《AOIT shader in UE4》


/*=============================================================================
    AOITCommon.usf
=============================================================================*/

//
Copyright 2011 Intel Corporation // All Rights Reserved // // Permission is granted to use, copy, distribute and prepare derivative works of this // software for any purpose and without fee, provided, that the above copyright notice // and this statement appear in all copies. Intel makes no representations about the // suitability of this software for any purpose. THIS SOFTWARE IS PROVIDED "AS IS." // INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY, // INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, // INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not // assume any responsibility for any errors which may appear in this software nor any // responsibility to update it. #ifndef H_COMMON #define H_COMMON ////////////////////////////////////////////// // Defines ////////////////////////////////////////////// #define AVSM_FILTERING_ENABLED #define LT_BILINEAR_FILTERING #define EARLY_DEPTH_CULL_ALPHA (0x1UL) /////////////////////// // Resources /////////////////////// TextureCube gIBLTexture; SamplerState gIBLSampler; Texture2DMS<float4> gCompositingBuffer; ////////////////////////////////////////////// // Constants ////////////////////////////////////////////// struct UIConstants { uint faceNormals; uint avsmSortingMethod; uint volumeShadowMethod; uint enableVolumeShadowLookup; uint pauseParticleAnimaton; float particleSize; uint particleOpacity; uint hairThickness; uint hairShadowThickness; uint hairOpacity; uint lightingOnly; uint stNumSamples; }; cbuffer PerFrameConstants { float4x4 mCameraWorldViewProj; float4x4 mCameraWorldView; float4x4 mCameraViewProj; float4x4 mCameraProj; float4 mCameraPos; float4x4 mLightWorldViewProj; float4x4 mAvsmLightWorldViewProj; float4x4 mCameraViewToWorld; float4x4 mCameraViewToLightProj; float4x4 mCameraViewToLightView; float4x4 mCameraViewToAvsmLightProj; float4x4 mCameraViewToAvsmLightView; float4 mLightDir; float4 mMSAACoverageNorm; float4 mGeometryAlpha; float4 mAlphaThreshold; UIConstants mUI; }; // data that we can read or derived from the surface shader outputs struct SurfaceData { float3 positionView; // View space position float3 positionViewDX; // Screen space derivatives float3 positionViewDY; // of view space position float3 normal; // View space normal float4 albedo; float2 lightTexCoord; // Texture coordinates in light space, [0, 1] float2 lightTexCoordDX; // Screen space partial derivatives float2 lightTexCoordDY; // of light space texture coordinates. float lightSpaceZ; // Z coordinate (depth) of surface in light space }; ////////////////////////////////////////////// // Full screen pass ////////////////////////////////////////////// //struct FullScreenTriangleVSOut //{ // float4 positionViewport : SV_Position; // float4 positionClip : positionClip; // float2 texCoord : texCoord; //}; ////////////////////////////////////////////// // Helper Functions ////////////////////////////////////////////// float linstep(float min, float max, float v) { return saturate((v - min) / (max - min)); } float2 ProjectIntoLightTexCoord(float3 positionView) { float4 positionLight = mul(float4(positionView, 1.0f), mCameraViewToLightProj); float2 texCoord = (positionLight.xy / positionLight.w) * float2(0.5f, -0.5f) + float2(0.5f, 0.5f); return texCoord; } float2 ProjectIntoAvsmLightTexCoord(float3 positionView) { float4 positionLight = mul(float4(positionView, 1.0f), mCameraViewToAvsmLightProj); float2 texCoord = (positionLight.xy / positionLight.w) * float2(0.5f, -0.5f) + float2(0.5f, 0.5f); return texCoord; } #endif // H_COMMON
/*=============================================================================
    AOITCaptureShader.usf: capture translucent pixels onto AOIT fragment list
=============================================================================*/

#include "Common.usf"
#include "BasePassPixelCommon.usf"
#include "FragmentList.usf"

#if NEEDS_BASEPASS_FOGGING
    #include "HeightFogCommon.usf"
#if BASEPASS_ATMOSPHERIC_FOG
    #include "AtmosphereCommon.usf"
#endif
#endif

#define DEBUG_AOIT_CAPTURE 0

struct FAOITCaptureVSToPS
{
    FVertexFactoryInterpolantsVSToPS FactoryInterpolants;
    FBasePassInterpolantsVSToPS BasePassInterpolants;
    float4 Position : SV_POSITION;
};

#if USING_TESSELLATION    
    struct FAOITCaptureVSToDS
    {
        FVertexFactoryInterpolantsVSToDS FactoryInterpolants;
        FBasePassInterpolantsVSToDS BasePassInterpolants;
        float4 Position : VS_To_DS_Position;
        OPTIONAL_VertexID_VS_To_DS
    };
    
    #define FAOITCaptureVSOutput FAOITCaptureVSToPS
    #define VertexFactoryGetInterpolants VertexFactoryGetInterpolantsVSToDS
    #define FPassSpecificVSToDS FAOITCaptureVSToPS
    #define FPassSpecificVSToPS FAOITCaptureVSToDS
#else
    #define FAOITCaptureVSOutput FAOITCaptureVSToPS
    #define VertexFactoryGetInterpolants VertexFactoryGetInterpolantsVSToPS
#endif

#if USING_TESSELLATION
FAOITCaptureVSToDS PassInterpolate(FAOITCaptureVSToDS a, float aInterp, FAOITCaptureVSToDS b, float bInterp)
{
    FAOITCaptureVSToDS O;
    
    O.FactoryInterpolants = VertexFactoryInterpolate(a.FactoryInterpolants, aInterp, b.FactoryInterpolants, bInterp);

    #if NEEDS_BASEPASS_FOGGING
        TESSELLATION_INTERPOLATE_MEMBER(BasePassInterpolants.VertexFog);
    #endif

    #if USE_WORLD_POSITION_EXCLUDING_SHADER_OFFSETS
        TESSELLATION_INTERPOLATE_MEMBER(BasePassInterpolants.WorldPositionExcludingWPO);
    #endif

    return O;
}

FAOITCaptureVSToPS PassFinalizeTessellationOutput(FAOITCaptureVSToDS Interpolants, float4 WorldPosition, FMaterialTessellationParameters MaterialParameters)
{
    FAOITCaptureVSToPS O;
    
    O.FactoryInterpolants = VertexFactoryAssignInterpolants(Interpolants.FactoryInterpolants);

    // Copy everything in the shared base class over to the pixel shader input
    (FSharedBasePassInterpolants)O.BasePassInterpolants = (FSharedBasePassInterpolants)Interpolants.BasePassInterpolants;

    // Transform position to clip-space
    ISOLATE
    {
        O.Position = mul(WorldPosition, View.TranslatedWorldToClip);
    }
    
    // Calc position-dependent interps
    O.BasePassInterpolants.PixelPosition = WorldPosition;

    #if USE_WORLD_POSITION_EXCLUDING_SHADER_OFFSETS
        O.BasePassInterpolants.PixelPositionExcludingWPO = float4(Interpolants.BasePassInterpolants.WorldPositionExcludingWPO, 1);
    #endif

    return O;
}

// This gets us the MainHull and MainDomain shader definitions
#include "Tessellation.usf"
#endif

void VSMain(
    FVertexFactoryInput Input,
    OPTIONAL_VertexID
    out FAOITCaptureVSOutput Output
    )
{
    FVertexFactoryIntermediates VFIntermediates = GetVertexFactoryIntermediates(Input);
    float4 WorldPositionExcludingWPO = VertexFactoryGetWorldPosition(Input, VFIntermediates);
    float4 WorldPosition = WorldPositionExcludingWPO;

    float3x3 TangentToLocal = VertexFactoryGetTangentToLocal(Input, VFIntermediates);    
    FMaterialVertexParameters VertexParameters = GetMaterialVertexParameters(Input, VFIntermediates, WorldPosition.xyz, TangentToLocal);

    // Isolate instructions used for world position offset
    // As these cause the optimizer to generate different position calculating instructions in each pass, resulting in self-z-fighting.
    // This is only necessary for shaders used in passes that have depth testing enabled.
    ISOLATE
    {
        WorldPosition.xyz += GetMaterialWorldPositionOffset(VertexParameters);
    }

#if USING_TESSELLATION
    // We let the Domain Shader convert to post projection when tessellating
    Output.Position = WorldPosition;    

    #if USE_WORLD_POSITION_EXCLUDING_SHADER_OFFSETS
        Output.BasePassInterpolants.WorldPositionExcludingWPO = WorldPositionExcludingWPO;
    #endif
#else
    ISOLATE
    {
        float4 RasterizedWorldPosition = VertexFactoryGetRasterizedWorldPosition(Input, VFIntermediates, WorldPosition);
        Output.Position = mul(RasterizedWorldPosition, View.TranslatedWorldToClip);
    }

    Output.BasePassInterpolants.PixelPosition = WorldPosition;

    #if USE_WORLD_POSITION_EXCLUDING_SHADER_OFFSETS
        Output.BasePassInterpolants.PixelPositionExcludingWPO = WorldPositionExcludingWPO;
    #endif
#endif

    Output.FactoryInterpolants = VertexFactoryGetInterpolants(Input, VFIntermediates, VertexParameters);

// Calculate the fog needed for translucency
#if NEEDS_BASEPASS_FOGGING
#if BASEPASS_ATMOSPHERIC_FOG
    Output.BasePassInterpolants.VertexFog = CalculateVertexAtmosphericFog(WorldPosition.xyz, View.TranslatedViewOrigin.xyz);
#else
    Output.BasePassInterpolants.VertexFog = CalculateVertexHeightFog(WorldPosition.xyz, View.TranslatedViewOrigin);
#endif
#endif

    OutputVertexID( Output );
}

EARLYDEPTHSTENCIL
void PSMain(
    FVertexFactoryInterpolantsVSToPS Interpolants,
    FBasePassInterpolantsVSToPS BasePassInterpolants,
    float4 InSVPosition : SV_POSITION,
    OPTIONAL_IsFrontFace
#if DEBUG_AOIT_CAPTURE
    ,out float4 OutColor        : SV_Target0
#endif
    )
{
    FMaterialPixelParameters MaterialParameters = GetMaterialPixelParameters(Interpolants, BasePassInterpolants.PixelPosition);

    CalcMaterialParameters(MaterialParameters,InSVPosition, bIsFrontFace,BasePassInterpolants.PixelPosition
#if USE_WORLD_POSITION_EXCLUDING_SHADER_OFFSETS
        , BasePassInterpolants.PixelPositionExcludingWPO
#endif
    );


#if EDITOR_PRIMITIVE_MATERIAL && (FEATURE_LEVEL >= FEATURE_LEVEL_SM4 || ES2_EMULATION)
    const bool bEditorWeightedZBuffering = true;
#else
    const bool bEditorWeightedZBuffering = false;
#endif


    //Clip if the blend mode requires it.
    if(!bEditorWeightedZBuffering)
    {
        GetMaterialCoverageAndClipping(MaterialParameters);
    }


    // Store the results in local variables and reuse instead of calling the functions multiple times.
    half3 BaseColor = GetMaterialBaseColor( MaterialParameters );
    half  Metallic = GetMaterialMetallic( MaterialParameters );
    half  Specular = GetMaterialSpecular( MaterialParameters );

    float MaterialAO = GetMaterialAmbientOcclusion(MaterialParameters);
    float Roughness = GetMaterialRoughness(MaterialParameters);

    // 0..1, SubsurfaceProfileId = int(x * 255)
    float SubsurfaceProfile = 0;

    // If we don't use this shading model the color should be black (don't generate shader code for unused data, don't do indirectlighting cache lighting with this color).
    float3 SubsurfaceColor = 0;
#if MATERIAL_SHADINGMODEL_SUBSURFACE || MATERIAL_SHADINGMODEL_PREINTEGRATED_SKIN || MATERIAL_SHADINGMODEL_SUBSURFACE_PROFILE
    {
        float4 SubsurfaceData = GetMaterialSubsurfaceData(MaterialParameters);

#if MATERIAL_SHADINGMODEL_SUBSURFACE || MATERIAL_SHADINGMODEL_PREINTEGRATED_SKIN
        SubsurfaceColor = SubsurfaceData.rgb;
#endif
        SubsurfaceProfile = SubsurfaceData.a;
    }
#endif

#if MATERIAL_FULLY_ROUGH
    Roughness = 1;
#endif

    // So that the following code can still use DiffuseColor and SpecularColor.
#if    MATERIAL_SHADINGMODEL_HAIR
    half3 DiffuseColor = BaseColor;
    half3 SpecularColor = lerp( 0.08 * Specular.xxx, BaseColor, Metallic.xxx );
#else
    half3 DiffuseColor = BaseColor - BaseColor * Metallic;
    half3 SpecularColor = lerp( 0.08 * Specular.xxx, BaseColor, Metallic.xxx );
#endif

// todo: COMPILE_SHADERS_FOR_DEVELOPMENT is unfinished feature, using XBOXONE_PROFILE as workaround
#if COMPILE_SHADERS_FOR_DEVELOPMENT == 1 && !XBOXONE_PROFILE && !ES31_AEP_PROFILE
    {
        // this feature is only needed for development/editor - we can compile it out for a shipping build (see r.CompileShadersForDevelopment cvar help)
        DiffuseColor = DiffuseColor * View.DiffuseOverrideParameter.w + View.DiffuseOverrideParameter.xyz;
        SpecularColor = SpecularColor * View.SpecularOverrideParameter.w + View.SpecularOverrideParameter.xyz;
    }
#endif


    half3 Color = 0;
    float IndirectIrradiance = 0;
    #if !MATERIAL_SHADINGMODEL_UNLIT
        float3 DiffuseIndirectLighting;
        float3 SubsurfaceIndirectLighting;
        GetPrecomputedIndirectLightingAndSkyLight(MaterialParameters, Interpolants, BasePassInterpolants, DiffuseIndirectLighting, SubsurfaceIndirectLighting, IndirectIrradiance);
        #if MATERIAL_SHADINGMODEL_SUBSURFACE || MATERIAL_SHADINGMODEL_PREINTEGRATED_SKIN
            // Add subsurface energy to diffuse
            //@todo - better subsurface handling for these shading models with skylight and precomputed GI
            DiffuseColor += SubsurfaceColor;
        #endif
        Color += (DiffuseIndirectLighting * DiffuseColor + SubsurfaceIndirectLighting * SubsurfaceColor) * MaterialAO;
        
        #if SIMPLE_DYNAMIC_LIGHTING
            // always unshadowed so BiasedNDotL is not needed
            half Lambert = saturate(dot(MaterialParameters.WorldNormal, View.DirectionalLightDirection));
            Color += DiffuseColor * Lambert * View.DirectionalLightColor.rgb;

            Color += GetMaterialHemisphereLightTransferFull(
                DiffuseColor,
                MaterialParameters,
                View.UpperSkyColor.rgb,
                View.LowerSkyColor.rgb
                );
        #endif
    #endif

    half Opacity = GetMaterialOpacity(MaterialParameters);

    #if NEEDS_BASEPASS_FOGGING
        float4 VertexFog = BasePassInterpolants.VertexFog;
    #else
        float4 VertexFog = float4(0,0,0,1);
    #endif
        
    // Volume lighting for lit translucency
    #if (MATERIAL_SHADINGMODEL_DEFAULT_LIT || MATERIAL_SHADINGMODEL_SUBSURFACE) && ( MATERIALBLENDING_TRANSLUCENT || MATERIALBLENDING_ADDITIVE )
        Color += GetTranslucencyLighting(MaterialParameters, DiffuseColor, Roughness, SpecularColor, IndirectIrradiance, MaterialAO);
    #endif

    #if MATERIAL_SHADINGMODEL_HAIR && MATERIALBLENDING_TRANSLUCENT
        Color += GetTranslucentHairLighting(MaterialParameters, DiffuseColor, Roughness, SpecularColor, IndirectIrradiance, MaterialAO);
    #endif

    #if !MATERIAL_SHADINGMODEL_UNLIT
        Color = lerp(Color, DiffuseColor + SpecularColor, View.UnlitViewmodeMask);
    #endif

    half3 Emissive = GetMaterialEmissive(MaterialParameters);

// todo: COMPILE_SHADERS_FOR_DEVELOPMENT is unfinished feature, using XBOXONE_PROFILE as workaround
#if COMPILE_SHADERS_FOR_DEVELOPMENT == 1 && !XBOXONE_PROFILE && !ES31_AEP_PROFILE
    // this feature is only needed for development/editor - we can compile it out for a shipping build (see r.CompileShadersForDevelopment cvar help)
    #if SM5_PROFILE || SM4_PROFILE
        BRANCH
        if (View.OutOfBoundsMask > 0)
        {
            if (any(abs(MaterialParameters.WorldPosition - Primitive.ObjectWorldPositionAndRadius.xyz) > Primitive.ObjectBounds + 1))
            {
                float Gradient = frac(dot(MaterialParameters.WorldPosition, float3(.577f, .577f, .577f)) / 500.0f);
                Emissive = lerp(float3(1,1,0), float3(0,1,1), Gradient.xxx > .5f);
                Opacity = 1;
            }
        }
    #endif
#endif

    Color += Emissive;

    #if MATERIALBLENDING_TRANSLUCENT
        Color = Color * VertexFog.a + VertexFog.rgb;
        Color *= Opacity;
    #elif MATERIALBLENDING_ADDITIVE
        Color = Color * VertexFog.aaa;
        Color *= Opacity;
        Opacity = 0.0;
    #elif MATERIALBLENDING_MODULATE
        Color = lerp(float3(1, 1, 1), Color, VertexFog.aaa * VertexFog.aaa);
    #endif
    
    float2 screenPositionF = MaterialParameters.ScreenPosition.xy / MaterialParameters.ScreenPosition.w * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz;
    int2 screenPosition = int2( screenPositionF * View.RenderTargetSize );
    uint newNodeAddress;
    uint mask;
    if (FL_AllocNode(newNodeAddress))
    {
        // Fill node
        FragmentListNode node;            

        node.color    = FL_PackColor3(Color, mask);
        
#if SUPPORT_ALL_TRANSLUCENCY
    #if MATERIALBLENDING_MODULATE
        mask |= 0x10UL;
    #endif
        node.depth    = FL_PackDepthAndMask5(MaterialParameters.ScreenPosition.w - MaterialParameters.ScreenPosition.z, mask);
#else
        node.depth    = FL_PackDepthAndMask4(MaterialParameters.ScreenPosition.w - MaterialParameters.ScreenPosition.z, mask);
#endif

        // Insert node!
        FL_InsertNode(screenPosition, newNodeAddress, node, 1.0 - Opacity);
    }
    
    #if DEBUG_AOIT_CAPTURE
        
        OutColor = half4(Color, 1);
    #endif
}
/*=============================================================================
    AOITResolveShader.usf: resolve AOIT fragment list to final buffer
=============================================================================*/

#include "Common.usf"
#include "AOIT.usf"
#include "FragmentList.usf"


void Main(
    FScreenVertexOutput Input,
    out float4 OutColor : SV_Target0
    )
{
    uint i;
    uint nodeOffset;
    int2 screenAddress = int2(Input.Position.xy);  

    // Get offset to the first node
    uint firstNodeOffset = FL_GetFirstNodeOffset(screenAddress);
    
    AOITData data;
    // Initialize AVSM data    
    [unroll]for (i = 0; i < AOIT_RT_COUNT; ++i) {
        data.depth[i] = AIOT_EMPTY_NODE_DEPTH.xxxx;
#if SUPPORT_ALL_TRANSLUCENCY
        data.trans[i][0] = AOIT_FIRT_NODE_TRANS.xxx;
        data.trans[i][1] = AOIT_FIRT_NODE_TRANS.xxx;
        data.trans[i][2] = AOIT_FIRT_NODE_TRANS.xxx;
        data.trans[i][3] = AOIT_FIRT_NODE_TRANS.xxx;
#else
        data.trans[i] = AOIT_FIRT_NODE_TRANS.xxxx;
#endif
    }
    
    // Fetch all nodes and add them to our visibiity function
    nodeOffset = firstNodeOffset;
    [loop] while (nodeOffset != FL_NODE_LIST_NULL) 
    {
        // Get node..
        FragmentListNode node = FL_GetNode(nodeOffset);

        float depth;
        uint mask;
#if SUPPORT_ALL_TRANSLUCENCY
        FL_UnpackDepthAndMask5(node.depth, depth, mask);
#else
        FL_UnpackDepthAndMask4(node.depth, depth, mask);
#endif
    
        uint next;
        float transA;
        next = FL_UnpackIndexAndOpcity(node.next, transA);
        
#if SUPPORT_ALL_TRANSLUCENCY
        float3 trans = transA.xxx;
        float3 nodeColor;
        nodeColor = FL_UnpackColor3(node.color, (uint)(mask & 0xFUL));
        trans = lerp(trans, nodeColor, float(mask>>4));
#else
        float trans = transA;
#endif
        
        AOITInsertFragment(depth, trans, data);      

        // Move to next node
        nodeOffset = next;                    
    }

    float3 color = float3(0, 0, 0);
    // Fetch all nodes again and composite them
    nodeOffset = firstNodeOffset;
    [loop]  while (nodeOffset != FL_NODE_LIST_NULL) {
        // Get node..
        FragmentListNode node = FL_GetNode(nodeOffset);

        float depth;
        uint mask;
#if SUPPORT_ALL_TRANSLUCENCY
        FL_UnpackDepthAndMask5(node.depth, depth, mask);
#else
        FL_UnpackDepthAndMask4(node.depth, depth, mask);
#endif
             
        // Unpack color
        float3 nodeColor;
        nodeColor = FL_UnpackColor3(node.color, (uint)(mask & 0xFUL));        
        uint next;
        float transA;
        next = FL_UnpackIndexAndOpcity(node.next, transA);
        
        AOITFragment frag = AOITFindFragment(data, depth);
#if SUPPORT_ALL_TRANSLUCENCY
        float3 vis = frag.index == 0 ? float3(1.0,1.0,1.0) : frag.transA;
        color += nodeColor * (1.0 - float(mask>>4)) * vis;
#else
        float vis = frag.index == 0 ? 1.0f : frag.transA;
        color += nodeColor * vis.xxx;
#endif

        // Move to next node
        nodeOffset = next;                    
    }
    
#if SUPPORT_ALL_TRANSLUCENCY
    OutColor = float4(color, data.trans[AOIT_RT_COUNT - 1][3][0]);
#else
    OutColor = float4(color, data.trans[AOIT_RT_COUNT - 1][3]);
#endif
    OutColor = RETURN_COLOR(OutColor);
}

/*=============================================================================
    AOIT.usf
=============================================================================*/

  // Copyright 2011 Intel Corporation

// All Rights Reserved
//
// Permission is granted to use, copy, distribute and prepare derivative works of this
// software for any purpose and without fee, provided, that the above copyright notice
// and this statement appear in all copies.  Intel makes no representations about the
// suitability of this software for any purpose.  THIS SOFTWARE IS PROVIDED "AS IS."
// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not
// assume any responsibility for any errors which may appear in this software nor any
// responsibility to update it.

#ifndef H_AOIT
#define H_AOIT
#include "AOITCommon.usf"

//////////////////////////////////////////////
// Defines
//////////////////////////////////////////////

#ifndef AOIT_NODE_COUNT 
#define AOIT_NODE_COUNT            (16)
#endif

#define AOIT_FIRT_NODE_TRANS    (1)
#define AOIT_RT_COUNT            (AOIT_NODE_COUNT / 4)
#define AIOT_EMPTY_NODE_DEPTH    (1E30)

// Forces compression to only work on the second half of the nodes (cheaper and better IQ in most cases)
#define AOIT_DONT_COMPRESS_FIRST_HALF 

//////////////////////////////////////////////
// Structs
//////////////////////////////////////////////

struct AOITData 
{
    float4 depth[AOIT_RT_COUNT];
#if SUPPORT_ALL_TRANSLUCENCY
    float3 trans[AOIT_RT_COUNT][4];
#else
    float4 trans[AOIT_RT_COUNT];
#endif
};

struct AOITFragment
{
    int   index;
    float depthA;
#if SUPPORT_ALL_TRANSLUCENCY
    float3 transA;
#else
    float transA;
#endif
};

//////////////////////////////////////////////////
// Two-level search for AT visibility functions
//////////////////////////////////////////////////
 
AOITFragment AOITFindFragment(in AOITData data, in float fragmentDepth)
{
    int    index;
#if SUPPORT_ALL_TRANSLUCENCY
    float4 depth;
    float3 trans[4];
#else
    float4 depth, trans;
#endif
    float  leftDepth;
#if SUPPORT_ALL_TRANSLUCENCY
    float3  leftTrans;
#else
    float  leftTrans;
#endif
    
    AOITFragment Output;      

#if AOIT_RT_COUNT > 7    
    [flatten]if (fragmentDepth > data.depth[6][3])
    {
        depth        = data.depth[7];
        trans        = data.trans[7];
        leftDepth    = data.depth[6][3];
        leftTrans    = data.trans[6][3];
        Output.index = 28;        
    }
    else
#endif  
#if AOIT_RT_COUNT > 6    
    [flatten]if (fragmentDepth > data.depth[5][3])
    {
        depth        = data.depth[6];
        trans        = data.trans[6];
        leftDepth    = data.depth[5][3];
        leftTrans    = data.trans[5][3];
        Output.index = 24;        
    }
    else
#endif  
#if AOIT_RT_COUNT > 5    
    [flatten]if (fragmentDepth > data.depth[4][3])
    {
        depth        = data.depth[5];
        trans        = data.trans[5];
        leftDepth    = data.depth[4][3];
        leftTrans    = data.trans[4][3];
        Output.index = 20;        
    }
    else
#endif  
#if AOIT_RT_COUNT > 4    
    [flatten]if (fragmentDepth > data.depth[3][3])
    {
        depth        = data.depth[4];
        trans        = data.trans[4];
        leftDepth    = data.depth[3][3];
        leftTrans    = data.trans[3][3];    
        Output.index = 16;        
    }
    else
#endif    
#if AOIT_RT_COUNT > 3    
    [flatten]if (fragmentDepth > data.depth[2][3])
    {
        depth        = data.depth[3];
        trans        = data.trans[3];
        leftDepth    = data.depth[2][3];
        leftTrans    = data.trans[2][3];    
        Output.index = 12;        
    }
    else
#endif    
#if AOIT_RT_COUNT > 2    
    [flatten]if (fragmentDepth > data.depth[1][3])
    {
        depth        = data.depth[2];
        trans        = data.trans[2];
        leftDepth    = data.depth[1][3];
        leftTrans    = data.trans[1][3];          
        Output.index = 8;        
    }
    else
#endif    
#if AOIT_RT_COUNT > 1    
    [flatten]if (fragmentDepth > data.depth[0][3])
    {
        depth        = data.depth[1];
        trans        = data.trans[1];
        leftDepth    = data.depth[0][3];
        leftTrans    = data.trans[0][3];       
        Output.index = 4;        
    }
    else
#endif
    {    
        depth        = data.depth[0];
        trans        = data.trans[0];
        leftDepth    = data.depth[0][0];
        leftTrans    = data.trans[0][0];      
        Output.index = 0;        
    } 
      
    [flatten]if (fragmentDepth <= depth[0]) {
        Output.depthA = leftDepth;
        Output.transA = leftTrans;
    } else if (fragmentDepth <= depth[1]) {
        Output.index += 1;
        Output.depthA = depth[0]; 
        Output.transA = trans[0];            
    } else if (fragmentDepth <= depth[2]) {
        Output.index += 2;
        Output.depthA = depth[1];
        Output.transA = trans[1];            
    } else if (fragmentDepth <= depth[3]) {
        Output.index += 3;    
        Output.depthA = depth[2];
        Output.transA = trans[2];            
    } else {
        Output.index += 4;       
        Output.depthA = depth[3];
        Output.transA = trans[3];         
    }
    
    return Output;
}    

////////////////////////////////////////////////////
// Insert a new fragment in the visibility function
////////////////////////////////////////////////////

void AOITInsertFragment(in float fragmentDepth,
#if SUPPORT_ALL_TRANSLUCENCY
                        in float3 fragmentTrans,
#else
                        in float fragmentTrans,
#endif
                        inout AOITData aoitData)
{    
    int i, j;
  
    // Unpack AOIT data    
    float depth[AOIT_NODE_COUNT + 1];    
#if SUPPORT_ALL_TRANSLUCENCY
    float3 trans[AOIT_NODE_COUNT + 1];
#else
    float trans[AOIT_NODE_COUNT + 1];
#endif  
    [unroll] for (i = 0; i < AOIT_RT_COUNT; ++i) {
        [unroll] for (j = 0; j < 4; ++j) {
            depth[4 * i + j] = aoitData.depth[i][j];
            trans[4 * i + j] = aoitData.trans[i][j];                    
        }
    }    

    // Find insertion index 
    AOITFragment tempFragment = AOITFindFragment(aoitData, fragmentDepth);
    const int   index = tempFragment.index;
    // If we are inserting in the first node then use 1.0 as previous transmittance value
    // (we don't store it, but it's implicitly set to 1. This allows us to store one more node)
#if SUPPORT_ALL_TRANSLUCENCY
    const float3 prevTrans = index != 0 ? tempFragment.transA : float3(1.0,1.0,1.0);
#else
    const float prevTrans = index != 0 ? tempFragment.transA : 1.0f;
#endif

    // Make space for the new fragment. Also composite new fragment with the current curve 
    // (except for the node that represents the new fragment)
    [unroll]for (i = AOIT_NODE_COUNT - 1; i >= 0; --i) {
        [flatten]if (index <= i) {
            depth[i + 1] = depth[i];
            trans[i + 1] = trans[i] * fragmentTrans;
        }
    }
    
    // Insert new fragment
    [unroll]for (i = 0; i <= AOIT_NODE_COUNT; ++i) {
        [flatten]if (index == i) {
            depth[i] = fragmentDepth;
            trans[i] = fragmentTrans * prevTrans;
        }
    } 
    
    // pack representation if we have too many nodes
    [flatten]if (depth[AOIT_NODE_COUNT] != AIOT_EMPTY_NODE_DEPTH) {                    
        
        // That's total number of nodes that can be possibly removed
        const int removalCandidateCount = (AOIT_NODE_COUNT + 1) - 1;

#ifdef AOIT_DONT_COMPRESS_FIRST_HALF
        // Although to bias our compression scheme in order to favor..
        // .. the closest nodes to the eye we skip the first 50%
        const int startRemovalIdx = removalCandidateCount / 2;
#else
        const int startRemovalIdx = 1;
#endif

#if AOIT_NODE_COUNT == 8
        float nodeUnderError[removalCandidateCount] = {0,0,0,0 ,0,0,0,0};
#elif AOIT_NODE_COUNT == 16
        float nodeUnderError[removalCandidateCount] = {0,0,0,0 ,0,0,0,0 ,0,0,0,0 ,0,0,0,0};
#elif AOIT_NODE_COUNT == 24
        float nodeUnderError[removalCandidateCount] = {0,0,0,0 ,0,0,0,0 ,0,0,0,0 ,0,0,0,0 ,0,0,0,0 ,0,0,0,0};
#elif AOIT_NODE_COUNT == 32
        float nodeUnderError[removalCandidateCount] = {0,0,0,0 ,0,0,0,0 ,0,0,0,0 ,0,0,0,0 ,0,0,0,0 ,0,0,0,0 ,0,0,0,0 ,0,0,0,0};
#else
        float nodeUnderError[removalCandidateCount];
#endif
        [unroll]for (i = startRemovalIdx; i < removalCandidateCount; ++i) {
            nodeUnderError[i] = (depth[i] - depth[i - 1]) * dot((trans[i - 1] - trans[i]), (trans[i - 1] - trans[i]));
        }

        // Find the node the generates the smallest removal error
        int smallestErrorIdx;
        float smallestError;

        smallestErrorIdx = startRemovalIdx;
        smallestError    = nodeUnderError[smallestErrorIdx];
        i = startRemovalIdx + 1;

        [unroll]for ( ; i < removalCandidateCount; ++i) {
            [flatten]if (nodeUnderError[i] < smallestError) {
                smallestError = nodeUnderError[i];
                smallestErrorIdx = i;
            } 
        }

        // Remove that node..
        [unroll]for (i = startRemovalIdx; i < AOIT_NODE_COUNT; ++i) {
            [flatten]if (smallestErrorIdx <= i) {
                depth[i] = depth[i + 1];
            }
        }
        [unroll]for (i = startRemovalIdx - 1; i < AOIT_NODE_COUNT; ++i) {
            [flatten]if (smallestErrorIdx - 1 <= i) {
                trans[i] = trans[i + 1];
            }
        }
    }
    
    // Pack AOIT data
    [unroll] for (i = 0; i < AOIT_RT_COUNT; ++i) {
        [unroll] for (j = 0; j < 4; ++j) {
            aoitData.depth[i][j] = depth[4 * i + j];
            aoitData.trans[i][j] = trans[4 * i + j];                    
        }
    }    
}

#endif // H_AOIT
原文地址:https://www.cnblogs.com/DeanWang/p/7070118.html