Shaders for Shadows
Shader code for rendering shadows of translucent occluders, soft shadows and single scattering with moment shadow maps.
 All Classes Files Functions Variables Pages
ParticipatingMediaUtility.fx
Go to the documentation of this file.
1 
8 float3 RectifiedToCartesian(float Distance,float Inclination,float Azimuth){
9  float AzimuthSine,AzimuthCosine;
10  sincos(Azimuth,AzimuthSine,AzimuthCosine);
11  float InclinationSine,InclinationCosine;
12  sincos(Inclination,InclinationSine,InclinationCosine);
13  return Distance*float3(AzimuthCosine,AzimuthSine,InclinationCosine/InclinationSine);
14 }
15 
16 
25 void CartesianToRectified(out float OutDistance,out float OutInclination,out float OutAzimuth,float3 CartesianCoordinates){
26  const float PI=3.1415926535897932384626433832795f;
27  OutDistance=length(CartesianCoordinates.xy);
28  OutInclination=atan2(OutDistance,CartesianCoordinates.z);
29  OutAzimuth=atan2(CartesianCoordinates.y,CartesianCoordinates.x);
30  OutAzimuth=(OutAzimuth<0.0f)?(OutAzimuth+2.0f*PI):OutAzimuth;
31 }
32 
33 
40 void GetSparseRepresentation4Moments(out float2 OutDepth,out float OutWeight1,float4 OptimizedMoments){
41  // Get the first three moments
42  float3 b;
43  b.xz=mul(OptimizedMoments.xz-0.5f,float2x2(-1.0f/3.0f,-0.75f,sqrt(3.0f),0.75f*sqrt(3.0f)));
44  b.y=mul(OptimizedMoments.yw,float2x1(0.125f,1.0f));
45  // Determine the locations of the Dirac-delta pulses
46  float3 Polynomial;
47  float Variance=mad(-b.x,b.x,b.y);
48  Polynomial.z=Variance;
49  Polynomial.y=mad(b.x,b.y,-b.z);
50  Polynomial.x=dot(-b.xy,Polynomial.yz);
51  float Scaling=1.0f/Polynomial[2];
52  float p=Polynomial[1]*Scaling;
53  float q=Polynomial[0]*Scaling;
54  float D=p*p*0.25f-q;
55  float r=sqrt(D);
56  OutDepth=float2(-0.5f*p-r,-0.5f*p+r);
57  // Get the weight
58  OutWeight1=(b.x-OutDepth[0])/(OutDepth[1]-OutDepth[0]);
59  // Negative variance may occur due to quantization errors. This is easily and
60  // appropriately handled using a single Dirac-delta at the mean
61  bool SingleDiracDelta=(Variance<=1.0e-6f);
62  OutDepth=SingleDiracDelta?float2(b.x,0.0f):OutDepth;
63  OutWeight1=SingleDiracDelta?0.0f:OutWeight1;
64 }
65 
66 
77 float EstimateIntegralFrom4Moments(float IntervalEnd,float4 OptimizedMoments,float MomentBias,float OverestimationWeight){
78  float4 _4Moments;
79  _4Moments.xz=mul(OptimizedMoments.xz-0.5f,float2x2(-1.0f/3.0f,-0.75f,sqrt(3.0f),0.75f*sqrt(3.0f)));
80  _4Moments.yw=mul(OptimizedMoments.yw,float2x2(0.125f,-0.125f,1.0f,1.0f));
81 
82  // Bias input data to avoid artifacts
83  const float4 BiasVector=float4(0.0f,0.628f,0.0f,0.628f);
84  float4 b=lerp(_4Moments,BiasVector,MomentBias);
85  float3 z;
86  z[0]=IntervalEnd;
87 
88  // Compute a Cholesky factorization of the Hankel matrix B storing only non-
89  // trivial entries or related products
90  float L21D11=mad(-b[0],b[1],b[2]);
91  float D11=mad(-b[0],b[0], b[1]);
92  float SquaredDepthVariance=mad(-b[1],b[1], b[3]);
93  float D22D11=dot(float2(SquaredDepthVariance,-L21D11),float2(D11,L21D11));
94  float InvD11=1.0f/D11;
95  float L21=L21D11*InvD11;
96 
97  // Obtain a scaled inverse image of bz=(1,z[0],z[0]*z[0])^T
98  float3 c=float3(1.0f,z[0],z[0]*z[0]);
99  // Forward substitution to solve L*c1=bz
100  c[1]-=b.x;
101  c[2]-=b.y+L21*c[1];
102  // Scaling to solve D*c2=c1
103  c[1]*=InvD11;
104  c[2]*=D11/D22D11;
105  // Backward substitution to solve L^T*c3=c2
106  c[1]-=L21*c[2];
107  c[0]-=dot(c.yz,b.xy);
108  // Solve the quadratic equation c[0]+c[1]*z+c[2]*z^2 to obtain solutions
109  // z[1] and z[2]
110  float InvC2=1.0f/c[2];
111  float p=c[1]*InvC2;
112  float q=c[0]*InvC2;
113  float D=(p*p*0.25f)-q;
114  float r=sqrt(D);
115  z[1]=-p*0.5f-r;
116  z[2]=-p*0.5f+r;
117  // Compute the shadow intensity by summing the appropriate weights
118  float3 Weight;
119  Weight[0]=(z[1]*z[2]-b[0]*(z[1]+z[2])+b[1])/((z[0]-z[1])*(z[0]-z[2]));
120  Weight[1]=(z[0]*z[2]-b[0]*(z[0]+z[2])+b[1])/((z[2]-z[1])*(z[0]-z[1]));
121  Weight[2]=1.0f-Weight[0]-Weight[1];
122  float IntegralLowerBound=
123  (z[2]<z[0])?(Weight[1]+Weight[2]):(
124  (z[1]<z[0])?Weight[1]:0.0f);
125  float IntegralUpperBound=saturate(IntegralLowerBound+Weight[0]);
126  IntegralLowerBound=saturate(IntegralLowerBound);
127  return lerp(IntegralLowerBound,IntegralUpperBound,OverestimationWeight);
128 }
129 
138 float3 GetRoots(float4 Coefficient){
139  // Since we do not want to have a homogenous representation of the roots in the
140  // end, we will need to divide by the leading coefficient. To save some other
141  // multiplications, we may just as well do this right now.
142  Coefficient.xyz/=Coefficient.w;
143  // Formulae are more handy if the coefficients in the middle are divided by
144  // three
145  Coefficient.yz/=3.0f;
146  // Compute the Hessian and the discrimant
147  float3 Delta=float3(
148  mad(-Coefficient.z,Coefficient.z,Coefficient.y),
149  mad(-Coefficient.y,Coefficient.z,Coefficient.x),
150  dot(float2(Coefficient.z,-Coefficient.y),Coefficient.xy)
151  );
152  float Discriminant=dot(float2(4.0f*Delta.x,-Delta.y),Delta.zy);
153  // Compute coefficients of the depressed cubic (third is zero, fourth is one)
154  float2 Depressed=float2(
155  mad(-2.0f*Coefficient.z,Delta.x,Delta.y),
156  Delta.x
157  );
158  // Take the cubic root of a normalized complex number
159  float Theta=atan2(sqrt(Discriminant),-Depressed.x)/3.0f;
160  float2 CubicRoot;
161  sincos(Theta,CubicRoot.y,CubicRoot.x);
162  // Compute the three roots, scale appropriately and revert the depression
163  // transform
164  float3 Root=float3(
165  CubicRoot.x,
166  dot(float2(-0.5f,-0.5f*sqrt(3.0f)),CubicRoot),
167  dot(float2(-0.5f, 0.5f*sqrt(3.0f)),CubicRoot)
168  );
169  Root=mad(2.0f*sqrt(-Depressed.y),Root,-Coefficient.z);
170  return Root;
171 }
172 
175 float EstimateIntegralFrom6Moments(float IntervalEnd,float2 OptimizedMoments0,float4 OptimizedMoments1,float MomentBias,float OverestimationWeight){
176  // Dequantize the moments
177  const float3x3 QuantizationMatrixOdd=float3x3(
178  -0.02877789192f, 0.09995235706f, 0.25893353755f,
179  0.47635550422f, 0.84532580931f, 0.90779616657f,
180  1.55242808973f, 1.05472570761f, 0.83327335647f);
181  const float3x3 QuantizationMatrixEven=float3x3(
182  0.00001253044f,-0.24998746956f,-0.37498825271f,
183  0.16668494186f, 0.16668494186f, 0.21876713299f,
184  0.86602540579f, 0.86602540579f, 0.81189881793f);
185  float3 OptimizedMomentsOdd=float3(OptimizedMoments0,OptimizedMoments1.x)-0.5f;
186  float3 OptimizedMomentsEven=OptimizedMoments1.yzw;
187  OptimizedMomentsEven.z-=0.018888946f;
188  float3 MomentsOdd=mul(OptimizedMomentsOdd,QuantizationMatrixOdd);
189  float3 MomentsEven=mul(OptimizedMomentsEven,QuantizationMatrixEven);
190  float b[6]={MomentsOdd.x,MomentsEven.x,MomentsOdd.y,MomentsEven.y,MomentsOdd.z,MomentsEven.z};
191  // Bias input data to avoid artifacts
192  const float pBiasVector[6]={0.0f,0.5566f,0.0f,0.489f,0.0f,0.47869382f};
193  [unroll] for(int i=0;i!=6;++i){
194  b[i]=lerp(b[i],pBiasVector[i],MomentBias);
195  }
196  float4 z;
197  z[0]=IntervalEnd;
198 
199  // Compute a Cholesky factorization of the Hankel matrix B storing only non-
200  // trivial entries or related products
201  float InvD11=1.0f/mad(-b[0],b[0],b[1]);
202  float L21D11=mad(-b[0],b[1],b[2]);
203  float L21=L21D11*InvD11;
204  float D22=mad(-L21D11,L21,mad(-b[1],b[1],b[3]));
205  float L31D11=mad(-b[0],b[2],b[3]);
206  float L31=L31D11*InvD11;
207  float InvD22=1.0f/D22;
208  float L32D22=mad(-L21D11,L31,mad(-b[1],b[2],b[4]));
209  float L32=L32D22*InvD22;
210  float D33=mad(-b[2],b[2],b[5])-dot(float2(L31D11,L32D22),float2(L31,L32));
211  float InvD33=1.0f/D33;
212 
213  // Construct the polynomial whose roots have to be points of support of the
214  // canonical distribution: bz=(1,z[0],z[0]*z[0],z[0]*z[0]*z[0])^T
215  float4 c;
216  c[0]=1.0f;
217  c[1]=z[0];
218  c[2]=c[1]*z[0];
219  c[3]=c[2]*z[0];
220  // Forward substitution to solve L*c1=bz
221  c[1]-=b[0];
222  c[2]-=mad(L21,c[1],b[1]);
223  c[3]-=b[2]+dot(float2(L31,L32),c.yz);
224  // Scaling to solve D*c2=c1
225  c.yzw*=float3(InvD11,InvD22,InvD33);
226  // Backward substitution to solve L^T*c3=c2
227  c[2]-=L32*c[3];
228  c[1]-=dot(float2(L21,L31),c.zw);
229  c[0]-=dot(float3(b[0],b[1],b[2]),c.yzw);
230 
231  // Solve the cubic equation c[0]+c[1]*z+c[2]*z^2+c[3]*z^3 to obtain solutions
232  // z[1], z[2] and z[3]
233  z.yzw=GetRoots(c);
234 
235  // Determine how the weights at these roots contribute to the end result
236  float4 WeightFactor;
237  WeightFactor[0]=OverestimationWeight;
238  WeightFactor.yzw=(z.yzw>z.xxx)?float3(0.0f,0.0f,0.0f):float3(1.0f,1.0f,1.0f);
239  // Construct an interpolation polynomial matching these factors at the Roots
240  float f0=WeightFactor[0];
241  float f1=WeightFactor[1];
242  float f2=WeightFactor[2];
243  float f3=WeightFactor[3];
244  float f01=(f1-f0)/(z[1]-z[0]);
245  float f12=(f2-f1)/(z[2]-z[1]);
246  float f23=(f3-f2)/(z[3]-z[2]);
247  float f012=(f12-f01)/(z[2]-z[0]);
248  float f123=(f23-f12)/(z[3]-z[1]);
249  float f0123=(f123-f012)/(z[3]-z[0]);
250  // Convert from the Newton basis to the canonical basis of polynomials
251  float4 InterpolationPolynomial;
252  // f012+f0123*(z-z2)
253  InterpolationPolynomial[0]=mad(-f0123,z[2],f012);
254  InterpolationPolynomial[1]=f0123;
255  // *(z-z1) +f01
256  InterpolationPolynomial[2]=InterpolationPolynomial[1];
257  InterpolationPolynomial[1]=mad(InterpolationPolynomial[1],-z[1],InterpolationPolynomial[0]);
258  InterpolationPolynomial[0]=mad(InterpolationPolynomial[0],-z[1],f01);
259  // *(z-z0) +f0
260  InterpolationPolynomial[3]=InterpolationPolynomial[2];
261  InterpolationPolynomial[2]=mad(InterpolationPolynomial[2],-z[0],InterpolationPolynomial[1]);
262  InterpolationPolynomial[1]=mad(InterpolationPolynomial[1],-z[0],InterpolationPolynomial[0]);
263  InterpolationPolynomial[0]=mad(InterpolationPolynomial[0],-z[0],f0);
264 
265  // Now the end result happens to be the dot product of the first four moments
266  // and the interpolation polynomial
267  return saturate(dot(InterpolationPolynomial,float4(1.0f,b[0],b[1],b[2])));
268 }
269 
270 
277 float ComputeTexelTransmittance(uint ShadowMapWidth,float ExtinctionCoefficient,float2 DistanceMinMax,float2 InclinationMinMax){
278  float RaySegmentLength=DistanceMinMax.y/(sin(0.5f*(InclinationMinMax.x+InclinationMinMax.y))*float(ShadowMapWidth));
279  return exp(-ExtinctionCoefficient*RaySegmentLength);
280 }
281 
282 
306 void PrepareShadowMapRowResampling(out float3 OutTextureSpaceRayStart,out float3 OutTextureSpaceRayOffset,out float2 OutInclinationToDepthTransform,out float OutOffsetLength,out float OutInitialOffsetLength,out float OutTexelTransmittance,uint2 OutputShadowMapResolution,uint iShadowMapRow,float ExtinctionCoefficient,float4x4 RectificationToShadowMapProjectionSpace,float2 DistanceMinMax,float2 InclinationMinMax,float2 AzimuthMinMax){
307  // Construct the reference ray in the epipolar slice
308  float RayAzimuth=lerp(AzimuthMinMax.x,AzimuthMinMax.y,float(iShadowMapRow)/float(OutputShadowMapResolution.y));
309  float ReferenceInclination=dot(InclinationMinMax,float2(0.5f,0.5f));
310  float4 RectificationSpaceRayDirection=float4(RectifiedToCartesian(1.0f,ReferenceInclination,RayAzimuth),0.0f);
311  float4 ProjectionSpaceRayDirection=mul(RectificationSpaceRayDirection,RectificationToShadowMapProjectionSpace);
312  float4 ProjectionSpaceRayStart=mul(float4(0.0f,0.0f,0.0f,1.0f),RectificationToShadowMapProjectionSpace);
313  // Convert to a coordinate system where shadow map texture coordinates arise
314  // directly to save one multiply-add within the loop
315  float3 TextureSpaceRayDirection=ProjectionSpaceRayDirection.xyw*float3(0.5f,-0.5f,1.0f)+ProjectionSpaceRayDirection.www*float3(0.5f,0.5f,0.0f);
316  // Also prepare coefficients for conversion of inclinations to depth
317  OutInclinationToDepthTransform.x=1.0f/(InclinationMinMax.y-InclinationMinMax.x);
318  OutInclinationToDepthTransform.y=-InclinationMinMax.x*OutInclinationToDepthTransform.x;
319  OutInclinationToDepthTransform*=2.0f;
320  OutInclinationToDepthTransform.y-=1.0f;
321  // Compute the texel transmittance
322  OutTexelTransmittance=ComputeTexelTransmittance(OutputShadowMapResolution.x,ExtinctionCoefficient,DistanceMinMax,InclinationMinMax);
323  // Compute planar distances
324  OutOffsetLength=DistanceMinMax.y/float(OutputShadowMapResolution.x);
325  OutInitialOffsetLength=0.5f*OutOffsetLength;
326  // Compute the offset length and the final offset
327  OutTextureSpaceRayOffset=TextureSpaceRayDirection*OutOffsetLength;
328  OutTextureSpaceRayStart=ProjectionSpaceRayStart.xyw*float3(0.5f,-0.5f,1.0f)+ProjectionSpaceRayStart.www*float3(0.5f,0.5f,0.0f);
329  OutTextureSpaceRayStart+=0.5f*OutTextureSpaceRayOffset;
330 }
331 
332 
344 float ConvertShadowMapDepthNonLinearRectification(float ShadowMapDepth,float2 ShadowMapToRectificationDepthTransform,float2 InclinationToDepthTransform,float TotalOffsetLength){
345  float RectificationSpaceDepth=mad(ShadowMapDepth,ShadowMapToRectificationDepthTransform.x,ShadowMapToRectificationDepthTransform.y);
346  float SampleInclination=atan2(TotalOffsetLength,RectificationSpaceDepth);
347  return clamp(mad(SampleInclination,InclinationToDepthTransform.x,InclinationToDepthTransform.y),-1.0f,1.0f);
348 }
349 
350 
352 void Convert4MomentCanonicalToOptimized(out float4 OutQuantizedMoments0,float4 CanonicalMoments0){
353  OutQuantizedMoments0.xz=mul(CanonicalMoments0.xz,float2x2(1.5f,sqrt(3.0f)*0.5f,-2.0f,-sqrt(3.0f)*2.0f/9.0f))+0.5f;
354  OutQuantizedMoments0.yw=mul(CanonicalMoments0.yw,float2x2(4.0f,0.5f,-4.0f,0.5f));
355 }
356 
357 
362 void ComputeMomentVector4MomentsOptimized(out float4 OutQuantizedMoments0,float Depth){
363  float SquaredDepth=Depth*Depth;
364  Convert4MomentCanonicalToOptimized(OutQuantizedMoments0,float4(Depth,SquaredDepth,SquaredDepth*Depth,SquaredDepth*SquaredDepth));
365 }
366 
367 
369 void Convert6MomentCanonicalToOptimized(out float2 OutQuantizedMoments0,out float4 OutQuantizedMoments1,float2 CanonicalMoments0,float4 CanonicalMoments1){
370  float3 MomentsOdd=float3(CanonicalMoments0.x,CanonicalMoments1.x,CanonicalMoments1.z);
371  float3 MomentsEven=float3(CanonicalMoments0.y,CanonicalMoments1.y,CanonicalMoments1.w);
372  // Apply the affine transform
373  const float3x3 QuantizationMatrixOdd=float3x3(
374  2.5f,-1.87499864450f, 1.26583039016f,
375  -10.0f, 4.20757543111f,-1.47644882902f,
376  8.0f,-1.83257678661f, 0.71061660238f);
377  const float3x3 QuantizationMatrixEven=float3x3(
378  4.0f, 9.0f,-0.57759806484f,
379  -4.0f,-24.0f, 4.61936647543f,
380  0.0f, 16.0f,-3.07953906655f);
381  float3 OptimizedMomentsOdd=mul(MomentsOdd,QuantizationMatrixOdd)+0.5f;
382  float3 OptimizedMomentsEven=mul(MomentsEven,QuantizationMatrixEven);
383  OptimizedMomentsEven.z+=0.018888946f;
384  OutQuantizedMoments0=OptimizedMomentsOdd.xy;
385  OutQuantizedMoments1=float4(OptimizedMomentsOdd.z,OptimizedMomentsEven);
386 }
387 
388 
393 void ComputeMomentVector6MomentsOptimized(out float2 OutQuantizedMoments0,out float4 OutQuantizedMoments1,float Depth){
394  // Generate the moments
395  float SquaredDepth=Depth*Depth;
396  float CubedDepth=Depth*SquaredDepth;
397  float2 CanonicalMoments0=float2(Depth,SquaredDepth);
398  float4 CanonicalMoments1=float4(CubedDepth,SquaredDepth*SquaredDepth,CubedDepth*SquaredDepth,CubedDepth*CubedDepth);
399  Convert6MomentCanonicalToOptimized(OutQuantizedMoments0,OutQuantizedMoments1,CanonicalMoments0,CanonicalMoments1);
400 }
401 
402 
403 #define TEXEL_TYPE float2
404 
417 void ApplyTransmittanceWeightedPrefixSum(inout RWTexture2D<TEXEL_TYPE> OutFilteredShadowMap,Texture2D<TEXEL_TYPE> FilterableShadowMap,uint iThreadX,uint iThreadY,float TexelTransmittance){
418  const uint ThreadGroupWidth=8;
419  uint Width,Height;
420  OutFilteredShadowMap.GetDimensions(Width,Height);
421  uint DividedWidth=Width/ThreadGroupWidth;
422  // Prepare running sums and products for the loop
423  float Weight=1.0f;
424  float TotalWeight=0.0f;
425  TEXEL_TYPE PrefixSum;
426  PrefixSum=0.0f;
427  // Iterate over the texture block by block
428  [loop] for(uint x=0;x<DividedWidth;++x){
429  uint BlockX=x*ThreadGroupWidth;
430  TEXEL_TYPE StoredPrefixSum=PrefixSum;
431  float StoredTotalWeight=TotalWeight;
432  [unroll] for(uint i=0;i!=ThreadGroupWidth;++i){
433  // We let each thread load all texture samples for its row. This means
434  // that we have ThreadGroupWidth as many texture reads as we would need
435  // to have but parallelism is increased by the same factor and
436  // benchmarks show that it is still very fast thanks to texture cache.
437  // In fact, the observed speed is better than caching via group-shared
438  // memory.
439  PrefixSum+=Weight*FilterableShadowMap.Load(uint3(BlockX+i,iThreadY,0));
440  TotalWeight+=Weight;
441  Weight*=TexelTransmittance;
442  bool StoreResult=(i==iThreadX);
443  StoredPrefixSum=StoreResult?PrefixSum:StoredPrefixSum;
444  StoredTotalWeight=StoreResult?TotalWeight:StoredTotalWeight;
445  }
446  OutFilteredShadowMap[uint2(BlockX+iThreadX,iThreadY)]=StoredPrefixSum/StoredTotalWeight;
447  }
448 }
449 
450 
451 #define TEXEL_TYPE float3
452 void ApplyTransmittanceWeightedPrefixSum(inout RWTexture2D<TEXEL_TYPE> OutFilteredShadowMap,Texture2D<TEXEL_TYPE> FilterableShadowMap,uint iThreadX,uint iThreadY,float TexelTransmittance){
453  const uint ThreadGroupWidth=8;
454  uint Width,Height;
455  OutFilteredShadowMap.GetDimensions(Width,Height);
456  uint DividedWidth=Width/ThreadGroupWidth;
457  // Prepare running sums and products for the loop
458  float Weight=1.0f;
459  float TotalWeight=0.0f;
460  TEXEL_TYPE PrefixSum;
461  PrefixSum=0.0f;
462  // Different threads handle different textures
463  [loop] for(uint x=0;x<DividedWidth;++x){
464  uint BlockX=x*ThreadGroupWidth;
465  TEXEL_TYPE StoredPrefixSum=PrefixSum;
466  float StoredTotalWeight=TotalWeight;
467  [unroll] for(uint i=0;i!=ThreadGroupWidth;++i){
468  // We let each thread load all texture samples for its row. This means
469  // that we have ThreadGroupWidth as many texture reads as we would need
470  // to have but parallelism is increased by the same factor and
471  // benchmarks show that it is still very fast thanks to texture cache.
472  // In fact, the observed speed is better than caching via group-shared
473  // memory.
474  PrefixSum+=Weight*FilterableShadowMap.Load(uint3(BlockX+i,iThreadY,0));
475  TotalWeight+=Weight;
476  Weight*=TexelTransmittance;
477  bool StoreResult=(i==iThreadX);
478  StoredPrefixSum=StoreResult?PrefixSum:StoredPrefixSum;
479  StoredTotalWeight=StoreResult?TotalWeight:StoredTotalWeight;
480  }
481  OutFilteredShadowMap[uint2(BlockX+iThreadX,iThreadY)]=StoredPrefixSum/StoredTotalWeight;
482  }
483 }
484 
485 
486 #define TEXEL_TYPE float4
487 void ApplyTransmittanceWeightedPrefixSum(inout RWTexture2D<TEXEL_TYPE> OutFilteredShadowMap,Texture2D<TEXEL_TYPE> FilterableShadowMap,uint iThreadX,uint iThreadY,float TexelTransmittance){
488  const uint ThreadGroupWidth=8;
489  uint Width,Height;
490  OutFilteredShadowMap.GetDimensions(Width,Height);
491  uint DividedWidth=Width/ThreadGroupWidth;
492  // Prepare running sums and products for the loop
493  float Weight=1.0f;
494  float TotalWeight=0.0f;
495  TEXEL_TYPE PrefixSum;
496  PrefixSum=0.0f;
497  // Different threads handle different textures
498  [loop] for(uint x=0;x<DividedWidth;++x){
499  uint BlockX=x*ThreadGroupWidth;
500  TEXEL_TYPE StoredPrefixSum=PrefixSum;
501  float StoredTotalWeight=TotalWeight;
502  [unroll] for(uint i=0;i!=ThreadGroupWidth;++i){
503  // We let each thread load all texture samples for its row. This means
504  // that we have ThreadGroupWidth as many texture reads as we would need
505  // to have but parallelism is increased by the same factor and
506  // benchmarks show that it is still very fast thanks to texture cache.
507  // In fact, the observed speed is better than caching via group-shared
508  // memory.
509  PrefixSum+=Weight*FilterableShadowMap.Load(uint3(BlockX+i,iThreadY,0));
510  TotalWeight+=Weight;
511  Weight*=TexelTransmittance;
512  bool StoreResult=(i==iThreadX);
513  StoredPrefixSum=StoreResult?PrefixSum:StoredPrefixSum;
514  StoredTotalWeight=StoreResult?TotalWeight:StoredTotalWeight;
515  }
516  OutFilteredShadowMap[uint2(BlockX+iThreadX,iThreadY)]=StoredPrefixSum/StoredTotalWeight;
517  }
518 }
void CartesianToRectified(out float OutDistance, out float OutInclination, out float OutAzimuth, float3 CartesianCoordinates)
void ComputeMomentVector6MomentsOptimized(out float2 OutQuantizedMoments0, out float4 OutQuantizedMoments1, float Depth)
void PrepareShadowMapRowResampling(out float3 OutTextureSpaceRayStart, out float3 OutTextureSpaceRayOffset, out float2 OutInclinationToDepthTransform, out float OutOffsetLength, out float OutInitialOffsetLength, out float OutTexelTransmittance, uint2 OutputShadowMapResolution, uint iShadowMapRow, float ExtinctionCoefficient, float4x4 RectificationToShadowMapProjectionSpace, float2 DistanceMinMax, float2 InclinationMinMax, float2 AzimuthMinMax)
void ApplyTransmittanceWeightedPrefixSum(inout RWTexture2D< TEXEL_TYPE > OutFilteredShadowMap, Texture2D< TEXEL_TYPE > FilterableShadowMap, uint iThreadX, uint iThreadY, float TexelTransmittance)
float3 GetRoots(float4 Coefficient)
float3 RectifiedToCartesian(float Distance, float Inclination, float Azimuth)
float ComputeTexelTransmittance(uint ShadowMapWidth, float ExtinctionCoefficient, float2 DistanceMinMax, float2 InclinationMinMax)
void Convert4MomentCanonicalToOptimized(out float4 OutQuantizedMoments0, float4 CanonicalMoments0)
void Convert6MomentCanonicalToOptimized(out float2 OutQuantizedMoments0, out float4 OutQuantizedMoments1, float2 CanonicalMoments0, float4 CanonicalMoments1)
float ConvertShadowMapDepthNonLinearRectification(float ShadowMapDepth, float2 ShadowMapToRectificationDepthTransform, float2 InclinationToDepthTransform, float TotalOffsetLength)
void ComputeMomentVector4MomentsOptimized(out float4 OutQuantizedMoments0, float Depth)
float EstimateIntegralFrom6Moments(float IntervalEnd, float2 OptimizedMoments0, float4 OptimizedMoments1, float MomentBias, float OverestimationWeight)
float EstimateIntegralFrom4Moments(float IntervalEnd, float4 OptimizedMoments, float MomentBias, float OverestimationWeight)
float2 CubicRoot(float2 Z)
void GetSparseRepresentation4Moments(out float2 OutDepth, out float OutWeight1, float4 OptimizedMoments)