10 float2
Dot(float3x2 LHS,float3x2 RHS){
12 dot(transpose(LHS)[0],transpose(RHS)[0])+dot(transpose(LHS)[1],transpose(RHS)[1]),
13 dot(transpose(LHS)[0],transpose(RHS)[1])-dot(transpose(LHS)[1],transpose(RHS)[0]));
15 float2
Dot(float4x2 LHS,float4x2 RHS){
17 dot(transpose(LHS)[0],transpose(RHS)[0])+dot(transpose(LHS)[1],transpose(RHS)[1]),
18 dot(transpose(LHS)[0],transpose(RHS)[1])-dot(transpose(LHS)[1],transpose(RHS)[0]));
20 float2
Dot(float2 LHS[5],float2 RHS[5]){
21 float2 Result=float2(0.0f,0.0f);
22 [unroll]
for(
int i=0;i!=5;++i){
36 float3x2
Multiply(float3x3 MatrixRealPart,float3x3 MatrixImaginaryPart,float3x2 RHSVector){
38 Result._m00_m10_m20=mul(MatrixRealPart,RHSVector._m00_m10_m20)-mul(MatrixImaginaryPart,RHSVector._m01_m11_m21);
39 Result._m01_m11_m21=mul(MatrixRealPart,RHSVector._m01_m11_m21)+mul(MatrixImaginaryPart,RHSVector._m00_m10_m20);
42 float4x2
Multiply(float4x4 MatrixRealPart,float4x4 MatrixImaginaryPart,float4x2 RHSVector){
44 Result._m00_m10_m20_m30=mul(MatrixRealPart,RHSVector._m00_m10_m20_m30)-mul(MatrixImaginaryPart,RHSVector._m01_m11_m21_m31);
45 Result._m01_m11_m21_m31=mul(MatrixRealPart,RHSVector._m01_m11_m21_m31)+mul(MatrixImaginaryPart,RHSVector._m00_m10_m20_m30);
48 void Multiply(out float2 OutProduct[5],
float MatrixRealPart[5][5],
float MatrixImaginaryPart[5][5],float2 RHSVector[5]){
49 [unroll]
for(
int i=0;i!=5;++i){
50 OutProduct[i]=float2(0.0f,0.0f);
51 [unroll]
for(
int j=0;j!=5;++j){
52 OutProduct[i]+=
Multiply(float2(MatrixRealPart[i][j],MatrixImaginaryPart[i][j]),RHSVector[j]);
66 float2 p=
Multiply(pCoefficient[1],aInv);
67 float2 q=
Multiply(pCoefficient[0],aInv);
68 float2 Discriminant=0.25f*
Square(p)-q;
81 float2 bb=
Square(pCoefficient[2]);
82 float2 ac=
Multiply(pCoefficient[1],pCoefficient[3]);
83 float2 Delta0=bb-3.0f*ac;
84 float2 bbb=
Multiply(bb,pCoefficient[2]);
85 float2 abc=
Multiply(ac,pCoefficient[2]);
86 float2 aad=
Multiply(
Multiply(pCoefficient[3],pCoefficient[3]),pCoefficient[0]);
87 float2 Delta1=2.0f*bbb-9.0f*abc+27.0f*aad;
90 float2 Delta0OverC=
Divide(Delta0,C);
92 pRoot[0]=-(1.0f/3.0f)*
Multiply(aInv,pCoefficient[2]+C+Delta0OverC);
93 pRoot[1]=-(1.0f/3.0f)*
Multiply(aInv,pCoefficient[2]+
Multiply(float2(-1.0f, sqrt(3.0f))/2.0f,C)+
Multiply(float2(-1.0f,-sqrt(3.0f))/2.0f,Delta0OverC));
94 pRoot[2]=-(1.0f/3.0f)*
Multiply(aInv,pCoefficient[2]+
Multiply(float2(-1.0f,-sqrt(3.0f))/2.0f,C)+
Multiply(float2(-1.0f, sqrt(3.0f))/2.0f,Delta0OverC));
102 float2 cc=
Square(pCoefficient[2]);
103 float2 bd=
Multiply(pCoefficient[1],pCoefficient[3]);
104 float2 ae=
Multiply(pCoefficient[0],pCoefficient[4]);
105 float2 Delta0=cc-3.0f*bd+12.0f*ae;
107 float2 ccc=
Multiply(cc,pCoefficient[2]);
108 float2 bcd=
Multiply(bd,pCoefficient[2]);
109 float2 bb=
Square(pCoefficient[3]);
110 float2 bbe=
Multiply(bb,pCoefficient[0]);
112 float2 ace=
Multiply(ae,pCoefficient[2]);
113 float2 Delta1=2.0f*ccc-9.0f*bcd+27.0f*bbe+27.0f*add-72.0f*ace;
117 float2 ac=
Multiply(pCoefficient[2],pCoefficient[4]);
118 float2 aa=
Square(pCoefficient[4]);
119 float2 p=
Divide(8.0f*ac-3.0f*bb,8.0f*aa);
120 float2 aaa=
Multiply(aa,pCoefficient[4]);
121 float2 aad=
Multiply(aa,pCoefficient[1]);
122 float2 abc=
Multiply(pCoefficient[2],
Multiply(pCoefficient[3],pCoefficient[4]));
123 float2 bbb=
Multiply(bb,pCoefficient[3]);
124 float2 q=
Divide(bbb-4.0f*abc+8.0f*aad,8.0f*aaa);
129 float2 bOver4a=0.25f*
Multiply(pCoefficient[3],aInv);
130 float2 SS4Plus2p=4.0f*
Square(S)+2.0f*p;
131 float2 qOverS=
Divide(q,S);
132 float2 FirstSquareRoot=
SquareRoot(-SS4Plus2p+qOverS);
133 float2 SecondSquareRoot=
SquareRoot(-SS4Plus2p-qOverS);
136 pRoot[0]=-bOver4a-S+0.5f*FirstSquareRoot;
137 pRoot[1]=-bOver4a-S-0.5f*FirstSquareRoot;
138 pRoot[2]=-bOver4a+S+0.5f*SecondSquareRoot;
139 pRoot[3]=-bOver4a+S-0.5f*SecondSquareRoot;
148 float2 w=float2(-0.5f*q,sqrt(-(0.25f*q*q+p*p*p/27.0f)));
150 float2 pThirdRoot[3];
152 pThirdRoot[1]=
Multiply(pThirdRoot[0],float2(-0.5f,0.5f*sqrt(3.0f)));
153 pThirdRoot[2]=
Multiply(pThirdRoot[0],float2(-0.5f,-0.5f*sqrt(3.0f)));
156 [unroll]
for(
int i=0;i!=3;++i){
157 pRoot[i]=pThirdRoot[i].x-p*(1.0f/3.0f)*
Reciprocal(pThirdRoot[i]).x;
169 void GetToeplitzInverse(out float3x3 OutToeplitzInverseReal,out float3x3 OutToeplitzInverseImaginary,float2 pTrigonometricMoment[2]){
170 float2 b1=pTrigonometricMoment[0];
171 float2 b2=pTrigonometricMoment[1];
172 float m00=1.0f-dot(b1,b1);
174 float m11=1.0f-dot(b2,b2);
176 OutToeplitzInverseReal=float3x3(
181 OutToeplitzInverseImaginary=float3x3(
188 float Scaling=dot(OutToeplitzInverseReal[0],float3(1.0f,pTrigonometricMoment[0].x,pTrigonometricMoment[1].x))-dot(OutToeplitzInverseImaginary[0],float3(0.0f,pTrigonometricMoment[0].y,pTrigonometricMoment[1].y));
190 OutToeplitzInverseReal/=Scaling;
191 OutToeplitzInverseImaginary/=Scaling;
200 float3x2
SolveToeplitzSystem(float2 TrigonometricMoment1,float2 TrigonometricMoment2,float2 RHS1,float2 RHS2){
201 float2 b1=TrigonometricMoment1;
202 float2 b2=TrigonometricMoment2;
204 float D11=1.0f-dot(b1,b1);
205 float InvD11=1.0f/D11;
207 float D22=1.0f-dot(b2,b2)-dot(L21,L21)*D11;
210 c[0]=float2(1.0f,0.0f);
229 return 1.0f-(2.0f*dot(pTrigonometricMoment[0],pTrigonometricMoment[0])+dot(pTrigonometricMoment[1],pTrigonometricMoment[1]))+2.0f*
Multiply(
Square(pTrigonometricMoment[0]),
Conjugate(pTrigonometricMoment[1])).x;
float2 Conjugate(float2 Z)
void GetToeplitzInverse(out float3x3 OutToeplitzInverseReal, out float3x3 OutToeplitzInverseImaginary, float2 pTrigonometricMoment[2])
float2 Divide(float2 Numerator, float2 Denominator)
float2 Dot(float3x2 LHS, float3x2 RHS)
float2 Multiply(float2 LHS, float2 RHS)
float3x2 SolveToeplitzSystem(float2 TrigonometricMoment1, float2 TrigonometricMoment2, float2 RHS1, float2 RHS2)
float2 Reciprocal(float2 Z)
float2 CubicRoot(float2 Z)
float2 SquareRoot(float2 Z)
float GetDeterminant(float2 pTrigonometricMoment[2])