|
@@ -87,6 +87,414 @@
|
|
|
]]>
|
|
|
</Shader>
|
|
|
</ShaderOp>
|
|
|
+
|
|
|
+ <ShaderOp Name="Derivatives" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
|
|
|
+ <RootSignature>
|
|
|
+ RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
|
|
|
+ DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)),
|
|
|
+ StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT)
|
|
|
+ </RootSignature>
|
|
|
+ <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
|
|
|
+ { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } },
|
|
|
+ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
|
|
|
+ { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
|
|
|
+
|
|
|
+ { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
|
|
|
+ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
|
|
|
+ { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="T0" Dimension="Texture2D" Width="4" Height="4" InitialResourceState="COPY_DEST" Init="FromBytes" Format="R32_FLOAT">
|
|
|
+ {.125f, .25f, .5f, 1.0f},
|
|
|
+ {2.0f, 4.0f, 16.0f, 32.0f},
|
|
|
+ {32.0f, 64.0f, 128.0f, 256.0f},
|
|
|
+ {256.0f, 512.0f, 1024.0f, 2048.0f}
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="RTarget" Dimension="TEXTURE2D" Width="32" Height="32" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
|
|
|
+ <Resource Name="U0" Dimension="BUFFER" Width="16384"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U1" Dimension="BUFFER" Width="16384"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U2" Dimension="BUFFER" Width="16384"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+
|
|
|
+ <RootValues>
|
|
|
+ <RootValue HeapName="ResHeap" />
|
|
|
+ </RootValues>
|
|
|
+ <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
|
|
|
+ <Descriptor Name='T0' Kind='SRV' ResName='T0' />
|
|
|
+ <Descriptor Name='U0' Kind='UAV' ResName='U0'
|
|
|
+ NumElements="1024" StructureByteStride="16" />
|
|
|
+ <Descriptor Name='U1' Kind='UAV' ResName='U1'
|
|
|
+ NumElements="1024" StructureByteStride="16" />
|
|
|
+ <Descriptor Name='U2' Kind='UAV' ResName='U2'
|
|
|
+ NumElements="1024" StructureByteStride="16" />
|
|
|
+ </DescriptorHeap>
|
|
|
+ <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
|
|
|
+ <Descriptor Name="RTarget" Kind="RTV"/>
|
|
|
+ </DescriptorHeap>
|
|
|
+
|
|
|
+ <InputElements>
|
|
|
+ <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
|
|
|
+ <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
|
|
|
+ </InputElements>
|
|
|
+ <RenderTargets>
|
|
|
+ <RenderTarget Name="RTarget"/>
|
|
|
+ </RenderTargets>
|
|
|
+ <Shader Name="CS" Target="cs_6_6" EntryPoint="CSMain" Text="@PS"/>
|
|
|
+ <Shader Name="AS" Target="as_6_6" EntryPoint="ASMain" Text="@PS"/>
|
|
|
+ <Shader Name="MS" Target="ms_6_6" EntryPoint="MSMain" Text="@PS"/>
|
|
|
+ <Shader Name="VS" Target="vs_6_0" EntryPoint="VSMain" Text="@PS"/>
|
|
|
+ <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain">
|
|
|
+ <![CDATA[
|
|
|
+ struct PSInput {
|
|
|
+ float4 position : SV_POSITION;
|
|
|
+ float2 uv : TEXCOORD;
|
|
|
+ };
|
|
|
+ Texture2D<float> g_tex : register(t0);
|
|
|
+ RWStructuredBuffer<float4> g_bufMain : register(u0);
|
|
|
+ RWStructuredBuffer<float4> g_bufMesh : register(u1);
|
|
|
+ RWStructuredBuffer<float4> g_bufAmp : register(u2);
|
|
|
+
|
|
|
+ float4 DerivTest(int2 uv) {
|
|
|
+ int3 offset = int3(uv%4, 0);
|
|
|
+ float val = g_tex.Load(offset);
|
|
|
+ return float4(ddx_fine(val), ddy_fine(val), ddx_coarse(val), ddy_coarse(val));
|
|
|
+ }
|
|
|
+
|
|
|
+ // Map group index to 4x4 UV texcoord block
|
|
|
+ int2 ConvertGroupIdx(uint groupIdx) {
|
|
|
+ return int2(((groupIdx&0x4)>>1) + (groupIdx&01), ((groupIdx&0x8)>>2) + ((groupIdx&02)>>1));
|
|
|
+ }
|
|
|
+
|
|
|
+ // Convert group index into uv texcoords and return derivatives test result
|
|
|
+ float4 DerivTest(uint groupIdx) {
|
|
|
+ return DerivTest(ConvertGroupIdx(groupIdx));
|
|
|
+ }
|
|
|
+
|
|
|
+ PSInput VSMain(float3 position : POSITION, float2 uv : TEXCOORD) {
|
|
|
+ PSInput result;
|
|
|
+ result.position = float4(position, 1.0);
|
|
|
+ result.uv = uv;
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ struct Payload {
|
|
|
+ uint nothing;
|
|
|
+ };
|
|
|
+
|
|
|
+ static float4 g_Verts[6] = {
|
|
|
+ { -1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { -1.0f, -1.0f, 0.0f, 1.0f },
|
|
|
+
|
|
|
+ { -1.0f, -1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, -1.0f, 0.0f, 1.0f }};
|
|
|
+
|
|
|
+ static float2 g_UV[6] = {
|
|
|
+ { 0.0f, 0.0f },
|
|
|
+ { 1.0f, 0.0f },
|
|
|
+ { 0.0f, 1.0f },
|
|
|
+
|
|
|
+ { 0.0f, 1.0f },
|
|
|
+ { 1.0f, 0.0f },
|
|
|
+ { 1.0f, 1.0f }};
|
|
|
+
|
|
|
+ [NumThreads(MESHDISPATCHX, MESHDISPATCHY, MESHDISPATCHZ)]
|
|
|
+ void ASMain(uint ix : SV_GroupIndex) {
|
|
|
+ Payload payload;
|
|
|
+ g_bufAmp[ix] = DerivTest(ix);
|
|
|
+ payload.nothing = 0;
|
|
|
+ DispatchMesh(1, 1, 1, payload);
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(MESHDISPATCHX, MESHDISPATCHY, MESHDISPATCHZ)]
|
|
|
+ [OutputTopology("triangle")]
|
|
|
+ void MSMain(
|
|
|
+ uint ix : SV_GroupIndex,
|
|
|
+ in payload Payload payload,
|
|
|
+ out vertices PSInput verts[6],
|
|
|
+ out indices uint3 tris[2]) {
|
|
|
+ SetMeshOutputCounts(6, 2);
|
|
|
+ verts[ix].position = g_Verts[ix];
|
|
|
+ verts[ix].uv = g_UV[ix];
|
|
|
+ if (ix % 3)
|
|
|
+ tris[ix / 3] = uint3(ix, ix + 1, ix + 2);
|
|
|
+ g_bufMesh[ix] = DerivTest(ix);
|
|
|
+ }
|
|
|
+ float4 PSMain(PSInput input) : SV_TARGET {
|
|
|
+ // Convert from texcoords into a groupIndex equivalent
|
|
|
+ int width = DISPATCHX;
|
|
|
+ int height = DISPATCHY;
|
|
|
+ int2 uv = int2(input.uv.x*width, input.uv.y*height);
|
|
|
+ uint ix = ((uv.y/4)*(width/4))*16 + (uv.x/4)*16 + (((uv.x & 0x2) << 1) | (uv.x & 0x1) | ((uv.y & 0x2) << 2) | ((uv.y & 0x1) << 1));
|
|
|
+
|
|
|
+ float4 res = DerivTest(ix);
|
|
|
+ g_bufMain[ix] = res;
|
|
|
+ return res;
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(DISPATCHX, DISPATCHY, DISPATCHZ)]
|
|
|
+ void CSMain(uint ix : SV_GroupIndex) {
|
|
|
+ g_bufMain[ix] = DerivTest(ix);
|
|
|
+ }
|
|
|
+ ]]>
|
|
|
+ </Shader>
|
|
|
+ </ShaderOp>
|
|
|
+
|
|
|
+ <ShaderOp Name="QuadRead" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
|
|
|
+ <RootSignature>
|
|
|
+ RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
|
|
|
+ DescriptorTable(UAV(u0), UAV(u1), UAV(u2))
|
|
|
+ </RootSignature>
|
|
|
+ <Resource Name="U0" Dimension="BUFFER" Width="16384"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U1" Dimension="BUFFER" Width="16384"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U2" Dimension="BUFFER" Width="16384"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+
|
|
|
+ <RootValues>
|
|
|
+ <RootValue HeapName="ResHeap" />
|
|
|
+ </RootValues>
|
|
|
+ <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
|
|
|
+ <Descriptor Name='U0' Kind='UAV' ResName='U0'
|
|
|
+ NumElements="1024" StructureByteStride="16" />
|
|
|
+ <Descriptor Name='U1' Kind='UAV' ResName='U1'
|
|
|
+ NumElements="1024" StructureByteStride="16" />
|
|
|
+ <Descriptor Name='U2' Kind='UAV' ResName='U2'
|
|
|
+ NumElements="1024" StructureByteStride="16" />
|
|
|
+ </DescriptorHeap>
|
|
|
+
|
|
|
+ <InputElements>
|
|
|
+ <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
|
|
|
+ <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
|
|
|
+ </InputElements>
|
|
|
+ <Shader Name="CS" Target="cs_6_0" EntryPoint="CSMain" Text="@PS"/>
|
|
|
+ <Shader Name="AS" Target="as_6_6" EntryPoint="ASMain" Text="@PS"/>
|
|
|
+ <Shader Name="MS" Target="ms_6_6" EntryPoint="MSMain" Text="@PS"/>
|
|
|
+ <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain">
|
|
|
+ <![CDATA[
|
|
|
+ struct PSInput {
|
|
|
+ float4 position : SV_POSITION;
|
|
|
+ };
|
|
|
+ RWStructuredBuffer<int4> g_bufMain : register(u0);
|
|
|
+ RWStructuredBuffer<int4> g_bufMesh : register(u1);
|
|
|
+ RWStructuredBuffer<int4> g_bufAmp : register(u2);
|
|
|
+
|
|
|
+ uint4 QuadReadTest(uint ix) {
|
|
|
+ return int4(QuadReadLaneAt(ix, ix & 0x3), QuadReadAcrossX(ix),
|
|
|
+ QuadReadAcrossY(ix), QuadReadAcrossDiagonal(ix));
|
|
|
+ }
|
|
|
+
|
|
|
+ struct Payload {
|
|
|
+ uint nothing;
|
|
|
+ };
|
|
|
+
|
|
|
+ [NumThreads(MESHDISPATCHX, MESHDISPATCHY, MESHDISPATCHZ)]
|
|
|
+ void ASMain(uint ix : SV_GroupIndex) {
|
|
|
+ Payload payload;
|
|
|
+ g_bufAmp[ix] = QuadReadTest(ix);
|
|
|
+ payload.nothing = 0;
|
|
|
+ DispatchMesh(1, 1, 1, payload);
|
|
|
+ }
|
|
|
+
|
|
|
+ static float4 g_Verts[6] = {
|
|
|
+ { -1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { -1.0f, -1.0f, 0.0f, 1.0f },
|
|
|
+
|
|
|
+ { -1.0f, -1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, -1.0f, 0.0f, 1.0f }};
|
|
|
+
|
|
|
+ [NumThreads(MESHDISPATCHX, MESHDISPATCHY, MESHDISPATCHZ)]
|
|
|
+ [OutputTopology("triangle")]
|
|
|
+ void MSMain(
|
|
|
+ uint ix : SV_GroupIndex,
|
|
|
+ in payload Payload payload,
|
|
|
+ out vertices PSInput verts[6],
|
|
|
+ out indices uint3 tris[2]) {
|
|
|
+ SetMeshOutputCounts(6, 2);
|
|
|
+ verts[ix].position = g_Verts[ix];
|
|
|
+ if (ix % 3)
|
|
|
+ tris[ix / 3] = uint3(ix, ix + 1, ix + 2);
|
|
|
+ g_bufMesh[ix] = QuadReadTest(ix);
|
|
|
+ }
|
|
|
+
|
|
|
+ void PSMain(PSInput input) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(DISPATCHX, DISPATCHY, DISPATCHZ)]
|
|
|
+ void CSMain(uint ix : SV_GroupIndex) {
|
|
|
+ g_bufMain[ix] = QuadReadTest(ix);
|
|
|
+ }
|
|
|
+ ]]>
|
|
|
+ </Shader>
|
|
|
+ </ShaderOp>
|
|
|
+
|
|
|
+ <ShaderOp Name="ComputeSample" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
|
|
|
+ <RootSignature>
|
|
|
+ RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
|
|
|
+ DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)),
|
|
|
+ StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT)
|
|
|
+ </RootSignature>
|
|
|
+ <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
|
|
|
+ { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } },
|
|
|
+ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
|
|
|
+ { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
|
|
|
+
|
|
|
+ { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
|
|
|
+ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
|
|
|
+ { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="T0" Dimension="Texture2D" Width="64" Height="64" MipLevels="7" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_FLOAT" />
|
|
|
+ <Resource Name="RTarget" Dimension="TEXTURE2D" Width="8" Height="8" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" />
|
|
|
+ <Resource Name="U0" Dimension="BUFFER" Width="1920"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U1" Dimension="BUFFER" Width="1920"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U2" Dimension="BUFFER" Width="1920"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+
|
|
|
+ <RootValues>
|
|
|
+ <RootValue HeapName="ResHeap" />
|
|
|
+ </RootValues>
|
|
|
+ <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
|
|
|
+ <Descriptor Name='T0' Kind='SRV' ResName='T0' />
|
|
|
+ <Descriptor Name='U0' Kind='UAV' ResName='U0'
|
|
|
+ NumElements="64" StructureByteStride="16" />
|
|
|
+ <Descriptor Name='U1' Kind='UAV' ResName='U1'
|
|
|
+ NumElements="64" StructureByteStride="16" />
|
|
|
+ <Descriptor Name='U2' Kind='UAV' ResName='U2'
|
|
|
+ NumElements="64" StructureByteStride="16" />
|
|
|
+ </DescriptorHeap>
|
|
|
+ <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
|
|
|
+ <Descriptor Name="RTarget" Kind="RTV"/>
|
|
|
+ </DescriptorHeap>
|
|
|
+
|
|
|
+ <InputElements>
|
|
|
+ <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
|
|
|
+ <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
|
|
|
+ </InputElements>
|
|
|
+ <RenderTargets>
|
|
|
+ <RenderTarget Name="RTarget"/>
|
|
|
+ </RenderTargets>
|
|
|
+ <Shader Name="CS" Target="cs_6_6" EntryPoint="CSMain" Text="@PS"/>
|
|
|
+ <Shader Name="AS" Target="as_6_6" EntryPoint="ASMain" Text="@PS"/>
|
|
|
+ <Shader Name="MS" Target="ms_6_6" EntryPoint="MSMain" Text="@PS"/>
|
|
|
+ <Shader Name="VS" Target="vs_6_0" EntryPoint="VSMain" Text="@PS"/>
|
|
|
+ <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain">
|
|
|
+ <![CDATA[
|
|
|
+ struct PSInput {
|
|
|
+ float4 position : SV_POSITION;
|
|
|
+ float2 uv : TEXCOORD;
|
|
|
+ };
|
|
|
+
|
|
|
+ Texture2D<float> g_tex : register(t0);
|
|
|
+ RWStructuredBuffer<float4> g_bufMain : register(u0);
|
|
|
+ RWStructuredBuffer<float4> g_bufMesh : register(u1);
|
|
|
+ RWStructuredBuffer<float4> g_bufAmp : register(u2);
|
|
|
+
|
|
|
+ PSInput VSMain(float3 position : POSITION, float2 uv : TEXCOORD) {
|
|
|
+ PSInput result;
|
|
|
+ result.position = float4(position, 1.0);
|
|
|
+ result.uv = uv;
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ SamplerState g_samp : register(s0);
|
|
|
+
|
|
|
+ uint4 DerivTest(uint ix, float left, float right, float top, float bot) {
|
|
|
+ uint iy = ix>>1;
|
|
|
+ return uint4(g_tex.CalculateLevelOfDetail(g_samp, float2(left, 0.5)) * (~ix&1) +
|
|
|
+ g_tex.CalculateLevelOfDetail(g_samp, float2(right, 0.5)) * (ix&1),
|
|
|
+ g_tex.Sample(g_samp, float2(left, 0.5)) * (~ix&1) +
|
|
|
+ g_tex.Sample(g_samp, float2(right, 0.5)) * (ix&1),
|
|
|
+ g_tex.CalculateLevelOfDetail(g_samp, float2(0.5, top)) * (~iy&1) +
|
|
|
+ g_tex.CalculateLevelOfDetail(g_samp, float2(0.5, bot)) * (iy&1),
|
|
|
+ g_tex.Sample(g_samp, float2(0.5, top)) * (~iy&1) +
|
|
|
+ g_tex.Sample(g_samp, float2(0.5, bot)) * (iy&1));
|
|
|
+ }
|
|
|
+
|
|
|
+ // To avoid conditionals, two samples are performed one for left one for right
|
|
|
+ // They are step functioned on or off depending
|
|
|
+ uint4 DerivTest(uint ix) {
|
|
|
+ uint iy = ix>>1;
|
|
|
+ return DerivTest(ix, ((ix^1)/64.0)*(ix&1), (ix/64.0)*(ix&1),
|
|
|
+ ((ix^2)/64.0)*(iy&1), (ix/64.0)*(iy&1));
|
|
|
+ }
|
|
|
+
|
|
|
+ static float4 g_Verts[6] = {
|
|
|
+ { -1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { -1.0f, -1.0f, 0.0f, 1.0f },
|
|
|
+
|
|
|
+ { -1.0f, -1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, -1.0f, 0.0f, 1.0f }};
|
|
|
+
|
|
|
+ static float2 g_UV[6] = {
|
|
|
+ { 0.0f, 0.0f },
|
|
|
+ { 1.0f, 0.0f },
|
|
|
+ { 0.0f, 1.0f },
|
|
|
+
|
|
|
+ { 0.0f, 1.0f },
|
|
|
+ { 1.0f, 0.0f },
|
|
|
+ { 1.0f, 1.0f }};
|
|
|
+
|
|
|
+ struct Payload {
|
|
|
+ uint nothing;
|
|
|
+ };
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 1)]
|
|
|
+ void ASMain(uint ix : SV_GroupIndex) {
|
|
|
+ Payload payload;
|
|
|
+ g_bufAmp[ix] = DerivTest(ix);
|
|
|
+ payload.nothing = 0;
|
|
|
+ DispatchMesh(1, 1, 1, payload);
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 1)]
|
|
|
+ [OutputTopology("triangle")]
|
|
|
+ void MSMain(
|
|
|
+ uint ix : SV_GroupIndex,
|
|
|
+ in payload Payload payload,
|
|
|
+ out vertices PSInput verts[6],
|
|
|
+ out indices uint3 tris[2]) {
|
|
|
+ SetMeshOutputCounts(6, 2);
|
|
|
+ verts[ix].position = g_Verts[ix];
|
|
|
+ verts[ix].uv = g_UV[ix];
|
|
|
+ if (ix % 3)
|
|
|
+ tris[ix / 3] = uint3(ix, ix + 1, ix + 2);
|
|
|
+ g_bufMesh[ix] = DerivTest(ix);
|
|
|
+ }
|
|
|
+
|
|
|
+ float4 PSMain(PSInput input) : SV_TARGET {
|
|
|
+ int ix = int(input.uv.y * 8) * 8 + int(input.uv.x * 8);
|
|
|
+ // Contort the linear index into quad order by rotating relevant middle bits
|
|
|
+ ix = (ix&~0xE)|((ix&0x8)>>2)|((ix&0x6)<<1);
|
|
|
+ g_bufMain[ix] = DerivTest(ix);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ [NumThreads(8, 8, 1)]
|
|
|
+ void CSMain(uint ix : SV_GroupIndex) {
|
|
|
+ g_bufMain[ix] = DerivTest(ix);
|
|
|
+ }
|
|
|
+
|
|
|
+ ]]>
|
|
|
+ </Shader>
|
|
|
+ </ShaderOp>
|
|
|
<ShaderOp Name="OOB" PS="PS" VS="VS">
|
|
|
<RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2))</RootSignature>
|
|
|
<Resource Name="CB0" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" TransitionTo="VERTEX_AND_CONSTANT_BUFFER">
|
|
@@ -880,7 +1288,855 @@
|
|
|
</Shader>
|
|
|
</ShaderOp>
|
|
|
|
|
|
- <!--
|
|
|
+ <ShaderOp Name="WaveSizeTest" CS="CS">
|
|
|
+ <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
|
|
|
+ <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="512" InitialResourceState="COPY_DEST" Init="ByName" Flags="ALLOW_UNORDERED_ACCESS" TransitionTo="UNORDERED_ACCESS" ReadBack="true" Format="R32_TYPELESS" />
|
|
|
+ <RootValues>
|
|
|
+ <RootValue Index="0" ResName="UAVBuffer0" />
|
|
|
+ </RootValues>
|
|
|
+ <Shader Name="CS" Target="cs_6_6">
|
|
|
+ <![CDATA[// Shader source code will be set at runtime]]>
|
|
|
+ </Shader>
|
|
|
+ </ShaderOp>>
|
|
|
+
|
|
|
+ <ShaderOp Name="PackUnpackOp" CS="CS" DispatchX="1" DispatchY="1">
|
|
|
+ <RootSignature>RootFlags(0), UAV(u0), UAV(u1), UAV(u2)</RootSignature>
|
|
|
+ <Resource Name="g_bufIn" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="false" />
|
|
|
+ <Resource Name="g_bufOutPacked" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
|
|
|
+ <Resource Name="g_bufOutPackedUnpacked" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
|
|
|
+ <RootValues>
|
|
|
+ <RootValue Index="0" ResName="g_bufIn" />
|
|
|
+ <RootValue Index="1" ResName="g_bufOutPacked" />
|
|
|
+ <RootValue Index="2" ResName="g_bufOutPackedUnpacked" />
|
|
|
+ </RootValues>
|
|
|
+ <Shader Name="CS" Target="cs_6_0">
|
|
|
+ <![CDATA[
|
|
|
+ void main(uint GI : SV_GroupIndex) {};
|
|
|
+ ]]>
|
|
|
+ </Shader>
|
|
|
+ </ShaderOp>
|
|
|
+
|
|
|
+ <!-- For explanations of the atomics tests, see comments in and around VerifyAtomicResults in Executiontest.cpp -->
|
|
|
+ <ShaderOp Name="Atomics" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
|
|
|
+ <RootSignature>
|
|
|
+ RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
|
|
|
+ DescriptorTable(UAV(u0), UAV(u1), UAV(u2), UAV(u3), UAV(u4), UAV(u5), UAV(u6), UAV(u7), UAV(u8), UAV(u9), UAV(u10), UAV(u11), UAV(u12), UAV(u13), UAV(u14), UAV(u15), UAV(u16), UAV(u17)),
|
|
|
+ StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT)
|
|
|
+ </RootSignature>
|
|
|
+ <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
|
|
|
+ { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } },
|
|
|
+ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
|
|
|
+ { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
|
|
|
+
|
|
|
+ { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
|
|
|
+ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
|
|
|
+ { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="RTarget" Dimension="TEXTURE2D" Width="64" Height="64" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" />
|
|
|
+ <!-- Raw buffers -->
|
|
|
+ <Resource Name="U0" Dimension="BUFFER" Width="576"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="FromBytes" ReadBack="true" >
|
|
|
+ {
|
|
|
+ 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
|
|
|
+ 0, 0, 0, 0, 0, 0, 0I, 0I, 99999999I, 99999999I, 0I, 0I, 99999999I, 99999999I, 0, 0, 0, 0,
|
|
|
+ 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
|
|
|
+ 0, 0, 0, 0, 0, 0, 0I, 0I, -1I, -1I, 0I, 0I, -1I, -1I, 0, 0, 0, 0,
|
|
|
+ 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
|
|
|
+ 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
|
|
|
+ 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
|
|
|
+ 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
|
|
|
+ }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="U1" Dimension="BUFFER" Width="9216"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U2" Dimension="BUFFER" Width="256"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="FromBytes" ReadBack="true">
|
|
|
+ { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="U3" Dimension="BUFFER" Width="1024"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <!-- 32-bit typed resources -->
|
|
|
+ <Resource Name="U4" Dimension="BUFFER" Width="256" Format="R32_UINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="FromBytes" ReadBack="true" >
|
|
|
+ { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="U5" Dimension="BUFFER" Width="256" Format="R32_SINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="FromBytes" ReadBack="true">
|
|
|
+ { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="U6" Dimension="BUFFER" Width="1024" Format="R32_UINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U7" Dimension="TEXTURE1D" Width="16" Format="R32_UINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="FromBytes" ReadBack="true" >
|
|
|
+ { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="U8" Dimension="TEXTURE1D" Width="16" Format="R32_SINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="FromBytes" ReadBack="true">
|
|
|
+ { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="U9" Dimension="TEXTURE1D" Width="128" Format="R32_UINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <!-- groupshared output buffers -->
|
|
|
+ <Resource Name="U10" Dimension="BUFFER" Width="256"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U11" Dimension="BUFFER" Width="1024"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <!-- 64-bit typed resources -->
|
|
|
+ <Resource Name="U12" Dimension="BUFFER" Width="256" Format="R32G32_UINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="FromBytes" ReadBack="true" >
|
|
|
+ { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="U13" Dimension="BUFFER" Width="256" Format="R32G32_SINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="FromBytes" ReadBack="true">
|
|
|
+ { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="U14" Dimension="BUFFER" Width="1024" Format="R32G32_UINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U15" Dimension="TEXTURE1D" Width="16" Format="R32G32_UINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="FromBytes" ReadBack="true" >
|
|
|
+ { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="U16" Dimension="TEXTURE1D" Width="16" Format="R32G32_SINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="FromBytes" ReadBack="true">
|
|
|
+ { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="U17" Dimension="TEXTURE1D" Width="128" Format="R32G32_UINT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <RootValues>
|
|
|
+ <RootValue HeapName="ResHeap" />
|
|
|
+ </RootValues>
|
|
|
+ <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
|
|
|
+ <!-- Raw buffers -->
|
|
|
+ <Descriptor Name="U0" Kind="UAV" ResName="U0"
|
|
|
+ NumElements="8" StructureByteStride="72" />
|
|
|
+ <Descriptor Name="U1" Kind="UAV" ResName="U1"
|
|
|
+ NumElements="128" StructureByteStride="72" />
|
|
|
+ <Descriptor Name="U2" Kind="UAV" ResName="U2"
|
|
|
+ NumElements="16" StructureByteStride="8" />
|
|
|
+ <Descriptor Name="U3" Kind="UAV" ResName="U3"
|
|
|
+ NumElements="128" StructureByteStride="8" />
|
|
|
+ <!-- 32-bit typed resources -->
|
|
|
+ <Descriptor Name="U4" Kind="UAV" ResName="U4" Dimension="BUFFER"
|
|
|
+ NumElements="16" Format="R32_UINT" />
|
|
|
+ <Descriptor Name="U5" Kind="UAV" ResName="U5" Dimension="BUFFER"
|
|
|
+ NumElements="16" Format="R32_UINT" />
|
|
|
+ <Descriptor Name="U6" Kind="UAV" ResName="U6" Dimension="BUFFER"
|
|
|
+ NumElements="128" Format="R32_UINT" />
|
|
|
+ <Descriptor Name="U7" Kind="UAV" ResName="U7" Dimension="TEXTURE1D"
|
|
|
+ NumElements="16" Format="R32_UINT" />
|
|
|
+ <Descriptor Name="U8" Kind="UAV" ResName="U8" Dimension="TEXTURE1D"
|
|
|
+ NumElements="16" Format="R32_UINT" />
|
|
|
+ <Descriptor Name="U9" Kind="UAV" ResName="U9" Dimension="TEXTURE1D"
|
|
|
+ NumElements="128" Format="R32_UINT" />
|
|
|
+ <!-- groupshared output buffers -->
|
|
|
+ <Descriptor Name="U10" Kind="UAV" ResName="U10" Dimension="BUFFER"
|
|
|
+ NumElements="8" Format="R32G32_UINT" />
|
|
|
+ <Descriptor Name="U11" Kind="UAV" ResName="U11" Dimension="BUFFER"
|
|
|
+ NumElements="64" Format="R32G32_UINT" />
|
|
|
+ <!-- 64-bit typed resources -->
|
|
|
+ <Descriptor Name="U12" Kind="UAV" ResName="U12" Dimension="BUFFER"
|
|
|
+ NumElements="16" Format="R32G32_UINT" />
|
|
|
+ <Descriptor Name="U13" Kind="UAV" ResName="U13" Dimension="BUFFER"
|
|
|
+ NumElements="16" Format="R32G32_UINT" />
|
|
|
+ <Descriptor Name="U14" Kind="UAV" ResName="U14" Dimension="BUFFER"
|
|
|
+ NumElements="128" Format="R32G32_UINT" />
|
|
|
+ <Descriptor Name="U15" Kind="UAV" ResName="U15" Dimension="TEXTURE1D"
|
|
|
+ NumElements="16" Format="R32G32_UINT" />
|
|
|
+ <Descriptor Name="U16" Kind="UAV" ResName="U16" Dimension="TEXTURE1D"
|
|
|
+ NumElements="16" Format="R32G32_UINT" />
|
|
|
+ <Descriptor Name="U17" Kind="UAV" ResName="U17" Dimension="TEXTURE1D"
|
|
|
+ NumElements="128" Format="R32G32_UINT" />
|
|
|
+ </DescriptorHeap>
|
|
|
+ <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
|
|
|
+ <Descriptor Name="RTarget" Kind="RTV"/>
|
|
|
+ </DescriptorHeap>
|
|
|
+
|
|
|
+ <InputElements>
|
|
|
+ <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
|
|
|
+ <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
|
|
|
+ </InputElements>
|
|
|
+ <RenderTargets>
|
|
|
+ <RenderTarget Name="RTarget"/>
|
|
|
+ </RenderTargets>
|
|
|
+ <Shader Name="PS64" Target="ps_6_6" EntryPoint="PSMain64" Text="@CS"/>
|
|
|
+ <Shader Name="AS64" Target="as_6_6" EntryPoint="ASMain64" Text="@CS"/>
|
|
|
+ <Shader Name="MS64" Target="ms_6_6" EntryPoint="MSMain64" Text="@CS"/>
|
|
|
+ <Shader Name="VS64" Target="vs_6_6" EntryPoint="VSMain64" Text="@CS"/>
|
|
|
+ <Shader Name="CS64" Target="cs_6_6" EntryPoint="CSMain64" Text="@CS"/>
|
|
|
+ <Shader Name="PSTY64" Target="ps_6_6" EntryPoint="PSMainTyped64" Text="@CS"/>
|
|
|
+ <Shader Name="ASTY64" Target="as_6_6" EntryPoint="ASMainTyped64" Text="@CS"/>
|
|
|
+ <Shader Name="MSTY64" Target="ms_6_6" EntryPoint="MSMainTyped64" Text="@CS"/>
|
|
|
+ <Shader Name="VSTY64" Target="vs_6_6" EntryPoint="VSMainTyped64" Text="@CS"/>
|
|
|
+ <Shader Name="CSTY64" Target="cs_6_6" EntryPoint="CSMainTyped64" Text="@CS"/>
|
|
|
+ <Shader Name="ASSH64" Target="as_6_6" EntryPoint="ASMainShared64" Text="@CS"/>
|
|
|
+ <Shader Name="MSSH64" Target="ms_6_6" EntryPoint="MSMainShared64" Text="@CS"/>
|
|
|
+ <Shader Name="CSSH64" Target="cs_6_6" EntryPoint="CSMainShared64" Text="@CS"/>
|
|
|
+ <Shader Name="AS" Target="as_6_5" EntryPoint="ASMain" Text="@CS"/>
|
|
|
+ <Shader Name="MS" Target="ms_6_5" EntryPoint="MSMain" Text="@CS"/>
|
|
|
+ <Shader Name="VS" Target="vs_6_0" EntryPoint="VSMain" Text="@CS"/>
|
|
|
+ <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain" Text="@CS"/>
|
|
|
+ <Shader Name="CS" Target="cs_6_0" EntryPoint="CSMain">
|
|
|
+ <![CDATA[
|
|
|
+ struct PSInput {
|
|
|
+ float4 position : SV_POSITION;
|
|
|
+ float2 uv : TEXCOORD;
|
|
|
+ };
|
|
|
+ struct AtomicStuff {
|
|
|
+ float2 prepad[3];
|
|
|
+ uint uintEl[4];
|
|
|
+ int4 sintEl;
|
|
|
+ struct useless {
|
|
|
+ uint3 unused;
|
|
|
+ } postpad;
|
|
|
+ float last;
|
|
|
+ };
|
|
|
+ struct Atomic64Stuff {
|
|
|
+ float2 prepad[3];
|
|
|
+ uint64_t uintEl[2];
|
|
|
+ int64_t2 sintEl;
|
|
|
+ struct useless {
|
|
|
+ uint3 unused;
|
|
|
+ } postpad;
|
|
|
+ float last;
|
|
|
+ };
|
|
|
+ RWStructuredBuffer<AtomicStuff> g_structBuf : register(u0);
|
|
|
+ RWStructuredBuffer<AtomicStuff> g_strXchgBuf : register(u1);
|
|
|
+
|
|
|
+ RWByteAddressBuffer g_rawBuf : register(u2);
|
|
|
+ RWByteAddressBuffer g_rawXchgBuf : register(u3);
|
|
|
+
|
|
|
+ RWBuffer<uint> g_uintBuf : register(u4);
|
|
|
+ RWBuffer<int> g_sintBuf : register(u5);
|
|
|
+ RWBuffer<int> g_xchgBuf : register(u6);
|
|
|
+
|
|
|
+ RWTexture1D<uint> g_utexBuf : register(u7);
|
|
|
+ RWTexture1D<int> g_stexBuf : register(u8);
|
|
|
+ RWTexture1D<int> g_xtexBuf : register(u9);
|
|
|
+
|
|
|
+ RWBuffer<uint2> g_shareBuf : register(u10);
|
|
|
+ RWBuffer<uint2> g_shareXchgBuf : register(u11);
|
|
|
+
|
|
|
+ groupshared uint g_uintShare[12];
|
|
|
+ groupshared int g_sintShare[6];
|
|
|
+ groupshared uint g_xchgShare[128];
|
|
|
+
|
|
|
+ RWStructuredBuffer<Atomic64Stuff> g_struct64Buf : register(u0);
|
|
|
+ RWStructuredBuffer<Atomic64Stuff> g_strXchg64Buf : register(u1);
|
|
|
+
|
|
|
+ RWByteAddressBuffer g_raw64Buf : register(u2);
|
|
|
+ RWByteAddressBuffer g_rawXchg64Buf : register(u3);
|
|
|
+
|
|
|
+ RWBuffer<uint64_t> g_uint64Buf : register(u12);
|
|
|
+ RWBuffer<int64_t> g_sint64Buf : register(u13);
|
|
|
+ RWBuffer<int64_t> g_xchg64Buf : register(u14);
|
|
|
+
|
|
|
+ RWTexture1D<uint64_t> g_utex64Buf : register(u15);
|
|
|
+ RWTexture1D<int64_t> g_stex64Buf : register(u16);
|
|
|
+ RWTexture1D<int64_t> g_xtex64Buf : register(u17);
|
|
|
+
|
|
|
+ RWBuffer<uint64_t> g_share64Buf : register(u10);
|
|
|
+ RWBuffer<uint64_t> g_shareXchg64Buf : register(u11);
|
|
|
+
|
|
|
+ groupshared uint64_t g_uint64Share[6];
|
|
|
+ groupshared int64_t g_sint64Share[3];
|
|
|
+ groupshared uint64_t g_xchg64Share[64];
|
|
|
+
|
|
|
+ #define VEC_CALL(op, uav, ix, val) op(uav[ix*stride], val);
|
|
|
+
|
|
|
+ #define USTRUCT_CALL(op, uav, ix, val) op(uav[ix].uintEl[stride], val);
|
|
|
+ #define SSTRUCT_CALL(op, uav, ix, val) op(uav[ix].sintEl.z, val);
|
|
|
+ #define SSTRUCT64_CALL(op, uav, ix, val) op(uav[ix].sintEl.y, val);
|
|
|
+
|
|
|
+ #define URAW_CALL(op, uav, ix, val) uav.op(8*ix, val);
|
|
|
+ #define SRAW_CALL(op, uav, ix, val) uav.op(8*(5+ix), val); // signed at end. raw buffers don't need separate buffers
|
|
|
+
|
|
|
+ #define OP_TEST(ucall, scall, uuav, suav) \
|
|
|
+ ucall(InterlockedAdd, uuav, 0, addVal); \
|
|
|
+ scall(InterlockedMin, suav, 1, sminMaxVal); \
|
|
|
+ scall(InterlockedMax, suav, 2, sminMaxVal); \
|
|
|
+ ucall(InterlockedMin, uuav, 1, uminMaxVal); \
|
|
|
+ ucall(InterlockedMax, uuav, 2, uminMaxVal); \
|
|
|
+ ucall(InterlockedAnd, uuav, 3, ~value); \
|
|
|
+ ucall(InterlockedOr, uuav, 4, value); \
|
|
|
+ ucall(InterlockedXor, uuav, 5, xorVal);
|
|
|
+
|
|
|
+ #define VEC_CALL3(op, uav, ix, cmp, val) op(uav[(ix)*stride], cmp, val)
|
|
|
+ #define VEC_CALL4(op, uav, ix, cmp, val, o) op(uav[(ix)*stride], cmp, val, o)
|
|
|
+
|
|
|
+ #define STRUCT_CALL3(op, uav, ix, cmp, val) op(uav[ix].uintEl[stride], cmp, val)
|
|
|
+ #define STRUCT_CALL4(op, uav, ix, cmp, val, o) op(uav[ix].uintEl[stride], cmp, val, o)
|
|
|
+
|
|
|
+ #define RAW_CALL3(op, uav, ix, cmp, val) uav.op(8*(ix), cmp, val)
|
|
|
+ #define RAW_CALL4(op, uav, ix, cmp, val, o) uav.op(8*(ix), cmp, val, o)
|
|
|
+
|
|
|
+ // The first of four to match gets the first and then the winner performs the last two exchanges
|
|
|
+ #define XCHG_TEST(call3, call4, uav) \
|
|
|
+ call3(InterlockedCompareStore, uav, (ix/3)%64, 0, xchgVal - 2); \
|
|
|
+ call4(InterlockedCompareExchange, uav, (ix/3)%64, xchgVal - 2, xchgVal - 1, output); \
|
|
|
+ if (output == xchgVal - 2) { call3(InterlockedExchange, uav, (ix/3)%64, xchgVal, output);}
|
|
|
+
|
|
|
+ void AtomicTest(uint ix, uint bitSize) {
|
|
|
+ uint stride = 2;
|
|
|
+ uint value = (ix) | ((ix) << (bitSize/2));
|
|
|
+ uint addVal = ix; // 32 bits isn't enough room to dupliate upper and lower
|
|
|
+ uint uminMaxVal = ~value*(~value&1) + value*(value&1);
|
|
|
+ int sminMaxVal = ~value*(~value&1) + value*(value&1);
|
|
|
+ uint xorVal = 1 << (ix%(bitSize-1));
|
|
|
+ // make higher bits differ while lower bits match
|
|
|
+ uint xchgVal = (ix << (bitSize/2)) | ((ix/3)%64);
|
|
|
+ uint output = 0;
|
|
|
+
|
|
|
+ // structured
|
|
|
+ OP_TEST(USTRUCT_CALL, SSTRUCT_CALL, g_structBuf, g_structBuf)
|
|
|
+ XCHG_TEST(STRUCT_CALL3, STRUCT_CALL4, g_strXchgBuf)
|
|
|
+
|
|
|
+ // raw
|
|
|
+ OP_TEST(URAW_CALL, SRAW_CALL, g_rawBuf, g_rawBuf)
|
|
|
+ XCHG_TEST(RAW_CALL3, RAW_CALL4, g_rawXchgBuf)
|
|
|
+
|
|
|
+ // typed buffer
|
|
|
+ OP_TEST(VEC_CALL, VEC_CALL, g_uintBuf, g_sintBuf)
|
|
|
+ XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchgBuf)
|
|
|
+
|
|
|
+ // texture
|
|
|
+ OP_TEST(VEC_CALL, VEC_CALL, g_utexBuf, g_stexBuf)
|
|
|
+ XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xtexBuf)
|
|
|
+ }
|
|
|
+
|
|
|
+ void AtomicRaw64Test(uint ix, uint64_t bitSize) {
|
|
|
+ uint64_t lix = ix;
|
|
|
+ uint stride = 1;
|
|
|
+ uint64_t value = (lix) | ((lix) << (bitSize/2));
|
|
|
+ uint64_t addVal = value;
|
|
|
+ uint64_t uminMaxVal = ~value*(~value&1) + value*(value&1);
|
|
|
+ int64_t sminMaxVal = ~value*(~value&1) + value*(value&1);
|
|
|
+ uint64_t xorVal = 1ULL << (lix%(bitSize-1));
|
|
|
+ // make higher bits differ while lower bits match
|
|
|
+ uint64_t xchgVal = (lix << (bitSize/2)) | ((lix/3)%64);
|
|
|
+ uint64_t output = 0;
|
|
|
+
|
|
|
+ OP_TEST(USTRUCT_CALL, SSTRUCT64_CALL, g_struct64Buf, g_struct64Buf)
|
|
|
+ XCHG_TEST(STRUCT_CALL3, STRUCT_CALL4, g_strXchg64Buf)
|
|
|
+
|
|
|
+ // ByteAddressBuffer for 64-bit values are a special case. inlined here
|
|
|
+ URAW_CALL(InterlockedAdd64, g_raw64Buf, 0, addVal);
|
|
|
+ SRAW_CALL(InterlockedMin64, g_raw64Buf, 1, sminMaxVal);
|
|
|
+ SRAW_CALL(InterlockedMax64, g_raw64Buf, 2, sminMaxVal);
|
|
|
+ URAW_CALL(InterlockedMin64, g_raw64Buf, 1, uminMaxVal);
|
|
|
+ URAW_CALL(InterlockedMax64, g_raw64Buf, 2, uminMaxVal);
|
|
|
+ URAW_CALL(InterlockedAnd64, g_raw64Buf, 3, ~value);
|
|
|
+ URAW_CALL(InterlockedOr64, g_raw64Buf, 4, value);
|
|
|
+ URAW_CALL(InterlockedXor64, g_raw64Buf, 5, xorVal);
|
|
|
+
|
|
|
+ RAW_CALL3(InterlockedCompareStore64, g_rawXchg64Buf, (ix/3)%64, 0, xchgVal - 2);
|
|
|
+ RAW_CALL4(InterlockedCompareExchange64, g_rawXchg64Buf, (ix/3)%64, xchgVal - 2, xchgVal - 1, output);
|
|
|
+ if (output == xchgVal - 2) { RAW_CALL3(InterlockedExchange64, g_rawXchg64Buf, (ix/3)%64, xchgVal, output);}
|
|
|
+ }
|
|
|
+
|
|
|
+ void AtomicTyped64Test(uint ix, uint64_t bitSize) {
|
|
|
+ uint64_t lix = ix;
|
|
|
+ uint stride = 1;
|
|
|
+ uint64_t value = (lix) | ((lix) << (bitSize/2));
|
|
|
+ uint64_t addVal = value;
|
|
|
+ uint64_t uminMaxVal = ~value*(~value&1) + value*(value&1);
|
|
|
+ int64_t sminMaxVal = ~value*(~value&1) + value*(value&1);
|
|
|
+ uint64_t xorVal = 1ULL << (lix%(bitSize-1));
|
|
|
+ // make higher bits differ while lower bits match
|
|
|
+ uint64_t xchgVal = (lix << (bitSize/2)) | ((lix/3)%64);
|
|
|
+ uint64_t output = 0;
|
|
|
+
|
|
|
+ OP_TEST(VEC_CALL, VEC_CALL, g_uint64Buf, g_sint64Buf)
|
|
|
+ XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchg64Buf)
|
|
|
+
|
|
|
+ OP_TEST(VEC_CALL, VEC_CALL, g_utex64Buf, g_stex64Buf)
|
|
|
+ XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xtex64Buf)
|
|
|
+ }
|
|
|
+
|
|
|
+ void AtomicGroupSharedTest(uint ix, uint bitSize) {
|
|
|
+ uint stride = 1;
|
|
|
+ uint value = (ix) | ((ix) << (bitSize/2));
|
|
|
+ uint addVal = ix; // 32 bits isn't enough room to dupliate upper and lower
|
|
|
+ uint uminMaxVal = ~value*(~value&1) + value*(value&1);
|
|
|
+ int sminMaxVal = ~value*(~value&1) + value*(value&1);
|
|
|
+ uint xorVal = 1 << (ix%(bitSize-1));
|
|
|
+ uint xchgVal = (ix << (bitSize/2)) | ((ix/3)%64);
|
|
|
+ uint output = 0;
|
|
|
+
|
|
|
+ uint uIx = ix%(6*stride);
|
|
|
+ uint sIx = ix%(3*stride);
|
|
|
+
|
|
|
+ // Zero-init shared memory
|
|
|
+ g_uintShare[uIx] = 0;
|
|
|
+ g_sintShare[sIx] = 0;
|
|
|
+ g_xchgShare[ix%64] = 0;
|
|
|
+
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
+
|
|
|
+ InterlockedCompareStore(g_uintShare[stride], 0, 99999999);
|
|
|
+ InterlockedCompareStore(g_uintShare[3*stride], 0, -1);
|
|
|
+ InterlockedCompareStore(g_sintShare[stride], 0, 99999999);
|
|
|
+
|
|
|
+ OP_TEST(VEC_CALL, VEC_CALL, g_uintShare, g_sintShare)
|
|
|
+ XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchgShare)
|
|
|
+
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
+
|
|
|
+ g_shareBuf[uIx].x = g_uintShare[uIx];
|
|
|
+ g_shareBuf[6 + sIx].x = g_sintShare[sIx + 1];
|
|
|
+
|
|
|
+ g_shareXchgBuf[(ix/3)%64].x = g_xchgShare[(ix/3)%64];
|
|
|
+ }
|
|
|
+
|
|
|
+ void AtomicGroupShared64Test(uint ix, uint64_t bitSize) {
|
|
|
+ uint64_t lix = ix;
|
|
|
+ uint stride = 1;
|
|
|
+ uint64_t value = (lix) | ((lix) << (bitSize/2));
|
|
|
+ uint64_t addVal = value;
|
|
|
+ uint64_t uminMaxVal = ~value*(~value&1) + value*(value&1);
|
|
|
+ int64_t sminMaxVal = ~value*(~value&1) + value*(value&1);
|
|
|
+ uint64_t xorVal = 1ULL << (lix%(bitSize-1));
|
|
|
+ uint64_t xchgVal = (lix << (bitSize/2)) | ((lix/3)%64);
|
|
|
+ uint64_t output = 0;
|
|
|
+
|
|
|
+ uint uIx = ix%(6*stride);
|
|
|
+ uint sIx = ix%(3*stride);
|
|
|
+
|
|
|
+ // Zero-init shared memory
|
|
|
+ g_uint64Share[uIx] = 0;
|
|
|
+ g_sint64Share[sIx] = 0;
|
|
|
+ g_xchg64Share[ix%64] = 0;
|
|
|
+
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
+
|
|
|
+ InterlockedCompareStore(g_uint64Share[stride], 0, 99999999ULL | (99999999ULL << (bitSize/2)));
|
|
|
+ InterlockedCompareStore(g_uint64Share[3*stride], 0, ~0ULL);
|
|
|
+ InterlockedCompareStore(g_sint64Share[stride], 0, 99999999ULL | (99999999ULL << (bitSize/2)));
|
|
|
+
|
|
|
+ OP_TEST(VEC_CALL, VEC_CALL, g_uint64Share, g_sint64Share)
|
|
|
+ XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchg64Share)
|
|
|
+
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
+
|
|
|
+ g_share64Buf[uIx] = g_uint64Share[uIx];
|
|
|
+ g_share64Buf[sIx + 6] = g_sint64Share[sIx + 1];
|
|
|
+
|
|
|
+ g_shareXchg64Buf[(ix/3)%64] = g_xchg64Share[(ix/3)%64];
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ struct Payload {
|
|
|
+ uint nothing;
|
|
|
+ };
|
|
|
+
|
|
|
+ static float4 g_Verts[6] = {
|
|
|
+ { -1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { -1.0f, -1.0f, 0.0f, 1.0f },
|
|
|
+
|
|
|
+ { -1.0f, -1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, -1.0f, 0.0f, 1.0f }};
|
|
|
+
|
|
|
+ static float2 g_UV[6] = {
|
|
|
+ { 0.0f, 0.0f },
|
|
|
+ { 1.0f, 0.0f },
|
|
|
+ { 0.0f, 1.0f },
|
|
|
+
|
|
|
+ { 0.0f, 1.0f },
|
|
|
+ { 1.0f, 0.0f },
|
|
|
+ { 1.0f, 1.0f }};
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 2)]
|
|
|
+ void ASMain(uint ix : SV_GroupIndex) {
|
|
|
+ Payload payload;
|
|
|
+ payload.nothing = 0;
|
|
|
+ AtomicTest(64*64 + 8*8 + ix, 32);
|
|
|
+ AtomicGroupSharedTest(64*64 + 8*8 + ix, 32);
|
|
|
+ DispatchMesh(1, 1, 1, payload);
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 2)]
|
|
|
+ [OutputTopology("triangle")]
|
|
|
+ void MSMain(
|
|
|
+ uint ix : SV_GroupIndex,
|
|
|
+ in payload Payload payload,
|
|
|
+ out vertices PSInput verts[6],
|
|
|
+ out indices uint3 tris[2]) {
|
|
|
+ SetMeshOutputCounts(6, 2);
|
|
|
+ verts[ix].position = g_Verts[ix];
|
|
|
+ verts[ix].uv = g_UV[ix];
|
|
|
+ if (ix % 3)
|
|
|
+ tris[ix / 3] = uint3(ix, ix + 1, ix + 2);
|
|
|
+ AtomicTest(64*64 + ix, 32);
|
|
|
+ AtomicGroupSharedTest(64*64 + ix, 32);
|
|
|
+ }
|
|
|
+
|
|
|
+ PSInput VSMain(float3 position : POSITION, float2 uv : TEXCOORD, uint ix : SV_VertexID) {
|
|
|
+ PSInput result;
|
|
|
+ result.position = float4(position, 1.0);
|
|
|
+ result.uv = uv;
|
|
|
+ AtomicTest(64*64 + ix, 32);
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ float4 PSMain(PSInput input) : SV_TARGET {
|
|
|
+ uint ix = uint(input.uv.y*64)*64 + input.uv.x*64;
|
|
|
+ AtomicTest(ix, 32);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(32, 32, 1)]
|
|
|
+ void CSMain(uint ix : SV_GroupIndex) {
|
|
|
+ AtomicTest(ix, 32);
|
|
|
+ AtomicGroupSharedTest(ix, 32);
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 2)]
|
|
|
+ void ASMain64(uint ix : SV_GroupIndex) {
|
|
|
+ Payload payload;
|
|
|
+ payload.nothing = 0;
|
|
|
+ AtomicRaw64Test(64*64 + 8*8 + ix, 64);
|
|
|
+ DispatchMesh(1, 1, 1, payload);
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 2)]
|
|
|
+ [OutputTopology("triangle")]
|
|
|
+ void MSMain64(
|
|
|
+ uint ix : SV_GroupIndex,
|
|
|
+ in payload Payload payload,
|
|
|
+ out vertices PSInput verts[6],
|
|
|
+ out indices uint3 tris[2]) {
|
|
|
+ SetMeshOutputCounts(6, 2);
|
|
|
+ verts[ix].position = g_Verts[ix];
|
|
|
+ verts[ix].uv = g_UV[ix];
|
|
|
+ if (ix % 3)
|
|
|
+ tris[ix / 3] = uint3(ix, ix + 1, ix + 2);
|
|
|
+ AtomicRaw64Test(64*64 + ix, 64);
|
|
|
+ }
|
|
|
+
|
|
|
+ PSInput VSMain64(float3 position : POSITION, float2 uv : TEXCOORD, uint ix : SV_VertexID) {
|
|
|
+ PSInput result;
|
|
|
+ result.position = float4(position, 1.0);
|
|
|
+ result.uv = uv;
|
|
|
+ AtomicRaw64Test(64*64 + ix, 64);
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ float4 PSMain64(PSInput input) : SV_TARGET {
|
|
|
+ uint ix = uint(input.uv.y*64)*64 + input.uv.x*64;
|
|
|
+ AtomicRaw64Test(ix, 64);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(32, 32, 1)]
|
|
|
+ void CSMain64(uint ix : SV_GroupIndex) {
|
|
|
+ AtomicRaw64Test(ix, 64);
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 2)]
|
|
|
+ void ASMainTyped64(uint ix : SV_GroupIndex) {
|
|
|
+ Payload payload;
|
|
|
+ payload.nothing = 0;
|
|
|
+ AtomicTyped64Test(64*64 + 8*8 + ix, 64);
|
|
|
+ DispatchMesh(1, 1, 1, payload);
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 2)]
|
|
|
+ [OutputTopology("triangle")]
|
|
|
+ void MSMainTyped64(
|
|
|
+ uint ix : SV_GroupIndex,
|
|
|
+ in payload Payload payload,
|
|
|
+ out vertices PSInput verts[6],
|
|
|
+ out indices uint3 tris[2]) {
|
|
|
+ SetMeshOutputCounts(6, 2);
|
|
|
+ verts[ix].position = g_Verts[ix];
|
|
|
+ verts[ix].uv = g_UV[ix];
|
|
|
+ if (ix % 3)
|
|
|
+ tris[ix / 3] = uint3(ix, ix + 1, ix + 2);
|
|
|
+ AtomicTyped64Test(64*64 + ix, 64);
|
|
|
+ }
|
|
|
+
|
|
|
+ PSInput VSMainTyped64(float3 position : POSITION, float2 uv : TEXCOORD, uint ix : SV_VertexID) {
|
|
|
+ PSInput result;
|
|
|
+ result.position = float4(position, 1.0);
|
|
|
+ result.uv = uv;
|
|
|
+ AtomicTyped64Test(64*64 + ix, 64);
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ float4 PSMainTyped64(PSInput input) : SV_TARGET {
|
|
|
+ uint ix = uint(input.uv.y*64)*64 + input.uv.x*64;
|
|
|
+ AtomicTyped64Test(ix, 64);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(32, 32, 1)]
|
|
|
+ void CSMainTyped64(uint ix : SV_GroupIndex) {
|
|
|
+ AtomicTyped64Test(ix, 64);
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 2)]
|
|
|
+ void ASMainShared64(uint ix : SV_GroupIndex) {
|
|
|
+ Payload payload;
|
|
|
+ payload.nothing = 0;
|
|
|
+ AtomicGroupShared64Test(64*64 + 8*8 + ix, 64);
|
|
|
+ DispatchMesh(1, 1, 1, payload);
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 2)]
|
|
|
+ [OutputTopology("triangle")]
|
|
|
+ void MSMainShared64(
|
|
|
+ uint ix : SV_GroupIndex,
|
|
|
+ in payload Payload payload,
|
|
|
+ out vertices PSInput verts[6],
|
|
|
+ out indices uint3 tris[2]) {
|
|
|
+ SetMeshOutputCounts(6, 2);
|
|
|
+ verts[ix].position = g_Verts[ix];
|
|
|
+ verts[ix].uv = g_UV[ix];
|
|
|
+ if (ix % 3)
|
|
|
+ tris[ix / 3] = uint3(ix, ix + 1, ix + 2);
|
|
|
+ AtomicGroupShared64Test(64*64 + ix, 64);
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(32, 32, 1)]
|
|
|
+ void CSMainShared64(uint ix : SV_GroupIndex) {
|
|
|
+ AtomicGroupShared64Test(ix, 64);
|
|
|
+ }
|
|
|
+ ]]>
|
|
|
+ </Shader>
|
|
|
+ </ShaderOp>
|
|
|
+
|
|
|
+ <ShaderOp Name="FloatAtomics" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
|
|
|
+ <RootSignature>
|
|
|
+ RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
|
|
|
+ DescriptorTable(UAV(u0), UAV(u1), UAV(u2), UAV(u3), UAV(u4)),
|
|
|
+ StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT)
|
|
|
+ </RootSignature>
|
|
|
+ <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
|
|
|
+ { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } },
|
|
|
+ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
|
|
|
+ { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
|
|
|
+
|
|
|
+ { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
|
|
|
+ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
|
|
|
+ { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } }
|
|
|
+ </Resource>
|
|
|
+ <Resource Name="RTarget" Dimension="TEXTURE2D" Width="64" Height="64" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" />
|
|
|
+ <Resource Name="U0" Dimension="BUFFER" Width="2816"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U1" Dimension="BUFFER" Width="256"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U2" Dimension="BUFFER" Width="256"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U3" Dimension="TEXTURE1D" Width="64" Format="R32_FLOAT"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <Resource Name="U4" Dimension="BUFFER" Width="256"
|
|
|
+ Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
|
|
|
+ Init="Zero" ReadBack="true" />
|
|
|
+ <RootValues>
|
|
|
+ <RootValue HeapName="ResHeap" />
|
|
|
+ </RootValues>
|
|
|
+ <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
|
|
|
+ <Descriptor Name="U0" Kind="UAV" ResName="U0"
|
|
|
+ NumElements="64" StructureByteStride="44" />
|
|
|
+ <Descriptor Name="U1" Kind="UAV" ResName="U1"
|
|
|
+ NumElements="64" StructureByteStride="4" />
|
|
|
+ <Descriptor Name="U2" Kind="UAV" ResName="U2"
|
|
|
+ NumElements="64" StructureByteStride="4" />
|
|
|
+ <Descriptor Name="U3" Kind="UAV" ResName="U3" Dimension="TEXTURE1D"
|
|
|
+ NumElements="64" StructureByteStride="4" />
|
|
|
+ <Descriptor Name="U4" Kind="UAV" ResName="U4"
|
|
|
+ NumElements="64" StructureByteStride="4" />
|
|
|
+ </DescriptorHeap>
|
|
|
+ <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
|
|
|
+ <Descriptor Name="RTarget" Kind="RTV"/>
|
|
|
+ </DescriptorHeap>
|
|
|
+
|
|
|
+ <InputElements>
|
|
|
+ <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
|
|
|
+ <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
|
|
|
+ </InputElements>
|
|
|
+ <RenderTargets>
|
|
|
+ <RenderTarget Name="RTarget"/>
|
|
|
+ </RenderTargets>
|
|
|
+ <Shader Name="AS" Target="as_6_5" EntryPoint="ASMain" Text="@CS"/>
|
|
|
+ <Shader Name="MS" Target="ms_6_5" EntryPoint="MSMain" Text="@CS"/>
|
|
|
+ <Shader Name="VS" Target="vs_6_0" EntryPoint="VSMain" Text="@CS"/>
|
|
|
+ <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain" Text="@CS"/>
|
|
|
+ <Shader Name="CS" Target="cs_6_0" EntryPoint="CSMain">
|
|
|
+ <![CDATA[
|
|
|
+ struct PSInput {
|
|
|
+ float4 position : SV_POSITION;
|
|
|
+ float2 uv : TEXCOORD;
|
|
|
+ };
|
|
|
+ struct AtomicStuff {
|
|
|
+ float2 prepad[3];
|
|
|
+ float fltEl[2];
|
|
|
+ struct useless {
|
|
|
+ uint3 unused;
|
|
|
+ } postpad;
|
|
|
+ };
|
|
|
+
|
|
|
+ RWStructuredBuffer<AtomicStuff> g_strXchgBuf : register(u0);
|
|
|
+ RWByteAddressBuffer g_rawXchgBuf : register(u1);
|
|
|
+ RWBuffer<float> g_xchgBuf : register(u2);
|
|
|
+ RWTexture1D<float> g_xtexBuf : register(u3);
|
|
|
+ RWBuffer<float> g_shareXchgBuf : register(u4);
|
|
|
+
|
|
|
+ groupshared float g_xchgShare[1024];
|
|
|
+
|
|
|
+ #define VEC_CALL3(op, uav, ix, cmp, val) op(uav[(ix)], cmp, val)
|
|
|
+ #define VEC_CALL4(op, uav, ix, cmp, val, o) op(uav[(ix)], cmp, val, o)
|
|
|
+
|
|
|
+ #define STRUCT_CALL3(op, uav, ix, cmp, val) op(uav[ix].fltEl[1], cmp, val)
|
|
|
+ #define STRUCT_CALL4(op, uav, ix, cmp, val, o) op(uav[ix].fltEl[1], cmp, val, o)
|
|
|
+
|
|
|
+ #define RAW_CALL3(op, uav, ix, cmp, val) uav.op(4*(ix), cmp, val)
|
|
|
+ #define RAW_CALL4(op, uav, ix, cmp, val, o) uav.op(4*(ix), cmp, val, o)
|
|
|
+
|
|
|
+ // The first of four to match gets the first and then the winner performs the last two exchanges
|
|
|
+ #define XCHG_TEST(call3, call4, uav) \
|
|
|
+ call3(InterlockedCompareStoreFloatBitwise, uav, (ix/3)%63 + 1, 0, xchgVal - 2); \
|
|
|
+ call4(InterlockedCompareExchangeFloatBitwise, uav, (ix/3)%63 + 1, xchgVal - 2, xchgVal - 1, output); \
|
|
|
+ if (output == xchgVal - 2) { call3(InterlockedExchange, uav, (ix/3)%63 + 1, xchgVal, output);}
|
|
|
+
|
|
|
+ void AtomicTest(uint ix) {
|
|
|
+ float xchgVal = ix;
|
|
|
+ float output = 0;
|
|
|
+
|
|
|
+ XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchgBuf)
|
|
|
+ XCHG_TEST(STRUCT_CALL3, STRUCT_CALL4, g_strXchgBuf)
|
|
|
+ XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xtexBuf)
|
|
|
+
|
|
|
+ // Special case for ByteAddressBuffers
|
|
|
+ RAW_CALL3(InterlockedCompareStoreFloatBitwise, g_rawXchgBuf, (ix/3)%63 + 1, 0, xchgVal - 2);
|
|
|
+ RAW_CALL4(InterlockedCompareExchangeFloatBitwise, g_rawXchgBuf, (ix/3)%63 + 1, xchgVal - 2, xchgVal - 1, output);
|
|
|
+ if (output == xchgVal - 2) { RAW_CALL3(InterlockedExchangeFloat, g_rawXchgBuf, (ix/3)%63 + 1, xchgVal, output);}
|
|
|
+
|
|
|
+ // Check NaN corner case
|
|
|
+ InterlockedCompareExchangeFloatBitwise(g_xchgBuf[0], 0, sqrt(-1), output);
|
|
|
+ if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_xchgBuf[0], sqrt(-1), 0.123);
|
|
|
+
|
|
|
+ InterlockedCompareExchangeFloatBitwise(g_strXchgBuf[0].fltEl[1], 0, sqrt(-1), output);
|
|
|
+ if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_strXchgBuf[0].fltEl[1], sqrt(-1), 0.123);
|
|
|
+
|
|
|
+ g_rawXchgBuf.InterlockedCompareExchangeFloatBitwise(0, 0, sqrt(-1), output);
|
|
|
+ if (output == 0.0) g_rawXchgBuf.InterlockedCompareStoreFloatBitwise(0, sqrt(-1), 0.123);
|
|
|
+
|
|
|
+ InterlockedCompareExchangeFloatBitwise(g_xtexBuf[0], 0, sqrt(-1), output);
|
|
|
+ if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_xtexBuf[0], sqrt(-1), 0.123);
|
|
|
+ }
|
|
|
+
|
|
|
+ void AtomicGroupSharedTest(uint ix) {
|
|
|
+ float xchgVal = ix;
|
|
|
+ float output = 0;
|
|
|
+
|
|
|
+ g_xchgShare[ix%64] = 0;
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
+
|
|
|
+ XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchgShare)
|
|
|
+
|
|
|
+ InterlockedCompareExchangeFloatBitwise(g_xchgShare[0], 0, sqrt(-1), output);
|
|
|
+ if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_xchgShare[0], sqrt(-1), 0.123);
|
|
|
+
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
+
|
|
|
+ g_shareXchgBuf[ix%64] = g_xchgShare[ix%64];
|
|
|
+ }
|
|
|
+
|
|
|
+ struct Payload {
|
|
|
+ uint nothing;
|
|
|
+ };
|
|
|
+
|
|
|
+ static float4 g_Verts[6] = {
|
|
|
+ { -1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { -1.0f, -1.0f, 0.0f, 1.0f },
|
|
|
+
|
|
|
+ { -1.0f, -1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, 1.0f, 0.0f, 1.0f },
|
|
|
+ { 1.0f, -1.0f, 0.0f, 1.0f }};
|
|
|
+
|
|
|
+ static float2 g_UV[6] = {
|
|
|
+ { 0.0f, 0.0f },
|
|
|
+ { 1.0f, 0.0f },
|
|
|
+ { 0.0f, 1.0f },
|
|
|
+
|
|
|
+ { 0.0f, 1.0f },
|
|
|
+ { 1.0f, 0.0f },
|
|
|
+ { 1.0f, 1.0f }};
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 2)]
|
|
|
+ void ASMain(uint ix : SV_GroupIndex) {
|
|
|
+ Payload payload;
|
|
|
+ payload.nothing = 0;
|
|
|
+ AtomicTest(ix);
|
|
|
+ AtomicGroupSharedTest(ix);
|
|
|
+ DispatchMesh(1, 1, 1, payload);
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(8, 8, 2)]
|
|
|
+ [OutputTopology("triangle")]
|
|
|
+ void MSMain(
|
|
|
+ uint ix : SV_GroupIndex,
|
|
|
+ in payload Payload payload,
|
|
|
+ out vertices PSInput verts[6],
|
|
|
+ out indices uint3 tris[2]) {
|
|
|
+ SetMeshOutputCounts(6, 2);
|
|
|
+ verts[ix].position = g_Verts[ix];
|
|
|
+ verts[ix].uv = g_UV[ix];
|
|
|
+ if (ix % 3)
|
|
|
+ tris[ix / 3] = uint3(ix, ix + 1, ix + 2);
|
|
|
+ AtomicTest(ix);
|
|
|
+ AtomicGroupSharedTest(ix);
|
|
|
+ }
|
|
|
+
|
|
|
+ PSInput VSMain(float3 position : POSITION, float2 uv : TEXCOORD, uint ix : SV_VertexID) {
|
|
|
+ PSInput result;
|
|
|
+ result.position = float4(position, 1.0);
|
|
|
+ result.uv = uv;
|
|
|
+ AtomicTest(64*64 + ix);
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ float4 PSMain(PSInput input) : SV_TARGET {
|
|
|
+ uint ix = uint(input.uv.y*64)*64 + input.uv.x*64;
|
|
|
+ AtomicTest(ix);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ [NumThreads(32, 32, 1)]
|
|
|
+ void CSMain(uint ix : SV_GroupIndex) {
|
|
|
+ AtomicTest(ix);
|
|
|
+ AtomicGroupSharedTest(ix);
|
|
|
+ }
|
|
|
+ ]]>
|
|
|
+ </Shader>
|
|
|
+ </ShaderOp>
|
|
|
+
|
|
|
+ <!--
|
|
|
TODO: Dynamically index into tables
|
|
|
-->
|
|
|
</ShaderOpSet>
|