Explorar el Código

Fix AS/MS/PS interaction for AtomicsFloatTest (#3447)

The AS/MS shader stages were overwriting each other's groupshared output,
and were not using the expected per-thread index when offsetting into
buffers and groupshared memory. Instead, follow the pattern of
AtomicsTest and use the payload data to convey groupshared data between
AS and MS, and use unique ranges of per-thread indices for each stage.
Justin Holewinski hace 4 años
padre
commit
f3c899bdde
Se han modificado 1 ficheros con 35 adiciones y 11 borrados
  1. 35 11
      tools/clang/test/HLSL/ShaderOpArith.xml

+ 35 - 11
tools/clang/test/HLSL/ShaderOpArith.xml

@@ -2148,25 +2148,27 @@
           if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_xtexBuf[0], sqrt(-1), 0.123);
           if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_xtexBuf[0], sqrt(-1), 0.123);
         }
         }
 
 
+        void InitSharedMem(uint ix) {
+          // Zero-init shared memory
+          g_xchgShare[ix%64] = 0;
+          GroupMemoryBarrierWithGroupSync();
+        }
+
         void AtomicGroupSharedTest(uint ix) {
         void AtomicGroupSharedTest(uint ix) {
           float xchgVal = ix;
           float xchgVal = ix;
           float output = 0;
           float output = 0;
 
 
-          g_xchgShare[ix%64] = 0;
-          GroupMemoryBarrierWithGroupSync();
-
           XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchgShare)
           XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchgShare)
 
 
           InterlockedCompareExchangeFloatBitwise(g_xchgShare[0], 0, sqrt(-1), output);
           InterlockedCompareExchangeFloatBitwise(g_xchgShare[0], 0, sqrt(-1), output);
           if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_xchgShare[0], sqrt(-1), 0.123);
           if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_xchgShare[0], sqrt(-1), 0.123);
 
 
           GroupMemoryBarrierWithGroupSync();
           GroupMemoryBarrierWithGroupSync();
-
-          g_shareXchgBuf[ix%64] = g_xchgShare[ix%64];
         }
         }
 
 
+        // Payloads are used to transport AS test results to MS where they are finalized
         struct Payload {
         struct Payload {
-          uint nothing;
+          float xchg[64];
         };
         };
 
 
         static float4 g_Verts[6] = {
         static float4 g_Verts[6] = {
@@ -2187,12 +2189,19 @@
           { 1.0f, 0.0f },
           { 1.0f, 0.0f },
           { 1.0f, 1.0f }};
           { 1.0f, 1.0f }};
 
 
+        groupshared Payload payload;
+
         [NumThreads(8, 8, 2)]
         [NumThreads(8, 8, 2)]
         void ASMain(uint ix : SV_GroupIndex) {
         void ASMain(uint ix : SV_GroupIndex) {
-          Payload payload;
-          payload.nothing = 0;
-          AtomicTest(ix);
+          AtomicTest(64*64 + 8*8*2 + ix);
+
+          InitSharedMem(ix);
           AtomicGroupSharedTest(ix);
           AtomicGroupSharedTest(ix);
+
+          // Copy AS test results to payload and ultimately to MS
+          // More threads than results are possible,
+          // so indices will result in duplicate copies
+          payload.xchg[ix%64] = g_xchgShare[ix%64];
           DispatchMesh(1, 1, 1, payload);
           DispatchMesh(1, 1, 1, payload);
         }
         }
 
 
@@ -2208,8 +2217,19 @@
             verts[ix%6].position = g_Verts[ix%6];
             verts[ix%6].position = g_Verts[ix%6];
             verts[ix%6].uv = g_UV[ix%6];
             verts[ix%6].uv = g_UV[ix%6];
             tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
             tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
-            AtomicTest(ix);
-            AtomicGroupSharedTest(ix);
+
+            AtomicTest(64*64 + ix);
+
+            // Load AS test results from payload
+            // More threads than results are possible,
+            // so indices will result in duplicate copies
+            g_xchgShare[ix%64] = payload.xchg[ix%64];
+            GroupMemoryBarrierWithGroupSync();
+
+            AtomicGroupSharedTest(8*8*2 + ix);
+
+            // Copy final AS + MS results to output UAVs
+            g_shareXchgBuf[ix%64] = g_xchgShare[ix%64];
         }
         }
 
 
         PSInput VSMain(float3 position : POSITION, float2 uv : TEXCOORD, uint ix : SV_VertexID) {
         PSInput VSMain(float3 position : POSITION, float2 uv : TEXCOORD, uint ix : SV_VertexID) {
@@ -2229,7 +2249,11 @@
         [NumThreads(32, 32, 1)]
         [NumThreads(32, 32, 1)]
         void CSMain(uint ix : SV_GroupIndex) {
         void CSMain(uint ix : SV_GroupIndex) {
           AtomicTest(ix);
           AtomicTest(ix);
+
+          InitSharedMem(ix);
           AtomicGroupSharedTest(ix);
           AtomicGroupSharedTest(ix);
+
+          g_shareXchgBuf[ix%64] = g_xchgShare[ix%64];
         }
         }
       ]]>
       ]]>
     </Shader>
     </Shader>