Explorar el Código

Merge branch '19h1-fixes' into 19h1-fixes-on-master

Tex Riddell hace 6 años
padre
commit
5e060dec2f

+ 14 - 19
tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript-col.hlsl

@@ -2,51 +2,46 @@
 
 // CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 0)
 // CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0
-// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16
+// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 4
 
 // CHECK: %[[cb1:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 1)
 // CHECK: %[[cb1x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 0
-// CHECK: store float %[[cb1x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
+// CHECK: store float %[[cb1x]], float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
 
-// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
+// CHECK: %[[_25:[^ ]+]] = getelementptr [4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
 // CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 4
-// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
+// CHECK: %[[_27:[^ ]+]] = getelementptr [4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
 // CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4
 
 // CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32
 // CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32
 
-// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{[^,]+}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15)
+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{[^,]+}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 undef, i32 undef, i8 3)
 
-float4 rows[4];
+float2 rows[2];
 
-void set_row(inout float4 row, uint i) {
+void set_row(inout float2 row, uint i) {
   row = rows[i];
 }
 
 class Obj {
-  float4x4 mat;
+  float2x2 mat;
   void set() {
     set_row(mat[0], 0);
     set_row(mat[1], 1);
-    set_row(mat[2], 2);
-    set_row(mat[3], 3);
   }
 };
 
 RWByteAddressBuffer RWBuf;
-groupshared Obj obj[2];
-int objIndex;
+groupshared Obj obj;
 
-[numthreads(4, 1, 1)]
+[numthreads(2, 1, 1)]
 void main(uint3 groupThreadID: SV_GroupThreadID) {
   if (groupThreadID.x == 0) {
-    obj[0].set();
-    obj[objIndex].set();
+    obj.set();
   }
   GroupMemoryBarrierWithGroupSync();
-  float4 row = obj[0].mat[groupThreadID.x];
-  row *= obj[objIndex].mat[groupThreadID.x];
-  uint addr = groupThreadID.x * 4;
-  RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w)));
+  float2 row = obj.mat[groupThreadID.x];
+  uint addr = groupThreadID.x * 8;
+  RWBuf.Store2(addr, uint2(asuint(row.x), asuint(row.y)));
 }

+ 13 - 15
tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript.hlsl

@@ -4,45 +4,43 @@
 // CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0
 // CHECK: %[[cb0y:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 1
 
-// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16
-// CHECK: store float %[[cb0y]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
+// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 4
+// CHECK: store float %[[cb0y]], float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
 
-// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
-// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 16
-// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
+// CHECK: %[[_25:[^ ]+]] = getelementptr [4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
+// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 4
+// CHECK: %[[_27:[^ ]+]] = getelementptr [4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
 // CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4
 
 // CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32
 // CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32
 
-// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15)
+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 undef, i32 undef, i8 3)
 
-float4 rows[4];
+float2 rows[2];
 
-void set_row(inout float4 row, uint i) {
+void set_row(inout float2 row, uint i) {
   row = rows[i];
 }
 
 class Obj {
-  float4x4 mat;
+  float2x2 mat;
   void set() {
     set_row(mat[0], 0);
     set_row(mat[1], 1);
-    set_row(mat[2], 2);
-    set_row(mat[3], 3);
   }
 };
 
 RWByteAddressBuffer RWBuf;
 groupshared Obj obj;
 
-[numthreads(4, 1, 1)]
+[numthreads(2, 1, 1)]
 void main(uint3 groupThreadID: SV_GroupThreadID) {
   if (groupThreadID.x == 0) {
     obj.set();
   }
   GroupMemoryBarrierWithGroupSync();
-  float4 row = obj.mat[groupThreadID.x];
-  uint addr = groupThreadID.x * 4;
-  RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w)));
+  float2 row = obj.mat[groupThreadID.x];
+  uint addr = groupThreadID.x * 8;
+  RWBuf.Store2(addr, uint2(asuint(row.x), asuint(row.y)));
 }

+ 45 - 0
tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript2.hlsl

@@ -0,0 +1,45 @@
+// RUN: %dxc -E main -T cs_6_0 -Zpr %s | FileCheck %s
+
+// Make sure non-const gep/addrspace cast in codegen is translated properly
+
+// CHECK: @[[obj:[^,]+]] = addrspace(3) global [8 x float] undef
+
+// CHECK: %[[_6:[^ ]+]] = getelementptr [8 x float], [8 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
+// CHECK: store float %{{.+}}, float addrspace(3)* %[[_6]], align 16
+
+// Skip next three stores to get to loads
+// CHECK: store
+// CHECK: store
+// CHECK: store
+
+// CHECK: %[[_23:[^ ]+]] = getelementptr [8 x float], [8 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
+// CHECK: %{{.+}} = load float, float addrspace(3)* %[[_23]], align 8
+
+
+float4 rows[2];
+
+void set_row(inout float2 row, uint i) {
+  row = rows[i];
+}
+
+class Obj {
+  float2x2 mat;
+  void set() {
+    set_row(mat[0], 0);
+    set_row(mat[1], 1);
+  }
+};
+
+RWByteAddressBuffer RWBuf;
+
+// Dynamic index array to generate non-const gep/addrspace cast
+groupshared Obj obj[2];
+
+[numthreads(2, 1, 1)]
+void main(uint3 groupThreadID: SV_GroupThreadID) {
+  obj[groupThreadID.x].set();
+  GroupMemoryBarrierWithGroupSync();
+  float2 row = obj[1 - groupThreadID.x].mat[groupThreadID.x];
+  uint addr = groupThreadID.x * 8;
+  RWBuf.Store2(addr, uint2(asuint(row.x), asuint(row.y)));
+}