hace 6 años · 5e060dec2f
--- a/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript-col.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript-col.hlsl
@@ -2,51 +2,46 @@
 
				 
			
 
				 // CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 0)
			
 
				 // CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0
			
 
				-// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16
			
 
				+// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 4
			
 
				 
			
 
				 // CHECK: %[[cb1:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 1)
			
 
				 // CHECK: %[[cb1x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 0
			
 
				-// CHECK: store float %[[cb1x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
			
 
				+// CHECK: store float %[[cb1x]], float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
			
 
				 
			
 
				-// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
			
 
				+// CHECK: %[[_25:[^ ]+]] = getelementptr [4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
			
 
				 // CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 4
			
 
				-// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
			
 
				+// CHECK: %[[_27:[^ ]+]] = getelementptr [4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
			
 
				 // CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4
			
 
				 
			
 
				 // CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32
			
 
				 // CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32
			
 
				 
			
 
				-// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{[^,]+}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15)
			
 
				+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{[^,]+}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 undef, i32 undef, i8 3)
			
 
				 
			
 
				-float4 rows[4];
			
 
				+float2 rows[2];
			
 
				 
			
 
				-void set_row(inout float4 row, uint i) {
			
 
				+void set_row(inout float2 row, uint i) {
			
 
				   row = rows[i];
			
 
				 }
			
 
				 
			
 
				 class Obj {
			
 
				-  float4x4 mat;
			
 
				+  float2x2 mat;
			
 
				   void set() {
			
 
				     set_row(mat[0], 0);
			
 
				     set_row(mat[1], 1);
			
 
				-    set_row(mat[2], 2);
			
 
				-    set_row(mat[3], 3);
			
 
				   }
			
 
				 };
			
 
				 
			
 
				 RWByteAddressBuffer RWBuf;
			
 
				-groupshared Obj obj[2];
			
 
				-int objIndex;
			
 
				+groupshared Obj obj;
			
 
				 
			
 
				-[numthreads(4, 1, 1)]
			
 
				+[numthreads(2, 1, 1)]
			
 
				 void main(uint3 groupThreadID: SV_GroupThreadID) {
			
 
				   if (groupThreadID.x == 0) {
			
 
				-    obj[0].set();
			
 
				-    obj[objIndex].set();
			
 
				+    obj.set();
			
 
				   }
			
 
				   GroupMemoryBarrierWithGroupSync();
			
 
				-  float4 row = obj[0].mat[groupThreadID.x];
			
 
				-  row *= obj[objIndex].mat[groupThreadID.x];
			
 
				-  uint addr = groupThreadID.x * 4;
			
 
				-  RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w)));
			
 
				+  float2 row = obj.mat[groupThreadID.x];
			
 
				+  uint addr = groupThreadID.x * 8;
			
 
				+  RWBuf.Store2(addr, uint2(asuint(row.x), asuint(row.y)));
			
 
				 }
			
--- a/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript.hlsl
@@ -4,45 +4,43 @@
 
				 // CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0
			
 
				 // CHECK: %[[cb0y:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 1
			
 
				 
			
 
				-// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16
			
 
				-// CHECK: store float %[[cb0y]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
			
 
				+// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 4
			
 
				+// CHECK: store float %[[cb0y]], float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
			
 
				 
			
 
				-// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
			
 
				-// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 16
			
 
				-// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
			
 
				+// CHECK: %[[_25:[^ ]+]] = getelementptr [4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
			
 
				+// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 4
			
 
				+// CHECK: %[[_27:[^ ]+]] = getelementptr [4 x float], [4 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
			
 
				 // CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4
			
 
				 
			
 
				 // CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32
			
 
				 // CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32
			
 
				 
			
 
				-// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15)
			
 
				+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 undef, i32 undef, i8 3)
			
 
				 
			
 
				-float4 rows[4];
			
 
				+float2 rows[2];
			
 
				 
			
 
				-void set_row(inout float4 row, uint i) {
			
 
				+void set_row(inout float2 row, uint i) {
			
 
				   row = rows[i];
			
 
				 }
			
 
				 
			
 
				 class Obj {
			
 
				-  float4x4 mat;
			
 
				+  float2x2 mat;
			
 
				   void set() {
			
 
				     set_row(mat[0], 0);
			
 
				     set_row(mat[1], 1);
			
 
				-    set_row(mat[2], 2);
			
 
				-    set_row(mat[3], 3);
			
 
				   }
			
 
				 };
			
 
				 
			
 
				 RWByteAddressBuffer RWBuf;
			
 
				 groupshared Obj obj;
			
 
				 
			
 
				-[numthreads(4, 1, 1)]
			
 
				+[numthreads(2, 1, 1)]
			
 
				 void main(uint3 groupThreadID: SV_GroupThreadID) {
			
 
				   if (groupThreadID.x == 0) {
			
 
				     obj.set();
			
 
				   }
			
 
				   GroupMemoryBarrierWithGroupSync();
			
 
				-  float4 row = obj.mat[groupThreadID.x];
			
 
				-  uint addr = groupThreadID.x * 4;
			
 
				-  RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w)));
			
 
				+  float2 row = obj.mat[groupThreadID.x];
			
 
				+  uint addr = groupThreadID.x * 8;
			
 
				+  RWBuf.Store2(addr, uint2(asuint(row.x), asuint(row.y)));
			
 
				 }
			
--- a/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript2.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript2.hlsl
@@ -0,0 +1,45 @@
 
				+// RUN: %dxc -E main -T cs_6_0 -Zpr %s | FileCheck %s
			
 
				+
			
 
				+// Make sure non-const gep/addrspace cast in codegen is translated properly
			
 
				+
			
 
				+// CHECK: @[[obj:[^,]+]] = addrspace(3) global [8 x float] undef
			
 
				+
			
 
				+// CHECK: %[[_6:[^ ]+]] = getelementptr [8 x float], [8 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
			
 
				+// CHECK: store float %{{.+}}, float addrspace(3)* %[[_6]], align 16
			
 
				+
			
 
				+// Skip next three stores to get to loads
			
 
				+// CHECK: store
			
 
				+// CHECK: store
			
 
				+// CHECK: store
			
 
				+
			
 
				+// CHECK: %[[_23:[^ ]+]] = getelementptr [8 x float], [8 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
			
 
				+// CHECK: %{{.+}} = load float, float addrspace(3)* %[[_23]], align 8
			
 
				+
			
 
				+
			
 
				+float4 rows[2];
			
 
				+
			
 
				+void set_row(inout float2 row, uint i) {
			
 
				+  row = rows[i];
			
 
				+}
			
 
				+
			
 
				+class Obj {
			
 
				+  float2x2 mat;
			
 
				+  void set() {
			
 
				+    set_row(mat[0], 0);
			
 
				+    set_row(mat[1], 1);
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+RWByteAddressBuffer RWBuf;
			
 
				+
			
 
				+// Dynamic index array to generate non-const gep/addrspace cast
			
 
				+groupshared Obj obj[2];
			
 
				+
			
 
				+[numthreads(2, 1, 1)]
			
 
				+void main(uint3 groupThreadID: SV_GroupThreadID) {
			
 
				+  obj[groupThreadID.x].set();
			
 
				+  GroupMemoryBarrierWithGroupSync();
			
 
				+  float2 row = obj[1 - groupThreadID.x].mat[groupThreadID.x];
			
 
				+  uint addr = groupThreadID.x * 8;
			
 
				+  RWBuf.Store2(addr, uint2(asuint(row.x), asuint(row.y)));
			
 
				+}