Jelajahi Sumber

Consider ranges when counting UAVs for 64UAV (#3470)

The 64UAV flag is meant to indicate that 9 or more UAVs are in use.
Previously, each UAV was considered as one regardless of range. By
adding up the ranges, we get a more accurate determination.

Only use the new counting when using the latest validtor to maintain
compatibility with the old ones

Fixes: #2964
Greg Roth 4 tahun lalu
induk
melakukan
b89f06578f

+ 13 - 5
lib/DXIL/DxilModule.cpp

@@ -316,16 +316,14 @@ void DxilModule::CollectShaderFlagsForModule(ShaderFlags &Flags) {
 
 
   const ShaderModel *SM = GetShaderModel();
   const ShaderModel *SM = GetShaderModel();
 
 
-  unsigned NumUAVs = m_UAVs.size();
+  unsigned NumUAVs = 0;
   const unsigned kSmallUAVCount = 8;
   const unsigned kSmallUAVCount = 8;
-  if (NumUAVs > kSmallUAVCount)
-    Flags.Set64UAVs(true);
-  if (NumUAVs && !(SM->IsCS() || SM->IsPS()))
-    Flags.SetUAVsAtEveryStage(true);
 
 
   bool hasRawAndStructuredBuffer = false;
   bool hasRawAndStructuredBuffer = false;
 
 
   for (auto &UAV : m_UAVs) {
   for (auto &UAV : m_UAVs) {
+    unsigned uavSize = UAV->GetRangeSize();
+    NumUAVs += uavSize > 8U? 9U: uavSize; // avoid overflow
     if (UAV->IsROV())
     if (UAV->IsROV())
       Flags.SetROVs(true);
       Flags.SetROVs(true);
     switch (UAV->GetKind()) {
     switch (UAV->GetKind()) {
@@ -338,6 +336,16 @@ void DxilModule::CollectShaderFlagsForModule(ShaderFlags &Flags) {
       break;
       break;
     }
     }
   }
   }
+  // Maintain earlier erroneous counting of UAVs for compatibility
+  if (DXIL::CompareVersions(m_ValMajor, m_ValMinor, 1, 6) < 0)
+    Flags.Set64UAVs(m_UAVs.size() > kSmallUAVCount);
+  else
+    Flags.Set64UAVs(NumUAVs > kSmallUAVCount);
+
+  if (NumUAVs && !(SM->IsCS() || SM->IsPS()))
+    Flags.SetUAVsAtEveryStage(true);
+
+
   for (auto &SRV : m_SRVs) {
   for (auto &SRV : m_SRVs) {
     switch (SRV->GetKind()) {
     switch (SRV->GetKind()) {
     case DXIL::ResourceKind::RawBuffer:
     case DXIL::ResourceKind::RawBuffer:

+ 23 - 0
tools/clang/test/HLSLFileCheck/hlsl/objects/Buffer/uav64.hlsl

@@ -0,0 +1,23 @@
+// RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
+
+// Verify that 9 UAVs will set the 64Uav shader flag
+
+// CHECK: Note: shader requires additional functionality:
+// CHECK: 64 UAV slots
+// CHECK: @main
+
+RWBuffer<float> output : register(u0);
+RWStructuredBuffer<float> g_buf1 : register(u1);
+RWStructuredBuffer<float> g_buf2 : register(u2);
+RWStructuredBuffer<float> g_buf3 : register(u3);
+RWStructuredBuffer<float> g_buf4 : register(u4);
+RWStructuredBuffer<float> g_buf5 : register(u5);
+RWStructuredBuffer<float> g_buf6 : register(u6);
+RWStructuredBuffer<float> g_buf7 : register(u7);
+RWStructuredBuffer<float> g_buf8 : register(u8);
+
+[numthreads(8,8,1)]
+void main(uint GI : SV_GroupIndex) {
+    output[GI] = g_buf1[GI] + g_buf2[GI] + g_buf3[GI] + g_buf4[GI] +
+                 g_buf5[GI] + g_buf6[GI] + g_buf7[GI] + g_buf8[GI];
+}

+ 16 - 0
tools/clang/test/HLSLFileCheck/hlsl/objects/Buffer/uavArray64.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxilver 1.6 | %dxc -E main -T cs_6_0 %s | FileCheck %s
+
+// Verify that 9 UAVs in an array will still add the 64Uav shader flag
+
+// CHECK: Note: shader requires additional functionality:
+// CHECK: 64 UAV slots
+// CHECK: @main
+
+RWBuffer<float> output : register(u0);
+RWStructuredBuffer<float> g_buf[8] : register(u1);
+
+[numthreads(8,8,1)]
+void main(uint GI : SV_GroupIndex) {
+    output[GI] = g_buf[1][GI] + g_buf[2][GI] + g_buf[3][GI] + g_buf[4][GI] +
+                 g_buf[5][GI] + g_buf[6][GI] + g_buf[7][GI];
+}

+ 15 - 0
tools/clang/test/HLSLFileCheck/hlsl/objects/Buffer/uavUnboundArray64.hlsl

@@ -0,0 +1,15 @@
+// RUN: %dxilver 1.6 | %dxc -E main -T cs_6_0 %s | FileCheck %s
+
+// Verify that an unbounded array will add the 64Uav shader flag
+
+// CHECK: Note: shader requires additional functionality:
+// CHECK: 64 UAV slots
+// CHECK: @main
+
+RWBuffer<float> output : register(u0);
+RWStructuredBuffer<float> g_buf[] : register(u1);
+
+[numthreads(8,8,1)]
+void main(uint GI : SV_GroupIndex) {
+    output[GI] = g_buf[1][GI] + g_buf[2][GI] + g_buf[3][GI] + g_buf[4][GI];
+}