Переглянути джерело

PIX pixel-hit instrumentation: Re-emit type system, fix several issues with the UAV (#635)

* Add SV_Position index parameter, various minor tweaks to UAV setup

* Convert to raw buffer

* fix up unit tests, add a few names to variables

* CR feedback: check for pre-existing type; typo
Jeff Noyle 8 роки тому
батько
коміт
f169e6f238

+ 4 - 3
lib/HLSL/DxcOptimizer.cpp

@@ -88,8 +88,8 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilEliminateOutputDynamicIndexingPass(Registry);
     initializeDxilEmitMetadataPass(Registry);
     initializeDxilExpandTrigIntrinsicsPass(Registry);
-    initializeDxilForceEarlyZPass(Registry);
     initializeDxilFinalizeModulePass(Registry);
+    initializeDxilForceEarlyZPass(Registry);
     initializeDxilGenerationPassPass(Registry);
     initializeDxilLegalizeEvalOperationsPass(Registry);
     initializeDxilLegalizeResourceUsePassPass(Registry);
@@ -175,7 +175,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
   static const LPCSTR AlwaysInlinerArgs[] = { "InsertLifetime", "InlineThreshold" };
   static const LPCSTR ArgPromotionArgs[] = { "maxElements" };
   static const LPCSTR CFGSimplifyPassArgs[] = { "Threshold", "Ftor", "bonus-inst-threshold" };
-  static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "force-early-z", "add-pixel-cost", "rt-width", "num-pixels" };
+  static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "force-early-z", "add-pixel-cost", "rt-width", "sv-position-index", "num-pixels" };
   static const LPCSTR DxilGenerationPassArgs[] = { "NotOptimized" };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "mod-mode", "constant-red", "constant-green", "constant-blue", "constant-alpha" };
   static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "ReplaceAllVectors" };
@@ -244,7 +244,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
   static const LPCSTR AlwaysInlinerArgs[] = { "Insert @llvm.lifetime intrinsics", "Insert @llvm.lifetime intrinsics" };
   static const LPCSTR ArgPromotionArgs[] = { "None" };
   static const LPCSTR CFGSimplifyPassArgs[] = { "None", "None", "Control the number of bonus instructions (default = 1)" };
-  static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "None", "None", "None", "None" };
+  static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "None", "None", "None", "None", "None" };
   static const LPCSTR DxilGenerationPassArgs[] = { "None" };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "None", "None", "None", "None", "None" };
   static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "None" };
@@ -365,6 +365,7 @@ static bool IsPassOptionName(StringRef S) {
     ||  S.equals("sample-profile-max-propagate-iterations")
     ||  S.equals("sroa-random-shuffle-slices")
     ||  S.equals("sroa-strict-inbounds")
+    ||  S.equals("sv-position-index")
     ||  S.equals("unlikely-branch-weight")
     ||  S.equals("unroll-allow-partial")
     ||  S.equals("unroll-count")

+ 62 - 53
lib/HLSL/DxilAddPixelHitInstrumentation.cpp

@@ -41,6 +41,7 @@ class DxilAddPixelHitInstrumentation : public ModulePass {
   bool AddPixelCost = false;
   int RTWidth = 1024;
   int NumPixels = 128;
+  int SVPositionIndex = -1;
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -70,6 +71,10 @@ void DxilAddPixelHitInstrumentation::applyOptions(PassOptions O)
     {
       AddPixelCost = atoi(option.second.data()) != 0;
     }
+    else if (0 == option.first.compare("sv-position-index"))
+    {
+      SVPositionIndex = atoi(option.second.data());
+    }
   }
 }
 
@@ -101,7 +106,7 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
   // If not present, we add it.
   if ( SV_Position == InputElements.end() ) {
     auto SVPosition = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
-    SVPosition->Initialize("Position", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Linear, 1, 4, 0, 0);
+    SVPosition->Initialize("Position", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Linear, 1, 4, SVPositionIndex == -1 ? 0 : SVPositionIndex, 0);
     SVPosition->AppendSemanticIndex(0);
     SVPosition->SetSigPointKind(DXIL::SigPointKind::PSIn);
     SVPosition->SetKind(hlsl::DXIL::SemanticKind::Position);
@@ -116,10 +121,54 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
   auto EntryPointFunction = DM.GetEntryFunction();
 
   auto & EntryBlock = EntryPointFunction->getEntryBlock();
-  bool HaveInsertedUAV = false;
 
   CallInst *HandleForUAV;
+  {
+    IRBuilder<> Builder(DM.GetEntryFunction()->getEntryBlock().getFirstInsertionPt());
+    
+    unsigned int UAVResourceHandle = static_cast<unsigned int>(DM.GetUAVs().size());
+
+    // Set up a UAV with structure of a single int
+    SmallVector<llvm::Type*, 1> Elements{ Type::getInt32Ty(Ctx) };
+    llvm::StructType *UAVStructTy = llvm::StructType::create(Elements, "class.RWStructuredBuffer");
+    std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
+    pUAV->SetGlobalName("PIX_CountUAVName");
+    pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
+    pUAV->SetID(UAVResourceHandle);
+    pUAV->SetSpaceID((unsigned int)-2); // This is the reserved-for-tools register space
+    pUAV->SetSampleCount(1);
+    pUAV->SetGloballyCoherent(false);
+    pUAV->SetHasCounter(false);
+    pUAV->SetCompType(CompType::getI32());
+    pUAV->SetLowerBound(0);
+    pUAV->SetRangeSize(1);
+    pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
+
+    auto pAnnotation = DM.GetTypeSystem().GetStructAnnotation(UAVStructTy);
+    if (pAnnotation == nullptr)
+    {
+      pAnnotation = DM.GetTypeSystem().AddStructAnnotation(UAVStructTy);
+      pAnnotation->GetFieldAnnotation(0).SetCBufferOffset(0);
+      pAnnotation->GetFieldAnnotation(0).SetCompType(hlsl::DXIL::ComponentType::I32);
+      pAnnotation->GetFieldAnnotation(0).SetFieldName("count");
+    }
+
+    ID = DM.AddUAV(std::move(pUAV));
 
+    assert(ID == UAVResourceHandle);
+
+    // Create handle for the newly-added UAV
+    Function* CreateHandleOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
+    Constant* CreateHandleOpcodeArg = HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
+    Constant* UAVArg = HlslOP->GetI8Const(static_cast<std::underlying_type<DxilResourceBase::Class>::type>(DXIL::ResourceClass::UAV));
+    Constant* MetaDataArg = HlslOP->GetU32Const(ID); // position of the metadata record in the corresponding metadata list
+    Constant* IndexArg = HlslOP->GetU32Const(0); // 
+    Constant* FalseArg = HlslOP->GetI1Const(0); // non-uniform resource index: false
+    HandleForUAV = Builder.CreateCall(CreateHandleOpFunc,
+    { CreateHandleOpcodeArg, UAVArg, MetaDataArg, IndexArg, FalseArg }, "PIX_CountUAV_Handle");
+
+    DM.ReEmitDxilResources();
+  }
   // todo: is it a reasonable assumption that there will be a "Ret" in the entry block, and that these are the only
   // points from which the shader can exit (except for a pixel-kill?)
   auto & Instructions = EntryBlock.getInstList();
@@ -134,42 +183,6 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
         // Start adding instructions right before the Ret:
         IRBuilder<> Builder(ThisInstruction);
 
-        if (!HaveInsertedUAV) {
-
-          // Set up a UAV with structure of a single int
-          SmallVector<llvm::Type*, 1> Elements{ Type::getInt32Ty(Ctx) };
-          llvm::StructType *UAVStructTy = llvm::StructType::create(Elements, "PIX_CountUAV_Type");
-          std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
-          pUAV->SetGlobalName("PIX_CountUAVName");
-          pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
-          pUAV->SetID(0);
-          pUAV->SetSpaceID((unsigned int)-2); // This is the reserved-for-tools register space
-          pUAV->SetSampleCount(1);
-          pUAV->SetGloballyCoherent(false);
-          pUAV->SetHasCounter(false);
-          pUAV->SetCompType(CompType::getI32());
-          pUAV->SetLowerBound(0);
-          pUAV->SetRangeSize(1);
-          pUAV->SetKind(DXIL::ResourceKind::StructuredBuffer);
-          pUAV->SetElementStride(4);
-
-          ID = DM.AddUAV(std::move(pUAV));
-
-          // Create handle for the newly-added UAV
-          Function* CreateHandleOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
-          Constant* CreateHandleOpcodeArg = HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
-          Constant* UAVVArg = HlslOP->GetI8Const(static_cast<std::underlying_type<DxilResourceBase::Class>::type>(DXIL::ResourceClass::UAV));
-          Constant* MetaDataArg = HlslOP->GetU32Const(ID); // position of the metadata record in the corresponding metadata list
-          Constant* IndexArg = HlslOP->GetU32Const(0); // 
-          Constant* FalseArg = HlslOP->GetI1Const(0); // non-uniform resource index: false
-          HandleForUAV = Builder.CreateCall(CreateHandleOpFunc,
-            { CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg }, "PIX_CountUAV_Handle");
-
-          DM.ReEmitDxilResources();
-
-          HaveInsertedUAV = true;
-        }
-
         // ------------------------------------------------------------------------------------------------------------
         // Generate instructions to increment (by one) a UAV value corresponding to the pixel currently being rendered
         // ------------------------------------------------------------------------------------------------------------
@@ -180,8 +193,7 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
         Constant* One32Arg = HlslOP->GetU32Const(1);
         Constant* One8Arg = HlslOP->GetI8Const(1);
         UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(Ctx));
-        Constant* NumPixelsArg = HlslOP->GetU32Const(NumPixels);
-        Constant* NumPixelsMinusOneArg = HlslOP->GetU32Const(NumPixels-1);
+        Constant* NumPixelsByteOffsetArg = HlslOP->GetU32Const(NumPixels * 4);
 
         // Step 1: Convert SV_POSITION to UINT          
         Value * XAsInt;
@@ -200,15 +212,12 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
         }
 
         // Step 2: Calculate pixel index
-        Value * ClampedIndex;
+        Value * Index;
         {
           Constant* RTWidthArg = HlslOP->GetI32Const(RTWidth);
-          auto YOffset = Builder.CreateMul(YAsInt, RTWidthArg);
-          auto Index = Builder.CreateAdd(XAsInt, YOffset);
-
-          // Step 3: Clamp to size of UAV to prevent TDR if something goes wrong
-          auto CompareToLimit = Builder.CreateICmpUGT(Index, NumPixelsMinusOneArg);
-          ClampedIndex = Builder.CreateSelect(CompareToLimit, NumPixelsMinusOneArg, Index, "Clamped");
+          auto YOffset = Builder.CreateMul(YAsInt, RTWidthArg, "YOffset");
+          auto Elementoffset = Builder.CreateAdd(XAsInt, YOffset, "ElementOffset");
+          Index = Builder.CreateMul(Elementoffset, HlslOP->GetU32Const(4), "ByteIndex");
         }
 
         // Insert the UAV increment instruction:
@@ -220,9 +229,9 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
             AtomicBinOpcode,// i32, ; opcode
             HandleForUAV,   // %dx.types.Handle, ; resource handle
             AtomicAdd,      // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
-            ClampedIndex,   // i32, ; coordinate c0: index in elements
-            Zero32Arg,      // i32, ; coordinate c1: byte offset into element
-            Zero32Arg,      // i32, ; coordinate c2 (unused)
+            Index,          // i32, ; coordinate c0: byte offset
+            UndefArg,       // i32, ; coordinate c1 (unused)
+            UndefArg,       // i32, ; coordinate c2 (unused)
             One32Arg        // i32); increment value
           }, "UAVIncResult");
         }
@@ -249,7 +258,7 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
           }
 
           // Step 2: Update write position ("Index") to second half of the UAV 
-          auto OffsetIndex = Builder.CreateAdd(ClampedIndex, NumPixelsArg);
+          auto OffsetIndex = Builder.CreateAdd(Index, NumPixelsByteOffsetArg, "OffsetByteIndex");
 
           // Step 3: Increment UAV value by the weight
           (void)Builder.CreateCall(AtomicOpFunc,{
@@ -257,8 +266,8 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
             HandleForUAV,   // %dx.types.Handle, ; resource handle
             AtomicAdd,      // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
             OffsetIndex,    // i32, ; coordinate c0: index in elements
-            Zero32Arg,      // i32, ; coordinate c1: byte offset into element
-            Zero32Arg,      // i32, ; coordinate c2 (unused)
+            UndefArg,       // i32, ; coordinate c1 (unused)
+            UndefArg,       // i32, ; coordinate c2 (unused)
             Weight          // i32); increment value
           }, "UAVIncResult2");
         }

+ 5 - 2
lib/HLSL/DxilMetadataHelper.cpp

@@ -720,9 +720,12 @@ void DxilMDHelper::EmitDxilTypeSystem(DxilTypeSystem &TypeSystem, vector<GlobalV
     MDFuncVals.push_back(pMD);
   }
 
+  NamedMDNode *pDxilTypeAnnotationsMD = m_pModule->getNamedMetadata(kDxilTypeSystemMDName);
+  if (pDxilTypeAnnotationsMD != nullptr) {
+    m_pModule->eraseNamedMetadata(pDxilTypeAnnotationsMD);
+  }
+
   if (MDVals.size() > 1) {
-    NamedMDNode *pDxilTypeAnnotationsMD = m_pModule->getNamedMetadata(kDxilTypeSystemMDName);
-    IFTBOOL(pDxilTypeAnnotationsMD == nullptr, DXC_E_INCORRECT_DXIL_METADATA);
     pDxilTypeAnnotationsMD = m_pModule->getOrInsertNamedMetadata(kDxilTypeSystemMDName);
 
     pDxilTypeAnnotationsMD->addOperand(MDNode::get(m_Ctx, MDVals));

+ 1 - 0
lib/HLSL/DxilModule.cpp

@@ -1358,6 +1358,7 @@ MDTuple *DxilModule::EmitDxilResources() {
 void DxilModule::ReEmitDxilResources() {
   MDTuple *pNewResource = EmitDxilResources();
   m_pMDHelper->UpdateDxilResources(pNewResource);
+  m_pMDHelper->EmitDxilTypeSystem(GetTypeSystem(), m_LLVMUsed);
   const llvm::NamedMDNode *pEntries = m_pMDHelper->GetDxilEntryPoints();
   IFTBOOL(pEntries->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA);
 

+ 1 - 3
tools/clang/test/HLSL/pix/forceEarlyZ.hlsl

@@ -1,9 +1,7 @@
 // RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-force-early-z | %FileCheck %s
 
-// Just check that the last line (which contains global flags) has the "8" meaning force-early-z:
+// Just check that the an appropriately-formed line (which contains global flags) has the "8" meaning force-early-z:
 // CHECK: !{i32 0, i64 8}
-// Check there are no more entries (i.e. the above really was the last line)
-// CHECK-NOT: !{
 
 [RootSignature("")]
 float4 main() : SV_Target {

+ 1 - 5
tools/clang/test/HLSL/pix/pixelCounter.hlsl

@@ -11,13 +11,9 @@
 // Calculation of offset:
 // CHECK: = mul i32 %YIndex, 16
 // CHECK: = add i32 %XIndex,
-// CHECK: = icmp ugt i32
-
-// Clamp to UAV size:
-// CHECK: %Clamped = select i1 
 
 // Check the write to the UAV was emitted:
-// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %Clamped, i32 0, i32 0, i32 1)
+// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %ByteIndex, i32 undef, i32 undef, i32 1)
 
 float4 main(float4 pos : SV_Position) : SV_Target {
   return pos;

+ 3 - 3
tools/clang/test/HLSL/pix/pixelCounterAddPixelCost.hlsl

@@ -1,13 +1,13 @@
 // RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64,add-pixel-cost=1 | %FileCheck %s
 
 // Check the write to the UAV was emitted:
-// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %Clamped, i32 0, i32 0, i32 1)
+// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %ByteIndex, i32 undef, i32 undef, i32 1)
 
 // Check for pixel cost instructions:
 // CHECK: %WeightStruct = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %PIX_CountUAV_Handle, i32 128, i32 0)
 // CHECK: %Weight = extractvalue %dx.types.ResRet.i32 %WeightStruct, 0
-// CHECK: add i32 %Clamped, 64
-// CHECK: %UAVIncResult2 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32
+// CHECK: %OffsetByteIndex = add i32 %ByteIndex, 256
+// CHECK: %UAVIncResult2 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %OffsetByteIndex, i32 undef, i32 undef, i32 %Weight)
 
 
 

+ 2 - 4
tools/clang/test/HLSL/pix/pixelCounterEarlyZ.hlsl

@@ -1,13 +1,11 @@
 // RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64,force-early-z=1 | %FileCheck %s
 
 // Check the write to the UAV was emitted:
-// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %Clamped, i32 0, i32 0, i32 1)
+// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %ByteIndex, i32 undef, i32 undef, i32 1)
 
-// Early z flag value is 8. The flags are stored in the last entry in the entry function description record. See:
+// Early z flag value is 8. The flags are stored in an entry in the entry function description record. See:
 // https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-properties-and-capabilities
 // CHECK: !{i32 0, i64 8}
-// Make sure it's the last entry:
-// CHECK-NOT: !{
 
 float4 main(float4 pos : SV_Position) : SV_Target {
   return pos;

+ 1 - 0
utils/hct/hctdb.py

@@ -1272,6 +1272,7 @@ class db_dxil(object):
             {'n':'force-early-z','t':'int','c':1},
             {'n':'add-pixel-cost','t':'int','c':1},
             {'n':'rt-width','t':'int','c':1},
+            {'n':'sv-position-index','t':'int','c':1},
             {'n':'num-pixels','t':'int','c':1}])
         add_pass('hlsl-dxil-constantColor', 'DxilOutputColorBecomesConstant', 'DXIL Constant Color Mod', [
             {'n':'mod-mode','t':'int','c':1},