瀏覽代碼

Implement IsHelperLane() (#3382)

Tex Riddell 4 年之前
父節點
當前提交
7c9e487afd

+ 1 - 0
docs/DXIL.rst

@@ -2317,6 +2317,7 @@ ID  Name                                                  Description
 218 CreateHandleFromHeap                                  create resource handle from heap
 219 Unpack4x8                                             unpacks 4 8-bit signed or unsigned values into int32 or int16 vector
 220 Pack4x8                                               packs vector of 4 signed or unsigned values into a packed datatype, drops or clamps unused bits
+221 IsHelperLane                                          returns true on helper lanes in pixel shaders
 === ===================================================== =======================================================================================================================================================================================================================
 
 

+ 10 - 4
include/dxc/DXIL/DxilConstants.h

@@ -465,6 +465,9 @@ namespace DXIL {
     // Graphics shader
     ViewID = 138, // returns the view index
   
+    // Helper Lanes
+    IsHelperLane = 221, // returns true on helper lanes in pixel shaders
+  
     // Hull shader
     OutputControlPointID = 107, // OutputControlPointID
     StorePatchConstant = 106, // StorePatchConstant
@@ -713,9 +716,9 @@ namespace DXIL {
     NumOpCodes_Dxil_1_3 = 162,
     NumOpCodes_Dxil_1_4 = 165,
     NumOpCodes_Dxil_1_5 = 216,
-    NumOpCodes_Dxil_1_6 = 221,
+    NumOpCodes_Dxil_1_6 = 222,
   
-    NumOpCodes = 221 // exclusive last value of enumeration
+    NumOpCodes = 222 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -794,6 +797,9 @@ namespace DXIL {
     // Graphics shader
     ViewID,
   
+    // Helper Lanes
+    IsHelperLane,
+  
     // Hull shader
     OutputControlPointID,
     StorePatchConstant,
@@ -975,9 +981,9 @@ namespace DXIL {
     NumOpClasses_Dxil_1_3 = 118,
     NumOpClasses_Dxil_1_4 = 120,
     NumOpClasses_Dxil_1_5 = 143,
-    NumOpClasses_Dxil_1_6 = 148,
+    NumOpClasses_Dxil_1_6 = 149,
   
-    NumOpClasses = 148 // exclusive last value of enumeration
+    NumOpClasses = 149 // exclusive last value of enumeration
   };
   // OPCODECLASS-ENUM:END
 

+ 18 - 0
include/dxc/DXIL/DxilInstructions.h

@@ -7148,5 +7148,23 @@ struct DxilInst_Pack4x8 {
   llvm::Value *get_w() const { return Instr->getOperand(5); }
   void set_w(llvm::Value *val) { Instr->setOperand(5, val); }
 };
+
+/// This instruction returns true on helper lanes in pixel shaders
+struct DxilInst_IsHelperLane {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_IsHelperLane(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::IsHelperLane);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+};
 // INSTR-HELPER:END
 } // namespace hlsl

+ 1 - 0
include/dxc/HlslIntrinsicOp.h

@@ -59,6 +59,7 @@ import hctdb_instrhelp
   IOP_InterlockedMin,
   IOP_InterlockedOr,
   IOP_InterlockedXor,
+  IOP_IsHelperLane,
   IOP_NonUniformResourceIndex,
   IOP_ObjectRayDirection,
   IOP_ObjectRayOrigin,

+ 7 - 0
lib/Analysis/DxilConstantFolding.cpp

@@ -530,6 +530,8 @@ static Constant *ConstantFoldIntIntrinsic(OP::OpCode opcode, Type *Ty, const Dxi
 
     return ConstantFoldQuaternaryIntInstrinsic(opcode, Ty, Op1, Op2, Op3, Op4);
   }
+  case OP::OpCodeClass::IsHelperLane:
+    return ConstantInt::get(Ty, (uint64_t)0);
   }
 
   return nullptr;
@@ -588,6 +590,11 @@ bool hlsl::CanConstantFoldCallTo(const Function *F) {
     case OP::OpCodeClass::Dot3:
     case OP::OpCodeClass::Dot4:
       return true;
+    case OP::OpCodeClass::IsHelperLane: {
+      const hlsl::ShaderModel *pSM =
+          F->getParent()->GetDxilModule().GetShaderModel();
+      return !pSM->IsPS() && !pSM->IsLib();
+    }
     }
   }
 

+ 9 - 2
lib/DXIL/DxilOperations.cpp

@@ -401,6 +401,9 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
   // Packing intrinsics                                                                                                      void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
   {  OC::Pack4x8,                 "Pack4x8",                  OCC::Pack4x8,                  "pack4x8",                   { false, false, false, false, false, false,  true,  true, false, false, false}, Attribute::ReadNone, },
+
+  // Helper Lanes                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
+  {  OC::IsHelperLane,            "IsHelperLane",             OCC::IsHelperLane,             "isHelperLane",              { false, false, false, false,  true, false, false, false, false, false, false}, Attribute::ReadOnly, },
 };
 // OPCODE-OLOADS:END
 
@@ -839,8 +842,8 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
     return;
   }
   // Instructions: AnnotateHandle=216, CreateHandleFromBinding=217,
-  // CreateHandleFromHeap=218, Unpack4x8=219, Pack4x8=220
-  if ((216 <= op && op <= 220)) {
+  // CreateHandleFromHeap=218, Unpack4x8=219, Pack4x8=220, IsHelperLane=221
+  if ((216 <= op && op <= 221)) {
     major = 6;  minor = 6;
     return;
   }
@@ -1427,6 +1430,9 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
 
     // Packing intrinsics
   case OpCode::Pack4x8:                A(pI32);     A(pI32); A(pI8);  A(pETy); A(pETy); A(pETy); A(pETy); break;
+
+    // Helper Lanes
+  case OpCode::IsHelperLane:           A(pI1);      A(pI32); break;
   // OPCODE-OLOAD-FUNCS:END
   default: DXASSERT(false, "otherwise unhandled case"); break;
   }
@@ -1684,6 +1690,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::RayQuery_CandidateProceduralPrimitiveNonOpaque:
   case OpCode::RayQuery_CandidateTriangleFrontFace:
   case OpCode::RayQuery_CommittedTriangleFrontFace:
+  case OpCode::IsHelperLane:
     return IntegerType::get(Ctx, 1);
   case OpCode::CBufferLoadLegacy:
   case OpCode::Sample:

+ 2 - 2
lib/HLSL/DxilValidation.cpp

@@ -968,8 +968,8 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
     return (major > 6 || (major == 6 && minor >= 5))
         && (SK == DXIL::ShaderKind::Mesh);
   // Instructions: AnnotateHandle=216, CreateHandleFromBinding=217,
-  // CreateHandleFromHeap=218, Unpack4x8=219, Pack4x8=220
-  if ((216 <= op && op <= 220))
+  // CreateHandleFromHeap=218, Unpack4x8=219, Pack4x8=220, IsHelperLane=221
+  if ((216 <= op && op <= 221))
     return (major > 6 || (major == 6 && minor >= 6));
   return true;
   // VALOPCODESM-TEXT:END

+ 1 - 0
lib/HLSL/HLOperationLower.cpp

@@ -5340,6 +5340,7 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_IsHelperLane, TrivialNoArgWithRetOperation, DXIL::OpCode::IsHelperLane},
     {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayDirection},
     {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayOrigin},

文件差異過大導致無法顯示
+ 148 - 143
tools/clang/lib/Sema/gen_intrin_main_tables_15.h


+ 206 - 0
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/helper/IsHelperLane.hlsl

@@ -0,0 +1,206 @@
+// RUN: %dxc -E vs -T vs_6_6 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E gs -T gs_6_6 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E hs -T hs_6_6 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E ds -T ds_6_6 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E ps -T ps_6_6 %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -E cs -T cs_6_6 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E as -T as_6_6 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E ms -T ms_6_6 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E vs -T vs_6_6 -Od %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -E gs -T gs_6_6 -Od %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -E hs -T hs_6_6 -Od %s | FileCheck %s -check-prefixes=CHECKHS
+// RUN: %dxc -E ds -T ds_6_6 -Od %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -E cs -T cs_6_6 -Od %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -E as -T as_6_6 -Od %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -E ms -T ms_6_6 -Od %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -T lib_6_6 %s | FileCheck %s -check-prefixes=CHECKLIB
+// RUN: %dxc -T lib_6_6 -fcgl %s | FileCheck %s -check-prefixes=CHECKHLLIB
+
+// Exactly one call
+// CHECK define void @{{.*}}()
+// CHECK: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECK-NOT: call i1 @dx.op.isHelperLane.i1(i32 221)
+
+// Exactly two calls for HS and PC func
+// CHECKHS define void @{{.*}}()
+// CHECKHS: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKHS: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKHS-NOT: call i1 @dx.op.isHelperLane.i1(i32 221)
+
+// Translated to constant zero, so no call:
+// CHECKCONST: define void @{{.*}}()
+// CHECKCONST-NOT: call i1 @dx.op.isHelperLane.i1(i32 221)
+
+// No calls simplified for lib target.
+// 10 for: vs, gs, hs + pc, ds, cs, as, ms, and exported testfn
+// CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
+// CHECKLIB-NOT: call i1 @dx.op.isHelperLane.i1(i32 221)
+
+// One HL call from each function
+// 18 functions for HL lib due to entry cloning
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id:.*]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+// CHECKHLLIB-NOT: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
+
+float4 a;
+
+/// Vertex Shader
+
+[shader("vertex")]
+float4 vs(): OUT
+{
+  float4 result = a + IsHelperLane();
+  return result;
+}
+
+/// Geometry Shader
+
+struct PosStruct {
+  float4 pos : SV_Position;
+};
+
+[shader("geometry")]
+[maxvertexcount(1)]
+void gs(triangle float4 array[3] : SV_Position,
+        inout PointStream<PosStruct> OutputStream)
+{
+  float4 result = a + IsHelperLane();
+  PosStruct output;
+  output.pos = result;
+  OutputStream.Append(output);
+  OutputStream.RestartStrip();
+}
+
+/// Hull Shader and Patch Constant function
+
+struct PCStruct
+{
+  float Edges[3]  : SV_TessFactor;
+  float Inside : SV_InsideTessFactor;
+  float4 test : TEST;
+};
+
+PCStruct pc(InputPatch<PosStruct, 3> ip,
+            OutputPatch<PosStruct, 3> op,
+            uint PatchID : SV_PrimitiveID)
+{
+  float4 result = a + IsHelperLane();
+  PCStruct a;
+  a.Edges[0] = ip[0].pos.w * result.x;
+  a.Edges[1] = ip[0].pos.w * result.y;
+  a.Edges[2] = ip[0].pos.w * result.z;
+  a.Inside = ip[0].pos.w * result.w;
+  return a;
+}
+
+[shader("hull")]
+[domain("tri")]
+[partitioning("fractional_odd")]
+[outputtopology("triangle_cw")]
+[outputcontrolpoints(3)]
+[patchconstantfunc("pc")]
+PosStruct hs(InputPatch<PosStruct, 3> p,
+             uint i : SV_OutputControlPointID)
+{
+  float4 result = a + IsHelperLane();
+  PosStruct output;
+  output.pos = p[i].pos * result;
+  return output;
+}
+
+/// Domain Shader
+
+// domain shader that actually outputs the triangle vertices
+[shader("domain")]
+[domain("tri")]
+PosStruct ds(const float3 bary : SV_DomainLocation,
+             const OutputPatch<PosStruct, 3> patch)
+{
+  float4 result = a + IsHelperLane();
+  PosStruct v;
+  v.pos = patch[0].pos * result;
+  return v;
+}
+
+/// Pixel Shader
+
+[shader("pixel")]
+float4 ps(): SV_Target
+{
+  float4 result = a + IsHelperLane();
+  return ddx(result);
+}
+
+/// Compute Shader
+
+RWStructuredBuffer<float4> SB;
+
+[shader("compute")]
+[numthreads(14,12,3)]
+void cs(uint gidx : SV_GroupIndex)
+{
+  float4 result = a + IsHelperLane();
+  SB[gidx] = ddx(result);
+}
+
+/// Amplification Shader
+
+groupshared PosStruct pld;
+
+[shader("amplification")]
+[numthreads(1, 1, 1)]
+void as()
+{
+  float4 result = a + IsHelperLane();
+  pld.pos = result;
+  DispatchMesh(1, 1, 1, pld);
+}
+
+/// Mesh Shader
+
+[shader("mesh")]
+[numthreads(3, 1, 1)]
+[outputtopology("triangle")]
+void ms(out indices uint3 primIndices[1],
+        out vertices PosStruct verts[3],
+        in uint tig : SV_GroupIndex)
+{
+  float4 result = a + IsHelperLane();
+  SetMeshOutputCounts(3, 1);
+  if (tig == 0)
+    primIndices[0] = uint3(0,1,2);
+  verts[tig].pos = result;
+}
+
+/// Exported function
+export
+float4 testfn()
+{
+  float4 result = a + IsHelperLane();
+  return result;
+}

+ 2 - 1
tools/clang/tools/dxcompiler/dxcdisassembler.cpp

@@ -1288,7 +1288,8 @@ static const char *OpCodeSignatures[] = {
   "(bind,index,nonUniformIndex)",  // CreateHandleFromBinding
   "(index,samplerHeap,nonUniformIndex)",  // CreateHandleFromHeap
   "(unpackMode,pk)",  // Unpack4x8
-  "(packMode,x,y,z,w)"  // Pack4x8
+  "(packMode,x,y,z,w)",  // Pack4x8
+  "()"  // IsHelperLane
 };
 // OPCODE-SIGS:END
 

+ 3 - 0
utils/hct/gen_intrin_main.txt

@@ -356,6 +356,9 @@ void [[]] SetMeshOutputCounts(in uint numVertices, in uint numPrimitives);
 // Amplification shader intrinsics:
 void [[]] DispatchMesh(in uint threadGroupCountX, in uint threadGroupCountY, in uint threadGroupCountZ, in udt meshPayload);
 
+// Return true if the current lane is a helper lane
+bool [[ro]] IsHelperLane();
+
 // HL Op for allocating ray query object that default constructor uses
 uint [[hidden]] AllocateRayQuery(in uint flags);
 

+ 8 - 1
utils/hct/hctdb.py

@@ -449,6 +449,9 @@ class db_dxil(object):
         for i in "Pack4x8".split(","):
             self.name_idx[i].category = "Packing intrinsics"
             self.name_idx[i].shader_model = 6,6
+        for i in "IsHelperLane".split(","):
+            self.name_idx[i].category = "Helper Lanes"
+            self.name_idx[i].shader_model = 6,6
 
     def populate_llvm_instructions(self):
         # Add instructions that map to LLVM instructions.
@@ -1858,9 +1861,13 @@ class db_dxil(object):
             db_dxil_param(6, "$o", "w", "the fourth component of the vector")])
         next_op_idx += 1
 
+        self.add_dxil_op("IsHelperLane", next_op_idx, "IsHelperLane", "returns true on helper lanes in pixel shaders", "1", "ro", [
+            db_dxil_param(0, "i1", "", "result")])
+        next_op_idx += 1
+
         # End of DXIL 1.6 opcodes.
         self.set_op_count_for_version(1, 6, next_op_idx)
-        assert next_op_idx == 221, "221 is expected next operation index but encountered %d and thus opcodes are broken" % next_op_idx
+        assert next_op_idx == 222, "222 is expected next operation index but encountered %d and thus opcodes are broken" % next_op_idx
 
         # Set interesting properties.
         self.build_indices()

部分文件因文件數量過多而無法顯示