Browse Source

Barycentric init (#250)

This change is the initial step to support Barycentric coordinates for shader model 6.1 / DXIL 1.1
 - Add GetBarycentrics, GetAttributeAtVertex intrinsics and their corresponding dxil ops
 - Lowering intrinsics to dxil ops
 - Adding codegen tests for these operations
Young Kim 8 years ago
parent
commit
108b897f40

+ 82 - 1
docs/DXIL.rst

@@ -1947,7 +1947,7 @@ ID  Name                           Description
 42  UMul_                          multiply of 32-bit operands to produce the correct full 64-bit result.
 43  UDiv_                          unsigned divide of the 32-bit operand src0 by the 32-bit operand src1.
 44  UAddc_                         unsigned add of 32-bit operand with the carry
-45  USubb_                         returns the USubb of the input values
+45  USubb_                         unsigned subtract of 32-bit operands with the borrow
 46  FMad_                          floating point multiply & add
 47  Fma_                           fused multiply-add
 48  IMad_                          Signed integer multiply & add
@@ -2039,6 +2039,11 @@ ID  Name                           Description
 134 LegacyDoubleToUInt32_          legacy fuction to convert double to uint32
 135 WaveAllBitCount_               returns the count of bits set to 1 across the wave
 136 WavePrefixBitCount_            returns the count of bits set to 1 on prior lanes
+137 Barycentrics_                  return weights at a current location.
+138 BarycentricsCentroid_          return weights at centroid location.
+139 BarycentricsSampleIndex_       return weights at the location of the sample specified by index
+140 BarycentricsSnapped_           return weights at the location specified in the pixel's 16x16 sample grid
+141 AttributeAtVertex_             returns the values of the attributes at the vertex.
 === ============================== =================================================================================================================
 
 
@@ -2075,6 +2080,82 @@ Atan
 
 Returns the arctangent of the specified value. The return value is within the range of -PI/2 to PI/2
 
+AttributeAtVertex
+~~~~~~~~~~~~~~~~~
+
+returns the values of the attributes at the vertex. VertexID ranges from 0 to 2.
+
+Barycentrics
+~~~~~~~~~~~~
+
+weight = Barycentrics(VertexID)
+
+Returns all 3 triangle Barycentrics weights even though two are strictly necessary.
+VertexID ranges from 0 to 2.
+
+The 3 values returned are NOT guaranteed to add up to floating-point 1.0 exactly.
+If it is desired for the pixel shader to receive weights with this property, it can reconstruct the third coordinate by subtracting the sum of the other two from 1.0.
+Also note, that individual barycentric weights may take on arbitrarily large or arbitrarily small values, and are not constrained to be within [0...1] range
+– this may happen for screen-space (non-perspective-correct) barycentric interpolants, screenspace quad primitives, or external triangles.
+For triangle primitives, all 3 weights will typically contain non-zero values, but for line primitives the third barycentric weight (myBaryWeights.z) is guaranteed to be exactly 0.0.
+
+BarycentricsCentroid
+~~~~~~~~~~~~~~~~~~~~
+
+weight = BarycentricsCentroid(VertexID)
+
+Equivalent to Barycentrics but returns barycentric weights at the centroid.
+
+BarycentricsSampleIndex
+~~~~~~~~~~~~~~~~~~~~~~~
+
+weight = BarycentricsSampleIndex(VertexID, sampleIndex)
+
+Equivalent to Barycentrics but returns barycentric weights at the sample.
+
+BarycentricsSnapped
+~~~~~~~~~~~~~~~~~~~
+
+weight = BarycentricsSnapped(VertexID, offsetX, offsetY)
+
+Equivalent to Barycentrics but returns barycentric weights of a position with 2D offset from the pixel center using a 16x16 grid. Only the last 4 bits of the two components of the pixel offsets are used.
+
++----------+----------------+
+|  4 bits  |     offset     |
++----------+----------------+
+|   1000   | -0.5f (-8/16)  |
++----------+----------------+
+|   1001   | -0.4375f(-7/16)|
++----------+----------------+
+|   1010   | -0.375f (-6/16)|
++----------+----------------+
+|   1011   | -0.3125f(-5/16)|
++----------+----------------+
+|   1100   | -0.25f (-4/16) |
++----------+----------------+
+|   1101   | -0.1875f(-3/16)|
++----------+----------------+
+|   1110   | -0.125f (-2/16)|
++----------+----------------+
+|   1111   | -0.0625f(-1/16)|
++----------+----------------+
+|   0000   | 0.0f (0/16)    |
++----------+----------------+
+|   0001   | 0.0625f (1/16) |
++----------+----------------+
+|   0010   | 0.125f (2/16)  |
++----------+----------------+
+|   0011   | 0.1875f (3/16) |
++----------+----------------+
+|   0100   | 0.25f (4/16)   |
++----------+----------------+
+|   0101   | 0.3125f (5/16) |
++----------+----------------+
+|   0110   | 0.375f (6/16)  |
++----------+----------------+
+|   0111   | 0.4375f (7/16) |
++----------+----------------+
+
 Bfi
 ~~~
 

+ 13 - 3
include/dxc/HLSL/DxilConstants.h

@@ -270,7 +270,7 @@ namespace DXIL {
   
     // Binary uint with carry or borrow
     UAddc = 44, // unsigned add of 32-bit operand with the carry
-    USubb = 45, // returns the USubb of the input values
+    USubb = 45, // unsigned subtract of 32-bit operands with the borrow
   
     // Bitcasts with different sizes
     BitcastF16toI16 = 125, // bitcast between different sizes
@@ -324,6 +324,11 @@ namespace DXIL {
     CycleCounterLegacy = 109, // CycleCounterLegacy
   
     // Pixel shader
+    AttributeAtVertex = 141, // returns the values of the attributes at the vertex.
+    Barycentrics = 137, // return weights at a current location.
+    BarycentricsCentroid = 138, // return weights at centroid location.
+    BarycentricsSampleIndex = 139, // return weights at the location of the sample specified by index
+    BarycentricsSnapped = 140, // return weights at the location specified in the pixel's 16x16 sample grid
     CalculateLOD = 81, // calculates the level of detail
     Coverage = 91, // returns the coverage mask input in a pixel shader
     DerivCoarseX = 83, // computes the rate of change per stamp in x direction.
@@ -444,7 +449,7 @@ namespace DXIL {
     WaveReadLaneAt = 117, // returns the value from the specified lane
     WaveReadLaneFirst = 118, // returns the value from the first lane
   
-    NumOpCodes = 137 // exclusive last value of enumeration
+    NumOpCodes = 142 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -516,6 +521,11 @@ namespace DXIL {
     CycleCounterLegacy,
   
     // Pixel shader
+    AttributeAtVertex,
+    Barycentrics,
+    BarycentricsCentroid,
+    BarycentricsSampleIndex,
+    BarycentricsSnapped,
     CalculateLOD,
     Coverage,
     Discard,
@@ -595,7 +605,7 @@ namespace DXIL {
     WaveReadLaneAt,
     WaveReadLaneFirst,
   
-    NumOpClasses = 93 // exclusive last value of enumeration
+    NumOpClasses = 98 // exclusive last value of enumeration
   };
   // OPCODECLASS-ENUM:END
 

+ 97 - 1
include/dxc/HLSL/DxilInstructions.h

@@ -1518,7 +1518,7 @@ struct DxilInst_UAddc {
   llvm::Value *get_b() const { return Instr->getOperand(2); }
 };
 
-/// This instruction returns the USubb of the input values
+/// This instruction unsigned subtract of 32-bit operands with the borrow
 struct DxilInst_USubb {
   const llvm::Instruction *Instr;
   // Construction and identification
@@ -3347,5 +3347,101 @@ struct DxilInst_WavePrefixBitCount {
   // Accessors
   llvm::Value *get_value() const { return Instr->getOperand(1); }
 };
+
+/// This instruction return weights at a current location.
+struct DxilInst_Barycentrics {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_Barycentrics(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Barycentrics);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Accessors
+  llvm::Value *get_VertexID() const { return Instr->getOperand(1); }
+};
+
+/// This instruction return weights at centroid location.
+struct DxilInst_BarycentricsCentroid {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_BarycentricsCentroid(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::BarycentricsCentroid);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Accessors
+  llvm::Value *get_VertexID() const { return Instr->getOperand(1); }
+};
+
+/// This instruction return weights at the location of the sample specified by index
+struct DxilInst_BarycentricsSampleIndex {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_BarycentricsSampleIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::BarycentricsSampleIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Accessors
+  llvm::Value *get_VertexID() const { return Instr->getOperand(1); }
+  llvm::Value *get_sampleIndex() const { return Instr->getOperand(2); }
+};
+
+/// This instruction return weights at the location specified in the pixel's 16x16 sample grid
+struct DxilInst_BarycentricsSnapped {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_BarycentricsSnapped(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::BarycentricsSnapped);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Accessors
+  llvm::Value *get_VertexID() const { return Instr->getOperand(1); }
+  llvm::Value *get_offsetX() const { return Instr->getOperand(2); }
+  llvm::Value *get_offsetY() const { return Instr->getOperand(3); }
+};
+
+/// This instruction returns the values of the attributes at the vertex.
+struct DxilInst_AttributeAtVertex {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_AttributeAtVertex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::AttributeAtVertex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (5 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Accessors
+  llvm::Value *get_inputSigId() const { return Instr->getOperand(1); }
+  llvm::Value *get_inputRowIndex() const { return Instr->getOperand(2); }
+  llvm::Value *get_inputColIndex() const { return Instr->getOperand(3); }
+  llvm::Value *get_VertexID() const { return Instr->getOperand(4); }
+};
 // INSTR-HELPER:END
 } // namespace hlsl

+ 5 - 0
include/dxc/HlslIntrinsicOp.h

@@ -31,6 +31,11 @@ import hctdb_instrhelp
   IOP_EvaluateAttributeAtSample,
   IOP_EvaluateAttributeCentroid,
   IOP_EvaluateAttributeSnapped,
+  IOP_GetAttributeAtVertex,
+  IOP_GetBarycentrics,
+  IOP_GetBarycentricsAtSample,
+  IOP_GetBarycentricsCentroid,
+  IOP_GetBarycentricsSnapped,
   IOP_GetRenderTargetSampleCount,
   IOP_GetRenderTargetSamplePosition,
   IOP_GroupMemoryBarrier,

+ 18 - 0
lib/HLSL/DxilOperations.cpp

@@ -238,6 +238,13 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::WaveAllBitCount,         "WaveAllBitCount",          OCC::WaveAllOp,                "waveAllOp",                   true, false, false, false, false, false, false, false, false, Attribute::None,     },
   {  OC::WavePrefixBitCount,      "WavePrefixBitCount",       OCC::WavePrefixOp,             "wavePrefixOp",                true, false, false, false, false, false, false, false, false, Attribute::None,     },
+
+  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
+  {  OC::Barycentrics,            "Barycentrics",             OCC::Barycentrics,             "barycentrics",               false, false,  true, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BarycentricsCentroid,    "BarycentricsCentroid",     OCC::BarycentricsCentroid,     "barycentricsCentroid",       false, false,  true, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BarycentricsSampleIndex, "BarycentricsSampleIndex",  OCC::BarycentricsSampleIndex,  "barycentricsSampleIndex",    false, false,  true, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BarycentricsSnapped,     "BarycentricsSnapped",      OCC::BarycentricsSnapped,      "barycentricsSnapped",        false, false,  true, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::AttributeAtVertex,       "AttributeAtVertex",        OCC::AttributeAtVertex,        "attributeAtVertex",          false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
 };
 // OPCODE-OLOADS:END
 
@@ -687,6 +694,13 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
     // Wave
   case OpCode::WaveAllBitCount:        A(pI32);     A(pI32); A(pI1);  break;
   case OpCode::WavePrefixBitCount:     A(pI32);     A(pI32); A(pI1);  break;
+
+    // Pixel shader
+  case OpCode::Barycentrics:           A(pF32);     A(pI32); A(pI8);  break;
+  case OpCode::BarycentricsCentroid:   A(pF32);     A(pI32); A(pI8);  break;
+  case OpCode::BarycentricsSampleIndex:A(pF32);     A(pI32); A(pI8);  A(pI32); break;
+  case OpCode::BarycentricsSnapped:    A(pF32);     A(pI32); A(pI8);  A(pI32); A(pI32); break;
+  case OpCode::AttributeAtVertex:      A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  A(pI8);  break;
   // OPCODE-OLOAD-FUNCS:END
   default: DXASSERT(false, "otherwise unhandled case"); break;
   }
@@ -820,6 +834,10 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
     return IntegerType::get(m_Ctx, 32);
   case OpCode::CalculateLOD:
   case OpCode::DomainLocation:
+  case OpCode::Barycentrics:
+  case OpCode::BarycentricsCentroid:
+  case OpCode::BarycentricsSampleIndex:
+  case OpCode::BarycentricsSnapped:
     return Type::getFloatTy(m_Ctx);
   case OpCode::MakeDouble:
   case OpCode::SplitDouble:

+ 4 - 2
lib/HLSL/DxilValidation.cpp

@@ -577,8 +577,10 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   // RenderTargetGetSamplePosition=76, RenderTargetGetSampleCount=77,
   // CalculateLOD=81, Discard=82, DerivCoarseX=83, DerivCoarseY=84,
   // DerivFineX=85, DerivFineY=86, EvalSnapped=87, EvalSampleIndex=88,
-  // EvalCentroid=89, SampleIndex=90, Coverage=91, InnerCoverage=92
-  if (60 <= op && op <= 61 || op == 64 || 76 <= op && op <= 77 || 81 <= op && op <= 92)
+  // EvalCentroid=89, SampleIndex=90, Coverage=91, InnerCoverage=92,
+  // Barycentrics=137, BarycentricsCentroid=138, BarycentricsSampleIndex=139,
+  // BarycentricsSnapped=140, AttributeAtVertex=141
+  if (60 <= op && op <= 61 || op == 64 || 76 <= op && op <= 77 || 81 <= op && op <= 92 || 137 <= op && op <= 141)
     return pSM->IsPS();
   return true;
   // VALOPCODESM-TEXT:END

+ 107 - 0
lib/HLSL/HLOperationLower.cpp

@@ -742,6 +742,108 @@ Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
   return result;
 }
 
+// Barycentrics intrinsics
+Value *TranslateBarycentrics(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
+                             HLOperationLowerHelper &helper,
+                             HLObjectOperationLowerHelper *pObjHelper,
+                             bool &Translated) {
+  DXASSERT(op == OP::OpCode::Barycentrics ||
+           op == OP::OpCode::BarycentricsCentroid,
+           "Wrong opcode to translate");
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  Function *evalFunc = hlslOP->GetOpFunc(op, Type::getFloatTy(CI->getContext()));
+  Value *opArg = hlslOP->GetU32Const((unsigned)op);
+
+  Value *result = UndefValue::get(CI->getType());
+  IRBuilder<> Builder(CI);
+  for (unsigned i = 0; i < 3; ++i) {
+    Value *Elt = Builder.CreateCall(evalFunc, { opArg, hlslOP->GetI8Const(i) });
+    result = Builder.CreateInsertElement(result, Elt, i);
+  }
+  return result;
+}
+
+Value *TranslateBarycentricsSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
+  HLOperationLowerHelper &helper,
+  HLObjectOperationLowerHelper *pObjHelper,
+  bool &Translated) {
+  DXASSERT(op == OP::OpCode::BarycentricsSampleIndex, "Wrong opcode to translate");
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  Function *evalFunc = hlslOP->GetOpFunc(op, Type::getFloatTy(CI->getContext()));
+  Value *opArg = hlslOP->GetU32Const((unsigned)op);
+  Value *sampleIndex = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
+
+  Value *result = UndefValue::get(CI->getType());
+  IRBuilder<> Builder(CI);
+  for (unsigned i = 0; i < 3; ++i) {
+    Value *Elt = Builder.CreateCall(evalFunc, { opArg, hlslOP->GetI8Const(i), sampleIndex });
+    result = Builder.CreateInsertElement(result, Elt, i);
+  }
+  return result;
+}
+
+Value *TranslateBarycentricsSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
+  HLOperationLowerHelper &helper,
+  HLObjectOperationLowerHelper *pObjHelper,
+  bool &Translated) {
+  DXASSERT(op == OP::OpCode::BarycentricsSnapped, "Wrong opcode to translate");
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  Function *evalFunc = hlslOP->GetOpFunc(op, Type::getFloatTy(CI->getContext()));
+  Value *opArg = hlslOP->GetU32Const((unsigned)op);
+  Value *offsets = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
+
+  Value *result = UndefValue::get(CI->getType());
+  IRBuilder<> Builder(CI);
+  Value *offsetX = Builder.CreateExtractElement(offsets, (uint64_t)0);
+  Value *offsetY = Builder.CreateExtractElement(offsets, 1);
+  for (unsigned i = 0; i < 3; ++i) {
+    Value *Elt = Builder.CreateCall(evalFunc, { opArg, hlslOP->GetI8Const(i), offsetX, offsetY });
+    result = Builder.CreateInsertElement(result, Elt, i);
+  }
+  return result;
+}
+
+Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
+  HLOperationLowerHelper &helper,
+  HLObjectOperationLowerHelper *pObjHelper,
+  bool &Translated) {
+  DXASSERT(op == OP::OpCode::AttributeAtVertex, "Wrong opcode to translate");
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  IRBuilder<> Builder(CI);
+  Type *Ty = CI->getType();
+  Value *val = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
+  Value *vertexIdx = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc1OpIdx);
+  Value *vertexI8Idx = Builder.CreateTrunc(vertexIdx, Type::getInt8Ty(CI->getContext()));
+
+  // Check the range of VertexID
+  Value *vertex0 = Builder.getInt8(0);
+  Value *vertex1 = Builder.getInt8(1);
+  Value *vertex2 = Builder.getInt8(2);
+  if (vertexI8Idx != vertex0 && vertexI8Idx != vertex1 && vertexI8Idx != vertex2) {
+    CI->getContext().emitError(CI, "VertexID at GetAttributeAtVertex can only range from 0 to 2");
+    return UndefValue::get(Ty);
+  }
+
+  std::vector<CallInst*> loadList;
+  Constant *shufMask = GetLoadInputsForEvaluate(val, loadList);
+
+  unsigned size = loadList.size();
+  Value *opArg = hlslOP->GetU32Const((unsigned)op);
+  Function *evalFunc = hlslOP->GetOpFunc(op, Ty->getScalarType());
+  Value *result = UndefValue::get(Ty);
+  for (unsigned i = 0; i < size; ++i) {
+    CallInst *loadInput = loadList[size - 1 - i];
+    Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
+    Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
+    Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
+    Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx,  vertexI8Idx });
+    result = Builder.CreateInsertElement(result, Elt, i);
+  }
+  if (shufMask)
+    result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
+  return result;
+}
+
 Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                              HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
@@ -4022,6 +4124,11 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid, DXIL::OpCode::EvalCentroid},
     {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex, DXIL::OpCode::AttributeAtVertex},
+    {IntrinsicOp::IOP_GetBarycentrics, TranslateBarycentrics, DXIL::OpCode::Barycentrics},
+    {IntrinsicOp::IOP_GetBarycentricsAtSample, TranslateBarycentricsSample, DXIL::OpCode::BarycentricsSampleIndex},
+    {IntrinsicOp::IOP_GetBarycentricsCentroid, TranslateBarycentrics, DXIL::OpCode::BarycentricsCentroid},
+    {IntrinsicOp::IOP_GetBarycentricsSnapped, TranslateBarycentricsSnapped, DXIL::OpCode::BarycentricsSnapped},
     {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation, DXIL::OpCode::RenderTargetGetSampleCount},
     {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},

File diff suppressed because it is too large
+ 157 - 128
tools/clang/lib/Sema/gen_intrin_main_tables_15.h


+ 1 - 1
tools/clang/test/CodeGenHLSL/abs1.hlsl

@@ -2,7 +2,7 @@
 
 // CHECK: main
 // After lowering, these would turn into multiple abs calls rather than a 4 x float
-// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 59,
+// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 64,
 
 float4 main(float4 a : A) : SV_TARGET {
   return abs(a*a.yxxx);

+ 23 - 0
tools/clang/test/CodeGenHLSL/attributeAtVertex.hlsl

@@ -0,0 +1,23 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 0, i32 0, i8 0, i8 0)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 0, i32 0, i8 1, i8 0)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 0, i32 0, i8 2, i8 0)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 0, i32 0, i8 3, i8 0)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 1, i32 0, i8 0, i8 1)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 1, i32 0, i8 1, i8 1)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 1, i32 0, i8 2, i8 1)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 1, i32 0, i8 3, i8 1)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 2, i32 0, i8 0, i8 2)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 2, i32 0, i8 1, i8 2)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 2, i32 0, i8 2, i8 2)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 141, i32 2, i32 0, i8 3, i8 2)
+
+float4 main(float4 a : A, float4 b : B, float4 c : C) : SV_Target
+{
+  float4 a0 = GetAttributeAtVertex(a, 0);
+  float4 b1 = GetAttributeAtVertex(b, 1);
+  float4 c2 = GetAttributeAtVertex(c, 2);
+
+  return a0 + b1 + c2;
+}

+ 25 - 0
tools/clang/test/CodeGenHLSL/barycentrics.hlsl

@@ -0,0 +1,25 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: call float @dx.op.barycentrics.f32
+// CHECK: call float @dx.op.barycentricsSampleIndex.f32
+// CHECK: call float @dx.op.barycentricsCentroid.f32
+// CHECK: call float @dx.op.barycentricsSnapped.f32
+
+float4 main(float4 a : A) : SV_Target
+{
+  float4 vcolor0 = float4(1,0,0,1);
+  float4 vcolor1 = float4(0,1,0,1);
+  float4 vcolor2 = float4(0,0,0,1);
+
+  float3 bary = GetBarycentrics();
+  float3 barySample = GetBarycentricsAtSample(0);
+  float3 baryCentroid = GetBarycentricsCentroid();
+  float3 barySnapped = GetBarycentricsSnapped(uint2(4,10));
+
+  float4 baryColor = bary.x * vcolor0 + bary.y * vcolor1 + bary.z * vcolor2;
+  float4 barySampleColor = barySample.x * vcolor0 + barySample.y * vcolor1 + barySample.z * vcolor2;
+  float4 baryCentroidColor = baryCentroid.x * vcolor0 + baryCentroid.y * vcolor1 + baryCentroid.z * vcolor2;
+  float4 barySnappedColor = barySnapped.x * vcolor0 + barySnapped.y * vcolor1 + barySnapped.z * vcolor2;
+
+  return (baryColor + barySampleColor + baryCentroidColor + barySnappedColor) / 4;
+}

+ 6 - 1
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -1589,7 +1589,12 @@ static const char *OpCodeSignatures[] = {
   "(value)",  // LegacyDoubleToSInt32
   "(value)",  // LegacyDoubleToUInt32
   "(value)",  // WaveAllBitCount
-  "(value)"  // WavePrefixBitCount
+  "(value)",  // WavePrefixBitCount
+  "(VertexID)",  // Barycentrics
+  "(VertexID)",  // BarycentricsCentroid
+  "(VertexID,sampleIndex)",  // BarycentricsSampleIndex
+  "(VertexID,offsetX,offsetY)",  // BarycentricsSnapped
+  "(inputSigId,inputRowIndex,inputColIndex,VertexID)"  // AttributeAtVertex
 };
 // OPCODE-SIGS:END
 

+ 10 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -315,6 +315,8 @@ public:
   TEST_METHOD(CodeGenAsUint)
   TEST_METHOD(CodeGenAsUint2)
   TEST_METHOD(CodeGenAtomic)
+  TEST_METHOD(CodeGenAttributeAtVertex)
+  TEST_METHOD(CodeGenBarycentrics)
   TEST_METHOD(CodeGenBinary1)
   TEST_METHOD(CodeGenBoolComb)
   TEST_METHOD(CodeGenBoolSvTarget)
@@ -2169,6 +2171,14 @@ TEST_F(CompilerTest, CodeGenAtomic) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\atomic.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenAttributeAtVertex) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\attributeAtVertex.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenBarycentrics) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\barycentrics.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenBinary1) {
   CodeGenTest(L"..\\CodeGenHLSL\\binary1.hlsl");
 }

+ 2 - 2
tools/clang/unittests/HLSL/ExtensionTest.cpp

@@ -731,11 +731,11 @@ TEST_F(ExtensionTest, UnsignedOpcodeIsUnchanged) {
 
   // - opcode is unchanged when it matches an hlsl intrinsic with
   //   an unsigned version.
-  // Note that 113 is the current opcode for IOP_min. If that opcode
+  // Note that 118 is the current opcode for IOP_min. If that opcode
   // changes the test will need to be updated to reflect the new opcode.
   VERIFY_IS_TRUE(
     disassembly.npos !=
-    disassembly.find("call i32 @test_unsigned(i32 113, "));
+    disassembly.find("call i32 @test_unsigned(i32 118, "));
 }
 
 TEST_F(ExtensionTest, ResourceExtensionIntrinsic) {

+ 5 - 0
utils/hct/gen_intrin_main.txt

@@ -121,6 +121,11 @@ $type1 [[rn]] dst(in numeric<4> a, in $type1 b);
 $type1 [[rn]] EvaluateAttributeAtSample(in numeric<> value, in uint index);
 $type1 [[rn]] EvaluateAttributeCentroid(in numeric<> value);
 $type1 [[rn]] EvaluateAttributeSnapped(in numeric<> value, in int<2> offset);
+$type1 [[rn]] GetAttributeAtVertex(in numeric<> value, in uint VertexID);
+float<3> [[rn]] GetBarycentrics();
+float<3> [[rn]] GetBarycentricsCentroid();
+float<3> [[rn]] GetBarycentricsAtSample(in uint index);
+float<3> [[rn]] GetBarycentricsSnapped(in int<2> offset);
 $type1 [[rn]] exp(in float_like<> x);
 $type1 [[rn]] exp2(in float_like<> x);
 float<> [[rn]] f16tof32(in uint<> x);

+ 29 - 3
utils/hct/hctdb.py

@@ -240,7 +240,7 @@ class db_dxil(object):
             self.name_idx[i].category = "Resources - gather"
         for i in "AtomicBinOp,AtomicCompareExchange,Barrier".split(","):
             self.name_idx[i].category = "Synchronization"
-        for i in "CalculateLOD,Discard,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,EvalSnapped,EvalSampleIndex,EvalCentroid,SampleIndex,Coverage,InnerCoverage".split(","):
+        for i in "CalculateLOD,Discard,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,EvalSnapped,EvalSampleIndex,EvalCentroid,SampleIndex,Coverage,InnerCoverage,Barycentrics,BarycentricsCentroid,BarycentricsSampleIndex,BarycentricsSnapped,AttributeAtVertex".split(","):
             self.name_idx[i].category = "Pixel shader"
             self.name_idx[i].shader_models = "p"
         for i in "ThreadId,GroupId,ThreadIdInGroup,FlattenedThreadIdInGroup".split(","):
@@ -1036,8 +1036,34 @@ class db_dxil(object):
             db_dxil_param(0, "i32", "", "operation result"),
             db_dxil_param(2, "i1", "value", "input value")])
         next_op_idx += 1
-
-        assert next_op_idx == 137, "next operation index is %d rather than 143 and thus opcodes are broken" % next_op_idx
+        self.add_dxil_op("Barycentrics", next_op_idx, "Barycentrics", "return weights at a current location.", "f", "rn", [
+            db_dxil_param(0, "f", "", "result"),
+            db_dxil_param(2, "i8", "VertexID", "Vertex Index")])
+        next_op_idx += 1
+        self.add_dxil_op("BarycentricsCentroid", next_op_idx, "BarycentricsCentroid", "return weights at centroid location.", "f", "rn", [
+            db_dxil_param(0, "f", "", "result"),
+            db_dxil_param(2, "i8", "VertexID", "Vertex Index")])
+        next_op_idx += 1
+        self.add_dxil_op("BarycentricsSampleIndex", next_op_idx, "BarycentricsSampleIndex", "return weights at the location of the sample specified by index", "f", "rn", [
+            db_dxil_param(0, "f", "", "result"),
+            db_dxil_param(2, "i8", "VertexID", "Vertex Index"),
+            db_dxil_param(3, "i32", "sampleIndex", "sample index")])
+        next_op_idx += 1
+        self.add_dxil_op("BarycentricsSnapped", next_op_idx, "BarycentricsSnapped", "return weights at the location specified in the pixel's 16x16 sample grid", "f", "rn", [
+            db_dxil_param(0, "f", "", "result"),
+            db_dxil_param(2, "i8", "VertexID", "Vertex Index"),
+            db_dxil_param(3, "i32", "offsetX", "2D offset from the pixel center using a 16x16 grid"),
+            db_dxil_param(4, "i32", "offsetY", "2D offset from the pixel center using a 16x16 grid")])
+        next_op_idx += 1
+        self.add_dxil_op("AttributeAtVertex", next_op_idx, "AttributeAtVertex", "returns the values of the attributes at the vertex.", "hf", "rn", [
+            db_dxil_param(0, "$o", "", "result"),
+            db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
+            db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
+            db_dxil_param(4, "i8", "inputColIndex", "column index of an input attribute"),
+            db_dxil_param(5, "i8", "VertexID", "Vertex Index")
+        ])
+        next_op_idx += 1
+        assert next_op_idx == 142, "next operation index is %d rather than 142 and thus opcodes are broken" % next_op_idx
 
         # Set interesting properties.
         self.build_indices()

+ 71 - 0
utils/hct/hctdb_inst_docs.txt

@@ -584,3 +584,74 @@ Either of destHI or destLO may be specified as NULL instead of specifying a regi
 * Inst: USubb - unsigned subtract of 32-bit operands with the borrow
 
 dest0, dest1 = USubb(src0, src1)
+
+* Inst: Barycentrics - return weights at a current location.
+
+weight = Barycentrics(VertexID)
+
+Returns all 3 triangle Barycentrics weights even though two are strictly necessary.
+VertexID ranges from 0 to 2.
+
+The 3 values returned are NOT guaranteed to add up to floating-point 1.0 exactly.
+If it is desired for the pixel shader to receive weights with this property, it can reconstruct the third coordinate by subtracting the sum of the other two from 1.0.
+Also note, that individual barycentric weights may take on arbitrarily large or arbitrarily small values, and are not constrained to be within [0...1] range
+– this may happen for screen-space (non-perspective-correct) barycentric interpolants, screenspace quad primitives, or external triangles.
+For triangle primitives, all 3 weights will typically contain non-zero values, but for line primitives the third barycentric weight (myBaryWeights.z) is guaranteed to be exactly 0.0.
+
+* Inst: BarycentricsCentroid - return weights at centroid location.
+
+weight = BarycentricsCentroid(VertexID)
+
+Equivalent to Barycentrics but returns barycentric weights at the centroid.
+
+* Inst: BarycentricsSampleIndex - return weights at the location of the sample specified by index
+
+weight = BarycentricsSampleIndex(VertexID, sampleIndex)
+
+Equivalent to Barycentrics but returns barycentric weights at the sample.
+
+* Inst: BarycentricsSnapped - return weights at the location specified in the pixel's 16x16 sample grid
+
+weight = BarycentricsSnapped(VertexID, offsetX, offsetY)
+
+Equivalent to Barycentrics but returns barycentric weights of a position with 2D offset from the pixel center using a 16x16 grid. Only the last 4 bits of the two components of the pixel offsets are used.
+
++----------+----------------+
+|  4 bits  |     offset     |
++----------+----------------+
+|   1000   | -0.5f (-8/16)  |
++----------+----------------+
+|   1001   | -0.4375f(-7/16)|
++----------+----------------+
+|   1010   | -0.375f (-6/16)|
++----------+----------------+
+|   1011   | -0.3125f(-5/16)|
++----------+----------------+
+|   1100   | -0.25f (-4/16) |
++----------+----------------+
+|   1101   | -0.1875f(-3/16)|
++----------+----------------+
+|   1110   | -0.125f (-2/16)|
++----------+----------------+
+|   1111   | -0.0625f(-1/16)|
++----------+----------------+
+|   0000   | 0.0f (0/16)    |
++----------+----------------+
+|   0001   | 0.0625f (1/16) |
++----------+----------------+
+|   0010   | 0.125f (2/16)  |
++----------+----------------+
+|   0011   | 0.1875f (3/16) |
++----------+----------------+
+|   0100   | 0.25f (4/16)   |
++----------+----------------+
+|   0101   | 0.3125f (5/16) |
++----------+----------------+
+|   0110   | 0.375f (6/16)  |
++----------+----------------+
+|   0111   | 0.4375f (7/16) |
++----------+----------------+
+
+* Inst: AttributeAtVertex - returns the values of the attributes at the vertex.
+
+returns the values of the attributes at the vertex. VertexID ranges from 0 to 2.

Some files were not shown because too many files changed in this diff