2
0
Эх сурвалжийг харах

[spirv] Translate intrinsic dst function. (#801)

Ehsan 8 жил өмнө
parent
commit
576a805117

+ 3 - 0
docs/SPIR-V.rst

@@ -1446,6 +1446,9 @@ extended instruction mapping, so they are handled with additional steps:
 - ``D3DCOLORtoUBYTE4``: Converts a floating-point, 4D vector set by a D3DCOLOR to a UBYTE4.
 - ``D3DCOLORtoUBYTE4``: Converts a floating-point, 4D vector set by a D3DCOLOR to a UBYTE4.
   This is achieved by performing ``int4(input.zyxw * 255.002)`` using SPIR-V ``OpVectorShuffle``,
   This is achieved by performing ``int4(input.zyxw * 255.002)`` using SPIR-V ``OpVectorShuffle``,
   ``OpVectorTimesScalar``, and ``OpConvertFToS``, respectively.
   ``OpVectorTimesScalar``, and ``OpConvertFToS``, respectively.
+- ``dst``: Calculates a distance vector. The resulting vector, ``dest``, has the following specifications:
+  ``dest.x = 1.0``, ``dest.y = src0.y * src1.y``, ``dest.z = src0.z``, and ``dest.w = src1.w``. 
+  Uses SPIR-V ``OpCompositeExtract`` and ``OpFMul``.
 
 
 Using SPIR-V opcode
 Using SPIR-V opcode
 ~~~~~~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~~~~~~

+ 22 - 0
tools/clang/lib/SPIRV/SPIRVEmitter.cpp

@@ -4127,6 +4127,8 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   case hlsl::IntrinsicOp::IOP_clip: {
   case hlsl::IntrinsicOp::IOP_clip: {
     return processIntrinsicClip(callExpr);
     return processIntrinsicClip(callExpr);
   }
   }
+  case hlsl::IntrinsicOp::IOP_dst:
+    return processIntrinsicDst(callExpr);
   case hlsl::IntrinsicOp::IOP_clamp:
   case hlsl::IntrinsicOp::IOP_clamp:
   case hlsl::IntrinsicOp::IOP_uclamp:
   case hlsl::IntrinsicOp::IOP_uclamp:
     return processIntrinsicClamp(callExpr);
     return processIntrinsicClamp(callExpr);
@@ -4568,6 +4570,26 @@ uint32_t SPIRVEmitter::processIntrinsicFrexp(const CallExpr *callExpr) {
   return 0;
   return 0;
 }
 }
 
 
+uint32_t SPIRVEmitter::processIntrinsicDst(const CallExpr *callExpr) {
+  // Signature is float4 dst(float4 src0, float4 src1)
+  // result.x = 1;
+  // result.y = src0.y * src1.y;
+  // result.z = src0.z;
+  // result.w = src1.w;
+  const auto floatId = theBuilder.getFloat32Type();
+  const auto arg0Id = doExpr(callExpr->getArg(0));
+  const auto arg1Id = doExpr(callExpr->getArg(1));
+  const auto arg0y = theBuilder.createCompositeExtract(floatId, arg0Id, {1});
+  const auto arg1y = theBuilder.createCompositeExtract(floatId, arg1Id, {1});
+  const auto arg0z = theBuilder.createCompositeExtract(floatId, arg0Id, {2});
+  const auto arg1w = theBuilder.createCompositeExtract(floatId, arg1Id, {3});
+  const auto arg0yMularg1y =
+      theBuilder.createBinaryOp(spv::Op::OpFMul, floatId, arg0y, arg1y);
+  return theBuilder.createCompositeConstruct(
+      typeTranslator.translateType(callExpr->getType()),
+      {theBuilder.getConstantFloat32(1.0), arg0yMularg1y, arg0z, arg1w});
+}
+
 uint32_t SPIRVEmitter::processIntrinsicClip(const CallExpr *callExpr) {
 uint32_t SPIRVEmitter::processIntrinsicClip(const CallExpr *callExpr) {
   // Discards the current pixel if the specified value is less than zero.
   // Discards the current pixel if the specified value is less than zero.
   // TODO: If the argument can be const folded and evaluated, we could
   // TODO: If the argument can be const folded and evaluated, we could

+ 3 - 0
tools/clang/lib/SPIRV/SPIRVEmitter.h

@@ -261,6 +261,9 @@ private:
   /// specified value is less than zero.
   /// specified value is less than zero.
   uint32_t processIntrinsicClip(const CallExpr *);
   uint32_t processIntrinsicClip(const CallExpr *);
 
 
+  /// Processes the 'dst' intrinsic function.
+  uint32_t processIntrinsicDst(const CallExpr *);
+
   /// Processes the 'clamp' intrinsic function.
   /// Processes the 'clamp' intrinsic function.
   uint32_t processIntrinsicClamp(const CallExpr *);
   uint32_t processIntrinsicClamp(const CallExpr *);
 
 

+ 25 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.dst.hlsl

@@ -0,0 +1,25 @@
+// Run: %dxc -T vs_6_0 -E main
+
+// According to the HLSL reference:
+// https://msdn.microsoft.com/en-us/library/windows/desktop/bb219790(v=vs.85).aspx
+//
+// dest dst(float4 src0, float4 src1)
+//
+// dest.x = 1;
+// dest.y = src0.y * src1.y;
+// dest.z = src0.z;
+// dest.w = src1.w;
+
+void main() {
+  float4 src0, src1;
+  
+// CHECK:         [[src0:%\d+]] = OpLoad %v4float %src0
+// CHECK-NEXT:    [[src1:%\d+]] = OpLoad %v4float %src1
+// CHECK-NEXT:   [[src0y:%\d+]] = OpCompositeExtract %float [[src0]] 1
+// CHECK-NEXT:   [[src1y:%\d+]] = OpCompositeExtract %float [[src1]] 1
+// CHECK-NEXT:   [[src0z:%\d+]] = OpCompositeExtract %float [[src0]] 2
+// CHECK-NEXT:   [[src1w:%\d+]] = OpCompositeExtract %float [[src1]] 3
+// CHECK-NEXT: [[resultY:%\d+]] = OpFMul %float [[src0y]] [[src1y]]
+// CHECK-NEXT:         {{%\d+}} = OpCompositeConstruct %v4float %float_1 [[resultY]] [[src0z]] [[src1w]]
+  float4 result = dst(src0, src1);
+}

+ 1 - 0
tools/clang/unittests/SPIRV/CodeGenSPIRVTest.cpp

@@ -660,6 +660,7 @@ TEST_F(FileTest, IntrinsicsDdyFine) { runFileTest("intrinsics.ddy-fine.hlsl"); }
 TEST_F(FileTest, IntrinsicsDeterminant) {
 TEST_F(FileTest, IntrinsicsDeterminant) {
   runFileTest("intrinsics.determinant.hlsl");
   runFileTest("intrinsics.determinant.hlsl");
 }
 }
+TEST_F(FileTest, IntrinsicsDst) { runFileTest("intrinsics.dst.hlsl"); }
 TEST_F(FileTest, IntrinsicsExp) { runFileTest("intrinsics.exp.hlsl"); }
 TEST_F(FileTest, IntrinsicsExp) { runFileTest("intrinsics.exp.hlsl"); }
 TEST_F(FileTest, IntrinsicsExp2) { runFileTest("intrinsics.exp2.hlsl"); }
 TEST_F(FileTest, IntrinsicsExp2) { runFileTest("intrinsics.exp2.hlsl"); }
 TEST_F(FileTest, IntrinsicsF16ToF32) {
 TEST_F(FileTest, IntrinsicsF16ToF32) {