8 年之前 · a579b3eaef
--- a/docs/SPIR-V.rst
+++ b/docs/SPIR-V.rst
@@ -1452,6 +1452,8 @@ HLSL Intrinsic Function   GLSL Extended Instruction
 
				 ``determinant``         ``Determinant``
			
 
				 ``exp``                 ``Exp``
			
 
				 ``exp2``                ``exp2``
			
 
				+``f16tof32``            ``UnpackHalf2x16``
			
 
				+``f32tof16``            ``PackHalf2x16``
			
 
				 ``faceforward``         ``FaceForward``
			
 
				 ``firstbithigh``        ``FindSMsb`` / ``FindUMsb``
			
 
				 ``firstbitlow``         ``FindILsb``
			
--- a/tools/clang/lib/SPIRV/SPIRVEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SPIRVEmitter.cpp
@@ -4060,7 +4060,7 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
 
				   case hlsl::IntrinsicOp::IOP_texCUBEgrad:
			
 
				   case hlsl::IntrinsicOp::IOP_texCUBElod:
			
 
				   case hlsl::IntrinsicOp::IOP_texCUBEproj: {
			
 
				-    emitError("deprecated intrinsic %0 function will not be not supported",
			
 
				+    emitError("deprecated %0 intrinsic function will not be supported",
			
 
				               callExpr->getExprLoc())
			
 
				         << callee->getName();
			
 
				     return 0;
			
@@ -4113,6 +4113,10 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
 
				   case hlsl::IntrinsicOp::IOP_log10: {
			
 
				     return processIntrinsicLog10(callExpr);
			
 
				   }
			
 
				+  case hlsl::IntrinsicOp::IOP_f16tof32:
			
 
				+    return processIntrinsicF16ToF32(callExpr);
			
 
				+  case hlsl::IntrinsicOp::IOP_f32tof16:
			
 
				+    return processIntrinsicF32ToF16(callExpr);
			
 
				     INTRINSIC_SPIRV_OP_CASE(transpose, Transpose, false);
			
 
				     INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true);
			
 
				     INTRINSIC_SPIRV_OP_WITH_CAP_CASE(ddx_coarse, DPdxCoarse, false,
			
@@ -4238,8 +4242,7 @@ SPIRVEmitter::processIntrinsicInterlockedMethod(const CallExpr *expr,
 
				   };
			
 
				 
			
 
				   const auto writeToOutputArg = [&baseType, dest, this](
			
 
				-                                    uint32_t toWrite, const CallExpr *callExpr,
			
 
				-                                    uint32_t outputArgIndex) {
			
 
				+      uint32_t toWrite, const CallExpr *callExpr, uint32_t outputArgIndex) {
			
 
				     const auto outputArg = callExpr->getArg(outputArgIndex);
			
 
				     const auto outputArgType = outputArg->getType();
			
 
				     if (baseType != outputArgType)
			
@@ -5153,6 +5156,80 @@ uint32_t SPIRVEmitter::processIntrinsicFloatSign(const CallExpr *callExpr) {
 
				                    arg->getExprLoc());
			
 
				 }
			
 
				 
			
 
				+uint32_t SPIRVEmitter::processIntrinsicF16ToF32(const CallExpr *callExpr) {
			
 
				+  // f16tof32() takes in (vector of) uint and returns (vector of) float.
			
 
				+  // The frontend should guarantee that by inserting implicit casts.
			
 
				+  const uint32_t glsl = theBuilder.getGLSLExtInstSet();
			
 
				+  const uint32_t f32TypeId = theBuilder.getFloat32Type();
			
 
				+  const uint32_t u32TypeId = theBuilder.getUint32Type();
			
 
				+  const uint32_t v2f32TypeId = theBuilder.getVecType(f32TypeId, 2);
			
 
				+
			
 
				+  const auto *arg = callExpr->getArg(0);
			
 
				+  const uint32_t argId = doExpr(arg);
			
 
				+
			
 
				+  uint32_t elemCount = {};
			
 
				+
			
 
				+  if (TypeTranslator::isVectorType(arg->getType(), nullptr, &elemCount)) {
			
 
				+    // The input is a vector. We need to handle each element separately.
			
 
				+    llvm::SmallVector<uint32_t, 4> elements;
			
 
				+
			
 
				+    for (uint32_t i = 0; i < elemCount; ++i) {
			
 
				+      const uint32_t srcElem =
			
 
				+          theBuilder.createCompositeExtract(u32TypeId, argId, {i});
			
 
				+      const uint32_t convert = theBuilder.createExtInst(
			
 
				+          v2f32TypeId, glsl, GLSLstd450::GLSLstd450UnpackHalf2x16, srcElem);
			
 
				+      elements.push_back(
			
 
				+          theBuilder.createCompositeExtract(f32TypeId, convert, {0}));
			
 
				+    }
			
 
				+    return theBuilder.createCompositeConstruct(
			
 
				+        theBuilder.getVecType(f32TypeId, elemCount), elements);
			
 
				+  }
			
 
				+
			
 
				+  const uint32_t convert = theBuilder.createExtInst(
			
 
				+      v2f32TypeId, glsl, GLSLstd450::GLSLstd450UnpackHalf2x16, argId);
			
 
				+  // f16tof32() converts the float16 stored in the low-half of the uint to
			
 
				+  // a float. So just need to return the first component.
			
 
				+  return theBuilder.createCompositeExtract(f32TypeId, convert, {0});
			
 
				+}
			
 
				+
			
 
				+uint32_t SPIRVEmitter::processIntrinsicF32ToF16(const CallExpr *callExpr) {
			
 
				+  // f32tof16() takes in (vector of) float and returns (vector of) uint.
			
 
				+  // The frontend should guarantee that by inserting implicit casts.
			
 
				+  const uint32_t glsl = theBuilder.getGLSLExtInstSet();
			
 
				+  const uint32_t f32TypeId = theBuilder.getFloat32Type();
			
 
				+  const uint32_t u32TypeId = theBuilder.getUint32Type();
			
 
				+  const uint32_t v2f32TypeId = theBuilder.getVecType(f32TypeId, 2);
			
 
				+  const uint32_t zero = theBuilder.getConstantFloat32(0);
			
 
				+
			
 
				+  const auto *arg = callExpr->getArg(0);
			
 
				+  const uint32_t argId = doExpr(arg);
			
 
				+  uint32_t elemCount = {};
			
 
				+
			
 
				+  if (TypeTranslator::isVectorType(arg->getType(), nullptr, &elemCount)) {
			
 
				+    // The input is a vector. We need to handle each element separately.
			
 
				+    llvm::SmallVector<uint32_t, 4> elements;
			
 
				+
			
 
				+    for (uint32_t i = 0; i < elemCount; ++i) {
			
 
				+      const uint32_t srcElem =
			
 
				+          theBuilder.createCompositeExtract(f32TypeId, argId, {i});
			
 
				+      const uint32_t srcVec =
			
 
				+          theBuilder.createCompositeConstruct(v2f32TypeId, {srcElem, zero});
			
 
				+
			
 
				+      elements.push_back(theBuilder.createExtInst(
			
 
				+          u32TypeId, glsl, GLSLstd450::GLSLstd450PackHalf2x16, srcVec));
			
 
				+    }
			
 
				+    return theBuilder.createCompositeConstruct(
			
 
				+        theBuilder.getVecType(u32TypeId, elemCount), elements);
			
 
				+  }
			
 
				+
			
 
				+  // f16tof32() stores the float into the low-half of the uint. So we need
			
 
				+  // to supply another zero to take the other half.
			
 
				+  const uint32_t srcVec =
			
 
				+      theBuilder.createCompositeConstruct(v2f32TypeId, {argId, zero});
			
 
				+  return theBuilder.createExtInst(u32TypeId, glsl,
			
 
				+                                  GLSLstd450::GLSLstd450PackHalf2x16, srcVec);
			
 
				+}
			
 
				+
			
 
				 uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
			
 
				     const CallExpr *callExpr, spv::Op opcode, bool actPerRowForMatrices) {
			
 
				   const uint32_t returnType = typeTranslator.translateType(callExpr->getType());
			
--- a/tools/clang/lib/SPIRV/SPIRVEmitter.h
+++ b/tools/clang/lib/SPIRV/SPIRVEmitter.h
@@ -306,6 +306,11 @@ private:
 
				   /// casting from float to integer is therefore performed by this method.
			
 
				   uint32_t processIntrinsicFloatSign(const CallExpr *);
			
 
				 
			
 
				+  /// Processes the 'f16to32' intrinsic function.
			
 
				+  uint32_t processIntrinsicF16ToF32(const CallExpr *);
			
 
				+  /// Processes the 'f32tof16' intrinsic function.
			
 
				+  uint32_t processIntrinsicF32ToF16(const CallExpr *);
			
 
				+
			
 
				   /// Processes the given intrinsic function call using the given GLSL
			
 
				   /// extended instruction. If the given instruction cannot operate on matrices,
			
 
				   /// it performs the instruction on each row of the matrix and uses composite
			
@@ -642,8 +647,7 @@ private:
 
				   /// \brief Wrapper method to create an error message and report it
			
 
				   /// in the diagnostic engine associated with this consumer.
			
 
				   template <unsigned N>
			
 
				-  DiagnosticBuilder emitError(const char (&message)[N],
			
 
				-                              SourceLocation loc) {
			
 
				+  DiagnosticBuilder emitError(const char (&message)[N], SourceLocation loc) {
			
 
				     const auto diagId =
			
 
				         diags.getCustomDiagID(clang::DiagnosticsEngine::Error, message);
			
 
				     return diags.Report(loc, diagId);
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.f16tof32.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.f16tof32.hlsl
@@ -0,0 +1,66 @@
 
				+// Run: %dxc -T vs_6_0 -E main
			
 
				+
			
 
				+// CHECK:      [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
			
 
				+
			
 
				+void main(uint      a : A,     uint2  b : B,     uint3  c : C,     uint4  d : D,
			
 
				+          out float m : M, out float2 n : N, out float3 o : O, out float4 p : P) {
			
 
				+// CHECK:        [[a:%\d+]] = OpLoad %uint %a
			
 
				+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[a]]
			
 
				+// CHECK-NEXT:   [[m:%\d+]] = OpCompositeExtract %float [[cov]] 0
			
 
				+// CHECK-NEXT:                OpStore %m [[m]]
			
 
				+    m = f16tof32(a);
			
 
				+
			
 
				+// CHECK:        [[b:%\d+]] = OpLoad %v2uint %b
			
 
				+
			
 
				+// CHECK-NEXT:  [[b0:%\d+]] = OpCompositeExtract %uint [[b]] 0
			
 
				+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[b0]]
			
 
				+// CHECK-NEXT:  [[n0:%\d+]] = OpCompositeExtract %float [[cov]] 0
			
 
				+
			
 
				+// CHECK-NEXT:  [[b1:%\d+]] = OpCompositeExtract %uint [[b]] 1
			
 
				+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[b1]]
			
 
				+// CHECK-NEXT:  [[n1:%\d+]] = OpCompositeExtract %float [[cov]] 0
			
 
				+
			
 
				+// CHECK-NEXT:   [[n:%\d+]] = OpCompositeConstruct %v2float [[n0]] [[n1]]
			
 
				+// CHECK-NEXT:                OpStore %n [[n]]
			
 
				+    n = f16tof32(b);
			
 
				+
			
 
				+// CHECK-NEXT:   [[c:%\d+]] = OpLoad %v3uint %c
			
 
				+
			
 
				+// CHECK-NEXT:  [[c0:%\d+]] = OpCompositeExtract %uint [[c]] 0
			
 
				+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[c0]]
			
 
				+// CHECK-NEXT:  [[o0:%\d+]] = OpCompositeExtract %float [[cov]] 0
			
 
				+
			
 
				+// CHECK-NEXT:  [[c1:%\d+]] = OpCompositeExtract %uint [[c]] 1
			
 
				+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[c1]]
			
 
				+// CHECK-NEXT:  [[o1:%\d+]] = OpCompositeExtract %float [[cov]] 0
			
 
				+
			
 
				+// CHECK-NEXT:  [[c2:%\d+]] = OpCompositeExtract %uint [[c]] 2
			
 
				+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[c2]]
			
 
				+// CHECK-NEXT:  [[o2:%\d+]] = OpCompositeExtract %float [[cov]] 0
			
 
				+
			
 
				+// CHECK-NEXT:   [[o:%\d+]] = OpCompositeConstruct %v3float [[o0]] [[o1]] [[o2]]
			
 
				+// CHECK-NEXT:                OpStore %o [[o]]
			
 
				+    o = f16tof32(c);
			
 
				+
			
 
				+// CHECK-NEXT:   [[d:%\d+]] = OpLoad %v4uint %d
			
 
				+
			
 
				+// CHECK-NEXT:  [[d0:%\d+]] = OpCompositeExtract %uint [[d]] 0
			
 
				+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[d0]]
			
 
				+// CHECK-NEXT:  [[p0:%\d+]] = OpCompositeExtract %float [[cov]] 0
			
 
				+
			
 
				+// CHECK-NEXT:  [[d1:%\d+]] = OpCompositeExtract %uint [[d]] 1
			
 
				+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[d1]]
			
 
				+// CHECK-NEXT:  [[p1:%\d+]] = OpCompositeExtract %float [[cov]] 0
			
 
				+
			
 
				+// CHECK-NEXT:  [[d2:%\d+]] = OpCompositeExtract %uint [[d]] 2
			
 
				+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[d2]]
			
 
				+// CHECK-NEXT:  [[p2:%\d+]] = OpCompositeExtract %float [[cov]] 0
			
 
				+
			
 
				+// CHECK-NEXT:  [[d3:%\d+]] = OpCompositeExtract %uint [[d]] 3
			
 
				+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[d3]]
			
 
				+// CHECK-NEXT:  [[p3:%\d+]] = OpCompositeExtract %float [[cov]] 0
			
 
				+
			
 
				+// CHECK-NEXT:   [[p:%\d+]] = OpCompositeConstruct %v4float [[p0]] [[p1]] [[p2]] [[p3]]
			
 
				+// CHECK-NEXT:                OpStore %p [[p]]
			
 
				+    p = f16tof32(d);
			
 
				+}
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.f32tof16.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.f32tof16.hlsl
@@ -0,0 +1,66 @@
 
				+// Run: %dxc -T vs_6_0 -E main
			
 
				+
			
 
				+// CHECK:      [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
			
 
				+
			
 
				+void main(out uint  a : A, out uint2  b : B, out uint3  c : C, out uint4  d : D,
			
 
				+              float m : M,     float2 n : N,     float3 o : O,     float4 p : P) {
			
 
				+// CHECK:        [[m:%\d+]] = OpLoad %float %m
			
 
				+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[m]] %float_0
			
 
				+// CHECK-NEXT:   [[a:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
			
 
				+// CHECK-NEXT:                OpStore %a [[a]]
			
 
				+    a = f32tof16(m);
			
 
				+
			
 
				+// CHECK-NEXT:   [[n:%\d+]] = OpLoad %v2float %n
			
 
				+
			
 
				+// CHECK-NEXT:  [[n0:%\d+]] = OpCompositeExtract %float [[n]] 0
			
 
				+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[n0]] %float_0
			
 
				+// CHECK-NEXT:  [[b0:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
			
 
				+
			
 
				+// CHECK-NEXT:  [[n1:%\d+]] = OpCompositeExtract %float [[n]] 1
			
 
				+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[n1]] %float_0
			
 
				+// CHECK-NEXT:  [[b1:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
			
 
				+
			
 
				+// CHECK-NEXT:   [[b:%\d+]] = OpCompositeConstruct %v2uint [[b0]] [[b1]]
			
 
				+// CHECK-NEXT:                OpStore %b [[b]]
			
 
				+    b = f32tof16(n);
			
 
				+
			
 
				+// CHECK-NEXT:   [[o:%\d+]] = OpLoad %v3float %o
			
 
				+
			
 
				+// CHECK-NEXT:  [[o0:%\d+]] = OpCompositeExtract %float [[o]] 0
			
 
				+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[o0]] %float_0
			
 
				+// CHECK-NEXT:  [[c0:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
			
 
				+
			
 
				+// CHECK-NEXT:  [[o1:%\d+]] = OpCompositeExtract %float [[o]] 1
			
 
				+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[o1]] %float_0
			
 
				+// CHECK-NEXT:  [[c1:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
			
 
				+
			
 
				+// CHECK-NEXT:  [[o2:%\d+]] = OpCompositeExtract %float [[o]] 2
			
 
				+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[o2]] %float_0
			
 
				+// CHECK-NEXT:  [[c2:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
			
 
				+
			
 
				+// CHECK-NEXT:   [[c:%\d+]] = OpCompositeConstruct %v3uint [[c0]] [[c1]] [[c2]]
			
 
				+// CHECK-NEXT:                OpStore %c [[c]]
			
 
				+    c = f32tof16(o);
			
 
				+
			
 
				+// CHECK-NEXT:   [[p:%\d+]] = OpLoad %v4float %p
			
 
				+
			
 
				+// CHECK-NEXT:  [[p0:%\d+]] = OpCompositeExtract %float [[p]] 0
			
 
				+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[p0]] %float_0
			
 
				+// CHECK-NEXT:  [[d0:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
			
 
				+
			
 
				+// CHECK-NEXT:  [[p1:%\d+]] = OpCompositeExtract %float [[p]] 1
			
 
				+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[p1]] %float_0
			
 
				+// CHECK-NEXT:  [[d1:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
			
 
				+
			
 
				+// CHECK-NEXT:  [[p2:%\d+]] = OpCompositeExtract %float [[p]] 2
			
 
				+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[p2]] %float_0
			
 
				+// CHECK-NEXT:  [[d2:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
			
 
				+
			
 
				+// CHECK-NEXT:  [[p3:%\d+]] = OpCompositeExtract %float [[p]] 3
			
 
				+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[p3]] %float_0
			
 
				+// CHECK-NEXT:  [[d3:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
			
 
				+
			
 
				+// CHECK-NEXT:   [[d:%\d+]] = OpCompositeConstruct %v4uint [[d0]] [[d1]] [[d2]] [[d3]]
			
 
				+// CHECK-NEXT:                OpStore %d [[d]]
			
 
				+    d = f32tof16(p);
			
 
				+}
			
--- a/tools/clang/unittests/SPIRV/CodeGenSPIRVTest.cpp
+++ b/tools/clang/unittests/SPIRV/CodeGenSPIRVTest.cpp
@@ -614,6 +614,12 @@ TEST_F(FileTest, IntrinsicsDeterminant) {
 
				 }
			
 
				 TEST_F(FileTest, IntrinsicsExp) { runFileTest("intrinsics.exp.hlsl"); }
			
 
				 TEST_F(FileTest, IntrinsicsExp2) { runFileTest("intrinsics.exp2.hlsl"); }
			
 
				+TEST_F(FileTest, IntrinsicsF16ToF32) {
			
 
				+  runFileTest("intrinsics.f16tof32.hlsl");
			
 
				+}
			
 
				+TEST_F(FileTest, IntrinsicsF32ToF16) {
			
 
				+  runFileTest("intrinsics.f32tof16.hlsl");
			
 
				+}
			
 
				 TEST_F(FileTest, IntrinsicsFaceForward) {
			
 
				   runFileTest("intrinsics.faceforward.hlsl");
			
 
				 }