Browse Source

[spirv] Add support for f16tof32() and f32tof16() (#786)

Fixes https://github.com/Microsoft/DirectXShaderCompiler/issues/785
Lei Zhang 7 years ago
parent
commit
a579b3eaef

+ 2 - 0
docs/SPIR-V.rst

@@ -1452,6 +1452,8 @@ HLSL Intrinsic Function   GLSL Extended Instruction
 ``determinant``         ``Determinant``
 ``exp``                 ``Exp``
 ``exp2``                ``exp2``
+``f16tof32``            ``UnpackHalf2x16``
+``f32tof16``            ``PackHalf2x16``
 ``faceforward``         ``FaceForward``
 ``firstbithigh``        ``FindSMsb`` / ``FindUMsb``
 ``firstbitlow``         ``FindILsb``

+ 80 - 3
tools/clang/lib/SPIRV/SPIRVEmitter.cpp

@@ -4060,7 +4060,7 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   case hlsl::IntrinsicOp::IOP_texCUBEgrad:
   case hlsl::IntrinsicOp::IOP_texCUBElod:
   case hlsl::IntrinsicOp::IOP_texCUBEproj: {
-    emitError("deprecated intrinsic %0 function will not be not supported",
+    emitError("deprecated %0 intrinsic function will not be supported",
               callExpr->getExprLoc())
         << callee->getName();
     return 0;
@@ -4113,6 +4113,10 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   case hlsl::IntrinsicOp::IOP_log10: {
     return processIntrinsicLog10(callExpr);
   }
+  case hlsl::IntrinsicOp::IOP_f16tof32:
+    return processIntrinsicF16ToF32(callExpr);
+  case hlsl::IntrinsicOp::IOP_f32tof16:
+    return processIntrinsicF32ToF16(callExpr);
     INTRINSIC_SPIRV_OP_CASE(transpose, Transpose, false);
     INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true);
     INTRINSIC_SPIRV_OP_WITH_CAP_CASE(ddx_coarse, DPdxCoarse, false,
@@ -4238,8 +4242,7 @@ SPIRVEmitter::processIntrinsicInterlockedMethod(const CallExpr *expr,
   };
 
   const auto writeToOutputArg = [&baseType, dest, this](
-                                    uint32_t toWrite, const CallExpr *callExpr,
-                                    uint32_t outputArgIndex) {
+      uint32_t toWrite, const CallExpr *callExpr, uint32_t outputArgIndex) {
     const auto outputArg = callExpr->getArg(outputArgIndex);
     const auto outputArgType = outputArg->getType();
     if (baseType != outputArgType)
@@ -5153,6 +5156,80 @@ uint32_t SPIRVEmitter::processIntrinsicFloatSign(const CallExpr *callExpr) {
                    arg->getExprLoc());
 }
 
+uint32_t SPIRVEmitter::processIntrinsicF16ToF32(const CallExpr *callExpr) {
+  // f16tof32() takes in (vector of) uint and returns (vector of) float.
+  // The frontend should guarantee that by inserting implicit casts.
+  const uint32_t glsl = theBuilder.getGLSLExtInstSet();
+  const uint32_t f32TypeId = theBuilder.getFloat32Type();
+  const uint32_t u32TypeId = theBuilder.getUint32Type();
+  const uint32_t v2f32TypeId = theBuilder.getVecType(f32TypeId, 2);
+
+  const auto *arg = callExpr->getArg(0);
+  const uint32_t argId = doExpr(arg);
+
+  uint32_t elemCount = {};
+
+  if (TypeTranslator::isVectorType(arg->getType(), nullptr, &elemCount)) {
+    // The input is a vector. We need to handle each element separately.
+    llvm::SmallVector<uint32_t, 4> elements;
+
+    for (uint32_t i = 0; i < elemCount; ++i) {
+      const uint32_t srcElem =
+          theBuilder.createCompositeExtract(u32TypeId, argId, {i});
+      const uint32_t convert = theBuilder.createExtInst(
+          v2f32TypeId, glsl, GLSLstd450::GLSLstd450UnpackHalf2x16, srcElem);
+      elements.push_back(
+          theBuilder.createCompositeExtract(f32TypeId, convert, {0}));
+    }
+    return theBuilder.createCompositeConstruct(
+        theBuilder.getVecType(f32TypeId, elemCount), elements);
+  }
+
+  const uint32_t convert = theBuilder.createExtInst(
+      v2f32TypeId, glsl, GLSLstd450::GLSLstd450UnpackHalf2x16, argId);
+  // f16tof32() converts the float16 stored in the low-half of the uint to
+  // a float. So just need to return the first component.
+  return theBuilder.createCompositeExtract(f32TypeId, convert, {0});
+}
+
+uint32_t SPIRVEmitter::processIntrinsicF32ToF16(const CallExpr *callExpr) {
+  // f32tof16() takes in (vector of) float and returns (vector of) uint.
+  // The frontend should guarantee that by inserting implicit casts.
+  const uint32_t glsl = theBuilder.getGLSLExtInstSet();
+  const uint32_t f32TypeId = theBuilder.getFloat32Type();
+  const uint32_t u32TypeId = theBuilder.getUint32Type();
+  const uint32_t v2f32TypeId = theBuilder.getVecType(f32TypeId, 2);
+  const uint32_t zero = theBuilder.getConstantFloat32(0);
+
+  const auto *arg = callExpr->getArg(0);
+  const uint32_t argId = doExpr(arg);
+  uint32_t elemCount = {};
+
+  if (TypeTranslator::isVectorType(arg->getType(), nullptr, &elemCount)) {
+    // The input is a vector. We need to handle each element separately.
+    llvm::SmallVector<uint32_t, 4> elements;
+
+    for (uint32_t i = 0; i < elemCount; ++i) {
+      const uint32_t srcElem =
+          theBuilder.createCompositeExtract(f32TypeId, argId, {i});
+      const uint32_t srcVec =
+          theBuilder.createCompositeConstruct(v2f32TypeId, {srcElem, zero});
+
+      elements.push_back(theBuilder.createExtInst(
+          u32TypeId, glsl, GLSLstd450::GLSLstd450PackHalf2x16, srcVec));
+    }
+    return theBuilder.createCompositeConstruct(
+        theBuilder.getVecType(u32TypeId, elemCount), elements);
+  }
+
+  // f16tof32() stores the float into the low-half of the uint. So we need
+  // to supply another zero to take the other half.
+  const uint32_t srcVec =
+      theBuilder.createCompositeConstruct(v2f32TypeId, {argId, zero});
+  return theBuilder.createExtInst(u32TypeId, glsl,
+                                  GLSLstd450::GLSLstd450PackHalf2x16, srcVec);
+}
+
 uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
     const CallExpr *callExpr, spv::Op opcode, bool actPerRowForMatrices) {
   const uint32_t returnType = typeTranslator.translateType(callExpr->getType());

+ 6 - 2
tools/clang/lib/SPIRV/SPIRVEmitter.h

@@ -306,6 +306,11 @@ private:
   /// casting from float to integer is therefore performed by this method.
   uint32_t processIntrinsicFloatSign(const CallExpr *);
 
+  /// Processes the 'f16to32' intrinsic function.
+  uint32_t processIntrinsicF16ToF32(const CallExpr *);
+  /// Processes the 'f32tof16' intrinsic function.
+  uint32_t processIntrinsicF32ToF16(const CallExpr *);
+
   /// Processes the given intrinsic function call using the given GLSL
   /// extended instruction. If the given instruction cannot operate on matrices,
   /// it performs the instruction on each row of the matrix and uses composite
@@ -642,8 +647,7 @@ private:
   /// \brief Wrapper method to create an error message and report it
   /// in the diagnostic engine associated with this consumer.
   template <unsigned N>
-  DiagnosticBuilder emitError(const char (&message)[N],
-                              SourceLocation loc) {
+  DiagnosticBuilder emitError(const char (&message)[N], SourceLocation loc) {
     const auto diagId =
         diags.getCustomDiagID(clang::DiagnosticsEngine::Error, message);
     return diags.Report(loc, diagId);

+ 66 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.f16tof32.hlsl

@@ -0,0 +1,66 @@
+// Run: %dxc -T vs_6_0 -E main
+
+// CHECK:      [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
+
+void main(uint      a : A,     uint2  b : B,     uint3  c : C,     uint4  d : D,
+          out float m : M, out float2 n : N, out float3 o : O, out float4 p : P) {
+// CHECK:        [[a:%\d+]] = OpLoad %uint %a
+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[a]]
+// CHECK-NEXT:   [[m:%\d+]] = OpCompositeExtract %float [[cov]] 0
+// CHECK-NEXT:                OpStore %m [[m]]
+    m = f16tof32(a);
+
+// CHECK:        [[b:%\d+]] = OpLoad %v2uint %b
+
+// CHECK-NEXT:  [[b0:%\d+]] = OpCompositeExtract %uint [[b]] 0
+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[b0]]
+// CHECK-NEXT:  [[n0:%\d+]] = OpCompositeExtract %float [[cov]] 0
+
+// CHECK-NEXT:  [[b1:%\d+]] = OpCompositeExtract %uint [[b]] 1
+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[b1]]
+// CHECK-NEXT:  [[n1:%\d+]] = OpCompositeExtract %float [[cov]] 0
+
+// CHECK-NEXT:   [[n:%\d+]] = OpCompositeConstruct %v2float [[n0]] [[n1]]
+// CHECK-NEXT:                OpStore %n [[n]]
+    n = f16tof32(b);
+
+// CHECK-NEXT:   [[c:%\d+]] = OpLoad %v3uint %c
+
+// CHECK-NEXT:  [[c0:%\d+]] = OpCompositeExtract %uint [[c]] 0
+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[c0]]
+// CHECK-NEXT:  [[o0:%\d+]] = OpCompositeExtract %float [[cov]] 0
+
+// CHECK-NEXT:  [[c1:%\d+]] = OpCompositeExtract %uint [[c]] 1
+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[c1]]
+// CHECK-NEXT:  [[o1:%\d+]] = OpCompositeExtract %float [[cov]] 0
+
+// CHECK-NEXT:  [[c2:%\d+]] = OpCompositeExtract %uint [[c]] 2
+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[c2]]
+// CHECK-NEXT:  [[o2:%\d+]] = OpCompositeExtract %float [[cov]] 0
+
+// CHECK-NEXT:   [[o:%\d+]] = OpCompositeConstruct %v3float [[o0]] [[o1]] [[o2]]
+// CHECK-NEXT:                OpStore %o [[o]]
+    o = f16tof32(c);
+
+// CHECK-NEXT:   [[d:%\d+]] = OpLoad %v4uint %d
+
+// CHECK-NEXT:  [[d0:%\d+]] = OpCompositeExtract %uint [[d]] 0
+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[d0]]
+// CHECK-NEXT:  [[p0:%\d+]] = OpCompositeExtract %float [[cov]] 0
+
+// CHECK-NEXT:  [[d1:%\d+]] = OpCompositeExtract %uint [[d]] 1
+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[d1]]
+// CHECK-NEXT:  [[p1:%\d+]] = OpCompositeExtract %float [[cov]] 0
+
+// CHECK-NEXT:  [[d2:%\d+]] = OpCompositeExtract %uint [[d]] 2
+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[d2]]
+// CHECK-NEXT:  [[p2:%\d+]] = OpCompositeExtract %float [[cov]] 0
+
+// CHECK-NEXT:  [[d3:%\d+]] = OpCompositeExtract %uint [[d]] 3
+// CHECK-NEXT: [[cov:%\d+]] = OpExtInst %v2float [[glsl]] UnpackHalf2x16 [[d3]]
+// CHECK-NEXT:  [[p3:%\d+]] = OpCompositeExtract %float [[cov]] 0
+
+// CHECK-NEXT:   [[p:%\d+]] = OpCompositeConstruct %v4float [[p0]] [[p1]] [[p2]] [[p3]]
+// CHECK-NEXT:                OpStore %p [[p]]
+    p = f16tof32(d);
+}

+ 66 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.f32tof16.hlsl

@@ -0,0 +1,66 @@
+// Run: %dxc -T vs_6_0 -E main
+
+// CHECK:      [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
+
+void main(out uint  a : A, out uint2  b : B, out uint3  c : C, out uint4  d : D,
+              float m : M,     float2 n : N,     float3 o : O,     float4 p : P) {
+// CHECK:        [[m:%\d+]] = OpLoad %float %m
+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[m]] %float_0
+// CHECK-NEXT:   [[a:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
+// CHECK-NEXT:                OpStore %a [[a]]
+    a = f32tof16(m);
+
+// CHECK-NEXT:   [[n:%\d+]] = OpLoad %v2float %n
+
+// CHECK-NEXT:  [[n0:%\d+]] = OpCompositeExtract %float [[n]] 0
+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[n0]] %float_0
+// CHECK-NEXT:  [[b0:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
+
+// CHECK-NEXT:  [[n1:%\d+]] = OpCompositeExtract %float [[n]] 1
+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[n1]] %float_0
+// CHECK-NEXT:  [[b1:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
+
+// CHECK-NEXT:   [[b:%\d+]] = OpCompositeConstruct %v2uint [[b0]] [[b1]]
+// CHECK-NEXT:                OpStore %b [[b]]
+    b = f32tof16(n);
+
+// CHECK-NEXT:   [[o:%\d+]] = OpLoad %v3float %o
+
+// CHECK-NEXT:  [[o0:%\d+]] = OpCompositeExtract %float [[o]] 0
+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[o0]] %float_0
+// CHECK-NEXT:  [[c0:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
+
+// CHECK-NEXT:  [[o1:%\d+]] = OpCompositeExtract %float [[o]] 1
+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[o1]] %float_0
+// CHECK-NEXT:  [[c1:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
+
+// CHECK-NEXT:  [[o2:%\d+]] = OpCompositeExtract %float [[o]] 2
+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[o2]] %float_0
+// CHECK-NEXT:  [[c2:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
+
+// CHECK-NEXT:   [[c:%\d+]] = OpCompositeConstruct %v3uint [[c0]] [[c1]] [[c2]]
+// CHECK-NEXT:                OpStore %c [[c]]
+    c = f32tof16(o);
+
+// CHECK-NEXT:   [[p:%\d+]] = OpLoad %v4float %p
+
+// CHECK-NEXT:  [[p0:%\d+]] = OpCompositeExtract %float [[p]] 0
+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[p0]] %float_0
+// CHECK-NEXT:  [[d0:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
+
+// CHECK-NEXT:  [[p1:%\d+]] = OpCompositeExtract %float [[p]] 1
+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[p1]] %float_0
+// CHECK-NEXT:  [[d1:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
+
+// CHECK-NEXT:  [[p2:%\d+]] = OpCompositeExtract %float [[p]] 2
+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[p2]] %float_0
+// CHECK-NEXT:  [[d2:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
+
+// CHECK-NEXT:  [[p3:%\d+]] = OpCompositeExtract %float [[p]] 3
+// CHECK-NEXT: [[vec:%\d+]] = OpCompositeConstruct %v2float [[p3]] %float_0
+// CHECK-NEXT:  [[d3:%\d+]] = OpExtInst %uint [[glsl]] PackHalf2x16 [[vec]]
+
+// CHECK-NEXT:   [[d:%\d+]] = OpCompositeConstruct %v4uint [[d0]] [[d1]] [[d2]] [[d3]]
+// CHECK-NEXT:                OpStore %d [[d]]
+    d = f32tof16(p);
+}

+ 6 - 0
tools/clang/unittests/SPIRV/CodeGenSPIRVTest.cpp

@@ -614,6 +614,12 @@ TEST_F(FileTest, IntrinsicsDeterminant) {
 }
 TEST_F(FileTest, IntrinsicsExp) { runFileTest("intrinsics.exp.hlsl"); }
 TEST_F(FileTest, IntrinsicsExp2) { runFileTest("intrinsics.exp2.hlsl"); }
+TEST_F(FileTest, IntrinsicsF16ToF32) {
+  runFileTest("intrinsics.f16tof32.hlsl");
+}
+TEST_F(FileTest, IntrinsicsF32ToF16) {
+  runFileTest("intrinsics.f32tof16.hlsl");
+}
 TEST_F(FileTest, IntrinsicsFaceForward) {
   runFileTest("intrinsics.faceforward.hlsl");
 }