Prechádzať zdrojové kódy

hlsl, spirv: allow use of ffinit-math-only (#4955)

* hlsl, spirv: allow use of ffinit-math-only

This options should match GCC's & Clang's behavior:

From GCC documention:
```
Allow optimizations for floating-point arithmetic that assume that arguments and results are not NaNs or +-Infs.

This option is not turned on by any -O option since it can result in incorrect output for programs that depend on an exact implementation of IEEE or ISO rules/specifications for math functions. It may, however, yield faster code for programs that do not require the guarantees of these specifications.

The default is -fno-finite-math-only.
```

This commit allows the flag to be used again, and makes SPIR-V min/max
intrinsics use FMin/FMax instead of NMin/NMax when enabled.

Fixes #4954

Signed-off-by: Nathan Gauër <[email protected]>
Co-authored-by: Laura Hermanns <[email protected]>
Nathan Gauër 2 rokov pred
rodič
commit
40d5e1fb9d

+ 9 - 2
docs/SPIR-V.rst

@@ -2409,8 +2409,8 @@ HLSL Intrinsic Function   GLSL Extended Instruction
 ``log10``               ``Log2`` (scaled by ``1/log2(10)``)
 ``log2``                ``Log2``
 ``mad``                 ``Fma``
-``max``                 ``SMax``/``UMax``/``NMax``
-``min``                 ``SMin``/``UMin``/``NMin``
+``max``                 ``SMax``/``UMax``/``NMax``/``FMax``
+``min``                 ``SMin``/``UMin``/``NMin``/``FMin``
 ``modf``                ``ModfStruct``
 ``normalize``           ``Normalize``
 ``pow``                 ``Pow``
@@ -2431,6 +2431,13 @@ HLSL Intrinsic Function   GLSL Extended Instruction
 ``trunc``               ``Trunc``
 ======================= ===================================
 
+Note on NMax,Nmin,FMax & FMin:
+
+This compiler supports the ``--ffinite-math-only`` option, which allows
+assuming non-NaN parameters to some operations. ``min`` & ``max`` intrinsics
+will by default generate ``NMin`` & ``NMax`` instructions, but if this option
+is enabled, ``FMin`` & ``FMax`` can be generated instead.
+
 Synchronization intrinsics
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 

+ 4 - 2
include/dxc/Support/HLSLOptions.td

@@ -143,8 +143,10 @@ def fno_associative_math : Flag<["-"], "fno-associative-math">, Group<hlsloptz_G
 //  Flag<["-"], "freciprocal-math">, Group<hlsloptz_Group>, Flags<[CoreOption]>,
 //  HelpText<"Allow division operations to be reassociated">;
 def fno_reciprocal_math : Flag<["-"], "fno-reciprocal-math">, Group<hlsloptz_Group>;
-def ffinite_math_only : Flag<["-"], "ffinite-math-only">, Group<hlsloptz_Group>, Flags<[CoreOption]>;
-def fno_finite_math_only : Flag<["-"], "fno-finite-math-only">, Group<hlsloptz_Group>;
+def ffinite_math_only: Flag<["-"], "ffinite-math-only">, Group<hlsloptz_Group>, Flags<[CoreOption]>,
+  HelpText<"Allow optimizations for floating-point arithmetic that assume that arguments and results are not NaNs or +-Infs.">;
+def fno_finite_math_only: Flag<["-"], "fno-finite-math-only">, Group<hlsloptz_Group>, Flags<[CoreOption]>,
+  HelpText<"Disallow optimizations for floating-point arithmetic that assume that arguments and results are not NaNs or +-Infs.">;
 def fsigned_zeros : Flag<["-"], "fsigned-zeros">, Group<hlsloptz_Group>;
 //def fno_signed_zeros :
 //  Flag<["-"], "fno-signed-zeros">, Group<hlsloptz_Group>, Flags<[CoreOption]>,

+ 1 - 0
include/dxc/Support/SPIRVOptions.h

@@ -49,6 +49,7 @@ struct SpirvCodeGenOptions {
   bool defaultRowMajor;
   bool disableValidation;
   bool enable16BitTypes;
+  bool finiteMathOnly;
   bool enableReflect;
   bool invertY; // Additive inverse
   bool invertW; // Multiplicative inverse

+ 2 - 0
lib/DxcSupport/HLSLOptions.cpp

@@ -979,6 +979,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.SpirvOptions.fixFuncCallArguments =
       Args.hasFlag(OPT_fspv_fix_func_call_arguments, OPT_INVALID, false);
   opts.SpirvOptions.autoShiftBindings = Args.hasFlag(OPT_fvk_auto_shift_bindings, OPT_INVALID, false);
+  opts.SpirvOptions.finiteMathOnly =
+      Args.hasFlag(OPT_ffinite_math_only, OPT_fno_finite_math_only, false);
 
   if (!handleVkShiftArgs(Args, OPT_fvk_b_shift, "b", &opts.SpirvOptions.bShift, errors) ||
       !handleVkShiftArgs(Args, OPT_fvk_t_shift, "t", &opts.SpirvOptions.tShift, errors) ||

+ 20 - 2
tools/clang/lib/SPIRV/SpirvEmitter.cpp

@@ -8568,6 +8568,26 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
     const Expr *falseExpr = callExpr->getArg(2);
     retVal = doConditional(callExpr, cond, falseExpr, trueExpr);
     break;
+  }
+  case hlsl::IntrinsicOp::IOP_min: {
+    glslOpcode =
+        isFloatType  ? (spirvOptions.finiteMathOnly ? GLSLstd450::GLSLstd450FMin
+                                                    : GLSLstd450::GLSLstd450NMin)
+        : isSintType ? GLSLstd450::GLSLstd450SMin
+                     : GLSLstd450::GLSLstd450UMin;
+    retVal = processIntrinsicUsingGLSLInst(callExpr, glslOpcode, true, srcLoc,
+                                           srcRange);
+    break;
+  }
+  case hlsl::IntrinsicOp::IOP_max: {
+    glslOpcode =
+        isFloatType  ? (spirvOptions.finiteMathOnly ? GLSLstd450::GLSLstd450FMax
+                                                    : GLSLstd450::GLSLstd450NMax)
+        : isSintType ? GLSLstd450::GLSLstd450SMax
+                     : GLSLstd450::GLSLstd450UMax;
+    retVal = processIntrinsicUsingGLSLInst(callExpr, glslOpcode, true, srcLoc,
+                                           srcRange);
+    break;
   }
     INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true);
     INTRINSIC_SPIRV_OP_CASE(ddx_coarse, DPdxCoarse, false);
@@ -8610,9 +8630,7 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
     INTRINSIC_OP_CASE(lerp, FMix, true);
     INTRINSIC_OP_CASE(log, Log, true);
     INTRINSIC_OP_CASE(log2, Log2, true);
-    INTRINSIC_OP_CASE_SINT_UINT_FLOAT(max, SMax, UMax, NMax, true);
     INTRINSIC_OP_CASE(umax, UMax, true);
-    INTRINSIC_OP_CASE_SINT_UINT_FLOAT(min, SMin, UMin, NMin, true);
     INTRINSIC_OP_CASE(umin, UMin, true);
     INTRINSIC_OP_CASE(normalize, Normalize, false);
     INTRINSIC_OP_CASE(pow, Pow, true);

+ 64 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.max.finitemathonly.hlsl

@@ -0,0 +1,64 @@
+// RUN: %dxc -ffinite-math-only -T vs_6_0 -E main
+
+// According to HLSL reference on denormals for 'max', if one of the
+// values is NaN, the other will be given as the result. If both values
+// are NaN, the result will be NaN.
+// However, ffinite-math-only allows us to optimize code by assuming input
+// values will not be NaN, allowing us to choose simpler implementations.
+
+// CHECK:      [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
+
+void main() {
+  float result;
+  float2 result2;
+  float3 result3;
+  float4 result4;
+  float2x3 result2x3;
+  int3 iresult3;
+  uint3 uresult3;
+
+// CHECK: {{%\d+}} = OpExtInst %v3int [[glsl]] SMax {{%\d+}} {{%\d+}}
+  int3 i1,i2;
+  iresult3 = max(i1,i2);
+
+// CHECK: {{%\d+}} = OpExtInst %v3uint [[glsl]] UMax {{%\d+}} {{%\d+}}
+  uint3 j1,j2;
+  uresult3 = max(j1,j2);
+
+// CHECK: {{%\d+}} = OpExtInst %float [[glsl]] FMax {{%\d+}} {{%\d+}}
+  float a1,a2;
+  result = max(a1,a2);
+
+// CHECK: {{%\d+}} = OpExtInst %float [[glsl]] FMax {{%\d+}} {{%\d+}}
+  float1 b1,b2;
+  result = max(b1,b2);
+
+// CHECK: {{%\d+}} = OpExtInst %v3float [[glsl]] FMax {{%\d+}} {{%\d+}}
+  float3 c1,c2;
+  result3 = max(c1,c2);
+
+// CHECK: {{%\d+}} = OpExtInst %float [[glsl]] FMax {{%\d+}} {{%\d+}}
+  float1x1 d1,d2;
+  result = max(d1,d2);
+
+// CHECK: {{%\d+}} = OpExtInst %v2float [[glsl]] FMax {{%\d+}} {{%\d+}}
+  float1x2 e1,e2;
+  result2 = max(e1,e2);
+
+// CHECK: {{%\d+}} = OpExtInst %v4float [[glsl]] FMax {{%\d+}} {{%\d+}}
+  float4x1 f1,f2;
+  result4 = max(f1,f2);
+
+// CHECK:      [[g1:%\d+]] = OpLoad %mat2v3float %g1
+// CHECK-NEXT: [[g2:%\d+]] = OpLoad %mat2v3float %g2
+// CHECK-NEXT: [[g1_row0:%\d+]] = OpCompositeExtract %v3float [[g1]] 0
+// CHECK-NEXT: [[g2_row0:%\d+]] = OpCompositeExtract %v3float [[g2]] 0
+// CHECK-NEXT: [[result_row0:%\d+]] = OpExtInst %v3float [[glsl]] FMax [[g1_row0]] [[g2_row0]]
+// CHECK-NEXT: [[g1_row1:%\d+]] = OpCompositeExtract %v3float [[g1]] 1
+// CHECK-NEXT: [[g2_row1:%\d+]] = OpCompositeExtract %v3float [[g2]] 1
+// CHECK-NEXT: [[result_row1:%\d+]] = OpExtInst %v3float [[glsl]] FMax [[g1_row1]] [[g2_row1]]
+// CHECK-NEXT: {{%\d+}} = OpCompositeConstruct %mat2v3float [[result_row0]] [[result_row1]]
+  float2x3 g1,g2;
+  result2x3 = max(g1,g2);
+}
+

+ 63 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.min.finitemathonly.hlsl

@@ -0,0 +1,63 @@
+// RUN: %dxc -ffinite-math-only -T vs_6_0 -E main
+
+// According to HLSL reference on denormals for 'min', if one of the
+// values is NaN, the other will be given as the result. If both values
+// are NaN, the result will be NaN.
+// However, ffinite-math-only allows us to optimize code by assuming input
+// values will not be NaN, allowing us to choose simpler implementations.
+
+// CHECK:      [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
+
+void main() {
+  float result;
+  float2 result2;
+  float3 result3;
+  float4 result4;
+  float2x3 result2x3;
+  int3 iresult3;
+  uint3 uresult3;
+
+// CHECK: {{%\d+}} = OpExtInst %v3int [[glsl]] SMin {{%\d+}} {{%\d+}}
+  int3 i1,i2;
+  iresult3 = min(i1,i2);
+
+// CHECK: {{%\d+}} = OpExtInst %v3uint [[glsl]] UMin {{%\d+}} {{%\d+}}
+  uint3 j1,j2;
+  uresult3 = min(j1,j2);
+
+// CHECK: {{%\d+}} = OpExtInst %float [[glsl]] FMin {{%\d+}} {{%\d+}}
+  float a1,a2;
+  result = min(a1,a2);
+
+// CHECK: {{%\d+}} = OpExtInst %float [[glsl]] FMin {{%\d+}} {{%\d+}}
+  float1 b1,b2;
+  result = min(b1,b2);
+
+// CHECK: {{%\d+}} = OpExtInst %v3float [[glsl]] FMin {{%\d+}} {{%\d+}}
+  float3 c1,c2;
+  result3 = min(c1,c2);
+
+// CHECK: {{%\d+}} = OpExtInst %float [[glsl]] FMin {{%\d+}} {{%\d+}}
+  float1x1 d1,d2;
+  result = min(d1,d2);
+
+// CHECK: {{%\d+}} = OpExtInst %v2float [[glsl]] FMin {{%\d+}} {{%\d+}}
+  float1x2 e1,e2;
+  result2 = min(e1,e2);
+
+// CHECK: {{%\d+}} = OpExtInst %v4float [[glsl]] FMin {{%\d+}} {{%\d+}}
+  float4x1 f1,f2;
+  result4 = min(f1,f2);
+
+// CHECK:      [[g1:%\d+]] = OpLoad %mat2v3float %g1
+// CHECK-NEXT: [[g2:%\d+]] = OpLoad %mat2v3float %g2
+// CHECK-NEXT: [[g1_row0:%\d+]] = OpCompositeExtract %v3float [[g1]] 0
+// CHECK-NEXT: [[g2_row0:%\d+]] = OpCompositeExtract %v3float [[g2]] 0
+// CHECK-NEXT: [[result_row0:%\d+]] = OpExtInst %v3float [[glsl]] FMin [[g1_row0]] [[g2_row0]]
+// CHECK-NEXT: [[g1_row1:%\d+]] = OpCompositeExtract %v3float [[g1]] 1
+// CHECK-NEXT: [[g2_row1:%\d+]] = OpCompositeExtract %v3float [[g2]] 1
+// CHECK-NEXT: [[result_row1:%\d+]] = OpExtInst %v3float [[glsl]] FMin [[g1_row1]] [[g2_row1]]
+// CHECK-NEXT: {{%\d+}} = OpCompositeConstruct %mat2v3float [[result_row0]] [[result_row1]]
+  float2x3 g1,g2;
+  result2x3 = min(g1,g2);
+}

+ 6 - 0
tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp

@@ -1294,6 +1294,9 @@ TEST_F(FileTest, IntrinsicsLog) { runFileTest("intrinsics.log.hlsl"); }
 TEST_F(FileTest, IntrinsicsLog10) { runFileTest("intrinsics.log10.hlsl"); }
 TEST_F(FileTest, IntrinsicsLog2) { runFileTest("intrinsics.log2.hlsl"); }
 TEST_F(FileTest, IntrinsicsMin) { runFileTest("intrinsics.min.hlsl"); }
+TEST_F(FileTest, IntrinsicsMinFiniteMathOnly) {
+  runFileTest("intrinsics.min.finitemathonly.hlsl");
+}
 TEST_F(FileTest, IntrinsicsLit) { runFileTest("intrinsics.lit.hlsl"); }
 TEST_F(FileTest, IntrinsicsModf) { runFileTest("intrinsics.modf.hlsl"); }
 TEST_F(FileTest, IntrinsicsModfWithSwizzling) {
@@ -1302,6 +1305,9 @@ TEST_F(FileTest, IntrinsicsModfWithSwizzling) {
 TEST_F(FileTest, IntrinsicsMad) { runFileTest("intrinsics.mad.hlsl"); }
 TEST_F(FileTest, IntrinsicsUMad) { runFileTest("intrinsics.umad.hlsl"); }
 TEST_F(FileTest, IntrinsicsMax) { runFileTest("intrinsics.max.hlsl"); }
+TEST_F(FileTest, IntrinsicsMaxFiniteMathOnly) {
+  runFileTest("intrinsics.max.finitemathonly.hlsl");
+}
 TEST_F(FileTest, IntrinsicsMsad4) { runFileTest("intrinsics.msad4.hlsl"); }
 TEST_F(FileTest, IntrinsicsNormalize) {
   runFileTest("intrinsics.normalize.hlsl");