Browse Source

Use nearest even rounding mode for compile-time constants (#3036)

Vishal Sharma 5 years ago
parent
commit
b6b6df7896

+ 1 - 1
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -3381,7 +3381,7 @@ RValue CGMSHLSLRuntime::EmitHLSLBuiltinCallExpr(CodeGenFunction &CGF,
           StringRef intrinsicGroup;
           hlsl::GetIntrinsicOp(FD, intrinsicOpcode, intrinsicGroup);
           IntrinsicOp opcode = static_cast<IntrinsicOp>(intrinsicOpcode);
-          if (Value *Result = TryEvalIntrinsic(CI, opcode)) {
+          if (Value *Result = TryEvalIntrinsic(CI, opcode, CGM.getLangOpts().HLSLVersion)) {
             RV = RValue::get(Result);
           }
         }

+ 18 - 2
tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp

@@ -43,6 +43,7 @@
 
 #include <vector>
 #include <memory>
+#include <fenv.h>
 
 #include "CGHLSLMSHelper.h"
 
@@ -1400,7 +1401,7 @@ void SimpleTransformForHLDXIRInst(Instruction *I, SmallInstSet &deadInsts) {
 
 namespace CGHLSLMSHelper {
 
-Value *TryEvalIntrinsic(CallInst *CI, IntrinsicOp intriOp) {
+Value *TryEvalIntrinsic(CallInst *CI, IntrinsicOp intriOp, unsigned hlslVersion) {
   switch (intriOp) {
   case IntrinsicOp::IOP_tan: {
     return EvalUnaryIntrinsic(CI, tanf, tan);
@@ -1527,7 +1528,22 @@ Value *TryEvalIntrinsic(CallInst *CI, IntrinsicOp intriOp) {
     return EvalUnaryIntrinsic(CI, floorf, floor);
   } break;
   case IntrinsicOp::IOP_round: {
-    return EvalUnaryIntrinsic(CI, roundf, round);
+    // round intrinsic could exhibit different behaviour for constant and runtime evaluations.
+    // E.g., for round(0.5): constant evaluation results in 1 (away from zero rounding), 
+    // while runtime evaluation results in 0 (nearest even rounding).
+    // 
+    // For back compat, DXC still preserves the above behavior for language versions 2016 or below.
+    // However, for newer language versions, DXC now always use nearest even for round() intrinsic in all
+    // cases.
+    if (hlslVersion <= 2016) {
+      return EvalUnaryIntrinsic(CI, roundf, round);
+    } else {
+      auto roundingMode = fegetround();
+      fesetround(FE_TONEAREST);
+      Value *result = EvalUnaryIntrinsic(CI, nearbyintf, nearbyint);
+      fesetround(roundingMode);
+      return result;
+    }
   } break;
   case IntrinsicOp::IOP_trunc: {
     return EvalUnaryIntrinsic(CI, truncf, trunc);

+ 1 - 1
tools/clang/lib/CodeGen/CGHLSLMSHelper.h

@@ -188,7 +188,7 @@ void StructurizeMultiRet(llvm::Module &M,
                          bool bWaveEnabledStage,
                          llvm::SmallVector<llvm::BranchInst *, 16> &DxBreaks);
 
-llvm::Value *TryEvalIntrinsic(llvm::CallInst *CI, hlsl::IntrinsicOp intriOp);
+llvm::Value *TryEvalIntrinsic(llvm::CallInst *CI, hlsl::IntrinsicOp intriOp, unsigned hlslVersion);
 void SimpleTransformForHLDXIR(llvm::Module *pM);
 void ExtensionCodeGen(hlsl::HLModule &HLM, clang::CodeGen::CodeGenModule &CGM);
 } // namespace CGHLSLMSHelper

+ 78 - 0
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/rounding/Round_ne_const.hlsl

@@ -0,0 +1,78 @@
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=0.5  %s | %FileCheck -check-prefix=FLT_RND_1 %s
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=-0.5  %s | %FileCheck -check-prefix=FLT_RND_2 %s
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.5  %s | %FileCheck -check-prefix=FLT_RND_3 %s
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=-1.5  %s | %FileCheck -check-prefix=FLT_RND_4 %s
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.6  %s | %FileCheck -check-prefix=FLT_RND_5 %s
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.3  %s | %FileCheck -check-prefix=FLT_RND_6 %s
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=0.5 -HV 2016 %s | %FileCheck -check-prefix=FLT_RND_7 %s
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=-0.5 -HV 2016 %s  | %FileCheck -check-prefix=FLT_RND_8 %s
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.5 -HV 2016 %s  | %FileCheck -check-prefix=FLT_RND_9 %s
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=-1.5 -HV 2016 %s  | %FileCheck -check-prefix=FLT_RND_10 %s
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.6 -HV 2016  %s | %FileCheck -check-prefix=FLT_RND_11 %s
+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.3 -HV 2016  %s | %FileCheck -check-prefix=FLT_RND_12 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=0.5  %s | %FileCheck -check-prefix=DBL_RND_1 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=-0.5  %s | %FileCheck -check-prefix=DBL_RND_2 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.5  %s | %FileCheck -check-prefix=DBL_RND_3 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=-1.5  %s | %FileCheck -check-prefix=DBL_RND_4 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.6  %s | %FileCheck -check-prefix=DBL_RND_5 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.3  %s | %FileCheck -check-prefix=DBL_RND_6 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=0.5 -HV 2016 %s | %FileCheck -check-prefix=DBL_RND_7 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=-0.5 -HV 2016 %s  | %FileCheck -check-prefix=DBL_RND_8 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.5 -HV 2016 %s  | %FileCheck -check-prefix=DBL_RND_9 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=-1.5 -HV 2016 %s  | %FileCheck -check-prefix=DBL_RND_10 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.6 -HV 2016  %s | %FileCheck -check-prefix=DBL_RND_11 %s
+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.3 -HV 2016  %s | %FileCheck -check-prefix=DBL_RND_12 %s
+
+// round intrinsic could exhibit different behaviour for constant and runtime evaluations.
+// E.g., for round(0.5): constant evaluation results in 1 (away from zero rounding), 
+// while runtime evaluation results in 0 (nearest even rounding).
+// 
+// For back compat, DXC still preserves the above behavior for language versions 2016 or below.
+// However, for newer language versions, DXC now always use nearest even for round() intrinsic in all
+// cases.
+
+
+// FLT_RND_1: call void @dx.op.storeOutput{{.*}} float 0.000000e+00
+// FLT_RND_2: call void @dx.op.storeOutput{{.*}} float -0.000000e+00
+// FLT_RND_3: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
+// FLT_RND_4: call void @dx.op.storeOutput{{.*}} float -2.000000e+00
+// FLT_RND_5: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
+// FLT_RND_6: call void @dx.op.storeOutput{{.*}} float 1.000000e+00
+
+// FLT_RND_7: select i1 %{{.*}}, float 0.000000e+00, float 1.000000e+00
+// FLT_RND_8: select i1 %{{.*}}, float -0.000000e+00, float -1.000000e+00
+// FLT_RND_9: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
+// FLT_RND_10: call void @dx.op.storeOutput{{.*}} float -2.000000e+00
+// FLT_RND_11: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
+// FLT_RND_12: call void @dx.op.storeOutput{{.*}} float 1.000000e+00
+
+// DBL_RND_1: call void @dx.op.storeOutput{{.*}} float 0.000000e+00
+// DBL_RND_2: call void @dx.op.storeOutput{{.*}} float -0.000000e+00
+// DBL_RND_3: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
+// DBL_RND_4: call void @dx.op.storeOutput{{.*}} float -2.000000e+00
+// DBL_RND_5: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
+// DBL_RND_6: call void @dx.op.storeOutput{{.*}} float 1.000000e+00
+
+// DBL_RND_7: select i1 %{{.*}}, float 0.000000e+00, float 1.000000e+00
+// DBL_RND_8: select i1 %{{.*}}, float -0.000000e+00, float -1.000000e+00
+// DBL_RND_9: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
+// DBL_RND_10: call void @dx.op.storeOutput{{.*}} float -2.000000e+00
+// DBL_RND_11: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
+// DBL_RND_12: call void @dx.op.storeOutput{{.*}} float 1.000000e+00
+
+float fr(float f : INPUT) : OUTPUT {
+  if (f == VAL)
+    return round(f);
+  else
+    return round(VAL);
+}
+
+RWStructuredBuffer<double> buf;
+
+float dr() : OUTPUT {
+  double d = buf[0];
+  if (d == VAL)
+    return round(d);
+  else
+    return round(VAL);
+}