5 years ago · b6b6df7896
--- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
@@ -3381,7 +3381,7 @@ RValue CGMSHLSLRuntime::EmitHLSLBuiltinCallExpr(CodeGenFunction &CGF,
 
				           StringRef intrinsicGroup;
			
 
				           hlsl::GetIntrinsicOp(FD, intrinsicOpcode, intrinsicGroup);
			
 
				           IntrinsicOp opcode = static_cast<IntrinsicOp>(intrinsicOpcode);
			
 
				-          if (Value *Result = TryEvalIntrinsic(CI, opcode)) {
			
 
				+          if (Value *Result = TryEvalIntrinsic(CI, opcode, CGM.getLangOpts().HLSLVersion)) {
			
 
				             RV = RValue::get(Result);
			
 
				           }
			
 
				         }
			
--- a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
@@ -43,6 +43,7 @@
 
				 
			
 
				 #include <vector>
			
 
				 #include <memory>
			
 
				+#include <fenv.h>
			
 
				 
			
 
				 #include "CGHLSLMSHelper.h"
			
 
				 
			
@@ -1400,7 +1401,7 @@ void SimpleTransformForHLDXIRInst(Instruction *I, SmallInstSet &deadInsts) {
 
				 
			
 
				 namespace CGHLSLMSHelper {
			
 
				 
			
 
				-Value *TryEvalIntrinsic(CallInst *CI, IntrinsicOp intriOp) {
			
 
				+Value *TryEvalIntrinsic(CallInst *CI, IntrinsicOp intriOp, unsigned hlslVersion) {
			
 
				   switch (intriOp) {
			
 
				   case IntrinsicOp::IOP_tan: {
			
 
				     return EvalUnaryIntrinsic(CI, tanf, tan);
			
@@ -1527,7 +1528,22 @@ Value *TryEvalIntrinsic(CallInst *CI, IntrinsicOp intriOp) {
 
				     return EvalUnaryIntrinsic(CI, floorf, floor);
			
 
				   } break;
			
 
				   case IntrinsicOp::IOP_round: {
			
 
				-    return EvalUnaryIntrinsic(CI, roundf, round);
			
 
				+    // round intrinsic could exhibit different behaviour for constant and runtime evaluations.
			
 
				+    // E.g., for round(0.5): constant evaluation results in 1 (away from zero rounding), 
			
 
				+    // while runtime evaluation results in 0 (nearest even rounding).
			
 
				+    // 
			
 
				+    // For back compat, DXC still preserves the above behavior for language versions 2016 or below.
			
 
				+    // However, for newer language versions, DXC now always use nearest even for round() intrinsic in all
			
 
				+    // cases.
			
 
				+    if (hlslVersion <= 2016) {
			
 
				+      return EvalUnaryIntrinsic(CI, roundf, round);
			
 
				+    } else {
			
 
				+      auto roundingMode = fegetround();
			
 
				+      fesetround(FE_TONEAREST);
			
 
				+      Value *result = EvalUnaryIntrinsic(CI, nearbyintf, nearbyint);
			
 
				+      fesetround(roundingMode);
			
 
				+      return result;
			
 
				+    }
			
 
				   } break;
			
 
				   case IntrinsicOp::IOP_trunc: {
			
 
				     return EvalUnaryIntrinsic(CI, truncf, trunc);
			
--- a/tools/clang/lib/CodeGen/CGHLSLMSHelper.h
+++ b/tools/clang/lib/CodeGen/CGHLSLMSHelper.h
@@ -188,7 +188,7 @@ void StructurizeMultiRet(llvm::Module &M,
 
				                          bool bWaveEnabledStage,
			
 
				                          llvm::SmallVector<llvm::BranchInst *, 16> &DxBreaks);
			
 
				 
			
 
				-llvm::Value *TryEvalIntrinsic(llvm::CallInst *CI, hlsl::IntrinsicOp intriOp);
			
 
				+llvm::Value *TryEvalIntrinsic(llvm::CallInst *CI, hlsl::IntrinsicOp intriOp, unsigned hlslVersion);
			
 
				 void SimpleTransformForHLDXIR(llvm::Module *pM);
			
 
				 void ExtensionCodeGen(hlsl::HLModule &HLM, clang::CodeGen::CodeGenModule &CGM);
			
 
				 } // namespace CGHLSLMSHelper
			
--- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/rounding/Round_ne_const.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/rounding/Round_ne_const.hlsl
@@ -0,0 +1,78 @@
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=0.5  %s | %FileCheck -check-prefix=FLT_RND_1 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=-0.5  %s | %FileCheck -check-prefix=FLT_RND_2 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.5  %s | %FileCheck -check-prefix=FLT_RND_3 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=-1.5  %s | %FileCheck -check-prefix=FLT_RND_4 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.6  %s | %FileCheck -check-prefix=FLT_RND_5 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.3  %s | %FileCheck -check-prefix=FLT_RND_6 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=0.5 -HV 2016 %s | %FileCheck -check-prefix=FLT_RND_7 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=-0.5 -HV 2016 %s  | %FileCheck -check-prefix=FLT_RND_8 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.5 -HV 2016 %s  | %FileCheck -check-prefix=FLT_RND_9 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=-1.5 -HV 2016 %s  | %FileCheck -check-prefix=FLT_RND_10 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.6 -HV 2016  %s | %FileCheck -check-prefix=FLT_RND_11 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E fr -DVAL=1.3 -HV 2016  %s | %FileCheck -check-prefix=FLT_RND_12 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=0.5  %s | %FileCheck -check-prefix=DBL_RND_1 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=-0.5  %s | %FileCheck -check-prefix=DBL_RND_2 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.5  %s | %FileCheck -check-prefix=DBL_RND_3 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=-1.5  %s | %FileCheck -check-prefix=DBL_RND_4 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.6  %s | %FileCheck -check-prefix=DBL_RND_5 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.3  %s | %FileCheck -check-prefix=DBL_RND_6 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=0.5 -HV 2016 %s | %FileCheck -check-prefix=DBL_RND_7 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=-0.5 -HV 2016 %s  | %FileCheck -check-prefix=DBL_RND_8 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.5 -HV 2016 %s  | %FileCheck -check-prefix=DBL_RND_9 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=-1.5 -HV 2016 %s  | %FileCheck -check-prefix=DBL_RND_10 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.6 -HV 2016  %s | %FileCheck -check-prefix=DBL_RND_11 %s
			
 
				+// RUN: %dxc -T vs_6_0 -E dr -DVAL=1.3 -HV 2016  %s | %FileCheck -check-prefix=DBL_RND_12 %s
			
 
				+
			
 
				+// round intrinsic could exhibit different behaviour for constant and runtime evaluations.
			
 
				+// E.g., for round(0.5): constant evaluation results in 1 (away from zero rounding), 
			
 
				+// while runtime evaluation results in 0 (nearest even rounding).
			
 
				+// 
			
 
				+// For back compat, DXC still preserves the above behavior for language versions 2016 or below.
			
 
				+// However, for newer language versions, DXC now always use nearest even for round() intrinsic in all
			
 
				+// cases.
			
 
				+
			
 
				+
			
 
				+// FLT_RND_1: call void @dx.op.storeOutput{{.*}} float 0.000000e+00
			
 
				+// FLT_RND_2: call void @dx.op.storeOutput{{.*}} float -0.000000e+00
			
 
				+// FLT_RND_3: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
			
 
				+// FLT_RND_4: call void @dx.op.storeOutput{{.*}} float -2.000000e+00
			
 
				+// FLT_RND_5: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
			
 
				+// FLT_RND_6: call void @dx.op.storeOutput{{.*}} float 1.000000e+00
			
 
				+
			
 
				+// FLT_RND_7: select i1 %{{.*}}, float 0.000000e+00, float 1.000000e+00
			
 
				+// FLT_RND_8: select i1 %{{.*}}, float -0.000000e+00, float -1.000000e+00
			
 
				+// FLT_RND_9: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
			
 
				+// FLT_RND_10: call void @dx.op.storeOutput{{.*}} float -2.000000e+00
			
 
				+// FLT_RND_11: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
			
 
				+// FLT_RND_12: call void @dx.op.storeOutput{{.*}} float 1.000000e+00
			
 
				+
			
 
				+// DBL_RND_1: call void @dx.op.storeOutput{{.*}} float 0.000000e+00
			
 
				+// DBL_RND_2: call void @dx.op.storeOutput{{.*}} float -0.000000e+00
			
 
				+// DBL_RND_3: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
			
 
				+// DBL_RND_4: call void @dx.op.storeOutput{{.*}} float -2.000000e+00
			
 
				+// DBL_RND_5: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
			
 
				+// DBL_RND_6: call void @dx.op.storeOutput{{.*}} float 1.000000e+00
			
 
				+
			
 
				+// DBL_RND_7: select i1 %{{.*}}, float 0.000000e+00, float 1.000000e+00
			
 
				+// DBL_RND_8: select i1 %{{.*}}, float -0.000000e+00, float -1.000000e+00
			
 
				+// DBL_RND_9: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
			
 
				+// DBL_RND_10: call void @dx.op.storeOutput{{.*}} float -2.000000e+00
			
 
				+// DBL_RND_11: call void @dx.op.storeOutput{{.*}} float 2.000000e+00
			
 
				+// DBL_RND_12: call void @dx.op.storeOutput{{.*}} float 1.000000e+00
			
 
				+
			
 
				+float fr(float f : INPUT) : OUTPUT {
			
 
				+  if (f == VAL)
			
 
				+    return round(f);
			
 
				+  else
			
 
				+    return round(VAL);
			
 
				+}
			
 
				+
			
 
				+RWStructuredBuffer<double> buf;
			
 
				+
			
 
				+float dr() : OUTPUT {
			
 
				+  double d = buf[0];
			
 
				+  if (d == VAL)
			
 
				+    return round(d);
			
 
				+  else
			
 
				+    return round(VAL);
			
 
				+}