8 years ago · 489147d88c
--- a/include/dxc/HLSL/DxilGenerationPass.h
+++ b/include/dxc/HLSL/DxilGenerationPass.h
@@ -43,6 +43,7 @@ ModulePass *createDxilGenerationPass(bool NotOptimized, hlsl::HLSLExtensionsCode
 
				 ModulePass *createHLEmitMetadataPass();
			
 
				 ModulePass *createHLEnsureMetadataPass();
			
 
				 ModulePass *createDxilEmitMetadataPass();
			
 
				+FunctionPass *createDxilExpandTrigIntrinsicsPass();
			
 
				 ModulePass *createDxilLoadMetadataPass();
			
 
				 ModulePass *createDxilPrecisePropagatePass();
			
 
				 FunctionPass *createDxilLegalizeResourceUsePass();
			
@@ -57,6 +58,7 @@ void initializeDxilGenerationPassPass(llvm::PassRegistry&);
 
				 void initializeHLEnsureMetadataPass(llvm::PassRegistry&);
			
 
				 void initializeHLEmitMetadataPass(llvm::PassRegistry&);
			
 
				 void initializeDxilEmitMetadataPass(llvm::PassRegistry&);
			
 
				+void initializeDxilExpandTrigIntrinsicsPass(llvm::PassRegistry&);
			
 
				 void initializeDxilLoadMetadataPass(llvm::PassRegistry&);
			
 
				 void initializeDxilPrecisePropagatePassPass(llvm::PassRegistry&);
			
 
				 void initializeDxilLegalizeResourceUsePassPass(llvm::PassRegistry&);
			
--- a/lib/HLSL/CMakeLists.txt
+++ b/lib/HLSL/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_library(LLVMHLSL
 
				   DxilContainerAssembler.cpp
			
 
				   DxilContainerReflection.cpp
			
 
				   DxilEliminateOutputDynamicIndexing.cpp
			
 
				+  DxilExpandTrigIntrinsics.cpp
			
 
				   DxilGenerationPass.cpp
			
 
				   DxilInterpolationMode.cpp
			
 
				   DxilLegalizeSampleOffsetPass.cpp
			
--- a/lib/HLSL/DxcOptimizer.cpp
+++ b/lib/HLSL/DxcOptimizer.cpp
@@ -85,6 +85,7 @@ HRESULT SetupRegistryPassForHLSL() {
 
				     initializeDxilCondenseResourcesPass(Registry);
			
 
				     initializeDxilEliminateOutputDynamicIndexingPass(Registry);
			
 
				     initializeDxilEmitMetadataPass(Registry);
			
 
				+    initializeDxilExpandTrigIntrinsicsPass(Registry);
			
 
				     initializeDxilGenerationPassPass(Registry);
			
 
				     initializeDxilLegalizeEvalOperationsPass(Registry);
			
 
				     initializeDxilLegalizeResourceUsePassPass(Registry);
			
--- a/lib/HLSL/DxilExpandTrigIntrinsics.cpp
+++ b/lib/HLSL/DxilExpandTrigIntrinsics.cpp
@@ -0,0 +1,519 @@
 
				+///////////////////////////////////////////////////////////////////////////////
			
 
				+//                                                                           //
			
 
				+// DxilExpandTrigIntrinsics.cpp                                              //
			
 
				+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
			
 
				+// This file is distributed under the University of Illinois Open Source     //
			
 
				+// License. See LICENSE.TXT for details.                                     //
			
 
				+//                                                                           //
			
 
				+// Expand trigonmetric intrinsics to a sequence of dxil instructions.        //
			
 
				+// ========================================================================= //
			
 
				+//
			
 
				+// We provide expansions to approximate several trigonmetric functions that
			
 
				+// typically do not have native instructions in hardware. The details of each
			
 
				+// expansion is given below, but typically the exansion occurs in three steps
			
 
				+// 
			
 
				+//     1. Perform range reduction (if necessary) to reduce input range
			
 
				+//        to a value that works with the approximation.
			
 
				+//     2. Compute an approximation to the function (typically by evaluating 
			
 
				+//        a polynomial).
			
 
				+//     3. Perform range expansion (if necessary) to map the result back to
			
 
				+//        the original range.
			
 
				+// 
			
 
				+// For example, say we are expanding f(x) using an approximation to f, call it
			
 
				+// f*(x). And assume that f* only works for positive inputs, but we know that
			
 
				+// f(-x) = -f(x).Then the expansion would be
			
 
				+// 
			
 
				+//     1. a = abs(x)
			
 
				+//     2. v = f*(a)
			
 
				+//     3. e = x < 0 ? -v : v
			
 
				+// 
			
 
				+// where e contains the final expanded result.
			
 
				+// 
			
 
				+// References
			
 
				+// ---------------------------------------------------------------------------
			
 
				+// [HMF] Handbook of Mathematical Formulas by Abramowitz and Stegun, 1964
			
 
				+// [ADC] Approximations for Digital Computers by Hastings, 1955
			
 
				+// [WIK] Wikipedia, 2017
			
 
				+// 
			
 
				+// The approximation functions mostly come from [ADC]. The approximations
			
 
				+// are also referenced in [HMF], but they give original credit to [ADC].
			
 
				+// 
			
 
				+///////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+#include "dxc/HLSL/DxilGenerationPass.h"
			
 
				+#include "dxc/HLSL/DxilOperations.h"
			
 
				+#include "dxc/HLSL/DxilSignatureElement.h"
			
 
				+#include "dxc/HLSL/DxilModule.h"
			
 
				+#include "dxc/Support/Global.h"
			
 
				+#include "dxc/HLSL/DxilInstructions.h"
			
 
				+
			
 
				+#include "llvm/IR/Module.h"
			
 
				+#include "llvm/Pass.h"
			
 
				+#include "llvm/IR/IRBuilder.h"
			
 
				+#include "llvm/IR/InstIterator.h"
			
 
				+#include "llvm/ADT/MapVector.h"
			
 
				+
			
 
				+#include <cmath>
			
 
				+#include <utility>
			
 
				+
			
 
				+using namespace llvm;
			
 
				+using namespace hlsl;
			
 
				+
			
 
				+namespace {
			
 
				+class DxilExpandTrigIntrinsics : public FunctionPass {
			
 
				+private:
			
 
				+
			
 
				+public:
			
 
				+  static char ID; // Pass identification, replacement for typeid
			
 
				+  explicit DxilExpandTrigIntrinsics() : FunctionPass(ID) {}
			
 
				+
			
 
				+  const char *getPassName() const override {
			
 
				+    return "DXIL expand trig intrinsics";
			
 
				+  }
			
 
				+  
			
 
				+  bool runOnFunction(Function &F) override;
			
 
				+  
			
 
				+
			
 
				+private:
			
 
				+  typedef std::vector<CallInst *> IntrinsicList;
			
 
				+  IntrinsicList findTrigFunctionsToExpand(Function &F);
			
 
				+  CallInst *isExpandableTrigIntrinsicCall(Instruction *I);
			
 
				+  bool expandTrigIntrinsics(DxilModule &DM, const IntrinsicList &worklist);
			
 
				+  FastMathFlags getFastMathFlagsForIntrinsic(CallInst *intrinsic);
			
 
				+  void prepareBuilderToExpandIntrinsic(IRBuilder<> &builder, CallInst *intrinsic);
			
 
				+
			
 
				+  // Expansion implementations.
			
 
				+  Value *expandACos(IRBuilder<> &builder, DxilInst_Acos acos, DxilModule &DM);
			
 
				+  Value *expandASin(IRBuilder<> &builder, DxilInst_Asin asin, DxilModule &DM);
			
 
				+  Value *expandATan(IRBuilder<> &builder, DxilInst_Atan atan, DxilModule &DM);
			
 
				+  Value *expandHCos(IRBuilder<> &builder, DxilInst_Hcos hcos, DxilModule &DM);
			
 
				+  Value *expandHSin(IRBuilder<> &builder, DxilInst_Hsin hsin, DxilModule &DM);
			
 
				+  Value *expandHTan(IRBuilder<> &builder, DxilInst_Htan htan, DxilModule &DM);
			
 
				+};
			
 
				+
			
 
				+// Math constants.
			
 
				+// Values taken from https://msdn.microsoft.com/en-us/library/4hwaceh6.aspx.
			
 
				+// Replicated here because they are not part of standard C++.
			
 
				+namespace math {
			
 
				+  constexpr double PI    = 3.14159265358979323846;
			
 
				+  constexpr double PI_2  = 1.57079632679489661923;
			
 
				+  constexpr double LOG2E = 1.44269504088896340736;
			
 
				+}
			
 
				+
			
 
				+}
			
 
				+
			
 
				+
			
 
				+bool DxilExpandTrigIntrinsics::runOnFunction(Function &F) {
			
 
				+  DxilModule &DM = F.getParent()->GetOrCreateDxilModule(); 
			
 
				+  IntrinsicList intrinsics = findTrigFunctionsToExpand(F);
			
 
				+  const bool changed = expandTrigIntrinsics(DM, intrinsics);
			
 
				+  return changed;
			
 
				+}
			
 
				+
			
 
				+CallInst *DxilExpandTrigIntrinsics::isExpandableTrigIntrinsicCall(Instruction *I) {
			
 
				+    if (OP::IsDxilOpFuncCallInst(I)) {
			
 
				+      switch (OP::GetDxilOpFuncCallInst(I)) {
			
 
				+      case OP::OpCode::Acos:
			
 
				+      case OP::OpCode::Asin:
			
 
				+      case OP::OpCode::Atan:
			
 
				+      case OP::OpCode::Hcos:
			
 
				+      case OP::OpCode::Hsin:
			
 
				+      case OP::OpCode::Htan:
			
 
				+        return cast<CallInst>(I);
			
 
				+      default: break;
			
 
				+      }
			
 
				+    }
			
 
				+    return nullptr;
			
 
				+}
			
 
				+
			
 
				+DxilExpandTrigIntrinsics::IntrinsicList DxilExpandTrigIntrinsics::findTrigFunctionsToExpand(Function &F) {
			
 
				+  IntrinsicList worklist;
			
 
				+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
			
 
				+    if (CallInst *call = isExpandableTrigIntrinsicCall(&*I))
			
 
				+      worklist.push_back(call);
			
 
				+
			
 
				+  return worklist;
			
 
				+}
			
 
				+
			
 
				+static bool isPreciseBuilder(IRBuilder<> &builder) {
			
 
				+  return !builder.getFastMathFlags().any();
			
 
				+}
			
 
				+
			
 
				+static void setPreciseBuilder(IRBuilder<> &builder, bool precise) {
			
 
				+  FastMathFlags flags;
			
 
				+  if (precise)
			
 
				+    flags.clear();
			
 
				+  else
			
 
				+    flags.setUnsafeAlgebra();
			
 
				+  builder.SetFastMathFlags(flags);
			
 
				+}
			
 
				+
			
 
				+void DxilExpandTrigIntrinsics::prepareBuilderToExpandIntrinsic(IRBuilder<> &builder, CallInst *intrinsic) {
			
 
				+  DxilModule &DM = intrinsic->getModule()->GetOrCreateDxilModule();
			
 
				+  builder.SetInsertPoint(intrinsic);
			
 
				+  setPreciseBuilder(builder, DM.IsPrecise(intrinsic));
			
 
				+}
			
 
				+  
			
 
				+bool DxilExpandTrigIntrinsics::expandTrigIntrinsics(DxilModule &DM, const IntrinsicList &worklist) {
			
 
				+  IRBuilder<> builder(DM.GetCtx());
			
 
				+  for (CallInst *intrinsic: worklist) {
			
 
				+    Value *expansion = nullptr;
			
 
				+    prepareBuilderToExpandIntrinsic(builder, intrinsic);
			
 
				+    
			
 
				+    OP::OpCode opcode = OP::GetDxilOpFuncCallInst(intrinsic);
			
 
				+    switch (opcode) {
			
 
				+    case OP::OpCode::Acos: expansion = expandACos(builder, intrinsic, DM); break;
			
 
				+    case OP::OpCode::Asin: expansion = expandASin(builder, intrinsic, DM); break;
			
 
				+    case OP::OpCode::Atan: expansion = expandATan(builder, intrinsic, DM); break;
			
 
				+    case OP::OpCode::Hcos: expansion = expandHCos(builder, intrinsic, DM); break;
			
 
				+    case OP::OpCode::Hsin: expansion = expandHSin(builder, intrinsic, DM); break;
			
 
				+    case OP::OpCode::Htan: expansion = expandHTan(builder, intrinsic, DM); break;
			
 
				+    default:
			
 
				+      assert(false && "unexpected intrinsic");
			
 
				+      break;
			
 
				+    }
			
 
				+
			
 
				+    assert(expansion);
			
 
				+    intrinsic->replaceAllUsesWith(expansion);
			
 
				+    intrinsic->eraseFromParent();
			
 
				+  }
			
 
				+
			
 
				+  return !worklist.empty();
			
 
				+}
			
 
				+
			
 
				+// Helper
			
 
				+// return dx.op.UnaryFloat(X)
			
 
				+//
			
 
				+static Value *emitUnaryFloat(IRBuilder<> &builder, Value *X, OP *dxOp, OP::OpCode opcode, StringRef name) {
			
 
				+  Function *F = dxOp->GetOpFunc(opcode, X->getType());
			
 
				+  Value *Args[] = { dxOp->GetI32Const(static_cast<int>(opcode)), X };
			
 
				+  CallInst *Call = builder.CreateCall(F, Args, name);
			
 
				+
			
 
				+  if (isPreciseBuilder(builder))
			
 
				+    DxilMDHelper::MarkPrecise(Call);
			
 
				+  return Call;
			
 
				+}
			
 
				+
			
 
				+// Helper
			
 
				+// return dx.op.Fabs(X)
			
 
				+//
			
 
				+static Value *emitFAbs(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
			
 
				+  return emitUnaryFloat(builder, X, dxOp, OP::OpCode::FAbs, name);
			
 
				+}
			
 
				+
			
 
				+// Helper
			
 
				+// return dx.op.Sqrt(X)
			
 
				+//
			
 
				+static Value *emitSqrt(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
			
 
				+  return emitUnaryFloat(builder, X, dxOp, OP::OpCode::Sqrt, name);
			
 
				+}
			
 
				+
			
 
				+// Helper
			
 
				+// return sqrt(1 - X) * psi*(X)
			
 
				+//
			
 
				+// We compute the polynomial using Horners method to evaluate it efficently.
			
 
				+//
			
 
				+// psi*(X) = a0 + a1x + a2x^2 + a3x^3
			
 
				+//         = a0 + x(a1 + a2x + a3x^2)
			
 
				+//         = a0 + x(a1 + x(a2 + a3x))
			
 
				+//
			
 
				+static Value *emitSqrt1mXtimesPsiX(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
			
 
				+  Value *One = ConstantFP::get(X->getType(), 1.0);
			
 
				+  Value *a0 = ConstantFP::get(X->getType(),  1.5707288);
			
 
				+  Value *a1 = ConstantFP::get(X->getType(), -0.2121144);
			
 
				+  Value *a2 = ConstantFP::get(X->getType(),  0.0742610);
			
 
				+  Value *a3 = ConstantFP::get(X->getType(), -0.0187293);
			
 
				+
			
 
				+
			
 
				+  // sqrt(1-x)
			
 
				+  Value *r1 = builder.CreateFSub(One, X, name);
			
 
				+  Value *r2 = emitSqrt(builder, r1, dxOp, name);
			
 
				+
			
 
				+  // psi*(x)
			
 
				+  Value *r3 = builder.CreateFMul(X,  a3, name);
			
 
				+         r3 = builder.CreateFAdd(r3, a2, name);
			
 
				+         r3 = builder.CreateFMul(X,  r3, name);
			
 
				+         r3 = builder.CreateFAdd(r3, a1, name);
			
 
				+         r3 = builder.CreateFMul(X,  r3, name);
			
 
				+         r3 = builder.CreateFAdd(r3, a0, name);
			
 
				+
			
 
				+  // sqrt(1-x) * psi*(x)
			
 
				+  Value *r4 = builder.CreateFMul(r2, r3,  name);
			
 
				+  return r4;
			
 
				+}
			
 
				+
			
 
				+// Helper
			
 
				+// return e^x, e^-x
			
 
				+//
			
 
				+// We can use the dxil Exp function to compute the exponential. The only slight
			
 
				+// wrinkle is that in dxil Exp(x) = 2^x and we need e^x. Luckily we can easily
			
 
				+// change the base of the exponent using the following identity [HFM(p69)]
			
 
				+//
			
 
				+//  e^x = 2^{x * log_2(e)}
			
 
				+//
			
 
				+static std::pair<Value *, Value *> emitExEmx(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
			
 
				+  Value *Zero  = ConstantFP::get(X->getType(), 0.0);
			
 
				+  Value *Log2e = ConstantFP::get(X->getType(), math::LOG2E);
			
 
				+
			
 
				+  Value *r0 = builder.CreateFMul(X, Log2e, name);
			
 
				+  Value *r1 = emitUnaryFloat(builder, r0, dxOp, OP::OpCode::Exp, name);
			
 
				+  Value *r2 = builder.CreateFSub(Zero, r0, name);
			
 
				+  Value *r3 = emitUnaryFloat(builder, r2, dxOp, OP::OpCode::Exp, name);
			
 
				+
			
 
				+  return std::make_pair(r1, r3);
			
 
				+}
			
 
				+
			
 
				+// Asin
			
 
				+// ----------------------------------------------------------------------------
			
 
				+// Function
			
 
				+//    arcsin X = pi/2  - sqrt(1 - X) * psi(X)
			
 
				+//
			
 
				+// Range
			
 
				+//    0 <= X <= 1
			
 
				+//
			
 
				+// Approximation
			
 
				+//    Psi*(X) = a0 + a1x + a2x^2 + a3x^3
			
 
				+//      a0 =  1.5707288
			
 
				+//      a1 = -0.2121144
			
 
				+//      a2 =  0.0742610
			
 
				+//      a3 = -0.0187293
			
 
				+// 
			
 
				+// The domain of the approximation is 0 <=x <= 1, but the domain of asin is
			
 
				+// -1 <= x <= 1. So we need to perform a range reduction to [0,1] before
			
 
				+// computing the approximation. 
			
 
				+// 
			
 
				+// We use the following identity from [HMF(p80),WIK] for range reduction
			
 
				+// 
			
 
				+// 	asin(-x) = -asin(x)
			
 
				+// 
			
 
				+// We take the absolute value of x, compute asin(x) using the approximation
			
 
				+// and then negate the value if x < 0.
			
 
				+//
			
 
				+// In [HMF] the authors claim an error, e, of |e| <= 5e-5, but the error graph
			
 
				+// in [ADC] looks like the error can be larger that that for some inputs.
			
 
				+// 
			
 
				+Value *DxilExpandTrigIntrinsics::expandASin(IRBuilder<> &builder, DxilInst_Asin asin, DxilModule &DM) {
			
 
				+  assert(asin);
			
 
				+  StringRef name = "asin.x";
			
 
				+  Value *X = asin.get_value();
			
 
				+  Value *PI_2 = ConstantFP::get(X->getType(), math::PI_2);
			
 
				+  Value *Zero = ConstantFP::get(X->getType(), 0.0);
			
 
				+  
			
 
				+  // Range reduction to [0, 1]
			
 
				+  Value *absX = emitFAbs(builder, X, DM.GetOP(), name);
			
 
				+
			
 
				+  // Approximation
			
 
				+  Value *psiX = emitSqrt1mXtimesPsiX(builder, absX, DM.GetOP(), name);
			
 
				+  Value *asinX = builder.CreateFSub(PI_2, psiX, name);
			
 
				+  Value *asinmX = builder.CreateFSub(Zero, asinX, name);
			
 
				+
			
 
				+  // Range expansion to [-1, 1]
			
 
				+  Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name);
			
 
				+  Value *r = builder.CreateSelect(lt0, asinmX, asinX, name);
			
 
				+
			
 
				+  return r;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+// Acos
			
 
				+// ----------------------------------------------------------------------------
			
 
				+// The acos expansion uses the following identity [WIK]. So that we can use the
			
 
				+// same approximation psi*(x) that we use for asin.
			
 
				+// 
			
 
				+// 	acos(x) = pi/2 - asin(x)
			
 
				+// 
			
 
				+// Substituting the equation for asin(x) we get
			
 
				+// 
			
 
				+// 	acos(x) = pi/2 - asin(x)
			
 
				+// 	        = pi/2 - (pi/2 - sqrt(1-x)*psi(x))
			
 
				+// 	        = sqrt(1-x)*psi(x)
			
 
				+// 
			
 
				+// We use the following identity from [HMF(p80),WIK] for range reduction
			
 
				+// 
			
 
				+// 	acos(-x) = pi - acos(x)
			
 
				+//               = pi - sqrt(1-x)*psi(x)
			
 
				+//
			
 
				+// We take the absolute value of x, compute acos(x) using the approximation
			
 
				+// and then subtract from pi if x < 0.
			
 
				+//
			
 
				+Value *DxilExpandTrigIntrinsics::expandACos(IRBuilder<> &builder, DxilInst_Acos acos, DxilModule &DM) {
			
 
				+  assert(acos);
			
 
				+  StringRef name = "acos.x";
			
 
				+  Value *X = acos.get_value();
			
 
				+  Value *PI = ConstantFP::get(X->getType(), math::PI);
			
 
				+  Value *Zero = ConstantFP::get(X->getType(), 0.0);
			
 
				+  
			
 
				+  // Range reduction to [0, 1]
			
 
				+  Value *absX = emitFAbs(builder, X, DM.GetOP(), name);
			
 
				+
			
 
				+  // Approximation
			
 
				+  Value *acosX = emitSqrt1mXtimesPsiX(builder, absX, DM.GetOP(), name);
			
 
				+  Value *acosmX = builder.CreateFSub(PI, acosX, name);
			
 
				+
			
 
				+  // Range expansion to [-1, 1]
			
 
				+  Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name);
			
 
				+  Value *r = builder.CreateSelect(lt0, acosmX, acosX, name);
			
 
				+
			
 
				+  return r;
			
 
				+}
			
 
				+
			
 
				+// Atan
			
 
				+// ----------------------------------------------------------------------------
			
 
				+// Function
			
 
				+//    arctan X
			
 
				+//
			
 
				+// Range
			
 
				+//    -1 <= X <= 1
			
 
				+//
			
 
				+// Approximation
			
 
				+//    arctan*(x) = c1x + c3x^3 + c5x^5 + c7x^7 + c9x^9
			
 
				+//      c1 =  0.9998660
			
 
				+//      c3 = -0.3302995
			
 
				+//      c5 =  0.1801410
			
 
				+//      c7 = -0.0851330
			
 
				+//      c9 =  0.0208351
			
 
				+// 	
			
 
				+// The polynomial is evaluated using Horner's method to efficiently compute the
			
 
				+// value
			
 
				+// 
			
 
				+// 	  c1x + c3x^3 + c5x^5 + c7x^7 + c9x^9 
			
 
				+// 	= x(c1 + c3x^2 + c5x^4 + c7x^6 + c9x^8)
			
 
				+// 	= x(c1 + x^2(c3 + c5x^2 + c7x^4 + c9x^6))
			
 
				+// 	= x(c1 + x^2(c3 + x^2(c5 + c7x^2 + c9x^4)))
			
 
				+// 	= x(c1 + x^2(c3 + x^2(c5 + x^2(c7 + c9x^2))))
			
 
				+// 	
			
 
				+// The range reduction is a little more compilicated for atan because the
			
 
				+// domain of atan is [-inf, inf], but the domain of the approximation is only
			
 
				+// [-1, 1]. We use the following identities for range reduction from
			
 
				+// [HMF(p80),WIK]
			
 
				+// 	
			
 
				+// 	arctan(-x) = -arctan(x)
			
 
				+//      arctan(x)   = pi/2 - arctan(1/x) if x > 0
			
 
				+// 
			
 
				+// The first identity allows us to only work with positive numbers. The second
			
 
				+// identity allows us to reduce the range to [0,1]. We first convert the value
			
 
				+// to positive by taking abs(x). Then if x > 1 we compute arctan(1/x).
			
 
				+// 
			
 
				+// To expand the range we check if x > 1 then subtracted the computed value from
			
 
				+// pi/2 and if x is negative then negate the final value.
			
 
				+//
			
 
				+Value *DxilExpandTrigIntrinsics::expandATan(IRBuilder<> &builder, DxilInst_Atan atan, DxilModule &DM) {
			
 
				+  assert(atan);
			
 
				+  StringRef name  = "atan.x";
			
 
				+  Value *X = atan.get_value();
			
 
				+  Value *PI_2 = ConstantFP::get(X->getType(), math::PI_2);
			
 
				+  Value *One  = ConstantFP::get(X->getType(), 1.0);
			
 
				+  Value *Zero = ConstantFP::get(X->getType(), 0.0);
			
 
				+  Value *c1 = ConstantFP::get(X->getType(),  0.9998660);
			
 
				+  Value *c3 = ConstantFP::get(X->getType(), -0.3302995);
			
 
				+  Value *c5 = ConstantFP::get(X->getType(),  0.1801410);
			
 
				+  Value *c7 = ConstantFP::get(X->getType(), -0.0851330);
			
 
				+  Value *c9 = ConstantFP::get(X->getType(),  0.0208351);
			
 
				+
			
 
				+  // Range reduction to [0, inf]
			
 
				+  Value *absX = emitFAbs(builder, X, DM.GetOP(), name);
			
 
				+
			
 
				+  // Range reduction to [0, 1]
			
 
				+  Value *gt1 = builder.CreateFCmp(CmpInst::FCMP_UGT, absX, One, name);
			
 
				+  Value *r1 = builder.CreateFDiv(One, absX, name);
			
 
				+  Value *r2 = builder.CreateSelect(gt1, r1, absX, name);
			
 
				+
			
 
				+  // Approximate
			
 
				+  Value *r3 = builder.CreateFMul(r2, r2, name);
			
 
				+  Value *r4 = builder.CreateFMul(r3, c9, name);
			
 
				+         r4 = builder.CreateFAdd(r4, c7, name);
			
 
				+         r4 = builder.CreateFMul(r4, r3, name);
			
 
				+         r4 = builder.CreateFAdd(r4, c5, name);
			
 
				+         r4 = builder.CreateFMul(r4, r3, name);
			
 
				+         r4 = builder.CreateFAdd(r4, c3, name);
			
 
				+         r4 = builder.CreateFMul(r4, r3, name);
			
 
				+         r4 = builder.CreateFAdd(r4, c1, name);
			
 
				+         r4 = builder.CreateFMul(r2, r4, name);
			
 
				+
			
 
				+  // Range Expansion to [0, inf]
			
 
				+  Value *r5 = builder.CreateFSub(PI_2, r4, name);
			
 
				+  Value *r6 = builder.CreateSelect(gt1, r5, r4, name);
			
 
				+
			
 
				+  // Range Expansion to [-inf, inf]
			
 
				+  Value *r7 = builder.CreateFSub(Zero, r6, name);
			
 
				+  Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name);
			
 
				+  Value *r = builder.CreateSelect(lt0, r7, r6, name);
			
 
				+
			
 
				+  return r;
			
 
				+}
			
 
				+
			
 
				+// Hcos
			
 
				+// ----------------------------------------------------------------------------
			
 
				+// We use the following identity for computing hcos(x) from [HMF(p83)]
			
 
				+// 	
			
 
				+//    cosh(x) = (e^x + e^-x) / 2
			
 
				+// 
			
 
				+// No range reduction is needed.
			
 
				+//
			
 
				+Value *DxilExpandTrigIntrinsics::expandHCos(IRBuilder<> &builder, DxilInst_Hcos hcos, DxilModule &DM) {
			
 
				+  assert(hcos);
			
 
				+  StringRef name = "hcos.x";
			
 
				+  Value *eX, *emX;
			
 
				+  Value *X = hcos.get_value();
			
 
				+  Value *Two = ConstantFP::get(X->getType(), 2.0);
			
 
				+
			
 
				+  std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name);
			
 
				+  Value *r4 = builder.CreateFAdd(eX, emX, name);
			
 
				+  Value *r  = builder.CreateFDiv(r4, Two, name);
			
 
				+
			
 
				+  return r;
			
 
				+}
			
 
				+
			
 
				+// Hsin
			
 
				+// ----------------------------------------------------------------------------
			
 
				+// We use the following identity for computing hsin(x) from[HMF(p83)]
			
 
				+//
			
 
				+//    sinh(x) = (e^x - e^-x) / 2
			
 
				+//
			
 
				+// No range reduction is needed.
			
 
				+//
			
 
				+Value *DxilExpandTrigIntrinsics::expandHSin(IRBuilder<> &builder, DxilInst_Hsin hsin, DxilModule &DM) {
			
 
				+  assert(hsin);
			
 
				+  StringRef name = "hsin.x";
			
 
				+  Value *eX, *emX;
			
 
				+  Value *X = hsin.get_value();
			
 
				+  Value *Two = ConstantFP::get(X->getType(), 2.0);
			
 
				+
			
 
				+  std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name);
			
 
				+  Value *r4 = builder.CreateFSub(eX, emX, name);
			
 
				+  Value *r  = builder.CreateFDiv(r4, Two, name);
			
 
				+
			
 
				+  return r;
			
 
				+}
			
 
				+
			
 
				+// Htan
			
 
				+// ----------------------------------------------------------------------------
			
 
				+// We use the following identity for computing hsin(x) from[HMF(p83)]
			
 
				+//
			
 
				+//    tanh(x) = (e^x - e^-x) / (e^x + e^-x)
			
 
				+//
			
 
				+// No range reduction is needed.
			
 
				+//
			
 
				+Value *DxilExpandTrigIntrinsics::expandHTan(IRBuilder<> &builder, DxilInst_Htan htan, DxilModule &DM) {
			
 
				+  assert(htan);
			
 
				+  StringRef name = "htan.x";
			
 
				+  Value *eX, *emX;
			
 
				+  Value *X = htan.get_value();
			
 
				+
			
 
				+  std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name);
			
 
				+  Value *r4 = builder.CreateFSub(eX, emX, name);
			
 
				+  Value *r5 = builder.CreateFAdd(eX, emX, name);
			
 
				+  Value *r  = builder.CreateFDiv(r4, r5, name);
			
 
				+
			
 
				+  return r;
			
 
				+}
			
 
				+
			
 
				+char DxilExpandTrigIntrinsics::ID = 0;
			
 
				+
			
 
				+FunctionPass *llvm::createDxilExpandTrigIntrinsicsPass() {
			
 
				+  return new DxilExpandTrigIntrinsics();
			
 
				+}
			
 
				+
			
 
				+INITIALIZE_PASS(DxilExpandTrigIntrinsics,
			
 
				+                "hlsl-dxil-expand-trig-intrinsics",
			
 
				+                "DXIL expand trig intrinsics", false, false)
			
--- a/tools/clang/test/HLSL/expand_trig/acos.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/acos.hlsl
@@ -0,0 +1,27 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// CHECK: [[X:%.*]]   = call float @dx.op.loadInput.f32(i32 4
			
 
				+// CHECK: [[r0:%.*]]  = call float @dx.op.unary.f32(i32 6, float [[X]]
			
 
				+
			
 
				+// CHECK: [[r1:%.*]]  = fsub fast float 1.000000e+00, [[r0]]
			
 
				+// CHECK: [[r2:%.*]]  = call float @dx.op.unary.f32(i32 24, float [[r1]]
			
 
				+
			
 
				+// CHECK: [[r3a:%.*]] = fmul fast float [[r0]], 0xBF932DC600000000
			
 
				+// CHECK: [[r3b:%.*]] = fadd fast float [[r3a]], 0x3FB302C4E0000000
			
 
				+// CHECK: [[r3c:%.*]] = fmul fast float [[r0]], [[r3b]]
			
 
				+// CHECK: [[r3d:%.*]] = fadd fast float [[r3c]], 0xBFCB269080000000
			
 
				+// CHECK: [[r3e:%.*]] = fmul fast float [[r0]], [[r3d]]
			
 
				+// CHECK: [[r3f:%.*]] = fadd fast float [[r3e]], 0x3FF921B480000000
			
 
				+// CHECK: [[r4:%.*]]  = fmul fast float [[r2]], [[r3f]]
			
 
				+
			
 
				+// CHECK: [[r5:%.*]]  = fsub fast float 0x400921FB60000000, [[r4]]
			
 
				+
			
 
				+// CHECK: [[b0:%.*]]  = fcmp fast ult float [[X]], 0.000000e+00
			
 
				+// CHECK: select i1 [[b0]], float [[r5]], float [[r4]]
			
 
				+
			
 
				+// CHECK-NOT: call float @dx.op.unary.f32(i32 15
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float main(float x : A) : SV_Target {
			
 
				+    return acos(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/acos_h.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/acos_h.hlsl
@@ -0,0 +1,12 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// Make sure the expansion works for half.
			
 
				+// Only checking for for minimal expansion here, full check is done for float case.
			
 
				+
			
 
				+// CHECK: fmul fast half %{{.*}}, 0xHA4CB
			
 
				+
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+min16float main(min16float x : A) : SV_Target {
			
 
				+    return acos(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/asin.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/asin.hlsl
@@ -0,0 +1,28 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// CHECK: [[X:%.*]]   = call float @dx.op.loadInput.f32(i32 4
			
 
				+// CHECK: [[r0:%.*]]  = call float @dx.op.unary.f32(i32 6, float [[X]]
			
 
				+
			
 
				+// CHECK: [[r1:%.*]]  = fsub fast float 1.000000e+00, [[r0]]
			
 
				+// CHECK: [[r2:%.*]]  = call float @dx.op.unary.f32(i32 24, float [[r1]]
			
 
				+
			
 
				+// CHECK: [[r3a:%.*]] = fmul fast float [[r0]], 0xBF932DC600000000
			
 
				+// CHECK: [[r3b:%.*]] = fadd fast float [[r3a]], 0x3FB302C4E0000000
			
 
				+// CHECK: [[r3c:%.*]] = fmul fast float [[r0]], [[r3b]]
			
 
				+// CHECK: [[r3d:%.*]] = fadd fast float [[r3c]], 0xBFCB269080000000
			
 
				+// CHECK: [[r3e:%.*]] = fmul fast float [[r0]], [[r3d]]
			
 
				+// CHECK: [[r3f:%.*]] = fadd fast float [[r3e]], 0x3FF921B480000000
			
 
				+// CHECK: [[r4:%.*]]  = fmul fast float [[r2]], [[r3f]]
			
 
				+
			
 
				+// CHECK: [[r5:%.*]]  = fsub fast float 0x3FF921FB60000000, [[r4]]
			
 
				+// CHECK: [[r6:%.*]]  = fsub fast float 0.000000e+00, [[r5]]
			
 
				+
			
 
				+// CHECK: [[b0:%.*]]  = fcmp fast ult float [[X]], 0.000000e+00
			
 
				+// CHECK: select i1 [[b0]], float [[r6]], float [[r5]]
			
 
				+
			
 
				+// CHECK-NOT: call float @dx.op.unary.f32(i32 16
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float main(float x : A) : SV_Target {
			
 
				+    return asin(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/asin_h.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/asin_h.hlsl
@@ -0,0 +1,12 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// Make sure the expansion works for half.
			
 
				+// Only checking for for minimal expansion here, full check is done for float case.
			
 
				+
			
 
				+// CHECK: fmul fast half %{{.*}}, 0xHA4CB
			
 
				+
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+min16float main(min16float x : A) : SV_Target {
			
 
				+    return asin(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/atan.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/atan.hlsl
@@ -0,0 +1,35 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// CHECK: [[X:%.*]]   = call float @dx.op.loadInput.f32(i32 4
			
 
				+// CHECK: [[r0:%.*]]  = call float @dx.op.unary.f32(i32 6, float [[X]]
			
 
				+
			
 
				+// CHECK: [[b0:%.*]]  = fcmp fast ugt float [[r0]], 1.000000e+00
			
 
				+// CHECK: [[r1:%.*]]  = fdiv fast float 1.000000e+00, [[r0]]
			
 
				+// CHECK: [[r2:%.*]]  = select i1 [[b0]], float [[r1]], float [[r0]]
			
 
				+
			
 
				+// CHECK: [[r3:%.*]]  = fmul fast float [[r2]],  [[r2]]
			
 
				+// CHECK: [[r4a:%.*]] = fmul fast float [[r3]],  0x3F9555CBE0000000
			
 
				+// CHECK: [[r4b:%.*]] = fadd fast float [[r4a]], 0xBFB5CB46C0000000 
			
 
				+// CHECK: [[r4c:%.*]] = fmul fast float [[r4b]], [[r3]]
			
 
				+// CHECK: [[r4d:%.*]] = fadd fast float [[r4c]], 0x3FC70EDC40000000
			
 
				+// CHECK: [[r4e:%.*]] = fmul fast float [[r4d]], [[r3]]
			
 
				+// CHECK: [[r4f:%.*]] = fadd fast float [[r4e]], 0xBFD523A080000000
			
 
				+// CHECK: [[r4g:%.*]] = fmul fast float [[r4f]], [[r3]]
			
 
				+// CHECK: [[r4h:%.*]] = fadd fast float [[r4g]], 0x3FEFFEE700000000
			
 
				+// CHECK: [[r4:%.*]]  = fmul fast float [[r2]],  [[r4h]]
			
 
				+
			
 
				+// CHECK: [[r5:%.*]]  = fsub fast float 0x3FF921FB60000000, [[r4]]
			
 
				+// CHECK: [[r6:%.*]]  = select i1 [[b0]], float [[r5]], float [[r4]]
			
 
				+
			
 
				+// CHECK: [[r7:%.*]]  = fsub fast float 0.000000e+00, [[r6]]
			
 
				+
			
 
				+// CHECK: [[b1:%.*]]  = fcmp fast ult float [[X]], 0.000000e+00
			
 
				+// CHECK: select i1 [[b1]], float [[r7]], float [[r6]]
			
 
				+
			
 
				+
			
 
				+// CHECK-NOT: call float @dx.op.unary.f32(i32 17
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float main(float x : A) : SV_Target {
			
 
				+    return atan(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/atan_h.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/atan_h.hlsl
@@ -0,0 +1,12 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// Make sure the expansion works for half.
			
 
				+// Only checking for for minimal expansion here, full check is done for float case.
			
 
				+
			
 
				+// CHECK: fmul fast half %{{.*}}, 0xH2555
			
 
				+
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+min16float main(min16float x : A) : SV_Target {
			
 
				+    return atan(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/hcos.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/hcos.hlsl
@@ -0,0 +1,16 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// CHECK: [[X:%.*]]   = call float @dx.op.loadInput.f32(i32 4
			
 
				+// CHECK: [[r0:%.*]]  = fmul fast float [[X]], 0x3FF7154760000000
			
 
				+// CHECK: [[r1:%.*]]  = call float @dx.op.unary.f32(i32 21, float [[r0]]
			
 
				+// CHECK: [[r2:%.*]]  = fsub fast float 0.000000e+00, [[r0]]
			
 
				+// CHECK: [[r3:%.*]]  = call float @dx.op.unary.f32(i32 21, float [[r2]]
			
 
				+// CHECK: [[r4:%.*]]  = fadd fast float [[r1]], [[r3]]
			
 
				+// CHECK: fdiv fast float [[r4]], 2.000000e+00
			
 
				+
			
 
				+// CHECK-NOT: call float @dx.op.unary.f32(i32 18
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float main(float x : A) : SV_Target {
			
 
				+    return cosh(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/hcos_h.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/hcos_h.hlsl
@@ -0,0 +1,12 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// Make sure the expansion works for half.
			
 
				+// Only checking for for minimal expansion here, full check is done for float case.
			
 
				+
			
 
				+// CHECK: fmul fast half %{{.*}}, 0xH3DC5
			
 
				+
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+min16float main(min16float x : A) : SV_Target {
			
 
				+    return cosh(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/hsin.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/hsin.hlsl
@@ -0,0 +1,16 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// CHECK: [[X:%.*]]   = call float @dx.op.loadInput.f32(i32 4
			
 
				+// CHECK: [[r0:%.*]]  = fmul fast float [[X]], 0x3FF7154760000000
			
 
				+// CHECK: [[r1:%.*]]  = call float @dx.op.unary.f32(i32 21, float [[r0]]
			
 
				+// CHECK: [[r2:%.*]]  = fsub fast float 0.000000e+00, [[r0]]
			
 
				+// CHECK: [[r3:%.*]]  = call float @dx.op.unary.f32(i32 21, float [[r2]]
			
 
				+// CHECK: [[r4:%.*]]  = fsub fast float [[r1]], [[r3]]
			
 
				+// CHECK: fdiv fast float [[r4]], 2.000000e+00
			
 
				+
			
 
				+// CHECK-NOT: call float @dx.op.unary.f32(i32 18
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float main(float x : A) : SV_Target {
			
 
				+    return sinh(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/hsin_h.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/hsin_h.hlsl
@@ -0,0 +1,12 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// Make sure the expansion works for half.
			
 
				+// Only checking for for minimal expansion here, full check is done for float case.
			
 
				+
			
 
				+// CHECK: fmul fast half %{{.*}}, 0xH3DC5
			
 
				+
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+min16float main(min16float x : A) : SV_Target {
			
 
				+    return sinh(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/htan.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/htan.hlsl
@@ -0,0 +1,17 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// CHECK: [[X:%.*]]   = call float @dx.op.loadInput.f32(i32 4
			
 
				+// CHECK: [[r0:%.*]]  = fmul fast float [[X]], 0x3FF7154760000000
			
 
				+// CHECK: [[r1:%.*]]  = call float @dx.op.unary.f32(i32 21, float [[r0]]
			
 
				+// CHECK: [[r2:%.*]]  = fsub fast float 0.000000e+00, [[r0]]
			
 
				+// CHECK: [[r3:%.*]]  = call float @dx.op.unary.f32(i32 21, float [[r2]]
			
 
				+// CHECK: [[r4:%.*]]  = fsub fast float [[r1]], [[r3]]
			
 
				+// CHECK: [[r5:%.*]]  = fadd fast float [[r1]], [[r3]]
			
 
				+// CHECK: fdiv fast float [[r4]], [[r5]]
			
 
				+
			
 
				+// CHECK-NOT: call float @dx.op.unary.f32(i32 18
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float main(float x : A) : SV_Target {
			
 
				+    return tanh(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/htan_h.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/htan_h.hlsl
@@ -0,0 +1,12 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// Make sure the expansion works for half.
			
 
				+// Only checking for for minimal expansion here, full check is done for float case.
			
 
				+
			
 
				+// CHECK: fmul fast half %{{.*}}, 0xH3DC5
			
 
				+
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+min16float main(min16float x : A) : SV_Target {
			
 
				+    return tanh(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/keep_precise.0.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/keep_precise.0.hlsl
@@ -0,0 +1,19 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// Make sure that when the call is precise we do not use fast math flags
			
 
				+// on the floating point instructions and add precise metadata to the
			
 
				+// generated dxil calls.
			
 
				+
			
 
				+// CHECK: [[X:%.*]]   = call float @dx.op.loadInput.f32(i32 4
			
 
				+// CHECK: [[r0:%.*]]  = fmul float [[X]], 0x3FF7154760000000
			
 
				+// CHECK: [[r1:%.*]]  = call float @dx.op.unary.f32(i32 21, float [[r0]]), !dx.precise
			
 
				+// CHECK: [[r2:%.*]]  = fsub float 0.000000e+00, [[r0]]
			
 
				+// CHECK: [[r3:%.*]]  = call float @dx.op.unary.f32(i32 21, float [[r2]]), !dx.precise
			
 
				+// CHECK: [[r4:%.*]]  = fsub float [[r1]], [[r3]]
			
 
				+// CHECK: [[r5:%.*]]  = fadd float [[r1]], [[r3]]
			
 
				+// CHECK: fdiv float [[r4]], [[r5]]
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+precise float main(float x : A) : SV_Target {
			
 
				+    return tanh(x);
			
 
				+}
			
--- a/tools/clang/test/HLSL/expand_trig/keep_precise.1.hlsl
+++ b/tools/clang/test/HLSL/expand_trig/keep_precise.1.hlsl
@@ -0,0 +1,30 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-expand-trig-intrinsics | %FileCheck %s
			
 
				+
			
 
				+// Make sure precise->non-precise->precise transition is handled properly.
			
 
				+
			
 
				+// A
			
 
				+// CHECK: fmul float {{.*}}, 0x3FF7154760000000
			
 
				+// CHECK: call float @dx.op.unary.f32(i32 21, float {{.*}}), !dx.precise
			
 
				+// CHECK: call float @dx.op.unary.f32(i32 21, float {{.*}}), !dx.precise
			
 
				+
			
 
				+// B
			
 
				+// CHECK: fmul fast float {{.*}}, 0x3FF7154760000000
			
 
				+// CHECK: call float @dx.op.unary.f32(i32 21, float {{.*}})
			
 
				+// CHECK-NOT: !dx.precise
			
 
				+// CHECK: call float @dx.op.unary.f32(i32 21, float {{.*}})
			
 
				+// CHECK-NOT: !dx.precise
			
 
				+
			
 
				+// C
			
 
				+// CHECK: fmul float {{.*}}, 0x3FF7154760000000
			
 
				+// CHECK: call float @dx.op.unary.f32(i32 21, float {{.*}}), !dx.precise
			
 
				+// CHECK: call float @dx.op.unary.f32(i32 21, float {{.*}}), !dx.precise
			
 
				+
			
 
				+// CHECK: ret
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float main(float x : A, float y : B, float z : C) : SV_Target {
			
 
				+    precise float a = tanh(x);
			
 
				+            float b = tanh(y);
			
 
				+    precise float c = tanh(z);
			
 
				+    return a + b + c;
			
 
				+}
			
--- a/tools/clang/unittests/HLSL/CompilerTest.cpp
+++ b/tools/clang/unittests/HLSL/CompilerTest.cpp
@@ -441,6 +441,7 @@ public:
 
				   TEST_METHOD(CodeGenEvalMatMember)
			
 
				   TEST_METHOD(CodeGenEvalPos)
			
 
				   TEST_METHOD(CodeGenExternRes)
			
 
				+  TEST_METHOD(CodeGenExpandTrig)
			
 
				   TEST_METHOD(CodeGenFloatCast)
			
 
				   TEST_METHOD(CodeGenFloatToBool)
			
 
				   TEST_METHOD(CodeGenFirstbitHi)
			
@@ -2518,6 +2519,23 @@ TEST_F(CompilerTest, CodeGenExternRes) {
 
				   CodeGenTestCheck(L"..\\CodeGenHLSL\\extern_res.hlsl");
			
 
				 }
			
 
				 
			
 
				+TEST_F(CompilerTest, CodeGenExpandTrig) {
			
 
				+  CodeGenTestCheck(L"expand_trig\\acos.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\acos_h.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\asin.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\asin_h.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\atan.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\atan_h.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\hcos.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\hcos_h.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\hsin.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\hsin_h.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\htan.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\htan_h.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\keep_precise.0.hlsl");
			
 
				+  CodeGenTestCheck(L"expand_trig\\keep_precise.1.hlsl");
			
 
				+}
			
 
				+
			
 
				 TEST_F(CompilerTest, CodeGenFloatCast) {
			
 
				   CodeGenTestCheck(L"..\\CodeGenHLSL\\float_cast.hlsl");
			
 
				 }
			
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -1264,6 +1264,7 @@ class db_dxil(object):
 
				         add_pass('hlsl-dxil-eliminate-output-dynamic', 'DxilEliminateOutputDynamicIndexing', 'DXIL eliminate ouptut dynamic indexing', [])
			
 
				         add_pass('hlsl-dxilemit', 'DxilEmitMetadata', 'HLSL DXIL Metadata Emit', [])
			
 
				         add_pass('hlsl-dxilload', 'DxilLoadMetadata', 'HLSL DXIL Metadata Load', [])
			
 
				+        add_pass('hlsl-dxil-expand-trig', 'DxilExpandTrigIntrinsics', 'DXIL expand trig intrinsics', [])
			
 
				         add_pass('hlsl-hca', 'HoistConstantArray', 'HLSL constant array hoisting', [])
			
 
				         add_pass('ipsccp', 'IPSCCP', 'Interprocedural Sparse Conditional Constant Propagation', [])
			
 
				         add_pass('globalopt', 'GlobalOpt', 'Global Variable Optimizer', [])