123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543 |
- ///////////////////////////////////////////////////////////////////////////////
- // //
- // DxilExpandTrigIntrinsics.cpp //
- // Copyright (C) Microsoft Corporation. All rights reserved. //
- // This file is distributed under the University of Illinois Open Source //
- // License. See LICENSE.TXT for details. //
- // //
- // Expand trigonmetric intrinsics to a sequence of dxil instructions. //
- // ========================================================================= //
- //
- // We provide expansions to approximate several trigonmetric functions that
- // typically do not have native instructions in hardware. The details of each
- // expansion is given below, but typically the exansion occurs in three steps
- //
- // 1. Perform range reduction (if necessary) to reduce input range
- // to a value that works with the approximation.
- // 2. Compute an approximation to the function (typically by evaluating
- // a polynomial).
- // 3. Perform range expansion (if necessary) to map the result back to
- // the original range.
- //
- // For example, say we are expanding f(x) using an approximation to f, call it
- // f*(x). And assume that f* only works for positive inputs, but we know that
- // f(-x) = -f(x).Then the expansion would be
- //
- // 1. a = abs(x)
- // 2. v = f*(a)
- // 3. e = x < 0 ? -v : v
- //
- // where e contains the final expanded result.
- //
- // References
- // ---------------------------------------------------------------------------
- // [HMF] Handbook of Mathematical Formulas by Abramowitz and Stegun, 1964
- // [ADC] Approximations for Digital Computers by Hastings, 1955
- // [WIK] Wikipedia, 2017
- //
- // The approximation functions mostly come from [ADC]. The approximations
- // are also referenced in [HMF], but they give original credit to [ADC].
- //
- ///////////////////////////////////////////////////////////////////////////////
- #include "dxc/HLSL/DxilGenerationPass.h"
- #include "dxc/DXIL/DxilOperations.h"
- #include "dxc/DXIL/DxilSignatureElement.h"
- #include "dxc/DXIL/DxilModule.h"
- #include "dxc/Support/Global.h"
- #include "dxc/DXIL/DxilInstructions.h"
- #include "llvm/IR/Module.h"
- #include "llvm/Pass.h"
- #include "llvm/IR/IRBuilder.h"
- #include "llvm/IR/InstIterator.h"
- #include "llvm/ADT/MapVector.h"
- #include <cmath>
- #include <utility>
- using namespace llvm;
- using namespace hlsl;
- namespace {
- class DxilExpandTrigIntrinsics : public FunctionPass {
- private:
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit DxilExpandTrigIntrinsics() : FunctionPass(ID) {}
- const char *getPassName() const override {
- return "DXIL expand trig intrinsics";
- }
-
- bool runOnFunction(Function &F) override;
-
- private:
- typedef std::vector<CallInst *> IntrinsicList;
- IntrinsicList findTrigFunctionsToExpand(Function &F);
- CallInst *isExpandableTrigIntrinsicCall(Instruction *I);
- bool expandTrigIntrinsics(DxilModule &DM, const IntrinsicList &worklist);
- FastMathFlags getFastMathFlagsForIntrinsic(CallInst *intrinsic);
- void prepareBuilderToExpandIntrinsic(IRBuilder<> &builder, CallInst *intrinsic);
- // Expansion implementations.
- Value *expandACos(IRBuilder<> &builder, DxilInst_Acos acos, DxilModule &DM);
- Value *expandASin(IRBuilder<> &builder, DxilInst_Asin asin, DxilModule &DM);
- Value *expandATan(IRBuilder<> &builder, DxilInst_Atan atan, DxilModule &DM);
- Value *expandHCos(IRBuilder<> &builder, DxilInst_Hcos hcos, DxilModule &DM);
- Value *expandHSin(IRBuilder<> &builder, DxilInst_Hsin hsin, DxilModule &DM);
- Value *expandHTan(IRBuilder<> &builder, DxilInst_Htan htan, DxilModule &DM);
- Value *expandTan(IRBuilder<> &builder, DxilInst_Tan tan, DxilModule &DM);
- };
- // Math constants.
- // Values taken from https://msdn.microsoft.com/en-us/library/4hwaceh6.aspx.
- // Replicated here because they are not part of standard C++.
- namespace math {
- constexpr double PI = 3.14159265358979323846;
- constexpr double PI_2 = 1.57079632679489661923;
- constexpr double LOG2E = 1.44269504088896340736;
- }
- }
- bool DxilExpandTrigIntrinsics::runOnFunction(Function &F) {
- DxilModule &DM = F.getParent()->GetOrCreateDxilModule();
- IntrinsicList intrinsics = findTrigFunctionsToExpand(F);
- const bool changed = expandTrigIntrinsics(DM, intrinsics);
- return changed;
- }
- CallInst *DxilExpandTrigIntrinsics::isExpandableTrigIntrinsicCall(Instruction *I) {
- if (OP::IsDxilOpFuncCallInst(I)) {
- switch (OP::GetDxilOpFuncCallInst(I)) {
- case OP::OpCode::Acos:
- case OP::OpCode::Asin:
- case OP::OpCode::Atan:
- case OP::OpCode::Hcos:
- case OP::OpCode::Hsin:
- case OP::OpCode::Htan:
- case OP::OpCode::Tan:
- return cast<CallInst>(I);
- default: break;
- }
- }
- return nullptr;
- }
- DxilExpandTrigIntrinsics::IntrinsicList DxilExpandTrigIntrinsics::findTrigFunctionsToExpand(Function &F) {
- IntrinsicList worklist;
- for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
- if (CallInst *call = isExpandableTrigIntrinsicCall(&*I))
- worklist.push_back(call);
- return worklist;
- }
- static bool isPreciseBuilder(IRBuilder<> &builder) {
- return !builder.getFastMathFlags().any();
- }
- static void setPreciseBuilder(IRBuilder<> &builder, bool precise) {
- FastMathFlags flags;
- if (precise)
- flags.clear();
- else
- flags.setUnsafeAlgebra();
- builder.SetFastMathFlags(flags);
- }
- void DxilExpandTrigIntrinsics::prepareBuilderToExpandIntrinsic(IRBuilder<> &builder, CallInst *intrinsic) {
- DxilModule &DM = intrinsic->getModule()->GetOrCreateDxilModule();
- builder.SetInsertPoint(intrinsic);
- setPreciseBuilder(builder, DM.IsPrecise(intrinsic));
- }
-
- bool DxilExpandTrigIntrinsics::expandTrigIntrinsics(DxilModule &DM, const IntrinsicList &worklist) {
- IRBuilder<> builder(DM.GetCtx());
- for (CallInst *intrinsic: worklist) {
- Value *expansion = nullptr;
- prepareBuilderToExpandIntrinsic(builder, intrinsic);
-
- OP::OpCode opcode = OP::GetDxilOpFuncCallInst(intrinsic);
- switch (opcode) {
- case OP::OpCode::Acos: expansion = expandACos(builder, intrinsic, DM); break;
- case OP::OpCode::Asin: expansion = expandASin(builder, intrinsic, DM); break;
- case OP::OpCode::Atan: expansion = expandATan(builder, intrinsic, DM); break;
- case OP::OpCode::Hcos: expansion = expandHCos(builder, intrinsic, DM); break;
- case OP::OpCode::Hsin: expansion = expandHSin(builder, intrinsic, DM); break;
- case OP::OpCode::Htan: expansion = expandHTan(builder, intrinsic, DM); break;
- case OP::OpCode::Tan: expansion = expandTan(builder, intrinsic, DM); break;
- default:
- assert(false && "unexpected intrinsic");
- break;
- }
- assert(expansion);
- intrinsic->replaceAllUsesWith(expansion);
- intrinsic->eraseFromParent();
- }
- return !worklist.empty();
- }
- // Helper
- // return dx.op.UnaryFloat(X)
- //
- static Value *emitUnaryFloat(IRBuilder<> &builder, Value *X, OP *dxOp, OP::OpCode opcode, StringRef name) {
- Function *F = dxOp->GetOpFunc(opcode, X->getType());
- Value *Args[] = { dxOp->GetI32Const(static_cast<int>(opcode)), X };
- CallInst *Call = builder.CreateCall(F, Args, name);
- if (isPreciseBuilder(builder))
- DxilMDHelper::MarkPrecise(Call);
- return Call;
- }
- // Helper
- // return dx.op.Fabs(X)
- //
- static Value *emitFAbs(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
- return emitUnaryFloat(builder, X, dxOp, OP::OpCode::FAbs, name);
- }
- // Helper
- // return dx.op.Sqrt(X)
- //
- static Value *emitSqrt(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
- return emitUnaryFloat(builder, X, dxOp, OP::OpCode::Sqrt, name);
- }
- // Helper
- // return sqrt(1 - X) * psi*(X)
- //
- // We compute the polynomial using Horners method to evaluate it efficently.
- //
- // psi*(X) = a0 + a1x + a2x^2 + a3x^3
- // = a0 + x(a1 + a2x + a3x^2)
- // = a0 + x(a1 + x(a2 + a3x))
- //
- static Value *emitSqrt1mXtimesPsiX(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
- Value *One = ConstantFP::get(X->getType(), 1.0);
- Value *a0 = ConstantFP::get(X->getType(), 1.5707288);
- Value *a1 = ConstantFP::get(X->getType(), -0.2121144);
- Value *a2 = ConstantFP::get(X->getType(), 0.0742610);
- Value *a3 = ConstantFP::get(X->getType(), -0.0187293);
- // sqrt(1-x)
- Value *r1 = builder.CreateFSub(One, X, name);
- Value *r2 = emitSqrt(builder, r1, dxOp, name);
- // psi*(x)
- Value *r3 = builder.CreateFMul(X, a3, name);
- r3 = builder.CreateFAdd(r3, a2, name);
- r3 = builder.CreateFMul(X, r3, name);
- r3 = builder.CreateFAdd(r3, a1, name);
- r3 = builder.CreateFMul(X, r3, name);
- r3 = builder.CreateFAdd(r3, a0, name);
- // sqrt(1-x) * psi*(x)
- Value *r4 = builder.CreateFMul(r2, r3, name);
- return r4;
- }
- // Helper
- // return e^x, e^-x
- //
- // We can use the dxil Exp function to compute the exponential. The only slight
- // wrinkle is that in dxil Exp(x) = 2^x and we need e^x. Luckily we can easily
- // change the base of the exponent using the following identity [HFM(p69)]
- //
- // e^x = 2^{x * log_2(e)}
- //
- static std::pair<Value *, Value *> emitExEmx(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
- Value *Zero = ConstantFP::get(X->getType(), 0.0);
- Value *Log2e = ConstantFP::get(X->getType(), math::LOG2E);
- Value *r0 = builder.CreateFMul(X, Log2e, name);
- Value *r1 = emitUnaryFloat(builder, r0, dxOp, OP::OpCode::Exp, name);
- Value *r2 = builder.CreateFSub(Zero, r0, name);
- Value *r3 = emitUnaryFloat(builder, r2, dxOp, OP::OpCode::Exp, name);
- return std::make_pair(r1, r3);
- }
- // Asin
- // ----------------------------------------------------------------------------
- // Function
- // arcsin X = pi/2 - sqrt(1 - X) * psi(X)
- //
- // Range
- // 0 <= X <= 1
- //
- // Approximation
- // Psi*(X) = a0 + a1x + a2x^2 + a3x^3
- // a0 = 1.5707288
- // a1 = -0.2121144
- // a2 = 0.0742610
- // a3 = -0.0187293
- //
- // The domain of the approximation is 0 <=x <= 1, but the domain of asin is
- // -1 <= x <= 1. So we need to perform a range reduction to [0,1] before
- // computing the approximation.
- //
- // We use the following identity from [HMF(p80),WIK] for range reduction
- //
- // asin(-x) = -asin(x)
- //
- // We take the absolute value of x, compute asin(x) using the approximation
- // and then negate the value if x < 0.
- //
- // In [HMF] the authors claim an error, e, of |e| <= 5e-5, but the error graph
- // in [ADC] looks like the error can be larger that that for some inputs.
- //
- Value *DxilExpandTrigIntrinsics::expandASin(IRBuilder<> &builder, DxilInst_Asin asin, DxilModule &DM) {
- assert(asin);
- StringRef name = "asin.x";
- Value *X = asin.get_value();
- Value *PI_2 = ConstantFP::get(X->getType(), math::PI_2);
- Value *Zero = ConstantFP::get(X->getType(), 0.0);
-
- // Range reduction to [0, 1]
- Value *absX = emitFAbs(builder, X, DM.GetOP(), name);
- // Approximation
- Value *psiX = emitSqrt1mXtimesPsiX(builder, absX, DM.GetOP(), name);
- Value *asinX = builder.CreateFSub(PI_2, psiX, name);
- Value *asinmX = builder.CreateFSub(Zero, asinX, name);
- // Range expansion to [-1, 1]
- Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name);
- Value *r = builder.CreateSelect(lt0, asinmX, asinX, name);
- return r;
- }
- // Acos
- // ----------------------------------------------------------------------------
- // The acos expansion uses the following identity [WIK]. So that we can use the
- // same approximation psi*(x) that we use for asin.
- //
- // acos(x) = pi/2 - asin(x)
- //
- // Substituting the equation for asin(x) we get
- //
- // acos(x) = pi/2 - asin(x)
- // = pi/2 - (pi/2 - sqrt(1-x)*psi(x))
- // = sqrt(1-x)*psi(x)
- //
- // We use the following identity from [HMF(p80),WIK] for range reduction
- //
- // acos(-x) = pi - acos(x)
- // = pi - sqrt(1-x)*psi(x)
- //
- // We take the absolute value of x, compute acos(x) using the approximation
- // and then subtract from pi if x < 0.
- //
- Value *DxilExpandTrigIntrinsics::expandACos(IRBuilder<> &builder, DxilInst_Acos acos, DxilModule &DM) {
- assert(acos);
- StringRef name = "acos.x";
- Value *X = acos.get_value();
- Value *PI = ConstantFP::get(X->getType(), math::PI);
- Value *Zero = ConstantFP::get(X->getType(), 0.0);
-
- // Range reduction to [0, 1]
- Value *absX = emitFAbs(builder, X, DM.GetOP(), name);
- // Approximation
- Value *acosX = emitSqrt1mXtimesPsiX(builder, absX, DM.GetOP(), name);
- Value *acosmX = builder.CreateFSub(PI, acosX, name);
- // Range expansion to [-1, 1]
- Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name);
- Value *r = builder.CreateSelect(lt0, acosmX, acosX, name);
- return r;
- }
- // Atan
- // ----------------------------------------------------------------------------
- // Function
- // arctan X
- //
- // Range
- // -1 <= X <= 1
- //
- // Approximation
- // arctan*(x) = c1x + c3x^3 + c5x^5 + c7x^7 + c9x^9
- // c1 = 0.9998660
- // c3 = -0.3302995
- // c5 = 0.1801410
- // c7 = -0.0851330
- // c9 = 0.0208351
- //
- // The polynomial is evaluated using Horner's method to efficiently compute the
- // value
- //
- // c1x + c3x^3 + c5x^5 + c7x^7 + c9x^9
- // = x(c1 + c3x^2 + c5x^4 + c7x^6 + c9x^8)
- // = x(c1 + x^2(c3 + c5x^2 + c7x^4 + c9x^6))
- // = x(c1 + x^2(c3 + x^2(c5 + c7x^2 + c9x^4)))
- // = x(c1 + x^2(c3 + x^2(c5 + x^2(c7 + c9x^2))))
- //
- // The range reduction is a little more compilicated for atan because the
- // domain of atan is [-inf, inf], but the domain of the approximation is only
- // [-1, 1]. We use the following identities for range reduction from
- // [HMF(p80),WIK]
- //
- // arctan(-x) = -arctan(x)
- // arctan(x) = pi/2 - arctan(1/x) if x > 0
- //
- // The first identity allows us to only work with positive numbers. The second
- // identity allows us to reduce the range to [0,1]. We first convert the value
- // to positive by taking abs(x). Then if x > 1 we compute arctan(1/x).
- //
- // To expand the range we check if x > 1 then subtracted the computed value from
- // pi/2 and if x is negative then negate the final value.
- //
- Value *DxilExpandTrigIntrinsics::expandATan(IRBuilder<> &builder, DxilInst_Atan atan, DxilModule &DM) {
- assert(atan);
- StringRef name = "atan.x";
- Value *X = atan.get_value();
- Value *PI_2 = ConstantFP::get(X->getType(), math::PI_2);
- Value *One = ConstantFP::get(X->getType(), 1.0);
- Value *Zero = ConstantFP::get(X->getType(), 0.0);
- Value *c1 = ConstantFP::get(X->getType(), 0.9998660);
- Value *c3 = ConstantFP::get(X->getType(), -0.3302995);
- Value *c5 = ConstantFP::get(X->getType(), 0.1801410);
- Value *c7 = ConstantFP::get(X->getType(), -0.0851330);
- Value *c9 = ConstantFP::get(X->getType(), 0.0208351);
- // Range reduction to [0, inf]
- Value *absX = emitFAbs(builder, X, DM.GetOP(), name);
- // Range reduction to [0, 1]
- Value *gt1 = builder.CreateFCmp(CmpInst::FCMP_UGT, absX, One, name);
- Value *r1 = builder.CreateFDiv(One, absX, name);
- Value *r2 = builder.CreateSelect(gt1, r1, absX, name);
- // Approximate
- Value *r3 = builder.CreateFMul(r2, r2, name);
- Value *r4 = builder.CreateFMul(r3, c9, name);
- r4 = builder.CreateFAdd(r4, c7, name);
- r4 = builder.CreateFMul(r4, r3, name);
- r4 = builder.CreateFAdd(r4, c5, name);
- r4 = builder.CreateFMul(r4, r3, name);
- r4 = builder.CreateFAdd(r4, c3, name);
- r4 = builder.CreateFMul(r4, r3, name);
- r4 = builder.CreateFAdd(r4, c1, name);
- r4 = builder.CreateFMul(r2, r4, name);
- // Range Expansion to [0, inf]
- Value *r5 = builder.CreateFSub(PI_2, r4, name);
- Value *r6 = builder.CreateSelect(gt1, r5, r4, name);
- // Range Expansion to [-inf, inf]
- Value *r7 = builder.CreateFSub(Zero, r6, name);
- Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name);
- Value *r = builder.CreateSelect(lt0, r7, r6, name);
- return r;
- }
- // Hcos
- // ----------------------------------------------------------------------------
- // We use the following identity for computing hcos(x) from [HMF(p83)]
- //
- // cosh(x) = (e^x + e^-x) / 2
- //
- // No range reduction is needed.
- //
- Value *DxilExpandTrigIntrinsics::expandHCos(IRBuilder<> &builder, DxilInst_Hcos hcos, DxilModule &DM) {
- assert(hcos);
- StringRef name = "hcos.x";
- Value *eX, *emX;
- Value *X = hcos.get_value();
- Value *Two = ConstantFP::get(X->getType(), 2.0);
- std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name);
- Value *r4 = builder.CreateFAdd(eX, emX, name);
- Value *r = builder.CreateFDiv(r4, Two, name);
- return r;
- }
- // Hsin
- // ----------------------------------------------------------------------------
- // We use the following identity for computing hsin(x) from[HMF(p83)]
- //
- // sinh(x) = (e^x - e^-x) / 2
- //
- // No range reduction is needed.
- //
- Value *DxilExpandTrigIntrinsics::expandHSin(IRBuilder<> &builder, DxilInst_Hsin hsin, DxilModule &DM) {
- assert(hsin);
- StringRef name = "hsin.x";
- Value *eX, *emX;
- Value *X = hsin.get_value();
- Value *Two = ConstantFP::get(X->getType(), 2.0);
- std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name);
- Value *r4 = builder.CreateFSub(eX, emX, name);
- Value *r = builder.CreateFDiv(r4, Two, name);
- return r;
- }
- // Htan
- // ----------------------------------------------------------------------------
- // We use the following identity for computing hsin(x) from[HMF(p83)]
- //
- // tanh(x) = (e^x - e^-x) / (e^x + e^-x)
- //
- // No range reduction is needed.
- //
- Value *DxilExpandTrigIntrinsics::expandHTan(IRBuilder<> &builder, DxilInst_Htan htan, DxilModule &DM) {
- assert(htan);
- StringRef name = "htan.x";
- Value *eX, *emX;
- Value *X = htan.get_value();
- std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name);
- Value *r4 = builder.CreateFSub(eX, emX, name);
- Value *r5 = builder.CreateFAdd(eX, emX, name);
- Value *r = builder.CreateFDiv(r4, r5, name);
- return r;
- }
- // Tan
- // ----------------------------------------------------------------------------
- // We use the following identity for computing tan(x)
- //
- // tan(x) = sin(x) / cos(x)
- //
- // No range reduction is needed.
- //
- Value *DxilExpandTrigIntrinsics::expandTan(IRBuilder<> &builder,
- DxilInst_Tan tan, DxilModule &DM) {
- assert(tan);
- StringRef name = "tan.x";
- Value *X = tan.get_value();
- OP *dxOp = DM.GetOP();
- Value *sin = emitUnaryFloat(builder, X, dxOp, OP::OpCode::Sin, name);
- Value *cos = emitUnaryFloat(builder, X, dxOp, OP::OpCode::Cos, name);
- Value *r = builder.CreateFDiv(sin, cos, name);
- return r;
- }
- char DxilExpandTrigIntrinsics::ID = 0;
- FunctionPass *llvm::createDxilExpandTrigIntrinsicsPass() {
- return new DxilExpandTrigIntrinsics();
- }
- INITIALIZE_PASS(DxilExpandTrigIntrinsics,
- "hlsl-dxil-expand-trig-intrinsics",
- "DXIL expand trig intrinsics", false, false)
|