O3DE
/
DirectXShaderCompiler
огледало од https://github.com/o3de/DirectXShaderCompiler


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
							///////////////////////////////////////////////////////////////////////////////
//                                                                           //
// DxilExpandTrigIntrinsics.cpp                                              //
// Copyright (C) Microsoft Corporation. All rights reserved.                 //
// This file is distributed under the University of Illinois Open Source     //
// License. See LICENSE.TXT for details.                                     //
//                                                                           //
// Expand trigonmetric intrinsics to a sequence of dxil instructions.        //
// ========================================================================= //
//
// We provide expansions to approximate several trigonmetric functions that
// typically do not have native instructions in hardware. The details of each
// expansion is given below, but typically the exansion occurs in three steps
// 
//     1. Perform range reduction (if necessary) to reduce input range
//        to a value that works with the approximation.
//     2. Compute an approximation to the function (typically by evaluating 
//        a polynomial).
//     3. Perform range expansion (if necessary) to map the result back to
//        the original range.
// 
// For example, say we are expanding f(x) using an approximation to f, call it
// f*(x). And assume that f* only works for positive inputs, but we know that
// f(-x) = -f(x).Then the expansion would be
// 
//     1. a = abs(x)
//     2. v = f*(a)
//     3. e = x < 0 ? -v : v
// 
// where e contains the final expanded result.
// 
// References
// ---------------------------------------------------------------------------
// [HMF] Handbook of Mathematical Formulas by Abramowitz and Stegun, 1964
// [ADC] Approximations for Digital Computers by Hastings, 1955
// [WIK] Wikipedia, 2017
// 
// The approximation functions mostly come from [ADC]. The approximations
// are also referenced in [HMF], but they give original credit to [ADC].
// 
///////////////////////////////////////////////////////////////////////////////

#include "dxc/HLSL/DxilGenerationPass.h"
#include "dxc/DXIL/DxilOperations.h"
#include "dxc/DXIL/DxilSignatureElement.h"
#include "dxc/DXIL/DxilModule.h"
#include "dxc/Support/Global.h"
#include "dxc/DXIL/DxilInstructions.h"

#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/ADT/MapVector.h"

#include <cmath>
#include <utility>

using namespace llvm;
using namespace hlsl;

namespace {
class DxilExpandTrigIntrinsics : public FunctionPass {
private:

public:
  static char ID; // Pass identification, replacement for typeid
  explicit DxilExpandTrigIntrinsics() : FunctionPass(ID) {}

  const char *getPassName() const override {
    return "DXIL expand trig intrinsics";
  }
  
  bool runOnFunction(Function &F) override;
  

private:
  typedef std::vector<CallInst *> IntrinsicList;
  IntrinsicList findTrigFunctionsToExpand(Function &F);
  CallInst *isExpandableTrigIntrinsicCall(Instruction *I);
  bool expandTrigIntrinsics(DxilModule &DM, const IntrinsicList &worklist);
  FastMathFlags getFastMathFlagsForIntrinsic(CallInst *intrinsic);
  void prepareBuilderToExpandIntrinsic(IRBuilder<> &builder, CallInst *intrinsic);

  // Expansion implementations.
  Value *expandACos(IRBuilder<> &builder, DxilInst_Acos acos, DxilModule &DM);
  Value *expandASin(IRBuilder<> &builder, DxilInst_Asin asin, DxilModule &DM);
  Value *expandATan(IRBuilder<> &builder, DxilInst_Atan atan, DxilModule &DM);
  Value *expandHCos(IRBuilder<> &builder, DxilInst_Hcos hcos, DxilModule &DM);
  Value *expandHSin(IRBuilder<> &builder, DxilInst_Hsin hsin, DxilModule &DM);
  Value *expandHTan(IRBuilder<> &builder, DxilInst_Htan htan, DxilModule &DM);
  Value *expandTan(IRBuilder<> &builder, DxilInst_Tan tan, DxilModule &DM);
};

// Math constants.
// Values taken from https://msdn.microsoft.com/en-us/library/4hwaceh6.aspx.
// Replicated here because they are not part of standard C++.
namespace math {
  constexpr double PI    = 3.14159265358979323846;
  constexpr double PI_2  = 1.57079632679489661923;
  constexpr double LOG2E = 1.44269504088896340736;
}

}


bool DxilExpandTrigIntrinsics::runOnFunction(Function &F) {
  DxilModule &DM = F.getParent()->GetOrCreateDxilModule(); 
  IntrinsicList intrinsics = findTrigFunctionsToExpand(F);
  const bool changed = expandTrigIntrinsics(DM, intrinsics);
  return changed;
}

CallInst *DxilExpandTrigIntrinsics::isExpandableTrigIntrinsicCall(Instruction *I) {
    if (OP::IsDxilOpFuncCallInst(I)) {
      switch (OP::GetDxilOpFuncCallInst(I)) {
      case OP::OpCode::Acos:
      case OP::OpCode::Asin:
      case OP::OpCode::Atan:
      case OP::OpCode::Hcos:
      case OP::OpCode::Hsin:
      case OP::OpCode::Htan:
      case OP::OpCode::Tan:
        return cast<CallInst>(I);
      default: break;
      }
    }
    return nullptr;
}

DxilExpandTrigIntrinsics::IntrinsicList DxilExpandTrigIntrinsics::findTrigFunctionsToExpand(Function &F) {
  IntrinsicList worklist;
  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
    if (CallInst *call = isExpandableTrigIntrinsicCall(&*I))
      worklist.push_back(call);

  return worklist;
}

static bool isPreciseBuilder(IRBuilder<> &builder) {
  return !builder.getFastMathFlags().any();
}

static void setPreciseBuilder(IRBuilder<> &builder, bool precise) {
  FastMathFlags flags;
  if (precise)
    flags.clear();
  else
    flags.setUnsafeAlgebra();
  builder.SetFastMathFlags(flags);
}

void DxilExpandTrigIntrinsics::prepareBuilderToExpandIntrinsic(IRBuilder<> &builder, CallInst *intrinsic) {
  DxilModule &DM = intrinsic->getModule()->GetOrCreateDxilModule();
  builder.SetInsertPoint(intrinsic);
  setPreciseBuilder(builder, DM.IsPrecise(intrinsic));
}
  
bool DxilExpandTrigIntrinsics::expandTrigIntrinsics(DxilModule &DM, const IntrinsicList &worklist) {
  IRBuilder<> builder(DM.GetCtx());
  for (CallInst *intrinsic: worklist) {
    Value *expansion = nullptr;
    prepareBuilderToExpandIntrinsic(builder, intrinsic);
    
    OP::OpCode opcode = OP::GetDxilOpFuncCallInst(intrinsic);
    switch (opcode) {
    case OP::OpCode::Acos: expansion = expandACos(builder, intrinsic, DM); break;
    case OP::OpCode::Asin: expansion = expandASin(builder, intrinsic, DM); break;
    case OP::OpCode::Atan: expansion = expandATan(builder, intrinsic, DM); break;
    case OP::OpCode::Hcos: expansion = expandHCos(builder, intrinsic, DM); break;
    case OP::OpCode::Hsin: expansion = expandHSin(builder, intrinsic, DM); break;
    case OP::OpCode::Htan: expansion = expandHTan(builder, intrinsic, DM); break;
    case OP::OpCode::Tan: expansion = expandTan(builder, intrinsic, DM); break;
    default:
      assert(false && "unexpected intrinsic");
      break;
    }

    assert(expansion);
    intrinsic->replaceAllUsesWith(expansion);
    intrinsic->eraseFromParent();
  }

  return !worklist.empty();
}

// Helper
// return dx.op.UnaryFloat(X)
//
static Value *emitUnaryFloat(IRBuilder<> &builder, Value *X, OP *dxOp, OP::OpCode opcode, StringRef name) {
  Function *F = dxOp->GetOpFunc(opcode, X->getType());
  Value *Args[] = { dxOp->GetI32Const(static_cast<int>(opcode)), X };
  CallInst *Call = builder.CreateCall(F, Args, name);

  if (isPreciseBuilder(builder))
    DxilMDHelper::MarkPrecise(Call);
  return Call;
}

// Helper
// return dx.op.Fabs(X)
//
static Value *emitFAbs(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
  return emitUnaryFloat(builder, X, dxOp, OP::OpCode::FAbs, name);
}

// Helper
// return dx.op.Sqrt(X)
//
static Value *emitSqrt(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
  return emitUnaryFloat(builder, X, dxOp, OP::OpCode::Sqrt, name);
}

// Helper
// return sqrt(1 - X) * psi*(X)
//
// We compute the polynomial using Horners method to evaluate it efficently.
//
// psi*(X) = a0 + a1x + a2x^2 + a3x^3
//         = a0 + x(a1 + a2x + a3x^2)
//         = a0 + x(a1 + x(a2 + a3x))
//
static Value *emitSqrt1mXtimesPsiX(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
  Value *One = ConstantFP::get(X->getType(), 1.0);
  Value *a0 = ConstantFP::get(X->getType(),  1.5707288);
  Value *a1 = ConstantFP::get(X->getType(), -0.2121144);
  Value *a2 = ConstantFP::get(X->getType(),  0.0742610);
  Value *a3 = ConstantFP::get(X->getType(), -0.0187293);


  // sqrt(1-x)
  Value *r1 = builder.CreateFSub(One, X, name);
  Value *r2 = emitSqrt(builder, r1, dxOp, name);

  // psi*(x)
  Value *r3 = builder.CreateFMul(X,  a3, name);
         r3 = builder.CreateFAdd(r3, a2, name);
         r3 = builder.CreateFMul(X,  r3, name);
         r3 = builder.CreateFAdd(r3, a1, name);
         r3 = builder.CreateFMul(X,  r3, name);
         r3 = builder.CreateFAdd(r3, a0, name);

  // sqrt(1-x) * psi*(x)
  Value *r4 = builder.CreateFMul(r2, r3,  name);
  return r4;
}

// Helper
// return e^x, e^-x
//
// We can use the dxil Exp function to compute the exponential. The only slight
// wrinkle is that in dxil Exp(x) = 2^x and we need e^x. Luckily we can easily
// change the base of the exponent using the following identity [HFM(p69)]
//
//  e^x = 2^{x * log_2(e)}
//
static std::pair<Value *, Value *> emitExEmx(IRBuilder<> &builder, Value *X, OP *dxOp, StringRef name) {
  Value *Zero  = ConstantFP::get(X->getType(), 0.0);
  Value *Log2e = ConstantFP::get(X->getType(), math::LOG2E);

  Value *r0 = builder.CreateFMul(X, Log2e, name);
  Value *r1 = emitUnaryFloat(builder, r0, dxOp, OP::OpCode::Exp, name);
  Value *r2 = builder.CreateFSub(Zero, r0, name);
  Value *r3 = emitUnaryFloat(builder, r2, dxOp, OP::OpCode::Exp, name);

  return std::make_pair(r1, r3);
}

// Asin
// ----------------------------------------------------------------------------
// Function
//    arcsin X = pi/2  - sqrt(1 - X) * psi(X)
//
// Range
//    0 <= X <= 1
//
// Approximation
//    Psi*(X) = a0 + a1x + a2x^2 + a3x^3
//      a0 =  1.5707288
//      a1 = -0.2121144
//      a2 =  0.0742610
//      a3 = -0.0187293
// 
// The domain of the approximation is 0 <=x <= 1, but the domain of asin is
// -1 <= x <= 1. So we need to perform a range reduction to [0,1] before
// computing the approximation. 
// 
// We use the following identity from [HMF(p80),WIK] for range reduction
// 
// 	asin(-x) = -asin(x)
// 
// We take the absolute value of x, compute asin(x) using the approximation
// and then negate the value if x < 0.
//
// In [HMF] the authors claim an error, e, of |e| <= 5e-5, but the error graph
// in [ADC] looks like the error can be larger that that for some inputs.
// 
Value *DxilExpandTrigIntrinsics::expandASin(IRBuilder<> &builder, DxilInst_Asin asin, DxilModule &DM) {
  assert(asin);
  StringRef name = "asin.x";
  Value *X = asin.get_value();
  Value *PI_2 = ConstantFP::get(X->getType(), math::PI_2);
  Value *Zero = ConstantFP::get(X->getType(), 0.0);
  
  // Range reduction to [0, 1]
  Value *absX = emitFAbs(builder, X, DM.GetOP(), name);

  // Approximation
  Value *psiX = emitSqrt1mXtimesPsiX(builder, absX, DM.GetOP(), name);
  Value *asinX = builder.CreateFSub(PI_2, psiX, name);
  Value *asinmX = builder.CreateFSub(Zero, asinX, name);

  // Range expansion to [-1, 1]
  Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name);
  Value *r = builder.CreateSelect(lt0, asinmX, asinX, name);

  return r;
}


// Acos
// ----------------------------------------------------------------------------
// The acos expansion uses the following identity [WIK]. So that we can use the
// same approximation psi*(x) that we use for asin.
// 
// 	acos(x) = pi/2 - asin(x)
// 
// Substituting the equation for asin(x) we get
// 
// 	acos(x) = pi/2 - asin(x)
// 	        = pi/2 - (pi/2 - sqrt(1-x)*psi(x))
// 	        = sqrt(1-x)*psi(x)
// 
// We use the following identity from [HMF(p80),WIK] for range reduction
// 
// 	acos(-x) = pi - acos(x)
//               = pi - sqrt(1-x)*psi(x)
//
// We take the absolute value of x, compute acos(x) using the approximation
// and then subtract from pi if x < 0.
//
Value *DxilExpandTrigIntrinsics::expandACos(IRBuilder<> &builder, DxilInst_Acos acos, DxilModule &DM) {
  assert(acos);
  StringRef name = "acos.x";
  Value *X = acos.get_value();
  Value *PI = ConstantFP::get(X->getType(), math::PI);
  Value *Zero = ConstantFP::get(X->getType(), 0.0);
  
  // Range reduction to [0, 1]
  Value *absX = emitFAbs(builder, X, DM.GetOP(), name);

  // Approximation
  Value *acosX = emitSqrt1mXtimesPsiX(builder, absX, DM.GetOP(), name);
  Value *acosmX = builder.CreateFSub(PI, acosX, name);

  // Range expansion to [-1, 1]
  Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name);
  Value *r = builder.CreateSelect(lt0, acosmX, acosX, name);

  return r;
}

// Atan
// ----------------------------------------------------------------------------
// Function
//    arctan X
//
// Range
//    -1 <= X <= 1
//
// Approximation
//    arctan*(x) = c1x + c3x^3 + c5x^5 + c7x^7 + c9x^9
//      c1 =  0.9998660
//      c3 = -0.3302995
//      c5 =  0.1801410
//      c7 = -0.0851330
//      c9 =  0.0208351
// 	
// The polynomial is evaluated using Horner's method to efficiently compute the
// value
// 
// 	  c1x + c3x^3 + c5x^5 + c7x^7 + c9x^9 
// 	= x(c1 + c3x^2 + c5x^4 + c7x^6 + c9x^8)
// 	= x(c1 + x^2(c3 + c5x^2 + c7x^4 + c9x^6))
// 	= x(c1 + x^2(c3 + x^2(c5 + c7x^2 + c9x^4)))
// 	= x(c1 + x^2(c3 + x^2(c5 + x^2(c7 + c9x^2))))
// 	
// The range reduction is a little more compilicated for atan because the
// domain of atan is [-inf, inf], but the domain of the approximation is only
// [-1, 1]. We use the following identities for range reduction from
// [HMF(p80),WIK]
// 	
// 	arctan(-x) = -arctan(x)
//      arctan(x)   = pi/2 - arctan(1/x) if x > 0
// 
// The first identity allows us to only work with positive numbers. The second
// identity allows us to reduce the range to [0,1]. We first convert the value
// to positive by taking abs(x). Then if x > 1 we compute arctan(1/x).
// 
// To expand the range we check if x > 1 then subtracted the computed value from
// pi/2 and if x is negative then negate the final value.
//
Value *DxilExpandTrigIntrinsics::expandATan(IRBuilder<> &builder, DxilInst_Atan atan, DxilModule &DM) {
  assert(atan);
  StringRef name  = "atan.x";
  Value *X = atan.get_value();
  Value *PI_2 = ConstantFP::get(X->getType(), math::PI_2);
  Value *One  = ConstantFP::get(X->getType(), 1.0);
  Value *Zero = ConstantFP::get(X->getType(), 0.0);
  Value *c1 = ConstantFP::get(X->getType(),  0.9998660);
  Value *c3 = ConstantFP::get(X->getType(), -0.3302995);
  Value *c5 = ConstantFP::get(X->getType(),  0.1801410);
  Value *c7 = ConstantFP::get(X->getType(), -0.0851330);
  Value *c9 = ConstantFP::get(X->getType(),  0.0208351);

  // Range reduction to [0, inf]
  Value *absX = emitFAbs(builder, X, DM.GetOP(), name);

  // Range reduction to [0, 1]
  Value *gt1 = builder.CreateFCmp(CmpInst::FCMP_UGT, absX, One, name);
  Value *r1 = builder.CreateFDiv(One, absX, name);
  Value *r2 = builder.CreateSelect(gt1, r1, absX, name);

  // Approximate
  Value *r3 = builder.CreateFMul(r2, r2, name);
  Value *r4 = builder.CreateFMul(r3, c9, name);
         r4 = builder.CreateFAdd(r4, c7, name);
         r4 = builder.CreateFMul(r4, r3, name);
         r4 = builder.CreateFAdd(r4, c5, name);
         r4 = builder.CreateFMul(r4, r3, name);
         r4 = builder.CreateFAdd(r4, c3, name);
         r4 = builder.CreateFMul(r4, r3, name);
         r4 = builder.CreateFAdd(r4, c1, name);
         r4 = builder.CreateFMul(r2, r4, name);

  // Range Expansion to [0, inf]
  Value *r5 = builder.CreateFSub(PI_2, r4, name);
  Value *r6 = builder.CreateSelect(gt1, r5, r4, name);

  // Range Expansion to [-inf, inf]
  Value *r7 = builder.CreateFSub(Zero, r6, name);
  Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name);
  Value *r = builder.CreateSelect(lt0, r7, r6, name);

  return r;
}

// Hcos
// ----------------------------------------------------------------------------
// We use the following identity for computing hcos(x) from [HMF(p83)]
// 	
//    cosh(x) = (e^x + e^-x) / 2
// 
// No range reduction is needed.
//
Value *DxilExpandTrigIntrinsics::expandHCos(IRBuilder<> &builder, DxilInst_Hcos hcos, DxilModule &DM) {
  assert(hcos);
  StringRef name = "hcos.x";
  Value *eX, *emX;
  Value *X = hcos.get_value();
  Value *Two = ConstantFP::get(X->getType(), 2.0);

  std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name);
  Value *r4 = builder.CreateFAdd(eX, emX, name);
  Value *r  = builder.CreateFDiv(r4, Two, name);

  return r;
}

// Hsin
// ----------------------------------------------------------------------------
// We use the following identity for computing hsin(x) from[HMF(p83)]
//
//    sinh(x) = (e^x - e^-x) / 2
//
// No range reduction is needed.
//
Value *DxilExpandTrigIntrinsics::expandHSin(IRBuilder<> &builder, DxilInst_Hsin hsin, DxilModule &DM) {
  assert(hsin);
  StringRef name = "hsin.x";
  Value *eX, *emX;
  Value *X = hsin.get_value();
  Value *Two = ConstantFP::get(X->getType(), 2.0);

  std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name);
  Value *r4 = builder.CreateFSub(eX, emX, name);
  Value *r  = builder.CreateFDiv(r4, Two, name);

  return r;
}

// Htan
// ----------------------------------------------------------------------------
// We use the following identity for computing hsin(x) from[HMF(p83)]
//
//    tanh(x) = (e^x - e^-x) / (e^x + e^-x)
//
// No range reduction is needed.
//
Value *DxilExpandTrigIntrinsics::expandHTan(IRBuilder<> &builder, DxilInst_Htan htan, DxilModule &DM) {
  assert(htan);
  StringRef name = "htan.x";
  Value *eX, *emX;
  Value *X = htan.get_value();

  std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name);
  Value *r4 = builder.CreateFSub(eX, emX, name);
  Value *r5 = builder.CreateFAdd(eX, emX, name);
  Value *r  = builder.CreateFDiv(r4, r5, name);

  return r;
}

// Tan
// ----------------------------------------------------------------------------
// We use the following identity for computing tan(x)
//
//    tan(x) = sin(x) / cos(x)
//
// No range reduction is needed.
//
Value *DxilExpandTrigIntrinsics::expandTan(IRBuilder<> &builder,
                                           DxilInst_Tan tan, DxilModule &DM) {
  assert(tan);
  StringRef name = "tan.x";
  Value *X = tan.get_value();
  OP *dxOp = DM.GetOP();
  Value *sin = emitUnaryFloat(builder, X, dxOp, OP::OpCode::Sin, name);
  Value *cos = emitUnaryFloat(builder, X, dxOp, OP::OpCode::Cos, name);
  Value *r = builder.CreateFDiv(sin, cos, name);

  return r;
}

char DxilExpandTrigIntrinsics::ID = 0;

FunctionPass *llvm::createDxilExpandTrigIntrinsicsPass() {
  return new DxilExpandTrigIntrinsics();
}

INITIALIZE_PASS(DxilExpandTrigIntrinsics,
                "hlsl-dxil-expand-trig-intrinsics",
                "DXIL expand trig intrinsics", false, false)