Browse Source

Add constant folding for dxil intrinsics (#179)

This commit adds the ability to constant fold dxil intrinsics when
all inputs are constant. We reuse the llvm constant folding
infrastructure and add special cases for calls to dxil intrinsics.
David Peixotto 8 years ago
parent
commit
ae69cf0a58
55 changed files with 1409 additions and 18 deletions
  1. 2 0
      include/dxc/HLSL/DxilOperations.h
  2. 6 0
      include/llvm/Analysis/ConstantFolding.h
  3. 41 0
      include/llvm/Analysis/DxilConstantFolding.h
  4. 1 0
      lib/Analysis/CMakeLists.txt
  5. 12 3
      lib/Analysis/ConstantFolding.cpp
  6. 553 0
      lib/Analysis/DxilConstantFolding.cpp
  7. 7 2
      lib/HLSL/DxilOperations.cpp
  8. 0 1
      tools/clang/test/CodeGenHLSL/Samples/DX11/SubD11_SmoothPS.hlsl
  9. 4 4
      tools/clang/test/CodeGenHLSL/firstbitHi.hlsl
  10. 5 6
      tools/clang/test/CodeGenHLSL/firstbitLo.hlsl
  11. 1 1
      tools/clang/test/CodeGenHLSL/firstbitshi_const.hlsl
  12. 8 0
      tools/clang/test/HLSL/constprop/Acos.hlsl
  13. 8 0
      tools/clang/test/HLSL/constprop/Asin.hlsl
  14. 9 0
      tools/clang/test/HLSL/constprop/Atan.hlsl
  15. 9 0
      tools/clang/test/HLSL/constprop/Bfrev.hlsl
  16. 8 0
      tools/clang/test/HLSL/constprop/Cos.hlsl
  17. 9 0
      tools/clang/test/HLSL/constprop/Countbits.hlsl
  18. 9 0
      tools/clang/test/HLSL/constprop/Dot2.hlsl
  19. 9 0
      tools/clang/test/HLSL/constprop/Dot3.hlsl
  20. 9 0
      tools/clang/test/HLSL/constprop/Dot4.hlsl
  21. 9 0
      tools/clang/test/HLSL/constprop/Exp.hlsl
  22. 9 0
      tools/clang/test/HLSL/constprop/FAbs.hlsl
  23. 17 0
      tools/clang/test/HLSL/constprop/FMad.hlsl
  24. 13 0
      tools/clang/test/HLSL/constprop/FMax.hlsl
  25. 13 0
      tools/clang/test/HLSL/constprop/FMin.hlsl
  26. 37 0
      tools/clang/test/HLSL/constprop/Firstbithi.hlsl
  27. 12 0
      tools/clang/test/HLSL/constprop/Firstbitlo.hlsl
  28. 12 0
      tools/clang/test/HLSL/constprop/Fma.hlsl
  29. 8 0
      tools/clang/test/HLSL/constprop/Frc.hlsl
  30. 8 0
      tools/clang/test/HLSL/constprop/Hcos.hlsl
  31. 8 0
      tools/clang/test/HLSL/constprop/Hsin.hlsl
  32. 8 0
      tools/clang/test/HLSL/constprop/Htan.hlsl
  33. 10 0
      tools/clang/test/HLSL/constprop/IMad.hlsl
  34. 11 0
      tools/clang/test/HLSL/constprop/IMax.hlsl
  35. 11 0
      tools/clang/test/HLSL/constprop/IMin.hlsl
  36. 8 0
      tools/clang/test/HLSL/constprop/Log.hlsl
  37. 9 0
      tools/clang/test/HLSL/constprop/Round_ne.hlsl
  38. 9 0
      tools/clang/test/HLSL/constprop/Round_ni.hlsl
  39. 9 0
      tools/clang/test/HLSL/constprop/Round_pi.hlsl
  40. 9 0
      tools/clang/test/HLSL/constprop/Round_z.hlsl
  41. 8 0
      tools/clang/test/HLSL/constprop/Rsqrt.hlsl
  42. 12 0
      tools/clang/test/HLSL/constprop/Saturate_double.hlsl
  43. 12 0
      tools/clang/test/HLSL/constprop/Saturate_float.hlsl
  44. 12 0
      tools/clang/test/HLSL/constprop/Saturate_half.hlsl
  45. 8 0
      tools/clang/test/HLSL/constprop/Sin.hlsl
  46. 8 0
      tools/clang/test/HLSL/constprop/Sqrt.hlsl
  47. 8 0
      tools/clang/test/HLSL/constprop/Tan.hlsl
  48. 15 0
      tools/clang/test/HLSL/constprop/UMad.hlsl
  49. 15 0
      tools/clang/test/HLSL/constprop/UMax.hlsl
  50. 15 0
      tools/clang/test/HLSL/constprop/UMin.hlsl
  51. 95 0
      tools/clang/test/HLSL/constprop/bfi.ll
  52. 103 0
      tools/clang/test/HLSL/constprop/ibfe.ll
  53. 103 0
      tools/clang/test/HLSL/constprop/ubfe.ll
  54. 51 0
      tools/clang/unittests/HLSL/CompilerTest.cpp
  55. 4 1
      tools/clang/unittests/HLSL/FileCheckerTest.cpp

+ 2 - 0
include/dxc/HLSL/DxilOperations.h

@@ -21,6 +21,7 @@ class Value;
 class Instruction;
 class Instruction;
 };
 };
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/ADT/StringRef.h"
 
 
 #include "DxilConstants.h"
 #include "DxilConstants.h"
 #include <unordered_map>
 #include <unordered_map>
@@ -72,6 +73,7 @@ public:
   static const char *GetOpCodeClassName(OpCode OpCode);
   static const char *GetOpCodeClassName(OpCode OpCode);
   static bool IsOverloadLegal(OpCode OpCode, llvm::Type *pType);
   static bool IsOverloadLegal(OpCode OpCode, llvm::Type *pType);
   static bool CheckOpCodeTable();
   static bool CheckOpCodeTable();
+  static bool IsDxilOpFuncName(llvm::StringRef name);
   static bool IsDxilOpFunc(const llvm::Function *F);
   static bool IsDxilOpFunc(const llvm::Function *F);
   static bool IsDxilOpFuncCallInst(const llvm::Instruction *I);
   static bool IsDxilOpFuncCallInst(const llvm::Instruction *I);
   static bool IsDxilOpFuncCallInst(const llvm::Instruction *I, OpCode opcode);
   static bool IsDxilOpFuncCallInst(const llvm::Instruction *I, OpCode opcode);

+ 6 - 0
include/llvm/Analysis/ConstantFolding.h

@@ -22,6 +22,7 @@
 
 
 namespace llvm {
 namespace llvm {
   class Constant;
   class Constant;
+  class ConstantFP;
   class ConstantExpr;
   class ConstantExpr;
   class Instruction;
   class Instruction;
   class DataLayout;
   class DataLayout;
@@ -108,6 +109,11 @@ bool canConstantFoldCallTo(const Function *F);
 /// with the specified arguments, returning null if unsuccessful.
 /// with the specified arguments, returning null if unsuccessful.
 Constant *ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
 Constant *ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
                            const TargetLibraryInfo *TLI = nullptr);
                            const TargetLibraryInfo *TLI = nullptr);
+
+/// HLSL Change - make these functions external so we can call them from 
+/// DxilConstantFolding.cpp.
+Constant *ConstantFoldFP(double(__cdecl *NativeFP)(double), double V, Type *Ty);
+double getValueAsDouble(ConstantFP *Op);
 }
 }
 
 
 #endif
 #endif

+ 41 - 0
include/llvm/Analysis/DxilConstantFolding.h

@@ -0,0 +1,41 @@
+//===-- DxilConstantFolding.h - Constant folding for Dxil ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// Copyright (C) Microsoft Corporation. All rights reserved.
+//===----------------------------------------------------------------------===//
+//
+// This file declares routines for folding dxil intrinsics into constants when
+// all operands are constants.
+//
+// We hook into the LLVM routines for constant folding so the function
+// interfaces are dictated by what llvm provides.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_HLSLCONSTANTFOLDING_H
+#define LLVM_ANALYSIS_HLSLCONSTANTFOLDING_H
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+  class Constant;
+  class Function;
+  class Type;
+  template<typename T>
+  class ArrayRef;
+}
+
+namespace hlsl {
+  /// ConstantFoldScalarCall - Try to constant fold the call instruction.
+  /// If successful, the constant result is returned, if not, null is returned.
+  llvm::Constant *ConstantFoldScalarCall(llvm::StringRef Name, llvm::Type *Ty, llvm::ArrayRef<llvm::Constant *> Operands);
+
+  /// CanConstantFoldCallTo - Return true if we can potentially constant
+  /// fold a call to the given function.
+  bool CanConstantFoldCallTo(const llvm::Function *F);
+}
+
+#endif

+ 1 - 0
lib/Analysis/CMakeLists.txt

@@ -25,6 +25,7 @@ add_llvm_library(LLVMAnalysis
   DivergenceAnalysis.cpp
   DivergenceAnalysis.cpp
   DomPrinter.cpp
   DomPrinter.cpp
   DominanceFrontier.cpp
   DominanceFrontier.cpp
+  DxilConstantFolding.cpp
   IVUsers.cpp
   IVUsers.cpp
   InstCount.cpp
   InstCount.cpp
   InstructionSimplify.cpp
   InstructionSimplify.cpp

+ 12 - 3
lib/Analysis/ConstantFolding.cpp

@@ -37,6 +37,8 @@
 #include <cerrno>
 #include <cerrno>
 #include <cmath>
 #include <cmath>
 
 
+#include "llvm/Analysis/DxilConstantFolding.h" // HLSL Change
+
 #ifdef HAVE_FENV_H
 #ifdef HAVE_FENV_H
 #include <fenv.h>
 #include <fenv.h>
 #endif
 #endif
@@ -1222,6 +1224,9 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
 
 
 /// Return true if it's even possible to fold a call to the specified function.
 /// Return true if it's even possible to fold a call to the specified function.
 bool llvm::canConstantFoldCallTo(const Function *F) {
 bool llvm::canConstantFoldCallTo(const Function *F) {
+  if (hlsl::CanConstantFoldCallTo(F)) // HLSL Change
+    return true;
+
   switch (F->getIntrinsicID()) {
   switch (F->getIntrinsicID()) {
   case Intrinsic::fabs:
   case Intrinsic::fabs:
   case Intrinsic::minnum:
   case Intrinsic::minnum:
@@ -1336,8 +1341,8 @@ static inline bool llvm_fenv_testexcept() {
 }
 }
 } // End namespace
 } // End namespace
 
 
-// HLSL Change: changed calling convention of NativeFP to __cdecl
-static Constant *ConstantFoldFP(double (__cdecl *NativeFP)(double), double V,
+// HLSL Change: changed calling convention of NativeFP to __cdecl and make non-static
+Constant *llvm::ConstantFoldFP(double (__cdecl *NativeFP)(double), double V,
                                 Type *Ty) {
                                 Type *Ty) {
   llvm_fenv_clearexcept();
   llvm_fenv_clearexcept();
   V = NativeFP(V);
   V = NativeFP(V);
@@ -1388,7 +1393,8 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val,
   return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
   return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
 }
 }
 
 
-static double getValueAsDouble(ConstantFP *Op) {
+// HLSL Change - make non-static.
+double llvm::getValueAsDouble(ConstantFP *Op) {
   Type *Ty = Op->getType();
   Type *Ty = Op->getType();
 
 
   if (Ty->isFloatTy())
   if (Ty->isFloatTy())
@@ -1406,6 +1412,9 @@ static double getValueAsDouble(ConstantFP *Op) {
 static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
 static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
                                         Type *Ty, ArrayRef<Constant *> Operands,
                                         Type *Ty, ArrayRef<Constant *> Operands,
                                         const TargetLibraryInfo *TLI) {
                                         const TargetLibraryInfo *TLI) {
+  if (Constant *C = hlsl::ConstantFoldScalarCall(Name, Ty, Operands)) // HLSL Change - Try hlsl constant folding first.
+    return C;
+
   if (Operands.size() == 1) {
   if (Operands.size() == 1) {
     if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
     if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
       if (IntrinsicID == Intrinsic::convert_to_fp16) {
       if (IntrinsicID == Intrinsic::convert_to_fp16) {

+ 553 - 0
lib/Analysis/DxilConstantFolding.cpp

@@ -0,0 +1,553 @@
+//===-- DxilConstantFolding.cpp - Fold dxil intrinsics into constants -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// Copyright (C) Microsoft Corporation. All rights reserved.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/DxilConstantFolding.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cerrno>
+#include <cmath>
+#include <algorithm>
+#include <functional>
+
+#include "dxc/HLSL/Dxil.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+// Check if the given function is a dxil intrinsic and if so extract the
+// opcode for the instrinsic being called.
+static bool GetDxilOpcode(StringRef Name, ArrayRef<Constant *> Operands, OP::OpCode &out) {
+  if (!OP::IsDxilOpFuncName(Name))
+    return false;
+  if (!Operands.size())
+    return false;
+  if (ConstantInt *ci = dyn_cast<ConstantInt>(Operands[0])) {
+    uint64_t opcode = ci->getLimitedValue();
+    if (opcode < static_cast<uint64_t>(OP::OpCode::NumOpCodes)) {
+      out = static_cast<OP::OpCode>(opcode);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// Typedefs for passing function pointers to evaluate float constants.
+typedef double(__cdecl *NativeFPUnaryOp)(double);
+typedef std::function<APFloat::opStatus(APFloat&)> APFloatUnaryOp;
+
+/// Currently APFloat versions of these functions do not exist, so we use
+/// the host native double versions.  Float versions are not called
+/// directly but for all these it is true (float)(f((double)arg)) ==
+/// f(arg).  Long double not supported yet.
+///
+/// Calls out to the llvm constant folding function to do the real work.
+static Constant *DxilConstantFoldFP(NativeFPUnaryOp NativeFP, ConstantFP *C, Type *Ty) {
+  double V = llvm::getValueAsDouble(C);
+  return llvm::ConstantFoldFP(NativeFP, V, Ty);
+}
+
+// Constant fold using the provided function on APFloats.
+static Constant *HLSLConstantFoldAPFloat(APFloatUnaryOp NativeFP, ConstantFP *C, Type *Ty) {
+  APFloat APF = C->getValueAPF();
+
+  if (NativeFP(APF) != APFloat::opStatus::opOK)
+    return nullptr;
+
+  return ConstantFP::get(Ty->getContext(), APF);
+}
+
+// Constant fold a round dxil intrinsic.
+static Constant *HLSLConstantFoldRound(APFloat::roundingMode roundingMode, ConstantFP *C, Type *Ty) {
+  APFloatUnaryOp f = [roundingMode](APFloat &x) { return x.roundToIntegral(roundingMode); };
+  return HLSLConstantFoldAPFloat(f, C, Ty);
+}
+
+namespace {
+// Wrapper for call operands that "shifts past" the hlsl intrinsic opcode.
+// Also provides accessors that dyn_cast the operand to a constant type.
+class DxilIntrinsicOperands {
+public:
+  DxilIntrinsicOperands(ArrayRef<Constant *> RawCallOperands) : m_RawCallOperands(RawCallOperands) {}
+  Constant * const &operator[](size_t index) const {
+    return m_RawCallOperands[index + 1];
+  }
+
+  ConstantInt *GetConstantInt(size_t index) const {
+    return dyn_cast<ConstantInt>(this->operator[](index));
+  }
+  
+  ConstantFP *GetConstantFloat(size_t index) const {
+    return dyn_cast<ConstantFP>(this->operator[](index));
+  }
+
+  size_t Size() const {
+    return m_RawCallOperands.size() - 1;
+  }
+private:
+  ArrayRef<Constant *> m_RawCallOperands;
+};
+}
+
+/// We only fold functions with finite arguments. Folding NaN and inf is
+/// likely to be aborted with an exception anyway, and some host libms
+/// have known errors raising exceptions.
+static bool IsFinite(ConstantFP *C) {
+  if (C->getValueAPF().isNaN() || C->getValueAPF().isInfinity())
+    return false;
+
+  return true;
+}
+
+// Check that the op is non-null and finite.
+static bool IsValidOp(ConstantFP *C) {
+  if (!C || !IsFinite(C))
+    return false;
+
+  return true;
+}
+
+// Check that all ops are valid.
+static bool AllValidOps(ArrayRef<ConstantFP *> Ops) {
+  return std::all_of(Ops.begin(), Ops.end(), IsValidOp);
+}
+
+// Constant fold unary floating point intrinsics.
+static Constant *ConstantFoldUnaryFPIntrinsic(OP::OpCode opcode, Type *Ty, ConstantFP *Op) {
+  switch (opcode) {
+  default: break;
+  case OP::OpCode::FAbs: return DxilConstantFoldFP(fabs, Op, Ty);
+  case OP::OpCode::Saturate: {
+    NativeFPUnaryOp f = [](double x) { return std::max(std::min(x, 1.0), 0.0); };
+    return DxilConstantFoldFP(f, Op, Ty);
+  }
+  case OP::OpCode::Cos:  return DxilConstantFoldFP(cos, Op, Ty);
+  case OP::OpCode::Sin:  return DxilConstantFoldFP(sin, Op, Ty);
+  case OP::OpCode::Tan:  return DxilConstantFoldFP(tan, Op, Ty);
+  case OP::OpCode::Acos: return DxilConstantFoldFP(acos, Op, Ty);
+  case OP::OpCode::Asin: return DxilConstantFoldFP(asin, Op, Ty);
+  case OP::OpCode::Atan: return DxilConstantFoldFP(atan, Op, Ty);
+  case OP::OpCode::Hcos: return DxilConstantFoldFP(cosh, Op, Ty);
+  case OP::OpCode::Hsin: return DxilConstantFoldFP(sinh, Op, Ty);
+  case OP::OpCode::Htan: return DxilConstantFoldFP(tanh, Op, Ty);
+  case OP::OpCode::Exp:  return DxilConstantFoldFP(exp2, Op, Ty);
+  case OP::OpCode::Frc: {
+    NativeFPUnaryOp f = [](double x) { double unused; return fabs(modf(x, &unused)); };
+    return DxilConstantFoldFP(f, Op, Ty);
+  }
+  case OP::OpCode::Log: return DxilConstantFoldFP(log2, Op, Ty);
+  case OP::OpCode::Sqrt: return DxilConstantFoldFP(sqrt, Op, Ty);
+  case OP::OpCode::Rsqrt: {
+    NativeFPUnaryOp f = [](double x) { return 1.0 / sqrt(x); };
+    return DxilConstantFoldFP(f, Op, Ty);
+  }
+  case OP::OpCode::Round_ne: return HLSLConstantFoldRound(APFloat::roundingMode::rmNearestTiesToEven, Op, Ty);
+  case OP::OpCode::Round_ni: return HLSLConstantFoldRound(APFloat::roundingMode::rmTowardNegative, Op, Ty);
+  case OP::OpCode::Round_pi: return HLSLConstantFoldRound(APFloat::roundingMode::rmTowardPositive, Op, Ty);
+  case OP::OpCode::Round_z: return HLSLConstantFoldRound(APFloat::roundingMode::rmTowardZero, Op, Ty);
+  }
+  
+  return nullptr;
+}
+
+// Constant fold binary floating point intrinsics.
+static Constant *ConstantFoldBinaryFPIntrinsic(OP::OpCode opcode, Type *Ty, ConstantFP *Op1, ConstantFP *Op2) {
+  const APFloat &C1 = Op1->getValueAPF();
+  const APFloat &C2 = Op2->getValueAPF();
+  switch (opcode) {
+  default: break;
+  case OP::OpCode::FMax: return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
+  case OP::OpCode::FMin: return ConstantFP::get(Ty->getContext(), minnum(C1, C2));
+  }
+
+  return nullptr;
+}
+
+// Constant fold ternary floating point intrinsics.
+static Constant *ConstantFoldTernaryFPIntrinsic(OP::OpCode opcode, Type *Ty, ConstantFP *Op1, ConstantFP *Op2, ConstantFP *Op3) {
+  const APFloat &C1 = Op1->getValueAPF();
+  const APFloat &C2 = Op2->getValueAPF();
+  const APFloat &C3 = Op3->getValueAPF();
+  APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven;
+  switch (opcode) {
+  default: break;
+  case OP::OpCode::FMad: {
+    APFloat result(C1);
+    result.multiply(C2, roundingMode);
+    result.add(C3, roundingMode);
+    return ConstantFP::get(Ty->getContext(), result);
+  }
+  case OP::OpCode::Fma: {
+    APFloat result(C1);
+    result.fusedMultiplyAdd(C2, C3, roundingMode);
+    return ConstantFP::get(Ty->getContext(), result);
+  }
+  }
+  return nullptr;
+}
+
+// Compute dot product for arbitrary sized vectors.
+static Constant *ComputeDot(Type *Ty, ArrayRef<ConstantFP *> A, ArrayRef<ConstantFP *> B) {
+  if (A.size() != B.size() || !A.size()) {
+    assert(false && "invalid call to compute dot");
+    return nullptr;
+  }
+
+  if (!AllValidOps(A) || !AllValidOps(B))
+    return nullptr;
+  
+  APFloat::roundingMode roundingMode = APFloat::roundingMode::rmNearestTiesToEven;
+  APFloat sum = APFloat::getZero(A[0]->getValueAPF().getSemantics());
+  for (int i = 0, e = A.size(); i != e; ++i) {
+    APFloat  val(A[i]->getValueAPF());
+    val.multiply(B[i]->getValueAPF(), roundingMode);
+    sum.add(val, roundingMode);
+  }
+
+  return ConstantFP::get(Ty->getContext(), sum);
+
+}
+
+// Constant folding for dot2, dot3, and dot4.
+static Constant *ConstantFoldDot(OP::OpCode opcode, Type *Ty, const DxilIntrinsicOperands &operands) {
+  switch (opcode) {
+  default: break;
+  case OP::OpCode::Dot2: {
+    ConstantFP *Ax = operands.GetConstantFloat(0);
+    ConstantFP *Ay = operands.GetConstantFloat(1);
+    ConstantFP *Bx = operands.GetConstantFloat(2);
+    ConstantFP *By = operands.GetConstantFloat(3);
+    return ComputeDot(Ty, { Ax, Ay }, { Bx, By });
+  }
+  case OP::OpCode::Dot3: {
+    ConstantFP *Ax = operands.GetConstantFloat(0);
+    ConstantFP *Ay = operands.GetConstantFloat(1);
+    ConstantFP *Az = operands.GetConstantFloat(2);
+    ConstantFP *Bx = operands.GetConstantFloat(3);
+    ConstantFP *By = operands.GetConstantFloat(4);
+    ConstantFP *Bz = operands.GetConstantFloat(5);
+    return ComputeDot(Ty, { Ax, Ay, Az }, { Bx, By, Bz });
+  }
+  case OP::OpCode::Dot4: {
+    ConstantFP *Ax = operands.GetConstantFloat(0);
+    ConstantFP *Ay = operands.GetConstantFloat(1);
+    ConstantFP *Az = operands.GetConstantFloat(2);
+    ConstantFP *Aw = operands.GetConstantFloat(3);
+    ConstantFP *Bx = operands.GetConstantFloat(4);
+    ConstantFP *By = operands.GetConstantFloat(5);
+    ConstantFP *Bz = operands.GetConstantFloat(6);
+    ConstantFP *Bw = operands.GetConstantFloat(7);
+    return ComputeDot(Ty, { Ax, Ay, Az, Aw }, { Bx, By, Bz, Bw });
+  }
+  }
+
+  return nullptr;
+}
+
+// Constant fold a Bfrev dxil intrinsic.
+static Constant *HLSLConstantFoldBfrev(ConstantInt *C, Type *Ty) {
+  APInt API = C->getValue();
+
+  uint64_t result = 0;
+  if (Ty == Type::getInt32Ty(Ty->getContext())) {
+    uint32_t val = static_cast<uint32_t>(API.getLimitedValue());
+    result = llvm::reverseBits(val);
+  }
+  else if (Ty == Type::getInt16Ty(Ty->getContext())) {
+    uint16_t val = static_cast<uint16_t>(API.getLimitedValue());
+    result = llvm::reverseBits(val);
+  }
+  else if (Ty == Type::getInt64Ty(Ty->getContext())) {
+    uint64_t val = static_cast<uint64_t>(API.getLimitedValue());
+    result = llvm::reverseBits(val);
+  }
+  else {
+    return nullptr;
+  }
+  return ConstantInt::get(Ty, result);
+}
+
+// Handle special case for findfirst* bit functions.
+// When the position is equal to the bitwidth the value was not found
+// and we need to return a result of -1.
+static Constant *HLSLConstantFoldFindBit(Type *Ty, unsigned position, unsigned bitwidth) {
+  if (position == bitwidth)
+    return ConstantInt::get(Ty, APInt::getAllOnesValue(Ty->getScalarSizeInBits()));
+
+  return ConstantInt::get(Ty, position);
+}
+
+// Constant fold unary integer intrinsics.
+static Constant *ConstantFoldUnaryIntIntrinsic(OP::OpCode opcode, Type *Ty, ConstantInt *Op) {
+  APInt API = Op->getValue();
+  switch (opcode) {
+  default: break;
+  case OP::OpCode::Bfrev:      return HLSLConstantFoldBfrev(Op, Ty);
+  case OP::OpCode::Countbits:  return ConstantInt::get(Ty, API.countPopulation());
+  case OP::OpCode::FirstbitLo: return HLSLConstantFoldFindBit(Ty, API.countTrailingZeros(), API.getBitWidth());
+  case OP::OpCode::FirstbitHi: return HLSLConstantFoldFindBit(Ty, API.countLeadingZeros(), API.getBitWidth());
+  case OP::OpCode::FirstbitSHi: {
+    if (API.isNegative())
+      return HLSLConstantFoldFindBit(Ty, API.countLeadingOnes(), API.getBitWidth());
+    else
+      return HLSLConstantFoldFindBit(Ty, API.countLeadingZeros(), API.getBitWidth());
+  }
+  }
+  
+  return nullptr;
+}
+
+// Constant fold binary integer intrinsics.
+static Constant *ConstantFoldBinaryIntIntrinsic(OP::OpCode opcode, Type *Ty, ConstantInt *Op1, ConstantInt *Op2) {
+  APInt C1 = Op1->getValue();
+  APInt C2 = Op2->getValue();
+  switch (opcode) {
+  default: break;
+  case OP::OpCode::IMin: {
+    APInt minVal = C1.slt(C2) ? C1 : C2;
+    return ConstantInt::get(Ty, minVal);
+  }
+  case OP::OpCode::IMax: {
+    APInt maxVal = C1.sgt(C2) ? C1 : C2;
+    return ConstantInt::get(Ty, maxVal);
+  }
+  case OP::OpCode::UMin: {
+    APInt minVal = C1.ult(C2) ? C1 : C2;
+    return ConstantInt::get(Ty, minVal);
+  }
+  case OP::OpCode::UMax: {
+    APInt maxVal = C1.ugt(C2) ? C1 : C2;
+    return ConstantInt::get(Ty, maxVal);
+  }
+  }
+
+  return nullptr;
+}
+
+// Compute bit field extract for ibfe and ubfe.
+// The comptuation for ibfe and ubfe is the same except for the right shift,
+// which is an arithemetic shift for ibfe and logical shift for ubfe.
+// ubfe: https://msdn.microsoft.com/en-us/library/windows/desktop/hh447243(v=vs.85).aspx
+// ibfe: https://msdn.microsoft.com/en-us/library/windows/desktop/hh447243(v=vs.85).aspx
+static Constant *ComputeBFE(Type *Ty, APInt width, APInt offset, APInt val, std::function<APInt(APInt, APInt)> shr) {
+    const APInt bitwidth(width.getBitWidth(), width.getBitWidth());
+	// Limit width and offset to the bitwidth of the value.
+    width  = width.And(bitwidth-1); 
+    offset = offset.And(bitwidth-1);
+    
+    if (width == 0) {
+      return ConstantInt::get(Ty, 0);
+    }
+    else if ((width + offset).ult(bitwidth)) {
+      APInt dest = val.shl(bitwidth - (width + offset));
+      dest = shr(dest, bitwidth - width);
+      return ConstantInt::get(Ty, dest);
+    }
+    else {
+      APInt dest = shr(val, offset);
+      return ConstantInt::get(Ty, dest);
+    }
+}
+
+// Constant fold ternary integer intrinsic.
+static Constant *ConstantFoldTernaryIntIntrinsic(OP::OpCode opcode, Type *Ty, ConstantInt *Op1, ConstantInt *Op2, ConstantInt *Op3) {
+  APInt C1 = Op1->getValue();
+  APInt C2 = Op2->getValue();
+  APInt C3 = Op3->getValue();
+  switch (opcode) {
+  default: break;
+  case OP::OpCode::IMad:
+  case OP::OpCode::UMad: {
+    // Result is same for signed/unsigned since this is twos complement and we only
+    // keep the lower half of the multiply.
+    APInt result = C1 * C2 + C3;
+    return ConstantInt::get(Ty, result);
+  }
+  case OP::OpCode::Ubfe: return ComputeBFE(Ty, C1, C2, C3, [](APInt val, APInt amt) {return val.lshr(amt); });
+  case OP::OpCode::Ibfe: return ComputeBFE(Ty, C1, C2, C3, [](APInt val, APInt amt) {return val.ashr(amt); });
+  }
+
+  return nullptr;
+}
+
+// Constant fold quaternary integer intrinsic.
+//
+// Currently we only have one quaternary intrinsic: Bfi.
+// The Bfi computaion is described here:
+// https://msdn.microsoft.com/en-us/library/windows/desktop/hh446837(v=vs.85).aspx
+static Constant *ConstantFoldQuaternaryIntInstrinsic(OP::OpCode opcode, Type *Ty, ConstantInt *Op1, ConstantInt *Op2, ConstantInt *Op3, ConstantInt *Op4) {
+  if (opcode != OP::OpCode::Bfi)
+    return nullptr;
+
+  APInt bitwidth(Op1->getValue().getBitWidth(), Op1->getValue().getBitWidth());
+  APInt width  = Op1->getValue().And(bitwidth-1);
+  APInt offset = Op2->getValue().And(bitwidth-1);
+  APInt src = Op3->getValue();
+  APInt dst = Op4->getValue();
+  APInt one(bitwidth.getBitWidth(), 1);
+  APInt allOnes = APInt::getAllOnesValue(bitwidth.getBitWidth());
+
+  // bitmask = (((1 << width)-1) << offset) & 0xffffffff
+  // dest = ((src2 << offset) & bitmask) | (src3 & ~bitmask)
+  APInt bitmask = (one.shl(width) - 1).shl(offset).And(allOnes);
+  APInt result = (src.shl(offset).And(bitmask)).Or(dst.And(~bitmask));
+
+  return ConstantInt::get(Ty, result);
+}
+
+// Return true if opcode is for a dot operation.
+static bool IsDotOpcode(OP::OpCode opcode) {
+  return opcode == OP::OpCode::Dot2
+      || opcode == OP::OpCode::Dot3
+      || opcode == OP::OpCode::Dot4;
+}
+
+// Top level function to constant fold floating point intrinsics.
+static Constant *ConstantFoldFPIntrinsic(OP::OpCode opcode, Type *Ty, const DxilIntrinsicOperands &IntrinsicOperands) {
+  if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+    return nullptr;
+
+  if (IntrinsicOperands.Size() == 1) {
+    ConstantFP *Op = IntrinsicOperands.GetConstantFloat(0);
+
+    if (!IsValidOp(Op))
+      return nullptr;
+
+    return ConstantFoldUnaryFPIntrinsic(opcode, Ty, Op);
+  }
+  else if (IntrinsicOperands.Size() == 2) {
+    ConstantFP *Op1 = IntrinsicOperands.GetConstantFloat(0);
+    ConstantFP *Op2 = IntrinsicOperands.GetConstantFloat(1);
+
+    if (!IsValidOp(Op1) || !IsValidOp(Op2))
+      return nullptr;
+
+    return ConstantFoldBinaryFPIntrinsic(opcode, Ty, Op1, Op2);
+  }
+  else if (IntrinsicOperands.Size() == 3) {
+    ConstantFP *Op1 = IntrinsicOperands.GetConstantFloat(0);
+    ConstantFP *Op2 = IntrinsicOperands.GetConstantFloat(1);
+    ConstantFP *Op3 = IntrinsicOperands.GetConstantFloat(2);
+
+    if (!IsValidOp(Op1) || !IsValidOp(Op2) || !IsValidOp(Op3))
+      return nullptr;
+
+    return ConstantFoldTernaryFPIntrinsic(opcode, Ty, Op1, Op2, Op3);
+  }
+  else if (IsDotOpcode(opcode)) {
+    return ConstantFoldDot(opcode, Ty, IntrinsicOperands);
+  }
+
+  return nullptr;
+}
+
+// Top level function to constant fold integer intrinsics.
+static Constant *ConstantFoldIntIntrinsic(OP::OpCode opcode, Type *Ty, const DxilIntrinsicOperands &IntrinsicOperands) {
+  if (Ty->getScalarSizeInBits() > (sizeof(int64_t) * CHAR_BIT))
+    return nullptr;
+
+  if (IntrinsicOperands.Size() == 1) {
+    ConstantInt *Op = IntrinsicOperands.GetConstantInt(0);
+    if (!Op)
+      return nullptr;
+
+    return ConstantFoldUnaryIntIntrinsic(opcode, Ty, Op);
+  }
+  else if (IntrinsicOperands.Size() == 2) {
+    ConstantInt *Op1 = IntrinsicOperands.GetConstantInt(0);
+    ConstantInt *Op2 = IntrinsicOperands.GetConstantInt(1);
+    if (!Op1 || !Op2)
+      return nullptr;
+    
+    return ConstantFoldBinaryIntIntrinsic(opcode, Ty, Op1, Op2);
+  }
+  else if (IntrinsicOperands.Size() == 3) {
+    ConstantInt *Op1 = IntrinsicOperands.GetConstantInt(0);
+    ConstantInt *Op2 = IntrinsicOperands.GetConstantInt(1);
+    ConstantInt *Op3 = IntrinsicOperands.GetConstantInt(2);
+    if (!Op1 || !Op2 || !Op3)
+      return nullptr;
+    
+    return ConstantFoldTernaryIntIntrinsic(opcode, Ty, Op1, Op2, Op3);
+  }
+  else if (IntrinsicOperands.Size() == 4) {
+    ConstantInt *Op1 = IntrinsicOperands.GetConstantInt(0);
+    ConstantInt *Op2 = IntrinsicOperands.GetConstantInt(1);
+    ConstantInt *Op3 = IntrinsicOperands.GetConstantInt(2);
+    ConstantInt *Op4 = IntrinsicOperands.GetConstantInt(3);
+    if (!Op1 || !Op2 || !Op3 || !Op4)
+      return nullptr;
+
+    return ConstantFoldQuaternaryIntInstrinsic(opcode, Ty, Op1, Op2, Op3, Op4);
+  }
+  return nullptr;
+}
+
+// External entry point to constant fold dxil intrinsics.
+// Called from the llvm constant folding routine.
+Constant *hlsl::ConstantFoldScalarCall(StringRef Name, Type *Ty, ArrayRef<Constant *> RawOperands) {
+  OP::OpCode opcode;
+  if (GetDxilOpcode(Name, RawOperands, opcode)) {
+    DxilIntrinsicOperands IntrinsicOperands(RawOperands);
+
+    if (Ty->isFloatingPointTy()) {
+      return ConstantFoldFPIntrinsic(opcode, Ty, IntrinsicOperands);
+    }
+    else if (Ty->isIntegerTy()) {
+      return ConstantFoldIntIntrinsic(opcode, Ty, IntrinsicOperands);
+    }
+  }
+  return nullptr;
+}
+
+// External entry point to determine if we can constant fold calls to
+// the given function. We have to overestimate the set of functions because
+// we only have the function value here instead of the call. We need the
+// actual call to get the opcode for the intrinsic.
+bool hlsl::CanConstantFoldCallTo(const Function *F) {
+  if (!OP::IsDxilOpFunc(F))
+    return false;
+
+  // Check match using startswith to get all overloads.
+  StringRef Name = F->getName();
+  if (Name.startswith("dx.op.unary"))
+    return true;
+  else if (Name.startswith("dx.op.unaryBits"))
+    return true;
+  else if (Name.startswith("dx.op.binary"))
+    return true;
+  else if (Name.startswith("dx.op.tertiary"))
+    return true;
+  else if (Name.startswith("dx.op.quaternary"))
+    return true;
+  else if (Name.startswith("dx.op.dot"))
+    return true;
+
+  return false;
+}

+ 7 - 2
lib/HLSL/DxilOperations.cpp

@@ -324,11 +324,16 @@ bool OP::CheckOpCodeTable() {
   return true;
   return true;
 }
 }
 
 
-bool OP::IsDxilOpFunc(const llvm::Function *F) {
-  StringRef name = F->getName();
+bool OP::IsDxilOpFuncName(StringRef name) {
   return name.startswith(OP::m_NamePrefix);
   return name.startswith(OP::m_NamePrefix);
 }
 }
 
 
+bool OP::IsDxilOpFunc(const llvm::Function *F) {
+  if (!F->hasName())
+    return false;
+  return IsDxilOpFuncName(F->getName());
+}
+
 bool OP::IsDxilOpFuncCallInst(const llvm::Instruction *I) {
 bool OP::IsDxilOpFuncCallInst(const llvm::Instruction *I) {
   const CallInst *CI = dyn_cast<CallInst>(I);
   const CallInst *CI = dyn_cast<CallInst>(I);
   if (CI == nullptr) return false;
   if (CI == nullptr) return false;

+ 0 - 1
tools/clang/test/CodeGenHLSL/Samples/DX11/SubD11_SmoothPS.hlsl

@@ -9,7 +9,6 @@
 // CHECK: sample
 // CHECK: sample
 // CHECK: Log
 // CHECK: Log
 // CHECK: Exp
 // CHECK: Exp
-// CHECK: dot3
 // CHECK: Sqrt
 // CHECK: Sqrt
 // CHECK: dot3
 // CHECK: dot3
 // CHECK: Saturate
 // CHECK: Saturate

+ 4 - 4
tools/clang/test/CodeGenHLSL/firstbitHi.hlsl

@@ -1,19 +1,19 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
 
-// CHECK: FirstbitHi
+// CHECK: call {{.*}} FirstbitHi
 // CHECK: sub i32 31
 // CHECK: sub i32 31
 // CHECK: icmp eq i32 {{.*}}, -1
 // CHECK: icmp eq i32 {{.*}}, -1
 // CHECK: select
 // CHECK: select
 // CHECK: i32 -1
 // CHECK: i32 -1
 
 
-// CHECK: FirstbitSHi
+// CHECK: call {{.*}} FirstbitSHi
 // CHECK: sub i32 31
 // CHECK: sub i32 31
 // CHECK: icmp eq i32 {{.*}}, -1
 // CHECK: icmp eq i32 {{.*}}, -1
 // CHECK: select
 // CHECK: select
 // CHECK: i32 -1
 // CHECK: i32 -1
 
 
-// CHECK: FirstbitSHi
-// CHECK: FirstbitSHi
+// CHECK: call void @dx.op.bufferStore.i32{{.*}}, i32 5 
+// CHECK: call void @dx.op.bufferStore.i32{{.*}}, i32 8
 
 
 // CHECK: dx.op.unaryBits.i64(i32 33, i64
 // CHECK: dx.op.unaryBits.i64(i32 33, i64
 // CHECK: sub i32 63
 // CHECK: sub i32 63

+ 5 - 6
tools/clang/test/CodeGenHLSL/firstbitLo.hlsl

@@ -1,14 +1,13 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
 
-// CHECK: FirstbitLo
+// CHECK: call i32 @dx.op.unaryBits.i32{{.*}} FirstbitLo
 
 
-// CHECK: FirstbitLo
+// CHECK: call i32 @dx.op.unaryBits.i32{{.*}} FirstbitLo
 
 
-// CHECK: FirstbitLo
-// CHECK: FirstbitLo
+// CHECK: call void @dx.op.bufferStore.i32{{.*}}, i32 5 
+// CHECK: call void @dx.op.bufferStore.i32{{.*}}, i32 9 
 
 
-// CHECK: dx.op.unaryBits.i64
-// CHECK: FirstbitLo
+// CHECK: call i32 @dx.op.unaryBits.i64{{.*}} FirstbitLo
 
 
 uint a;
 uint a;
 int2 b;
 int2 b;

+ 1 - 1
tools/clang/test/CodeGenHLSL/firstbitshi_const.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
 
-// CHECK: FirstbitSHi
+// CHECK: call void @dx.op.storeOutput.i32{{.*}}, i32 -1
 
 
 [RootSignature("")]
 [RootSignature("")]
 int main() : SV_Target {
 int main() : SV_Target {

+ 8 - 0
tools/clang/test/HLSL/constprop/Acos.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x3FF0C15240000000 
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 0.5;
+    return acos(y);
+}

+ 8 - 0
tools/clang/test/HLSL/constprop/Asin.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x3FE0C15240000000 
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 0.5;
+    return asin(y);
+}

+ 9 - 0
tools/clang/test/HLSL/constprop/Atan.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x3FDDAC6700000000 
+
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 0.5;
+    return atan(y);
+}

+ 9 - 0
tools/clang/test/HLSL/constprop/Bfrev.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} i32 1532713819
+
+[RootSignature("")]
+int main() : SV_Target {
+    uint64_t s = reversebits(114ULL);
+    int x = 0xdadadada;
+    return reversebits(x);
+}

+ 8 - 0
tools/clang/test/HLSL/constprop/Cos.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x3FB21BD540000000 
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 1.5;
+    return cos(y);
+}

+ 9 - 0
tools/clang/test/HLSL/constprop/Countbits.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} i32 23
+
+[RootSignature("")]
+int main() : SV_Target {
+    int x = 0xdadadada;
+    uint64_t y = 0x30100000000ULL;
+    return countbits(x) + countbits(y);
+}

+ 9 - 0
tools/clang/test/HLSL/constprop/Dot2.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 1.328125e-01
+
+[RootSignature("")]
+float main() : SV_Target {
+    float2 x = float2(0.5, 0.125);
+    float2 y = float2(0.25, 0.0625);
+    return dot(x, y);
+}

+ 9 - 0
tools/clang/test/HLSL/constprop/Dot3.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x400F100000000000
+
+[RootSignature("")]
+float main() : SV_Target {
+    float3 x = float3(0.5, 0.125, 1.5);
+    float3 y = float3(0.25, 0.0625, 2.5);
+    return dot(x, y);
+}

+ 9 - 0
tools/clang/test/HLSL/constprop/Dot4.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x4044010000000000
+
+[RootSignature("")]
+float main() : SV_Target {
+    float4 x = float4(0.5, 0.125, 1.5, 8.5);
+    float4 y = float4(0.25, 0.0625, 2.5, 4.25);
+    return dot(x, y);
+}

+ 9 - 0
tools/clang/test/HLSL/constprop/Exp.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x3FD346C420000000
+
+[RootSignature("")]
+float main() : SV_Target {
+    float x = -1.2;
+    return exp(x);
+}
+

+ 9 - 0
tools/clang/test/HLSL/constprop/FAbs.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x3FF3333340000000
+
+[RootSignature("")]
+float main() : SV_Target {
+    float x = -1.2;
+    return abs(x);
+}
+

+ 17 - 0
tools/clang/test/HLSL/constprop/FMad.hlsl

@@ -0,0 +1,17 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 5.078125e-01
+
+[RootSignature("")]
+float main() : SV_Target {
+    float   x = 0.5;
+    float   y = 0.25;
+    float   z = 0.125;
+    float   f = mad(x, y, z);    // 0.25
+
+    double d1 = 0.0625;
+    double d2 = 0.125;
+    double d3 = 0.25;
+    double d  = mad(d1, d2, d3); // 0.2578125
+
+    return f + d;
+}

+ 13 - 0
tools/clang/test/HLSL/constprop/FMax.hlsl

@@ -0,0 +1,13 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 1.000000e+00
+
+[RootSignature("")]
+float main() : SV_Target {
+    float x = 0.5;
+    float y = 0.25;
+    
+    double d1 = 0.5;
+    double d2 = 0.25;
+
+    return max(x, y) + max(d1, d2);
+}

+ 13 - 0
tools/clang/test/HLSL/constprop/FMin.hlsl

@@ -0,0 +1,13 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 5.000000e-01
+
+[RootSignature("")]
+float main() : SV_Target {
+    float x = 0.5;
+    float y = 0.25;
+    
+    double d1 = 0.5;
+    double d2 = 0.25;
+
+    return min(x, y) + min(d1, d2);
+}

+ 37 - 0
tools/clang/test/HLSL/constprop/Firstbithi.hlsl

@@ -0,0 +1,37 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} i32 189
+
+[RootSignature("")]
+int main() : SV_Target {
+    int x = 0x0;           // firstbithigh(y) = -1
+    int y = 0x0010;        // firstbithigh(y) =  4
+    int z = 0x80000000;    // firstbithigh(z) = 30
+    int w = 0xffffffff;    // firstbithigh(w) = -1
+    
+    uint ux = 0x0;         // firstbithigh(ux) = -1
+    uint uy = 0x1000;      // firstbithigh(uy) = 12
+    uint uz = 0x80000000;  // firstbithigh(uz) = 31
+    uint uw = 0xffffffff;  // firstbithigh(uw) = 31
+    
+    // TODO: add tests for int64_t when constant literals are fixed
+    
+    uint64_t lux = 0x0ULL;           // firstbithigh(lux) = -1
+    uint64_t luy = 0x1000ULL;        // firstbithigh(luy) = 12
+    uint64_t luz = 0x00100000000ULL; // firstbithi(luz) = 32
+    uint64_t luw = 0x30000000000ULL; // firstbithi(luw) = 41
+    
+    
+    return firstbithigh(x)
+         + firstbithigh(y)
+         + firstbithigh(z)
+         + firstbithigh(w)
+         + firstbithigh(ux)
+         + firstbithigh(uy)
+         + firstbithigh(uz)
+         + firstbithigh(uw)
+         + firstbithigh(lux)
+         + firstbithigh(luy)
+         + firstbithigh(luz)
+         + firstbithigh(luw)
+         ;
+}

+ 12 - 0
tools/clang/test/HLSL/constprop/Firstbitlo.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} i32 42
+
+[RootSignature("")]
+int main() : SV_Target {
+    int x = 0x0;                   // firstbitlow(x) = -1
+    int y = 0x100;                 // firstbitlow(y) =  8
+    int z = 0x110;                 // firstbitlow(y) =  4
+    uint64_t w = 0x30100000000ULL; // firstbitlow(w) = 32
+    uint64_t s = 0x0ULL;           // firstbitlow(w) = -1
+    return firstbitlow(x) + firstbitlow(y) + firstbitlow(z) + firstbitlow(w) + firstbitlow(s);
+}

+ 12 - 0
tools/clang/test/HLSL/constprop/Fma.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 2.500000e-01
+
+[RootSignature("")]
+float main() : SV_Target {
+    double x = 0.5;
+    double y = 0.25;
+    double z = 0.125;
+    double d = fma(x, y, z); // 0.25
+
+    return d;
+}

+ 8 - 0
tools/clang/test/HLSL/constprop/Frc.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 5.000000e-01
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = -1.5;
+    return frac(y);
+}

+ 8 - 0
tools/clang/test/HLSL/constprop/Hcos.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x3FF20AC180000000 
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 0.5;
+    return cosh(y);
+}

+ 8 - 0
tools/clang/test/HLSL/constprop/Hsin.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x3FE0ACD000000000 
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 0.5;
+    return sinh(y);
+}

+ 8 - 0
tools/clang/test/HLSL/constprop/Htan.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x3FDD9353E0000000 
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 0.5;
+    return tanh(y);
+}

+ 10 - 0
tools/clang/test/HLSL/constprop/IMad.hlsl

@@ -0,0 +1,10 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} i32 -7
+
+[RootSignature("")]
+int main() : SV_Target {
+    int x = -2;
+    int y = 5;
+    int z = 3;
+    return mad(x, y ,z); // -7
+}

+ 11 - 0
tools/clang/test/HLSL/constprop/IMax.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} i32 25
+
+[RootSignature("")]
+int main() : SV_Target {
+    int x = 5;
+    int y = 25;
+    int z = -1;
+
+    return max(max(x, y), z);
+}

+ 11 - 0
tools/clang/test/HLSL/constprop/IMin.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} i32 -1
+
+[RootSignature("")]
+int main() : SV_Target {
+    int x = 5;
+    int y = 25;
+    int z = -1;
+
+    return min(min(x, y), z);
+}

+ 8 - 0
tools/clang/test/HLSL/constprop/Log.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x3FD9F323E0000000
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 1.5;
+    return log(y);
+}

+ 9 - 0
tools/clang/test/HLSL/constprop/Round_ne.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 4.000000e+00
+
+[RootSignature("")]
+float main() : SV_Target {
+    float x = 1.5;
+    float y = 2.5;
+    return round(x) + round(y);
+}

+ 9 - 0
tools/clang/test/HLSL/constprop/Round_ni.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 3.000000e+00
+
+[RootSignature("")]
+float main() : SV_Target {
+    float x = 1.5;
+    float y = 2.5;
+    return floor(x) + floor(y);
+}

+ 9 - 0
tools/clang/test/HLSL/constprop/Round_pi.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 5.000000e+00
+
+[RootSignature("")]
+float main() : SV_Target {
+    float x = 1.5;
+    float y = 2.5;
+    return ceil(x) + ceil(y);
+}

+ 9 - 0
tools/clang/test/HLSL/constprop/Round_z.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 1.000000e+00
+
+[RootSignature("")]
+float main() : SV_Target {
+    float x = -1.5;
+    float y = 2.5;
+    return trunc(x) + trunc(y);
+}

+ 8 - 0
tools/clang/test/HLSL/constprop/Rsqrt.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 4.000000e+00
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 0.0625;
+    return rsqrt(y);
+}

+ 12 - 0
tools/clang/test/HLSL/constprop/Saturate_double.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 1.500000e+00 
+
+[RootSignature("")]
+float main() : SV_Target {
+    double x = -1.2;
+    double y = 1.2;
+    double z = 0.5;
+    return saturate(x)
+         + saturate(y)
+         + saturate(z);
+}

+ 12 - 0
tools/clang/test/HLSL/constprop/Saturate_float.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 1.500000e+00 
+
+[RootSignature("")]
+float main() : SV_Target {
+    float x = -1.2;
+    float y =  1.2;
+    float z =  0.5;
+    return saturate(x)
+         + saturate(y)
+         + saturate(z);
+}

+ 12 - 0
tools/clang/test/HLSL/constprop/Saturate_half.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 1.500000e+00 
+
+[RootSignature("")]
+float main() : SV_Target {
+    min16float x = -1.2;
+    min16float y = 1.2;
+    min16float z = 0.5;
+    return saturate(x)
+         + saturate(y)
+         + saturate(z);
+}

+ 8 - 0
tools/clang/test/HLSL/constprop/Sin.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x3FEFEB7AA0000000
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 1.5;
+    return sin(y);
+}

+ 8 - 0
tools/clang/test/HLSL/constprop/Sqrt.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 2.500000e-01
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 0.0625;
+    return sqrt(y);
+}

+ 8 - 0
tools/clang/test/HLSL/constprop/Tan.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} float 0x402C33ED60000000
+
+[RootSignature("")]
+float main(float x : A) : SV_Target {
+    float y = 1.5;
+    return tan(y);
+}

+ 15 - 0
tools/clang/test/HLSL/constprop/UMad.hlsl

@@ -0,0 +1,15 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} i32 -4
+
+[RootSignature("")]
+uint main() : SV_Target {
+    uint x = -2;
+    uint y = 5;
+    uint z = 3;
+
+    uint64_t xl = 0x100000000ULL;
+    uint64_t yl = 0x000000002ULL;
+    uint64_t zl = 0x000000003ULL;
+    return mad(x, y ,z)     // -7
+         + mad(xl, yl, zl); // 0x200000003
+}

+ 15 - 0
tools/clang/test/HLSL/constprop/UMax.hlsl

@@ -0,0 +1,15 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} i32 -2
+
+[RootSignature("")]
+uint main() : SV_Target {
+    uint ux = 5;
+    uint uy = 25;
+    uint uz = -1;
+    
+    uint64_t lx = 7ULL;
+    uint64_t ly = 25ULL;
+    uint64_t lz = -1ULL;
+
+    return max(max(ux, uy), uz) + max(max(lx, ly), lz);
+}

+ 15 - 0
tools/clang/test/HLSL/constprop/UMin.hlsl

@@ -0,0 +1,15 @@
+// RUN: %dxc -T ps_6_0 %s -E main | %FileCheck %s
+// CHECK: call void @dx.op.storeOutput{{.*}} i32 12
+
+[RootSignature("")]
+uint main() : SV_Target {
+    uint ux = 5;
+    uint uy = 25;
+    uint uz = -1;
+    
+    uint64_t lx = 7ULL;
+    uint64_t ly = 25ULL;
+    uint64_t lz = -1ULL;
+
+    return min(min(ux, uy), uz) + min(min(lx, ly), lz);
+}

+ 95 - 0
tools/clang/test/HLSL/constprop/bfi.ll

@@ -0,0 +1,95 @@
+; RUN: %opt %s -sccp -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%struct.RWByteAddressBuffer = type { i32 }
+
+define void @main() {
+entry:
+  %buf_UAV_rawbuf = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 510,
+  %0 = call i32 @dx.op.quaternary.i32(i32 53, i32 8, i32 1, i32 255, i32 0)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 0, i32 undef, i32 %0, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 255,
+  %1 = call i32 @dx.op.quaternary.i32(i32 53, i32 8, i32 32, i32 255, i32 0)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 4, i32 undef, i32 %1, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 -1091584273,
+  %2 = call i32 @dx.op.quaternary.i32(i32 53, i32 16, i32 16, i32 48879, i32 3735928559)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 8, i32 undef, i32 %2, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 322420463,
+  %3 = call i32 @dx.op.quaternary.i32(i32 53, i32 8, i32 16, i32 55, i32 318815983)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 12, i32 undef, i32 %3, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 15,
+  %4 = call i32 @dx.op.quaternary.i32(i32 53, i32 0, i32 8, i32 0, i32 15)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 16, i32 undef, i32 %4, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 2560,
+  %5 = call i64 @dx.op.quaternary.i64(i32 53, i64 4, i64 8, i64 4010, i64 0)
+  %6 = trunc i64 %5 to i32
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 20, i32 undef, i32 %6, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 10,
+  %7 = call i64 @dx.op.quaternary.i64(i32 53, i64 4, i64 32, i64 4010, i64 0)
+  %8 = lshr i64 %7, 32
+  %9 = trunc i64 %8 to i32
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 24, i32 undef, i32 %9, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 0)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.bufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8) #1
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #2
+
+declare i32 @dx.op.quaternary.i32(i32, i32, i32, i32, i32) #1
+declare i64 @dx.op.quaternary.i64(i32, i64, i64, i64, i64) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!llvm.ident = !{!0}
+!dx.valver = !{!1}
+!dx.version = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.typeAnnotations = !{!6, !9}
+!dx.entryPoints = !{!13}
+
+!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
+!1 = !{i32 1, i32 0}
+!2 = !{!"ps", i32 6, i32 0}
+!3 = !{null, !4, null, null}
+!4 = !{!5}
+!5 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"buf", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!6 = !{i32 0, %struct.RWByteAddressBuffer undef, !7}
+!7 = !{i32 4, !8}
+!8 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4}
+!9 = !{i32 1, void ()* @main, !10}
+!10 = !{!11}
+!11 = !{i32 0, !12, !12}
+!12 = !{}
+!13 = !{void ()* @main, !"main", !14, !3, !20}
+!14 = !{!15, !18, null}
+!15 = !{!16}
+!16 = !{i32 0, !"A", i8 4, i8 0, !17, i8 1, i32 1, i8 1, i32 0, i8 0, null}
+!17 = !{i32 0}
+!18 = !{!19}
+!19 = !{i32 0, !"SV_Target", i8 4, i8 16, !17, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!20 = !{i32 0, i64 16}

+ 103 - 0
tools/clang/test/HLSL/constprop/ibfe.ll

@@ -0,0 +1,103 @@
+; RUN: %opt %s -sccp -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%struct.RWByteAddressBuffer = type { i32 }
+
+define void @main() {
+entry:
+  %buf_UAV_rawbuf = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 -9510,
+  %0 = call i32 @dx.op.tertiary.i32(i32 51, i32 16, i32 16, i32 3671719936)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 0, i32 undef, i32 %0, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 0,
+  %1 = call i32 @dx.op.tertiary.i32(i32 51, i32 32, i32 16, i32 3671719936)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 4, i32 undef, i32 %1, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 -255,
+  %2 = call i32 @dx.op.tertiary.i32(i32 51, i32 11, i32 0, i32 3841)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 8, i32 undef, i32 %2, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 -1,
+  %3 = call i32 @dx.op.tertiary.i32(i32 51, i32 1, i32 0, i32 3841)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 12, i32 undef, i32 %3, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 -1,
+  %4 = call i64 @dx.op.tertiary.i64(i32 51, i64 1, i64 32, i64 4294967296)
+  %5 = lshr i64 %4, 0
+  %6 = trunc i64 %5 to i32
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 16, i32 undef, i32 %6, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 1,
+  %7 = call i64 @dx.op.tertiary.i64(i32 51, i64 2, i64 32, i64 4294967296)
+  %8 = lshr i64 %7, 0
+  %9 = trunc i64 %8 to i32
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 20, i32 undef, i32 %9, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 -15,
+  %10 = call i64 @dx.op.tertiary.i64(i32 51, i64 39, i64 0, i64 1035087118336)
+  %11 = lshr i64 %10, 32
+  %12 = trunc i64 %11 to i32
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 24, i32 undef, i32 %12, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 1,
+  %13 = call i64 @dx.op.tertiary.i64(i32 51, i64 66, i64 0, i64 1)
+  %14 = trunc i64 %13 to i32
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 28, i32 undef, i32 %14, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 0)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.bufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8) #1
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #2
+
+declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #1
+declare i64 @dx.op.tertiary.i64(i32, i64, i64, i64) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!llvm.ident = !{!0}
+!dx.valver = !{!1}
+!dx.version = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.typeAnnotations = !{!6, !9}
+!dx.entryPoints = !{!13}
+
+!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
+!1 = !{i32 1, i32 0}
+!2 = !{!"ps", i32 6, i32 0}
+!3 = !{null, !4, null, null}
+!4 = !{!5}
+!5 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"buf", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!6 = !{i32 0, %struct.RWByteAddressBuffer undef, !7}
+!7 = !{i32 4, !8}
+!8 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4}
+!9 = !{i32 1, void ()* @main, !10}
+!10 = !{!11}
+!11 = !{i32 0, !12, !12}
+!12 = !{}
+!13 = !{void ()* @main, !"main", !14, !3, !20}
+!14 = !{!15, !18, null}
+!15 = !{!16}
+!16 = !{i32 0, !"A", i8 4, i8 0, !17, i8 1, i32 1, i8 1, i32 0, i8 0, null}
+!17 = !{i32 0}
+!18 = !{!19}
+!19 = !{i32 0, !"SV_Target", i8 4, i8 16, !17, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!20 = !{i32 0, i64 16}

+ 103 - 0
tools/clang/test/HLSL/constprop/ubfe.ll

@@ -0,0 +1,103 @@
+; RUN: %opt %s -sccp -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%struct.RWByteAddressBuffer = type { i32 }
+
+define void @main() {
+entry:
+  %buf_UAV_rawbuf = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 1793,
+  %0 = call i32 @dx.op.tertiary.i32(i32 52, i32 11, i32 0, i32 3841)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 0, i32 undef, i32 %0, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 1,
+  %1 = call i32 @dx.op.tertiary.i32(i32 52, i32 1, i32 0, i32 3841)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 4, i32 undef, i32 %1, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 56026,
+  %2 = call i32 @dx.op.tertiary.i32(i32 52, i32 16, i32 16, i32 3671719936)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 8, i32 undef, i32 %2, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 0,
+  %3 = call i32 @dx.op.tertiary.i32(i32 52, i32 32, i32 16, i32 3671719936)
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 12, i32 undef, i32 %3, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 1,
+  %4 = call i64 @dx.op.tertiary.i64(i32 52, i64 1, i64 32, i64 4294967296)
+  %5 = lshr i64 %4, 0
+  %6 = trunc i64 %5 to i32
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 16, i32 undef, i32 %6, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 1,
+  %7 = call i64 @dx.op.tertiary.i64(i32 52, i64 2, i64 32, i64 4294967296)
+  %8 = lshr i64 %7, 0
+  %9 = trunc i64 %8 to i32
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 20, i32 undef, i32 %9, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 113,
+  %10 = call i64 @dx.op.tertiary.i64(i32 52, i64 39, i64 0, i64 1035087118336)
+  %11 = lshr i64 %10, 32
+  %12 = trunc i64 %11 to i32
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 24, i32 undef, i32 %12, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+  
+  ; CHECK: @dx.op.bufferStore{{.*}}, i32 1,
+  %13 = call i64 @dx.op.tertiary.i64(i32 52, i64 66, i64 0, i64 1)
+  %14 = trunc i64 %13 to i32
+  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %buf_UAV_rawbuf, i32 28, i32 undef, i32 %14, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 0)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.bufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8) #1
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #2
+
+declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #1
+declare i64 @dx.op.tertiary.i64(i32, i64, i64, i64) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!llvm.ident = !{!0}
+!dx.valver = !{!1}
+!dx.version = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.typeAnnotations = !{!6, !9}
+!dx.entryPoints = !{!13}
+
+!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
+!1 = !{i32 1, i32 0}
+!2 = !{!"ps", i32 6, i32 0}
+!3 = !{null, !4, null, null}
+!4 = !{!5}
+!5 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"buf", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!6 = !{i32 0, %struct.RWByteAddressBuffer undef, !7}
+!7 = !{i32 4, !8}
+!8 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4}
+!9 = !{i32 1, void ()* @main, !10}
+!10 = !{!11}
+!11 = !{i32 0, !12, !12}
+!12 = !{}
+!13 = !{void ()* @main, !"main", !14, !3, !20}
+!14 = !{!15, !18, null}
+!15 = !{!16}
+!16 = !{i32 0, !"A", i8 4, i8 0, !17, i8 1, i32 1, i8 1, i32 0, i8 0, null}
+!17 = !{i32 0}
+!18 = !{!19}
+!19 = !{i32 0, !"SV_Target", i8 4, i8 16, !17, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!20 = !{i32 0, i64 16}

+ 51 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -839,6 +839,7 @@ public:
   TEST_METHOD(CodeGenDx12MiniEngineTonemapcs)
   TEST_METHOD(CodeGenDx12MiniEngineTonemapcs)
   TEST_METHOD(CodeGenDx12MiniEngineUpsampleandblurcs)
   TEST_METHOD(CodeGenDx12MiniEngineUpsampleandblurcs)
   TEST_METHOD(DxilGen_StoreOutput)
   TEST_METHOD(DxilGen_StoreOutput)
+  TEST_METHOD(ConstantFolding)
 
 
   dxc::DxcDllSupport m_dllSupport;
   dxc::DxcDllSupport m_dllSupport;
   bool m_CompilerPreservesBBNames;
   bool m_CompilerPreservesBBNames;
@@ -4102,6 +4103,56 @@ TEST_F(CompilerTest, DxilGen_StoreOutput) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\dxilgen_storeoutput.hlsl");
   CodeGenTestCheck(L"..\\CodeGenHLSL\\dxilgen_storeoutput.hlsl");
 }
 }
 
 
+TEST_F(CompilerTest, ConstantFolding) {
+  CodeGenTestCheck(L"constprop\\FAbs.hlsl");
+  CodeGenTestCheck(L"constprop\\Saturate_half.hlsl");
+  CodeGenTestCheck(L"constprop\\Saturate_float.hlsl");
+  CodeGenTestCheck(L"constprop\\Saturate_double.hlsl");
+  CodeGenTestCheck(L"constprop\\Cos.hlsl");
+  CodeGenTestCheck(L"constprop\\Sin.hlsl");
+  CodeGenTestCheck(L"constprop\\Tan.hlsl");
+  CodeGenTestCheck(L"constprop\\Acos.hlsl");
+  CodeGenTestCheck(L"constprop\\Asin.hlsl");
+  CodeGenTestCheck(L"constprop\\Atan.hlsl");
+  CodeGenTestCheck(L"constprop\\Hcos.hlsl");
+  CodeGenTestCheck(L"constprop\\Hsin.hlsl");
+  CodeGenTestCheck(L"constprop\\Htan.hlsl");
+  CodeGenTestCheck(L"constprop\\Exp.hlsl");
+  CodeGenTestCheck(L"constprop\\Frc.hlsl");
+  CodeGenTestCheck(L"constprop\\Log.hlsl");
+  CodeGenTestCheck(L"constprop\\Sqrt.hlsl");
+  CodeGenTestCheck(L"constprop\\Rsqrt.hlsl");
+  CodeGenTestCheck(L"constprop\\Round_ne.hlsl");
+  CodeGenTestCheck(L"constprop\\Round_ni.hlsl");
+  CodeGenTestCheck(L"constprop\\Round_pi.hlsl");
+  CodeGenTestCheck(L"constprop\\Round_z.hlsl");
+  
+  CodeGenTestCheck(L"constprop\\Bfrev.hlsl");
+  CodeGenTestCheck(L"constprop\\Countbits.hlsl");
+  CodeGenTestCheck(L"constprop\\Firstbitlo.hlsl");
+  CodeGenTestCheck(L"constprop\\Firstbithi.hlsl");
+
+  CodeGenTestCheck(L"constprop\\FMin.hlsl");
+  CodeGenTestCheck(L"constprop\\FMax.hlsl");
+  CodeGenTestCheck(L"constprop\\IMin.hlsl");
+  CodeGenTestCheck(L"constprop\\IMax.hlsl");
+  CodeGenTestCheck(L"constprop\\UMin.hlsl");
+  CodeGenTestCheck(L"constprop\\UMax.hlsl");
+  
+  CodeGenTestCheck(L"constprop\\FMad.hlsl");
+  CodeGenTestCheck(L"constprop\\Fma.hlsl");
+  CodeGenTestCheck(L"constprop\\IMad.hlsl");
+  CodeGenTestCheck(L"constprop\\UMad.hlsl");
+  
+  CodeGenTestCheck(L"constprop\\Dot2.hlsl");
+  CodeGenTestCheck(L"constprop\\Dot3.hlsl");
+  CodeGenTestCheck(L"constprop\\Dot4.hlsl");
+
+  CodeGenTestCheck(L"constprop\\ibfe.ll");
+  CodeGenTestCheck(L"constprop\\ubfe.ll");
+  CodeGenTestCheck(L"constprop\\bfi.ll");
+}
+
 TEST_F(CompilerTest, PreprocessWhenValidThenOK) {
 TEST_F(CompilerTest, PreprocessWhenValidThenOK) {
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcOperationResult> pResult;
   CComPtr<IDxcOperationResult> pResult;

+ 4 - 1
tools/clang/unittests/HLSL/FileCheckerTest.cpp

@@ -341,9 +341,12 @@ static string trim(string value) {
       for (llvm::StringRef S : splitArgs) {
       for (llvm::StringRef S : splitArgs) {
         optionStrings.push_back(
         optionStrings.push_back(
             Unicode::UTF8ToUTF16StringOrThrow(trim(S.str()).c_str()));
             Unicode::UTF8ToUTF16StringOrThrow(trim(S.str()).c_str()));
-        options.push_back(optionStrings.back().c_str());
       }
       }
 
 
+      // Add the options outside the above loop in case the vector is resized.
+      for (const std::wstring& str : optionStrings)
+        options.push_back(str.c_str());
+
       IFT(pOptimizer->RunOptimizer(pSource, options.data(), options.size(),
       IFT(pOptimizer->RunOptimizer(pSource, options.data(), options.size(),
                                    &pOutputModule, &pOutputText));
                                    &pOutputModule, &pOutputText));
       StdOut = BlobToUtf8(pOutputText);
       StdOut = BlobToUtf8(pOutputText);