пре 7 година · 953ba46999
--- a/include/dxc/HLSL/DxilUtil.h
+++ b/include/dxc/HLSL/DxilUtil.h
@@ -14,6 +14,7 @@
 
															 #include <string>
														
 
															 #include <memory>
														
 
															 #include "llvm/ADT/StringRef.h"
														
 
															+#include "llvm/IR/Constants.h"
														
 
															 namespace llvm {
														
 
															 class Type;
														
@@ -92,6 +93,7 @@ namespace dxilutil {
 
															   void PrintDiagnosticHandler(const llvm::DiagnosticInfo &DI, void *Context);
														
 
															   // Returns true if type contains HLSL Object type (resource)
														
 
															   bool ContainsHLSLObjectType(llvm::Type *Ty);
														
 
															+  bool IsSplat(llvm::ConstantDataVector *cdv);
														
 
															 }
														
 
															 }
														
--- a/include/dxc/HLSL/HLModule.h
+++ b/include/dxc/HLSL/HLModule.h
@@ -49,7 +49,7 @@ class RootSignatureHandle;
 
															 struct HLOptions {
														
 
															   HLOptions()
														
 
															       : bDefaultRowMajor(false), bIEEEStrict(false), bDisableOptimizations(false),
														
 
															-        bLegacyCBufferLoad(false), PackingStrategy(0), bBackCompatMode(0), unused(0) {
														
 
															+        bLegacyCBufferLoad(false), PackingStrategy(0), bDX9CompatMode(0), bFXCCompatMode(0), unused(0) {
														
 
															   }
														
 
															   uint32_t GetHLOptionsRaw() const;
														
 
															   void SetHLOptionsRaw(uint32_t data);
														
@@ -61,8 +61,9 @@ struct HLOptions {
 
															   unsigned PackingStrategy         : 2;
														
 
															   static_assert((unsigned)DXIL::PackingStrategy::Invalid < 4, "otherwise 2 bits is not enough to store PackingStrategy");
														
 
															   unsigned bUseMinPrecision        : 1;
														
 
															-  unsigned bBackCompatMode         : 1;
														
 
															-  unsigned unused                  : 23;
														
 
															+  unsigned bDX9CompatMode          : 1;
														
 
															+  unsigned bFXCCompatMode          : 1;
														
 
															+  unsigned unused                  : 22;
														
 
															 };
														
 
															 typedef std::unordered_map<const llvm::Function *, std::unique_ptr<DxilFunctionProps>> DxilFunctionPropsMap;
														
--- a/include/dxc/Support/HLSLOptions.h
+++ b/include/dxc/Support/HLSLOptions.h
@@ -132,7 +132,8 @@ public:
 
															   bool AvoidFlowControl = false;     // OPT_Gfa
														
 
															   bool PreferFlowControl = false;    // OPT_Gfp
														
 
															   bool EnableStrictMode = false;     // OPT_Ges
														
 
															-  bool EnableBackCompatMode = false;     // OPT_Gec
														
 
															+  bool EnableDX9CompatMode = false;     // OPT_Gec
														
 
															+  bool EnableFXCCompatMode = false;     // internal flag
														
 
															   unsigned long HLSLVersion = 0; // OPT_hlsl_version (2015-2018)
														
 
															   bool Enable16BitTypes = false; // OPT_enable_16bit_types
														
 
															   bool OptDump = false; // OPT_ODump - dump optimizer commands
														
--- a/lib/DxcSupport/HLSLOptions.cpp
+++ b/lib/DxcSupport/HLSLOptions.cpp
@@ -363,10 +363,10 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 
															     }
														
 
															   }
														
 
															-  opts.EnableBackCompatMode = Args.hasFlag(OPT_Gec, OPT_INVALID, false);
														
 
															+  opts.EnableDX9CompatMode = Args.hasFlag(OPT_Gec, OPT_INVALID, false);
														
 
															   llvm::StringRef ver = Args.getLastArgValue(OPT_hlsl_version);
														
 
															   if (ver.empty()) {
														
 
															-    if (opts.EnableBackCompatMode)
														
 
															+    if (opts.EnableDX9CompatMode)
														
 
															       opts.HLSLVersion = 2016; // Default to max supported version with /Gec flag
														
 
															     else
														
 
															       opts.HLSLVersion = 2018; // Default to latest version
														
@@ -393,11 +393,15 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 
															     return 1;
														
 
															   }
														
 
															-  if (opts.EnableBackCompatMode && opts.HLSLVersion > 2016) {
														
 
															+  if (opts.EnableDX9CompatMode && opts.HLSLVersion > 2016) {
														
 
															     errors << "/Gec is not supported with HLSLVersion " << opts.HLSLVersion;
														
 
															     return 1;
														
 
															   }
														
 
															+  if (opts.HLSLVersion <= 2016) {
														
 
															+    opts.EnableFXCCompatMode = true;
														
 
															+  }
														
 
															+
														
 
															   // AssemblyCodeHex not supported (Fx)
														
 
															   // OutputLibrary not supported (Fl)
														
 
															   opts.AssemblyCode = Args.getLastArgValue(OPT_Fc);
														
--- a/lib/HLSL/DxilUtil.cpp
+++ b/lib/HLSL/DxilUtil.cpp
@@ -394,6 +394,19 @@ bool ContainsHLSLObjectType(llvm::Type *Ty) {
 
															   return false;
														
 
															 }
														
 
															+// Based on the implementation available in LLVM's trunk:
														
 
															+// http://llvm.org/doxygen/Constants_8cpp_source.html#l02734
														
 
															+bool IsSplat(llvm::ConstantDataVector *cdv) {
														
 
															+  const char *Base = cdv->getRawDataValues().data();
														
 
															+
														
 
															+  // Compare elements 1+ to the 0'th element.
														
 
															+  unsigned EltSize = cdv->getElementByteSize();
														
 
															+  for (unsigned i = 1, e = cdv->getNumElements(); i != e; ++i)
														
 
															+    if (memcmp(Base, Base + i * EltSize, EltSize))
														
 
															+      return false;
														
 
															+
														
 
															+  return true;
														
 
															+}
														
 
															 }
														
 
															 }
														
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -27,6 +27,7 @@
 
															 #include "llvm/IR/IRBuilder.h"
														
 
															 #include "llvm/IR/Instructions.h"
														
 
															 #include "llvm/IR/Module.h"
														
 
															+#include "llvm/ADT/APSInt.h"
														
 
															 using namespace llvm;
														
 
															 using namespace hlsl;
														
@@ -2113,21 +2114,132 @@ Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
															   return Builder.CreateSelect(cond, zero, one);
														
 
															 }
														
 
															-Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
														
 
															-                    HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
														
 
															-  hlsl::OP *hlslOP = &helper.hlslOP;
														
 
															-  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
														
 
															-  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
														
 
															-  IRBuilder<> Builder(CI);
														
 
															+// Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
														
 
															+// Fxc uses the below rules when choosing mul-only code gen pattern to implement pow function.
														
 
															+// Rule 1: Applicable only to power values in the range [INT32_MIN, INT32_MAX]
														
 
															+// Rule 2: The maximum number of mul ops needed shouldn't exceed (2n+1) or (n+1) based on whether the power
														
 
															+//         is a positive or a negative value. Here "n" is the number of scalar elements in power.
														
 
															+// Rule 3: Power must be an exact value.
														
 
															+// +----------+---------------------+------------------+
														
 
															+// | BaseType | IsExponentPositive  | MaxMulOpsAllowed |
														
 
															+// +----------+---------------------+------------------+
														
 
															+// | float4x4 | True                |               33 |
														
 
															+// | float4x4 | False               |               17 |
														
 
															+// | float4x2 | True                |               17 |
														
 
															+// | float4x2 | False               |                9 |
														
 
															+// | float2x4 | True                |               17 |
														
 
															+// | float2x4 | False               |                9 |
														
 
															+// | float4   | True                |                9 |
														
 
															+// | float4   | False               |                5 |
														
 
															+// | float2   | True                |                5 |
														
 
															+// | float2   | False               |                3 |
														
 
															+// | float    | True                |                3 |
														
 
															+// | float    | False               |                2 |
														
 
															+// +----------+---------------------+------------------+
														
 
															+
														
 
															+bool CanUseFxcMulOnlyPatternForPow(IRBuilder<>& Builder, Value *x, Value *pow, int32_t& powI) {
														
 
															+  // Applicable only when power is a literal.
														
 
															+  if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)) {
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  // Only apply this code gen on splat values.
														
 
															+  if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
														
 
															+    if (!hlsl::dxilutil::IsSplat(cdv)) {
														
 
															+      return false;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  APFloat powAPF = isa<ConstantDataVector>(pow) ?
														
 
															+    cast<ConstantDataVector>(pow)->getElementAsAPFloat(0) : // should be a splat value
														
 
															+    cast<ConstantFP>(pow)->getValueAPF();
														
 
															+  APSInt powAPS(32, false);
														
 
															+  bool isExact = false;
														
 
															+  // Try converting float value of power to integer and also check if the float value is exact.
														
 
															+  APFloat::opStatus status = powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
														
 
															+  if (status == APFloat::opStatus::opOK && isExact) {
														
 
															+    powI = powAPS.getExtValue();
														
 
															+    uint32_t powU = abs(powI);
														
 
															+    int setBitCount = 0;
														
 
															+    int maxBitSetPos = -1;
														
 
															+    for (int i = 0; i < 32; i++) {
														
 
															+      if ((powU >> i) & 1) {
														
 
															+        setBitCount++;
														
 
															+        maxBitSetPos = i;
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
														
 
															+    unsigned numElem = isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements() : 1;
														
 
															+    int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1;
														
 
															+    int mulOpNeeded = maxBitSetPos + setBitCount - 1;
														
 
															+    return mulOpNeeded <= mulOpThreshold;
														
 
															+  }
														
 
															+
														
 
															+  return false;
														
 
															+}
														
 
															+
														
 
															+Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<>& Builder, Value *x, const int32_t y) {
														
 
															+  uint32_t absY = abs(y);
														
 
															+  // If y is zero then always return 1.
														
 
															+  if (absY == 0) {
														
 
															+    return ConstantFP::get(x->getType(), 1);
														
 
															+  }
														
 
															+
														
 
															+  int lastSetPos = -1;
														
 
															+  Value *result = nullptr;
														
 
															+  Value *mul = nullptr;
														
 
															+  for (int i = 0; i < 32; i++) {
														
 
															+    if ((absY >> i) & 1) {
														
 
															+      for (int j = i; j > lastSetPos; j--) {
														
 
															+        if (!mul) {
														
 
															+          mul = x;
														
 
															+        } else {
														
 
															+          mul = Builder.CreateFMul(mul, mul);
														
 
															+        }
														
 
															+      }
														
 
															+
														
 
															+      result = (result == nullptr) ? mul : Builder.CreateFMul(result, mul);
														
 
															+      lastSetPos = i;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  // Compute reciprocal for negative power values.
														
 
															+  if (y < 0) {
														
 
															+    Value* constOne = ConstantFP::get(x->getType(), 1);
														
 
															+    result = Builder.CreateFDiv(constOne, result);
														
 
															+  }
														
 
															+
														
 
															+  return result;
														
 
															+}
														
 
															+
														
 
															+Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
														
 
															+  // As applicable implement pow using only mul ops as done by Fxc.
														
 
															+  int32_t p=0;
														
 
															+  if (isFXCCompatMode && CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
														
 
															+    return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
														
 
															+  }
														
 
															+
														
 
															+  // Default to log-mul-exp pattern if previous scenarios don't apply.
														
 
															   // t = log(x);
														
 
															   Value *logX =
														
 
															-      TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
														
 
															+    TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
														
 
															   // t = y * t;
														
 
															   Value *mulY = Builder.CreateFMul(logX, y);
														
 
															   // pow = exp(t);
														
 
															   return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
														
 
															 }
														
 
															+Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
														
 
															+                    HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
														
 
															+  hlsl::OP *hlslOP = &helper.hlslOP;
														
 
															+  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
														
 
															+  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
														
 
															+  bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
														
 
															+  IRBuilder<> Builder(CI);
														
 
															+  return TranslatePowImpl(hlslOP,Builder,x,y,isFXCCompatMode);
														
 
															+}
														
 
															+
														
 
															 Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
														
 
															                             HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
														
 
															   hlsl::OP *hlslOP = &helper.hlslOP;
														
--- a/lib/HLSL/HLSignatureLower.cpp
+++ b/lib/HLSL/HLSignatureLower.cpp
@@ -231,7 +231,7 @@ void HLSignatureLower::ProcessArgument(Function *func,
 
															   }
														
 
															   //  back-compat mode - remap obsolete semantics
														
 
															-  if (HLM.GetHLOptions().bBackCompatMode && paramAnnotation.HasSemanticString()) {
														
 
															+  if (HLM.GetHLOptions().bDX9CompatMode && paramAnnotation.HasSemanticString()) {
														
 
															     hlsl::RemapObsoleteSemantic(paramAnnotation, sigPoint->GetKind(), HLM.GetCtx());
														
 
															   }
														
--- a/tools/clang/include/clang/Basic/LangOptions.h
+++ b/tools/clang/include/clang/Basic/LangOptions.h
@@ -156,7 +156,8 @@ public:
 
															   unsigned RootSigMinor;
														
 
															   bool IsHLSLLibrary;
														
 
															   bool UseMinPrecision; // use min precision, not native precision.
														
 
															-  bool EnableBackCompatMode;
														
 
															+  bool EnableDX9CompatMode;
														
 
															+  bool EnableFXCCompatMode;
														
 
															   // HLSL Change Ends
														
 
															   bool SPIRV = false;  // SPIRV Change
														
--- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
@@ -385,7 +385,8 @@ CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM)
 
															   opts.PackingStrategy = CGM.getCodeGenOpts().HLSLSignaturePackingStrategy;
														
 
															   opts.bUseMinPrecision = CGM.getLangOpts().UseMinPrecision;
														
 
															-  opts.bBackCompatMode = CGM.getLangOpts().EnableBackCompatMode;
														
 
															+  opts.bDX9CompatMode = CGM.getLangOpts().EnableDX9CompatMode;
														
 
															+  opts.bFXCCompatMode = CGM.getLangOpts().EnableFXCCompatMode;
														
 
															   m_pHLModule->SetHLOptions(opts);
														
 
															   m_pHLModule->SetAutoBindingSpace(CGM.getCodeGenOpts().HLSLDefaultSpace);
														
@@ -1559,7 +1560,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
 
															   SourceLocation retTySemanticLoc = SetSemantic(FD, retTyAnnotation);
														
 
															   retTyAnnotation.SetParamInputQual(DxilParamInputQual::Out);
														
 
															   if (isEntry) {
														
 
															-    if (CGM.getLangOpts().EnableBackCompatMode && retTyAnnotation.HasSemanticString()) {
														
 
															+    if (CGM.getLangOpts().EnableDX9CompatMode && retTyAnnotation.HasSemanticString()) {
														
 
															       RemapObsoleteSemantic(retTyAnnotation, /*isPatchConstantFunction*/ false);
														
 
															     }
														
 
															     CheckParameterAnnotation(retTySemanticLoc, retTyAnnotation,
														
@@ -1840,7 +1841,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
 
															     paramAnnotation.SetParamInputQual(dxilInputQ);
														
 
															     if (isEntry) {
														
 
															-      if (CGM.getLangOpts().EnableBackCompatMode && paramAnnotation.HasSemanticString()) {
														
 
															+      if (CGM.getLangOpts().EnableDX9CompatMode && paramAnnotation.HasSemanticString()) {
														
 
															         RemapObsoleteSemantic(paramAnnotation, /*isPatchConstantFunction*/ false);
														
 
															       }
														
 
															       CheckParameterAnnotation(paramSemanticLoc, paramAnnotation,
														
@@ -1941,7 +1942,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
 
															 }
														
 
															 void CGMSHLSLRuntime::RemapObsoleteSemantic(DxilParameterAnnotation &paramInfo, bool isPatchConstantFunction) {
														
 
															-  DXASSERT(CGM.getLangOpts().EnableBackCompatMode, "should be used only in back-compat mode");
														
 
															+  DXASSERT(CGM.getLangOpts().EnableDX9CompatMode, "should be used only in back-compat mode");
														
 
															   const ShaderModel *SM = m_pHLModule->GetShaderModel();
														
 
															   DXIL::SigPointKind sigPointKind = SigPointFromInputQual(paramInfo.GetParamInputQual(), SM->GetKind(), isPatchConstantFunction);
														
@@ -4577,7 +4578,7 @@ void CGMSHLSLRuntime::FinishCodeGen() {
 
															     // In back-compat mode (with /Gec flag) create a static global for each const global
														
 
															     // to allow writing to it.
														
 
															     // TODO: Verfiy the behavior of static globals in hull shader
														
 
															-    if(CGM.getLangOpts().EnableBackCompatMode && CGM.getLangOpts().HLSLVersion <= 2016)
														
 
															+    if(CGM.getLangOpts().EnableDX9CompatMode && CGM.getLangOpts().HLSLVersion <= 2016)
														
 
															       CreateWriteEnabledStaticGlobals(m_pHLModule->GetModule(), m_pHLModule->GetEntryFunction());
														
 
															     if (m_pHLModule->GetShaderModel()->IsHS()) {
														
 
															       SetPatchConstantFunction(Entry);
														
--- a/tools/clang/lib/Parse/ParseDecl.cpp
+++ b/tools/clang/lib/Parse/ParseDecl.cpp
@@ -2177,7 +2177,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS,
 
															   // global variable can be inside a global structure as a static member.
														
 
															   // Check if the global is a static member and skip global const pass.
														
 
															   // in backcompat mode, the check for global const is deferred to later stage in CGMSHLSLRuntime::FinishCodeGen()
														
 
															-  bool CheckGlobalConst = getLangOpts().HLSL && getLangOpts().EnableBackCompatMode && getLangOpts().HLSLVersion <= 2016 ? false : true;
														
 
															+  bool CheckGlobalConst = getLangOpts().HLSL && getLangOpts().EnableDX9CompatMode && getLangOpts().HLSLVersion <= 2016 ? false : true;
														
 
															   if (NestedNameSpecifier *nameSpecifier = D.getCXXScopeSpec().getScopeRep()) {
														
 
															     if (nameSpecifier->getKind() == NestedNameSpecifier::SpecifierKind::TypeSpec) {
														
 
															       const Type *type = D.getCXXScopeSpec().getScopeRep()->getAsType();
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-check-count01.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-check-count01.hlsl
@@ -0,0 +1,35 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fdiv
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+
														
 
															+float4 main (float x1 : A, float4x4 x2 : B, float2 x3 : C, float4 x4 : D) : SV_Target
														
 
															+{
														
 
															+    float p1 = 8.0;
														
 
															+    float4x4 p2 =         {57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0};
														
 
															+    float2 p3 = float2(-5.0,-5.0);
														
 
															+    float4 p4 = float4(17.0,17.0,17.0,17.0);
														
 
															+
														
 
															+    return float4(pow(x1, p1), pow(x2, p2)[0][0], pow(x3, p3)[0], pow(x4, p4)[0]);
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-check-count02.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-check-count02.hlsl
@@ -0,0 +1,18 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+
														
 
															+float2 main (float4 x : A) : SV_Target
														
 
															+{
														
 
															+    float2 y = float2(11.0,11.0);
														
 
															+    return pow(x, y);
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-correctness.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-correctness.hlsl
@@ -0,0 +1,38 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+
														
 
															+// Verify the mul-only pattern implemented to support Fxc compatability.
														
 
															+
														
 
															+// 2.0^8.0.
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 2.560000e+02)
														
 
															+
														
 
															+// 2.0^57.0 = 144115188075855872 (0x4380000000000000)
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0x4380000000000000)
														
 
															+
														
 
															+// 2.0^-5.0
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 3.125000e-02)
														
 
															+
														
 
															+//2.0^17.0
														
 
															+// call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.310720e+05)
														
 
															+
														
 
															+float4 main () : SV_Target
														
 
															+{
														
 
															+    float x1 = 2.0;
														
 
															+    float p1 = 8.0;
														
 
															+
														
 
															+    float4x4 x2 = {2.0, 2.0, 2.0, 2.0,
														
 
															+                           2.0, 2.0, 2.0, 2.0,
														
 
															+                           2.0, 2.0, 2.0, 2.0,
														
 
															+                           2.0, 2.0, 2.0, 2.0};
														
 
															+    float4x4 p2 = {57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0};
														
 
															+
														
 
															+    float2 x3 = float2(2.0,2.0);
														
 
															+    float2 p3 = float2(-5.0,-5.0);
														
 
															+
														
 
															+    float4 x4 = float4(2.0,2.0,2.0,2.0);
														
 
															+    float4 p4 = float4(17.0,17.0,17.0,17.0);
														
 
															+
														
 
															+    return float4(pow(x1, p1), pow(x2, p2)[0][0], pow(x3, p3)[0], pow(x4, p4)[0]);
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-criteria01.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-criteria01.hlsl
@@ -0,0 +1,27 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+// dxc should use log-mul-exp pattern to implement all scenarios listed below.
														
 
															+
														
 
															+// CHECK: Log
														
 
															+// CHECK: Exp
														
 
															+// CHECK: Log
														
 
															+// CHECK: Exp
														
 
															+// CHECK: Log
														
 
															+// CHECK: Exp
														
 
															+// CHECK: Log
														
 
															+// CHECK: Exp
														
 
															+// CHECK: Log
														
 
															+// CHECK: Exp
														
 
															+
														
 
															+float main (float4x4 a : A, float b : B, float4 c: C) : SV_Target
														
 
															+{
														
 
															+    float4x4 p1 = {2.0, 2.0, 3.0, 2.0,
														
 
															+                  2.0, 2.0, 2.0, 2.0,
														
 
															+                  2.0, 2.0, 2.0, 2.0,
														
 
															+                  2.0, 2.0, -1.0, 2.0,}; // not a splat vector
														
 
															+    float4 p2 = {2.33, 2.33, 2.33, 2.33}; // a splat vector but not exact
														
 
															+    float p3 = 2.001; // not an exact value
														
 
															+    float p4 = 4294967296.0; // value greater than int max
														
 
															+    float p5 = 7; // exceeds the mulop threshold criteria for float
														
 
															+
														
 
															+    return pow(a,p1)[0][0] + pow(b,p2)[0] + pow(a,p3)[0][0] + pow(a,p4)[0][0] + pow(c,p4)[0] + pow(b,p5);
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-criteria02.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-criteria02.hlsl
@@ -0,0 +1,17 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+// dxc should use log-mul-exp pattern to implement all scenarios listed below.
														
 
															+
														
 
															+// CHECK-NOT: Log
														
 
															+// CHECK-NOT: Exp
														
 
															+
														
 
															+float main (float4x4 a : A, float b : B, float4 c: C) : SV_Target
														
 
															+{
														
 
															+    float4x4 p1 = {2.0, 2.0, 2.0, 2.0,
														
 
															+                  2.0, 2.0, 2.0, 2.0,
														
 
															+                  2.0, 2.0, 2.0, 2.0,
														
 
															+                  2.0, 2.0, 2.0, 2.0,}; // a splat
														
 
															+    float4 p2 = {9, 9, 9, 9}; // another splat
														
 
															+    float p3 = 8; // meets the threshold criteria
														
 
															+
														
 
															+    return pow(a,p1)[0][0] + pow(b,p2)[0] + pow(a,p3)[0][0];
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-lit-types.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-lit-types.hlsl
@@ -0,0 +1,14 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+// check that different float literals are being considered for mul-only code gen for pow.
														
 
															+// CHECK-NOT: Log
														
 
															+// CHECK-NOT: Exp
														
 
															+
														
 
															+float main ( float a : A, float4x4 b: B, float4 c: C, float2 d: D) : SV_Target
														
 
															+{
														
 
															+    return pow(a, 8.0f) + 
														
 
															+           pow(d, 14.0h)[0] +
														
 
															+           pow(c, 384.0H)[0] +
														
 
															+           pow(c, -32.0F)[0] +
														
 
															+           pow(b, -131072.0L)[0][0] +
														
 
															+           pow(b, 1073741824.0L)[0][0];
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-one-as-power.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-one-as-power.hlsl
@@ -0,0 +1,11 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %{{[a-z0-9]+.*[a-z0-9]*}})
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %{{[a-z0-9]+.*[a-z0-9]*}})
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %{{[a-z0-9]+.*[a-z0-9]*}})
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %{{[a-z0-9]+.*[a-z0-9]*}})
														
 
															+
														
 
															+float4 main ( float a : A, float2 b : B, float4 c: C, float4x4 d: D) : SV_Target
														
 
															+{
														
 
															+    return float4(pow(a, 1), pow(b, float2(1.00,1.00))[0], pow(c, float4(1.00,1.00,1.00,1.00))[2], pow(d, 1.00)[1][2]);
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-zero-as-power.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-zero-as-power.hlsl
@@ -0,0 +1,11 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 1.000000e+00)
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.000000e+00)
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 1.000000e+00)
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.000000e+00)
														
 
															+
														
 
															+float4 main ( float a : A, float2 b : B, float4 c: C, float4x4 d: D) : SV_Target
														
 
															+{
														
 
															+    return float4(pow(a, 0), pow(b, float2(0.00,0.00))[0], pow(c, -0.00)[2], pow(d, 0.00)[1][2]);
														
 
															+}
														
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@@ -800,7 +800,7 @@ public:
 
															     compiler.createSourceManager(compiler.getFileManager());
														
 
															     compiler.setTarget(
														
 
															         TargetInfo::CreateTargetInfo(compiler.getDiagnostics(), targetOptions));
														
 
															-    if (Opts.EnableBackCompatMode) {
														
 
															+    if (Opts.EnableDX9CompatMode) {
														
 
															       auto const ID = compiler.getDiagnostics().getCustomDiagID(clang::DiagnosticsEngine::Warning, "/Gec flag is a deprecated functionality.");
														
 
															       compiler.getDiagnostics().Report(ID);
														
 
															     }
														
@@ -855,7 +855,8 @@ public:
 
															     compiler.getLangOpts().RootSigMajor = 1;
														
 
															     compiler.getLangOpts().RootSigMinor = rootSigMinor;
														
 
															     compiler.getLangOpts().HLSLVersion = (unsigned) Opts.HLSLVersion;
														
 
															-    compiler.getLangOpts().EnableBackCompatMode = Opts.EnableBackCompatMode;
														
 
															+    compiler.getLangOpts().EnableDX9CompatMode = Opts.EnableDX9CompatMode;
														
 
															+    compiler.getLangOpts().EnableFXCCompatMode = Opts.EnableFXCCompatMode;
														
 
															     compiler.getLangOpts().UseMinPrecision = !Opts.Enable16BitTypes;
														
--- a/tools/clang/tools/libclang/dxcrewriteunused.cpp
+++ b/tools/clang/tools/libclang/dxcrewriteunused.cpp
@@ -129,7 +129,8 @@ void SetupCompilerForRewrite(CompilerInstance &compiler,
 
															   compiler.getDiagnostics().setIgnoreAllWarnings(!opts.OutputWarnings);
														
 
															   compiler.getLangOpts().HLSLVersion = (unsigned)opts.HLSLVersion;
														
 
															   compiler.getLangOpts().UseMinPrecision = !opts.Enable16BitTypes;
														
 
															-  compiler.getLangOpts().EnableBackCompatMode = opts.EnableBackCompatMode;
														
 
															+  compiler.getLangOpts().EnableDX9CompatMode = opts.EnableDX9CompatMode;
														
 
															+  compiler.getLangOpts().EnableFXCCompatMode = opts.EnableFXCCompatMode;
														
 
															   PreprocessorOptions &PPOpts = compiler.getPreprocessorOpts();
														
 
															   if (rewrite != nullptr) {