7 anni fa · 2d530bd3ea
--- a/external/SPIRV-Headers
+++ b/external/SPIRV-Headers
@@ -1 +1 @@
 
															-Subproject commit dcf23bdabacc3c54b83b1f9367e7a8adb27f8d87
														
 
															+Subproject commit d5b2e1255f706ce1f88812217e9a554f299848af
														
--- a/external/SPIRV-Tools
+++ b/external/SPIRV-Tools
@@ -1 +1 @@
 
															-Subproject commit 9fbcce4ca17de7b2d8f6b322bcd1d43a7d6adc29
														
 
															+Subproject commit 4b4bd4c53aaa020f7e349aede394d42476b7e3aa
														
--- a/external/googletest
+++ b/external/googletest
@@ -1 +1 @@
 
															-Subproject commit d25268a55f6f6f38c65a7d1b7b119e33a46d1688
														
 
															+Subproject commit 440527a61e1c91188195f7de212c63c77e8f0a45
														
--- a/include/dxc/HLSL/DxilUtil.h
+++ b/include/dxc/HLSL/DxilUtil.h
@@ -14,6 +14,7 @@
 
															 #include <string>
														
 
															 #include <memory>
														
 
															 #include "llvm/ADT/StringRef.h"
														
 
															+#include "llvm/IR/Constants.h"
														
 
															 namespace llvm {
														
 
															 class Type;
														
@@ -92,6 +93,7 @@ namespace dxilutil {
 
															   void PrintDiagnosticHandler(const llvm::DiagnosticInfo &DI, void *Context);
														
 
															   // Returns true if type contains HLSL Object type (resource)
														
 
															   bool ContainsHLSLObjectType(llvm::Type *Ty);
														
 
															+  bool IsSplat(llvm::ConstantDataVector *cdv);
														
 
															 }
														
 
															 }
														
--- a/include/dxc/HLSL/HLModule.h
+++ b/include/dxc/HLSL/HLModule.h
@@ -49,7 +49,7 @@ class RootSignatureHandle;
 
															 struct HLOptions {
														
 
															   HLOptions()
														
 
															       : bDefaultRowMajor(false), bIEEEStrict(false), bDisableOptimizations(false),
														
 
															-        bLegacyCBufferLoad(false), PackingStrategy(0), bBackCompatMode(0), unused(0) {
														
 
															+        bLegacyCBufferLoad(false), PackingStrategy(0), bDX9CompatMode(0), bFXCCompatMode(0), unused(0) {
														
 
															   }
														
 
															   uint32_t GetHLOptionsRaw() const;
														
 
															   void SetHLOptionsRaw(uint32_t data);
														
@@ -61,8 +61,9 @@ struct HLOptions {
 
															   unsigned PackingStrategy         : 2;
														
 
															   static_assert((unsigned)DXIL::PackingStrategy::Invalid < 4, "otherwise 2 bits is not enough to store PackingStrategy");
														
 
															   unsigned bUseMinPrecision        : 1;
														
 
															-  unsigned bBackCompatMode         : 1;
														
 
															-  unsigned unused                  : 23;
														
 
															+  unsigned bDX9CompatMode          : 1;
														
 
															+  unsigned bFXCCompatMode          : 1;
														
 
															+  unsigned unused                  : 22;
														
 
															 };
														
 
															 typedef std::unordered_map<const llvm::Function *, std::unique_ptr<DxilFunctionProps>> DxilFunctionPropsMap;
														
--- a/include/dxc/Support/HLSLOptions.h
+++ b/include/dxc/Support/HLSLOptions.h
@@ -132,7 +132,8 @@ public:
 
															   bool AvoidFlowControl = false;     // OPT_Gfa
														
 
															   bool PreferFlowControl = false;    // OPT_Gfp
														
 
															   bool EnableStrictMode = false;     // OPT_Ges
														
 
															-  bool EnableBackCompatMode = false;     // OPT_Gec
														
 
															+  bool EnableDX9CompatMode = false;     // OPT_Gec
														
 
															+  bool EnableFXCCompatMode = false;     // internal flag
														
 
															   unsigned long HLSLVersion = 0; // OPT_hlsl_version (2015-2018)
														
 
															   bool Enable16BitTypes = false; // OPT_enable_16bit_types
														
 
															   bool OptDump = false; // OPT_ODump - dump optimizer commands
														
--- a/include/dxc/Support/WinAdapter.h
+++ b/include/dxc/Support/WinAdapter.h
@@ -172,6 +172,7 @@
 
															 #define _atoi64 atoll
														
 
															 #define sprintf_s snprintf
														
 
															 #define _strdup strdup
														
 
															+#define _strnicmp strnicmp
														
 
															 #define vsprintf_s vsprintf
														
 
															 #define strcat_s strcat
														
--- a/include/dxc/Support/WinFunctions.h
+++ b/include/dxc/Support/WinFunctions.h
@@ -26,6 +26,8 @@ HRESULT UIntAdd(UINT uAugend, UINT uAddend, UINT *puResult);
 
															 HRESULT IntToUInt(int in, UINT *out);
														
 
															 HRESULT SizeTToInt(size_t in, INT *out);
														
 
															 HRESULT UInt32Mult(UINT a, UINT b, UINT *out);
														
 
															+
														
 
															+int strnicmp(const char *str1, const char *str2, size_t count);
														
 
															 int _stricmp(const char *str1, const char *str2);
														
 
															 int _wcsicmp(const wchar_t *str1, const wchar_t *str2);
														
 
															 int _wcsnicmp(const wchar_t *str1, const wchar_t *str2, size_t n);
														
--- a/lib/DxcSupport/HLSLOptions.cpp
+++ b/lib/DxcSupport/HLSLOptions.cpp
@@ -363,10 +363,10 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 
															     }
														
 
															   }
														
 
															-  opts.EnableBackCompatMode = Args.hasFlag(OPT_Gec, OPT_INVALID, false);
														
 
															+  opts.EnableDX9CompatMode = Args.hasFlag(OPT_Gec, OPT_INVALID, false);
														
 
															   llvm::StringRef ver = Args.getLastArgValue(OPT_hlsl_version);
														
 
															   if (ver.empty()) {
														
 
															-    if (opts.EnableBackCompatMode)
														
 
															+    if (opts.EnableDX9CompatMode)
														
 
															       opts.HLSLVersion = 2016; // Default to max supported version with /Gec flag
														
 
															     else
														
 
															       opts.HLSLVersion = 2018; // Default to latest version
														
@@ -393,11 +393,15 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 
															     return 1;
														
 
															   }
														
 
															-  if (opts.EnableBackCompatMode && opts.HLSLVersion > 2016) {
														
 
															+  if (opts.EnableDX9CompatMode && opts.HLSLVersion > 2016) {
														
 
															     errors << "/Gec is not supported with HLSLVersion " << opts.HLSLVersion;
														
 
															     return 1;
														
 
															   }
														
 
															+  if (opts.HLSLVersion <= 2016) {
														
 
															+    opts.EnableFXCCompatMode = true;
														
 
															+  }
														
 
															+
														
 
															   // AssemblyCodeHex not supported (Fx)
														
 
															   // OutputLibrary not supported (Fl)
														
 
															   opts.AssemblyCode = Args.getLastArgValue(OPT_Fc);
														
--- a/lib/DxcSupport/WinFunctions.cpp
+++ b/lib/DxcSupport/WinFunctions.cpp
@@ -98,6 +98,23 @@ HRESULT UInt32Mult(UINT a, UINT b, UINT *out) {
 
															   return S_OK;
														
 
															 }
														
 
															+int strnicmp(const char *str1, const char *str2, size_t count) {
														
 
															+  size_t i = 0;
														
 
															+  for (; i < count && str1[i] && str2[i]; ++i) {
														
 
															+    int d = std::tolower(str1[i]) - std::tolower(str2[i]);
														
 
															+    if (d != 0)
														
 
															+      return d;
														
 
															+  }
														
 
															+
														
 
															+  if (i == count) {
														
 
															+    // All 'count' characters matched.
														
 
															+    return 0;
														
 
															+  }
														
 
															+
														
 
															+  // str1 or str2 reached NULL before 'count' characters were compared.
														
 
															+  return str1[i] - str2[i];
														
 
															+}
														
 
															+
														
 
															 int _stricmp(const char *str1, const char *str2) {
														
 
															   size_t i = 0;
														
 
															   for (; str1[i] && str2[i]; ++i) {
														
--- a/lib/HLSL/DxilTypeSystem.cpp
+++ b/lib/HLSL/DxilTypeSystem.cpp
@@ -11,6 +11,7 @@
 
															 #include "dxc/HLSL/DxilModule.h"
														
 
															 #include "dxc/HLSL/HLModule.h"
														
 
															 #include "dxc/Support/Global.h"
														
 
															+#include "dxc/Support/WinFunctions.h"
														
 
															 #include "llvm/IR/Module.h"
														
 
															 #include "llvm/IR/LLVMContext.h"
														
--- a/lib/HLSL/DxilUtil.cpp
+++ b/lib/HLSL/DxilUtil.cpp
@@ -394,6 +394,19 @@ bool ContainsHLSLObjectType(llvm::Type *Ty) {
 
															   return false;
														
 
															 }
														
 
															+// Based on the implementation available in LLVM's trunk:
														
 
															+// http://llvm.org/doxygen/Constants_8cpp_source.html#l02734
														
 
															+bool IsSplat(llvm::ConstantDataVector *cdv) {
														
 
															+  const char *Base = cdv->getRawDataValues().data();
														
 
															+
														
 
															+  // Compare elements 1+ to the 0'th element.
														
 
															+  unsigned EltSize = cdv->getElementByteSize();
														
 
															+  for (unsigned i = 1, e = cdv->getNumElements(); i != e; ++i)
														
 
															+    if (memcmp(Base, Base + i * EltSize, EltSize))
														
 
															+      return false;
														
 
															+
														
 
															+  return true;
														
 
															+}
														
 
															 }
														
 
															 }
														
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -27,6 +27,7 @@
 
															 #include "llvm/IR/IRBuilder.h"
														
 
															 #include "llvm/IR/Instructions.h"
														
 
															 #include "llvm/IR/Module.h"
														
 
															+#include "llvm/ADT/APSInt.h"
														
 
															 using namespace llvm;
														
 
															 using namespace hlsl;
														
@@ -609,6 +610,123 @@ Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP,
 
															   return Builder.CreateBitCast(byte4, CI->getType());
														
 
															 }
														
 
															+// Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
														
 
															+// Fxc uses the below rules when choosing mul-only code gen pattern to implement pow function.
														
 
															+// Rule 1: Applicable only to power values in the range [INT32_MIN, INT32_MAX]
														
 
															+// Rule 2: The maximum number of mul ops needed shouldn't exceed (2n+1) or (n+1) based on whether the power
														
 
															+//         is a positive or a negative value. Here "n" is the number of scalar elements in power.
														
 
															+// Rule 3: Power must be an exact value.
														
 
															+// +----------+---------------------+------------------+
														
 
															+// | BaseType | IsExponentPositive  | MaxMulOpsAllowed |
														
 
															+// +----------+---------------------+------------------+
														
 
															+// | float4x4 | True                |               33 |
														
 
															+// | float4x4 | False               |               17 |
														
 
															+// | float4x2 | True                |               17 |
														
 
															+// | float4x2 | False               |                9 |
														
 
															+// | float2x4 | True                |               17 |
														
 
															+// | float2x4 | False               |                9 |
														
 
															+// | float4   | True                |                9 |
														
 
															+// | float4   | False               |                5 |
														
 
															+// | float2   | True                |                5 |
														
 
															+// | float2   | False               |                3 |
														
 
															+// | float    | True                |                3 |
														
 
															+// | float    | False               |                2 |
														
 
															+// +----------+---------------------+------------------+
														
 
															+
														
 
															+bool CanUseFxcMulOnlyPatternForPow(IRBuilder<>& Builder, Value *x, Value *pow, int32_t& powI) {
														
 
															+  // Applicable only when power is a literal.
														
 
															+  if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)) {
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  // Only apply this code gen on splat values.
														
 
															+  if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
														
 
															+    if (!hlsl::dxilutil::IsSplat(cdv)) {
														
 
															+      return false;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  APFloat powAPF = isa<ConstantDataVector>(pow) ?
														
 
															+    cast<ConstantDataVector>(pow)->getElementAsAPFloat(0) : // should be a splat value
														
 
															+    cast<ConstantFP>(pow)->getValueAPF();
														
 
															+  APSInt powAPS(32, false);
														
 
															+  bool isExact = false;
														
 
															+  // Try converting float value of power to integer and also check if the float value is exact.
														
 
															+  APFloat::opStatus status = powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
														
 
															+  if (status == APFloat::opStatus::opOK && isExact) {
														
 
															+    powI = powAPS.getExtValue();
														
 
															+    uint32_t powU = abs(powI);
														
 
															+    int setBitCount = 0;
														
 
															+    int maxBitSetPos = -1;
														
 
															+    for (int i = 0; i < 32; i++) {
														
 
															+      if ((powU >> i) & 1) {
														
 
															+        setBitCount++;
														
 
															+        maxBitSetPos = i;
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
														
 
															+    unsigned numElem = isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements() : 1;
														
 
															+    int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1;
														
 
															+    int mulOpNeeded = maxBitSetPos + setBitCount - 1;
														
 
															+    return mulOpNeeded <= mulOpThreshold;
														
 
															+  }
														
 
															+
														
 
															+  return false;
														
 
															+}
														
 
															+
														
 
															+Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<>& Builder, Value *x, const int32_t y) {
														
 
															+  uint32_t absY = abs(y);
														
 
															+  // If y is zero then always return 1.
														
 
															+  if (absY == 0) {
														
 
															+    return ConstantFP::get(x->getType(), 1);
														
 
															+  }
														
 
															+
														
 
															+  int lastSetPos = -1;
														
 
															+  Value *result = nullptr;
														
 
															+  Value *mul = nullptr;
														
 
															+  for (int i = 0; i < 32; i++) {
														
 
															+    if ((absY >> i) & 1) {
														
 
															+      for (int j = i; j > lastSetPos; j--) {
														
 
															+        if (!mul) {
														
 
															+          mul = x;
														
 
															+        }
														
 
															+        else {
														
 
															+          mul = Builder.CreateFMul(mul, mul);
														
 
															+        }
														
 
															+      }
														
 
															+
														
 
															+      result = (result == nullptr) ? mul : Builder.CreateFMul(result, mul);
														
 
															+      lastSetPos = i;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  // Compute reciprocal for negative power values.
														
 
															+  if (y < 0) {
														
 
															+    Value* constOne = ConstantFP::get(x->getType(), 1);
														
 
															+    result = Builder.CreateFDiv(constOne, result);
														
 
															+  }
														
 
															+
														
 
															+  return result;
														
 
															+}
														
 
															+
														
 
															+Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
														
 
															+  // As applicable implement pow using only mul ops as done by Fxc.
														
 
															+  int32_t p = 0;
														
 
															+  if (isFXCCompatMode && CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
														
 
															+    return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
														
 
															+  }
														
 
															+
														
 
															+  // Default to log-mul-exp pattern if previous scenarios don't apply.
														
 
															+  // t = log(x);
														
 
															+  Value *logX =
														
 
															+    TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
														
 
															+  // t = y * t;
														
 
															+  Value *mulY = Builder.CreateFMul(logX, y);
														
 
															+  // pow = exp(t);
														
 
															+  return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
														
 
															+}
														
 
															+
														
 
															 Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
														
 
															                                  OP::OpCode opcode,
														
 
															                                  HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
														
@@ -1501,11 +1619,12 @@ Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
															   Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst);
														
 
															   Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l);
														
 
															   Result = Builder.CreateInsertElement(Result, diffuse, 1);
														
 
															-  // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h * m).
														
 
															+  // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h ^ m).
														
 
															   Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst);
														
 
															   Value *specCond = Builder.CreateOr(nlCmp, nhCmp);
														
 
															-  Value *nhMulM = Builder.CreateFMul(n_dot_h, m);
														
 
															-  Value *spec = Builder.CreateSelect(specCond, zeroConst, nhMulM);
														
 
															+  bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
														
 
															+  Value *nhPowM = TranslatePowImpl(&helper.hlslOP, Builder, n_dot_h, m, isFXCCompatMode);
														
 
															+  Value *spec = Builder.CreateSelect(specCond, zeroConst, nhPowM);
														
 
															   Result = Builder.CreateInsertElement(Result, spec, 2);
														
 
															   return Result;
														
 
															 }
														
@@ -2118,14 +2237,9 @@ Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
															   hlsl::OP *hlslOP = &helper.hlslOP;
														
 
															   Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
														
 
															   Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
														
 
															+  bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
														
 
															   IRBuilder<> Builder(CI);
														
 
															-  // t = log(x);
														
 
															-  Value *logX =
														
 
															-      TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
														
 
															-  // t = y * t;
														
 
															-  Value *mulY = Builder.CreateFMul(logX, y);
														
 
															-  // pow = exp(t);
														
 
															-  return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
														
 
															+  return TranslatePowImpl(hlslOP,Builder,x,y,isFXCCompatMode);
														
 
															 }
														
 
															 Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
														
--- a/lib/HLSL/HLSignatureLower.cpp
+++ b/lib/HLSL/HLSignatureLower.cpp
@@ -231,7 +231,7 @@ void HLSignatureLower::ProcessArgument(Function *func,
 
															   }
														
 
															   //  back-compat mode - remap obsolete semantics
														
 
															-  if (HLM.GetHLOptions().bBackCompatMode && paramAnnotation.HasSemanticString()) {
														
 
															+  if (HLM.GetHLOptions().bDX9CompatMode && paramAnnotation.HasSemanticString()) {
														
 
															     hlsl::RemapObsoleteSemantic(paramAnnotation, sigPoint->GetKind(), HLM.GetCtx());
														
 
															   }
														
--- a/tools/clang/include/clang/Basic/LangOptions.h
+++ b/tools/clang/include/clang/Basic/LangOptions.h
@@ -156,7 +156,8 @@ public:
 
															   unsigned RootSigMinor;
														
 
															   bool IsHLSLLibrary;
														
 
															   bool UseMinPrecision; // use min precision, not native precision.
														
 
															-  bool EnableBackCompatMode;
														
 
															+  bool EnableDX9CompatMode;
														
 
															+  bool EnableFXCCompatMode;
														
 
															   // HLSL Change Ends
														
 
															   bool SPIRV = false;  // SPIRV Change
														
--- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
@@ -385,7 +385,8 @@ CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM)
 
															   opts.PackingStrategy = CGM.getCodeGenOpts().HLSLSignaturePackingStrategy;
														
 
															   opts.bUseMinPrecision = CGM.getLangOpts().UseMinPrecision;
														
 
															-  opts.bBackCompatMode = CGM.getLangOpts().EnableBackCompatMode;
														
 
															+  opts.bDX9CompatMode = CGM.getLangOpts().EnableDX9CompatMode;
														
 
															+  opts.bFXCCompatMode = CGM.getLangOpts().EnableFXCCompatMode;
														
 
															   m_pHLModule->SetHLOptions(opts);
														
 
															   m_pHLModule->SetAutoBindingSpace(CGM.getCodeGenOpts().HLSLDefaultSpace);
														
@@ -1559,7 +1560,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
 
															   SourceLocation retTySemanticLoc = SetSemantic(FD, retTyAnnotation);
														
 
															   retTyAnnotation.SetParamInputQual(DxilParamInputQual::Out);
														
 
															   if (isEntry) {
														
 
															-    if (CGM.getLangOpts().EnableBackCompatMode && retTyAnnotation.HasSemanticString()) {
														
 
															+    if (CGM.getLangOpts().EnableDX9CompatMode && retTyAnnotation.HasSemanticString()) {
														
 
															       RemapObsoleteSemantic(retTyAnnotation, /*isPatchConstantFunction*/ false);
														
 
															     }
														
 
															     CheckParameterAnnotation(retTySemanticLoc, retTyAnnotation,
														
@@ -1840,7 +1841,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
 
															     paramAnnotation.SetParamInputQual(dxilInputQ);
														
 
															     if (isEntry) {
														
 
															-      if (CGM.getLangOpts().EnableBackCompatMode && paramAnnotation.HasSemanticString()) {
														
 
															+      if (CGM.getLangOpts().EnableDX9CompatMode && paramAnnotation.HasSemanticString()) {
														
 
															         RemapObsoleteSemantic(paramAnnotation, /*isPatchConstantFunction*/ false);
														
 
															       }
														
 
															       CheckParameterAnnotation(paramSemanticLoc, paramAnnotation,
														
@@ -1941,7 +1942,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
 
															 }
														
 
															 void CGMSHLSLRuntime::RemapObsoleteSemantic(DxilParameterAnnotation &paramInfo, bool isPatchConstantFunction) {
														
 
															-  DXASSERT(CGM.getLangOpts().EnableBackCompatMode, "should be used only in back-compat mode");
														
 
															+  DXASSERT(CGM.getLangOpts().EnableDX9CompatMode, "should be used only in back-compat mode");
														
 
															   const ShaderModel *SM = m_pHLModule->GetShaderModel();
														
 
															   DXIL::SigPointKind sigPointKind = SigPointFromInputQual(paramInfo.GetParamInputQual(), SM->GetKind(), isPatchConstantFunction);
														
@@ -4577,7 +4578,7 @@ void CGMSHLSLRuntime::FinishCodeGen() {
 
															     // In back-compat mode (with /Gec flag) create a static global for each const global
														
 
															     // to allow writing to it.
														
 
															     // TODO: Verfiy the behavior of static globals in hull shader
														
 
															-    if(CGM.getLangOpts().EnableBackCompatMode && CGM.getLangOpts().HLSLVersion <= 2016)
														
 
															+    if(CGM.getLangOpts().EnableDX9CompatMode && CGM.getLangOpts().HLSLVersion <= 2016)
														
 
															       CreateWriteEnabledStaticGlobals(m_pHLModule->GetModule(), m_pHLModule->GetEntryFunction());
														
 
															     if (m_pHLModule->GetShaderModel()->IsHS()) {
														
 
															       SetPatchConstantFunction(Entry);
														
--- a/tools/clang/lib/Parse/ParseDecl.cpp
+++ b/tools/clang/lib/Parse/ParseDecl.cpp
@@ -2177,7 +2177,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS,
 
															   // global variable can be inside a global structure as a static member.
														
 
															   // Check if the global is a static member and skip global const pass.
														
 
															   // in backcompat mode, the check for global const is deferred to later stage in CGMSHLSLRuntime::FinishCodeGen()
														
 
															-  bool CheckGlobalConst = getLangOpts().HLSL && getLangOpts().EnableBackCompatMode && getLangOpts().HLSLVersion <= 2016 ? false : true;
														
 
															+  bool CheckGlobalConst = getLangOpts().HLSL && getLangOpts().EnableDX9CompatMode && getLangOpts().HLSLVersion <= 2016 ? false : true;
														
 
															   if (NestedNameSpecifier *nameSpecifier = D.getCXXScopeSpec().getScopeRep()) {
														
 
															     if (nameSpecifier->getKind() == NestedNameSpecifier::SpecifierKind::TypeSpec) {
														
 
															       const Type *type = D.getCXXScopeSpec().getScopeRep()->getAsType();
														
--- a/tools/clang/lib/SPIRV/SPIRVEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SPIRVEmitter.cpp
@@ -2241,8 +2241,8 @@ SpirvEvalInfo SPIRVEmitter::doCastExpr(const CastExpr *expr) {
 
															     if (const uint32_t valueId = tryToEvaluateAsConst(expr))
														
 
															       return SpirvEvalInfo(valueId).setConstant().setRValue();
														
 
															-    const auto valueId =
														
 
															-        castToInt(doExpr(subExpr), subExprType, toType, subExpr->getExprLoc());
														
 
															+    const auto valueId = castToInt(loadIfGLValue(subExpr), subExprType, toType,
														
 
															+                                   subExpr->getExprLoc());
														
 
															     return SpirvEvalInfo(valueId).setRValue();
														
 
															   }
														
 
															   case CastKind::CK_FloatingCast:
														
@@ -6640,9 +6640,7 @@ SpirvEvalInfo SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
 
															     retVal = processWaveVote(callExpr, spv::Op::OpGroupNonUniformAllEqual);
														
 
															     break;
														
 
															   case hlsl::IntrinsicOp::IOP_WaveActiveCountBits:
														
 
															-    retVal = processWaveReductionOrPrefix(
														
 
															-        callExpr, spv::Op::OpGroupNonUniformBallotBitCount,
														
 
															-        spv::GroupOperation::Reduce);
														
 
															+    retVal = processWaveCountBits(callExpr, spv::GroupOperation::Reduce);
														
 
															     break;
														
 
															   case hlsl::IntrinsicOp::IOP_WaveActiveUSum:
														
 
															   case hlsl::IntrinsicOp::IOP_WaveActiveSum:
														
@@ -6670,9 +6668,7 @@ SpirvEvalInfo SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
 
															         spv::GroupOperation::ExclusiveScan);
														
 
															   } break;
														
 
															   case hlsl::IntrinsicOp::IOP_WavePrefixCountBits:
														
 
															-    retVal = processWaveReductionOrPrefix(
														
 
															-        callExpr, spv::Op::OpGroupNonUniformBallotBitCount,
														
 
															-        spv::GroupOperation::ExclusiveScan);
														
 
															+    retVal = processWaveCountBits(callExpr, spv::GroupOperation::ExclusiveScan);
														
 
															     break;
														
 
															   case hlsl::IntrinsicOp::IOP_WaveReadLaneAt:
														
 
															   case hlsl::IntrinsicOp::IOP_WaveReadLaneFirst:
														
@@ -7106,7 +7102,8 @@ uint32_t SPIRVEmitter::processWaveQuery(const CallExpr *callExpr,
 
															   featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_1, "Wave Operation",
														
 
															                                   callExpr->getExprLoc());
														
 
															   theBuilder.requireCapability(getCapabilityForGroupNonUniform(opcode));
														
 
															-  const uint32_t subgroupScope = theBuilder.getConstantInt32(3);
														
 
															+  const uint32_t subgroupScope =
														
 
															+      theBuilder.getConstantInt32(static_cast<int32_t>(spv::Scope::Subgroup));
														
 
															   const uint32_t retType =
														
 
															       typeTranslator.translateType(callExpr->getCallReturnType(astContext));
														
 
															   return theBuilder.createGroupNonUniformOp(opcode, retType, subgroupScope);
														
@@ -7123,7 +7120,8 @@ uint32_t SPIRVEmitter::processWaveVote(const CallExpr *callExpr,
 
															                                   callExpr->getExprLoc());
														
 
															   theBuilder.requireCapability(getCapabilityForGroupNonUniform(opcode));
														
 
															   const uint32_t predicate = doExpr(callExpr->getArg(0));
														
 
															-  const uint32_t subgroupScope = theBuilder.getConstantInt32(3);
														
 
															+  const uint32_t subgroupScope =
														
 
															+      theBuilder.getConstantInt32(static_cast<int32_t>(spv::Scope::Subgroup));
														
 
															   const uint32_t retType =
														
 
															       typeTranslator.translateType(callExpr->getCallReturnType(astContext));
														
 
															   return theBuilder.createGroupNonUniformUnaryOp(opcode, retType, subgroupScope,
														
@@ -7199,11 +7197,39 @@ spv::Op SPIRVEmitter::translateWaveOp(hlsl::IntrinsicOp op, QualType type,
 
															   return spv::Op::OpNop;
														
 
															 }
														
 
															+uint32_t SPIRVEmitter::processWaveCountBits(const CallExpr *callExpr,
														
 
															+                                            spv::GroupOperation groupOp) {
														
 
															+  // Signatures:
														
 
															+  // uint WaveActiveCountBits(bool bBit)
														
 
															+  // uint WavePrefixCountBits(Bool bBit)
														
 
															+  assert(callExpr->getNumArgs() == 1);
														
 
															+
														
 
															+  featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_1, "Wave Operation",
														
 
															+                                  callExpr->getExprLoc());
														
 
															+  theBuilder.requireCapability(getCapabilityForGroupNonUniform(
														
 
															+      spv::Op::OpGroupNonUniformBallotBitCount));
														
 
															+
														
 
															+  const uint32_t predicate = doExpr(callExpr->getArg(0));
														
 
															+  const uint32_t subgroupScope =
														
 
															+      theBuilder.getConstantInt32(static_cast<int32_t>(spv::Scope::Subgroup));
														
 
															+
														
 
															+  const uint32_t u32Type = theBuilder.getUint32Type();
														
 
															+  const uint32_t v4u32Type = theBuilder.getVecType(u32Type, 4);
														
 
															+  const uint32_t retType =
														
 
															+      typeTranslator.translateType(callExpr->getCallReturnType(astContext));
														
 
															+
														
 
															+  const uint32_t ballot = theBuilder.createGroupNonUniformUnaryOp(
														
 
															+      spv::Op::OpGroupNonUniformBallot, v4u32Type, subgroupScope, predicate);
														
 
															+
														
 
															+  return theBuilder.createGroupNonUniformUnaryOp(
														
 
															+      spv::Op::OpGroupNonUniformBallotBitCount, retType, subgroupScope, ballot,
														
 
															+      llvm::Optional<spv::GroupOperation>(groupOp));
														
 
															+}
														
 
															+
														
 
															 uint32_t SPIRVEmitter::processWaveReductionOrPrefix(
														
 
															     const CallExpr *callExpr, spv::Op opcode, spv::GroupOperation groupOp) {
														
 
															   // Signatures:
														
 
															   // bool WaveActiveAllEqual( <type> expr )
														
 
															-  // uint WaveActiveCountBits( bool bBit )
														
 
															   // <type> WaveActiveSum( <type> expr )
														
 
															   // <type> WaveActiveProduct( <type> expr )
														
 
															   // <int_type> WaveActiveBitAnd( <int_type> expr )
														
@@ -7212,7 +7238,6 @@ uint32_t SPIRVEmitter::processWaveReductionOrPrefix(
 
															   // <type> WaveActiveMin( <type> expr)
														
 
															   // <type> WaveActiveMax( <type> expr)
														
 
															   //
														
 
															-  // uint WavePrefixCountBits(Bool bBit)
														
 
															   // <type> WavePrefixProduct(<type> value)
														
 
															   // <type> WavePrefixSum(<type> value)
														
 
															   assert(callExpr->getNumArgs() == 1);
														
@@ -7220,7 +7245,8 @@ uint32_t SPIRVEmitter::processWaveReductionOrPrefix(
 
															                                   callExpr->getExprLoc());
														
 
															   theBuilder.requireCapability(getCapabilityForGroupNonUniform(opcode));
														
 
															   const uint32_t predicate = doExpr(callExpr->getArg(0));
														
 
															-  const uint32_t subgroupScope = theBuilder.getConstantInt32(3);
														
 
															+  const uint32_t subgroupScope =
														
 
															+      theBuilder.getConstantInt32(static_cast<int32_t>(spv::Scope::Subgroup));
														
 
															   const uint32_t retType =
														
 
															       typeTranslator.translateType(callExpr->getCallReturnType(astContext));
														
 
															   return theBuilder.createGroupNonUniformUnaryOp(
														
@@ -7238,7 +7264,8 @@ uint32_t SPIRVEmitter::processWaveBroadcast(const CallExpr *callExpr) {
 
															                                   callExpr->getExprLoc());
														
 
															   theBuilder.requireCapability(spv::Capability::GroupNonUniformBallot);
														
 
															   const uint32_t value = doExpr(callExpr->getArg(0));
														
 
															-  const uint32_t subgroupScope = theBuilder.getConstantInt32(3);
														
 
															+  const uint32_t subgroupScope =
														
 
															+      theBuilder.getConstantInt32(static_cast<int32_t>(spv::Scope::Subgroup));
														
 
															   const uint32_t retType =
														
 
															       typeTranslator.translateType(callExpr->getCallReturnType(astContext));
														
 
															   if (numArgs == 2)
														
@@ -7264,7 +7291,8 @@ uint32_t SPIRVEmitter::processWaveQuadWideShuffle(const CallExpr *callExpr,
 
															   theBuilder.requireCapability(spv::Capability::GroupNonUniformQuad);
														
 
															   const uint32_t value = doExpr(callExpr->getArg(0));
														
 
															-  const uint32_t subgroupScope = theBuilder.getConstantInt32(3);
														
 
															+  const uint32_t subgroupScope =
														
 
															+      theBuilder.getConstantInt32(static_cast<int32_t>(spv::Scope::Subgroup));
														
 
															   const uint32_t retType =
														
 
															       typeTranslator.translateType(callExpr->getCallReturnType(astContext));
														
--- a/tools/clang/lib/SPIRV/SPIRVEmitter.h
+++ b/tools/clang/lib/SPIRV/SPIRVEmitter.h
@@ -467,6 +467,9 @@ private:
 
															   /// Processes SM6.0 wave vote intrinsic calls.
														
 
															   uint32_t processWaveVote(const CallExpr *, spv::Op opcode);
														
 
															+  /// Processes SM6.0 wave active/prefix count bits.
														
 
															+  uint32_t processWaveCountBits(const CallExpr *, spv::GroupOperation groupOp);
														
 
															+
														
 
															   /// Processes SM6.0 wave reduction or scan/prefix intrinsic calls.
														
 
															   uint32_t processWaveReductionOrPrefix(const CallExpr *, spv::Op op,
														
 
															                                         spv::GroupOperation groupOp);
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/lit-function.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/lit-function.hlsl
@@ -0,0 +1,20 @@
 
															+// RUN: %dxc -T ps_6_0 -E main  %s | FileCheck %s
														
 
															+
														
 
															+// Verify lit function defined as lit(ambient, diffuse, specular, 1) where:
														
 
															+// ambient = 1.
														
 
															+// diffuse = ((n l) < 0) ? 0 : n l.
														
 
															+// specular = ((n l) < 0) || ((n h) < 0) ? 0 : ((n h) ^ m).
														
 
															+
														
 
															+// CHECK: fcmp
														
 
															+// CHECK: select
														
 
															+// CHECK: fcmp
														
 
															+// CHECK: or
														
 
															+// CHECK: Log
														
 
															+// CHECK: fmul
														
 
															+// CHECK: Exp
														
 
															+// CHECK: select
														
 
															+
														
 
															+float4 main(float a : A, float b : B, float c : C) : SV_Target
														
 
															+{
														
 
															+  return lit(a, b, c);
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-check-count01.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-check-count01.hlsl
@@ -0,0 +1,35 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fdiv
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+
														
 
															+float4 main (float x1 : A, float4x4 x2 : B, float2 x3 : C, float4 x4 : D) : SV_Target
														
 
															+{
														
 
															+    float p1 = 8.0;
														
 
															+    float4x4 p2 =         {57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0};
														
 
															+    float2 p3 = float2(-5.0,-5.0);
														
 
															+    float4 p4 = float4(17.0,17.0,17.0,17.0);
														
 
															+
														
 
															+    return float4(pow(x1, p1), pow(x2, p2)[0][0], pow(x3, p3)[0], pow(x4, p4)[0]);
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-check-count02.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-check-count02.hlsl
@@ -0,0 +1,18 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+// CHECK: fmul
														
 
															+
														
 
															+float2 main (float4 x : A) : SV_Target
														
 
															+{
														
 
															+    float2 y = float2(11.0,11.0);
														
 
															+    return pow(x, y);
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-correctness.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-correctness.hlsl
@@ -0,0 +1,38 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+
														
 
															+// Verify the mul-only pattern implemented to support Fxc compatability.
														
 
															+
														
 
															+// 2.0^8.0.
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 2.560000e+02)
														
 
															+
														
 
															+// 2.0^57.0 = 144115188075855872 (0x4380000000000000)
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0x4380000000000000)
														
 
															+
														
 
															+// 2.0^-5.0
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 3.125000e-02)
														
 
															+
														
 
															+//2.0^17.0
														
 
															+// call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.310720e+05)
														
 
															+
														
 
															+float4 main () : SV_Target
														
 
															+{
														
 
															+    float x1 = 2.0;
														
 
															+    float p1 = 8.0;
														
 
															+
														
 
															+    float4x4 x2 = {2.0, 2.0, 2.0, 2.0,
														
 
															+                           2.0, 2.0, 2.0, 2.0,
														
 
															+                           2.0, 2.0, 2.0, 2.0,
														
 
															+                           2.0, 2.0, 2.0, 2.0};
														
 
															+    float4x4 p2 = {57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0,
														
 
															+                           57.0, 57.0, 57.0, 57.0};
														
 
															+
														
 
															+    float2 x3 = float2(2.0,2.0);
														
 
															+    float2 p3 = float2(-5.0,-5.0);
														
 
															+
														
 
															+    float4 x4 = float4(2.0,2.0,2.0,2.0);
														
 
															+    float4 p4 = float4(17.0,17.0,17.0,17.0);
														
 
															+
														
 
															+    return float4(pow(x1, p1), pow(x2, p2)[0][0], pow(x3, p3)[0], pow(x4, p4)[0]);
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-criteria01.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-criteria01.hlsl
@@ -0,0 +1,27 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+// dxc should use log-mul-exp pattern to implement all scenarios listed below.
														
 
															+
														
 
															+// CHECK: Log
														
 
															+// CHECK: Exp
														
 
															+// CHECK: Log
														
 
															+// CHECK: Exp
														
 
															+// CHECK: Log
														
 
															+// CHECK: Exp
														
 
															+// CHECK: Log
														
 
															+// CHECK: Exp
														
 
															+// CHECK: Log
														
 
															+// CHECK: Exp
														
 
															+
														
 
															+float main (float4x4 a : A, float b : B, float4 c: C) : SV_Target
														
 
															+{
														
 
															+    float4x4 p1 = {2.0, 2.0, 3.0, 2.0,
														
 
															+                  2.0, 2.0, 2.0, 2.0,
														
 
															+                  2.0, 2.0, 2.0, 2.0,
														
 
															+                  2.0, 2.0, -1.0, 2.0,}; // not a splat vector
														
 
															+    float4 p2 = {2.33, 2.33, 2.33, 2.33}; // a splat vector but not exact
														
 
															+    float p3 = 2.001; // not an exact value
														
 
															+    float p4 = 4294967296.0; // value greater than int max
														
 
															+    float p5 = 7; // exceeds the mulop threshold criteria for float
														
 
															+
														
 
															+    return pow(a,p1)[0][0] + pow(b,p2)[0] + pow(a,p3)[0][0] + pow(a,p4)[0][0] + pow(c,p4)[0] + pow(b,p5);
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-criteria02.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-criteria02.hlsl
@@ -0,0 +1,17 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+// dxc should use log-mul-exp pattern to implement all scenarios listed below.
														
 
															+
														
 
															+// CHECK-NOT: Log
														
 
															+// CHECK-NOT: Exp
														
 
															+
														
 
															+float main (float4x4 a : A, float b : B, float4 c: C) : SV_Target
														
 
															+{
														
 
															+    float4x4 p1 = {2.0, 2.0, 2.0, 2.0,
														
 
															+                  2.0, 2.0, 2.0, 2.0,
														
 
															+                  2.0, 2.0, 2.0, 2.0,
														
 
															+                  2.0, 2.0, 2.0, 2.0,}; // a splat
														
 
															+    float4 p2 = {9, 9, 9, 9}; // another splat
														
 
															+    float p3 = 8; // meets the threshold criteria
														
 
															+
														
 
															+    return pow(a,p1)[0][0] + pow(b,p2)[0] + pow(a,p3)[0][0];
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-lit-types.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-lit-types.hlsl
@@ -0,0 +1,14 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+// check that different float literals are being considered for mul-only code gen for pow.
														
 
															+// CHECK-NOT: Log
														
 
															+// CHECK-NOT: Exp
														
 
															+
														
 
															+float main ( float a : A, float4x4 b: B, float4 c: C, float2 d: D) : SV_Target
														
 
															+{
														
 
															+    return pow(a, 8.0f) + 
														
 
															+           pow(d, 14.0h)[0] +
														
 
															+           pow(c, 384.0H)[0] +
														
 
															+           pow(c, -32.0F)[0] +
														
 
															+           pow(b, -131072.0L)[0][0] +
														
 
															+           pow(b, 1073741824.0L)[0][0];
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-one-as-power.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-one-as-power.hlsl
@@ -0,0 +1,11 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %{{[a-z0-9]+.*[a-z0-9]*}})
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %{{[a-z0-9]+.*[a-z0-9]*}})
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %{{[a-z0-9]+.*[a-z0-9]*}})
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %{{[a-z0-9]+.*[a-z0-9]*}})
														
 
															+
														
 
															+float4 main ( float a : A, float2 b : B, float4 c: C, float4x4 d: D) : SV_Target
														
 
															+{
														
 
															+    return float4(pow(a, 1), pow(b, float2(1.00,1.00))[0], pow(c, float4(1.00,1.00,1.00,1.00))[2], pow(d, 1.00)[1][2]);
														
 
															+}
														
--- a/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-zero-as-power.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/pow-mulonly-zero-as-power.hlsl
@@ -0,0 +1,11 @@
 
															+// RUN: %dxc -HV 2016 -E main -T ps_6_0 %s | FileCheck %s
														
 
															+
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 1.000000e+00)
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.000000e+00)
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 1.000000e+00)
														
 
															+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.000000e+00)
														
 
															+
														
 
															+float4 main ( float a : A, float2 b : B, float4 c: C, float4x4 d: D) : SV_Target
														
 
															+{
														
 
															+    return float4(pow(a, 0), pow(b, float2(0.00,0.00))[0], pow(c, -0.00)[2], pow(d, 0.00)[1][2]);
														
 
															+}
														
--- a/tools/clang/test/CodeGenSPIRV/cf.switch.opswitch.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cf.switch.opswitch.hlsl
@@ -342,6 +342,19 @@ void main() {
 
															 // CHECK-NEXT: %switch_merge_8 = OpLabel
														
 
															   }
														
 
															-// CHECK-NEXT: OpReturn
														
 
															-// CHECK-NEXT: OpFunctionEnd
														
 
															+
														
 
															+  //////////////////////////////////////////////////////////////////
														
 
															+  // Using float as selector results in multiple casts in the AST //
														
 
															+  //////////////////////////////////////////////////////////////////
														
 
															+  float sel;
														
 
															+// CHECK:      [[floatSelector:%\d+]] = OpLoad %float %sel
														
 
															+// CHECK-NEXT:           [[sel:%\d+]] = OpConvertFToS %int [[floatSelector]]
														
 
															+// CHECK-NEXT:                          OpSelectionMerge %switch_merge_9 None
														
 
															+// CHECK-NEXT:                          OpSwitch [[sel]] %switch_merge_9 0 %switch_0_0
														
 
															+  switch (sel) {
														
 
															+  case 0:
														
 
															+    result = 0;
														
 
															+    break;
														
 
															+  }
														
 
															+
														
 
															 }
														
--- a/tools/clang/test/CodeGenSPIRV/sm6.wave-active-count-bits.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/sm6.wave-active-count-bits.hlsl
@@ -7,6 +7,7 @@ struct S {
 
															 };
														
 
															 RWStructuredBuffer<S> values;
														
 
															+RWStructuredBuffer<S> results;
														
 
															 // CHECK: OpCapability GroupNonUniformBallot
														
@@ -14,7 +15,9 @@ RWStructuredBuffer<S> values;
 
															 void main(uint3 id: SV_DispatchThreadID) {
														
 
															     uint x = id.x;
														
 
															-// CHECK:  {{%\d+}} = OpGroupNonUniformBallotBitCount %uint %int_3 Reduce {{%\d+}}
														
 
															-    values[x].val = WaveActiveCountBits(values[x].val == 0);
														
 
															+// CHECK:         [[cmp:%\d+]] = OpIEqual %bool {{%\d+}} %uint_0
														
 
															+// CHECK-NEXT: [[ballot:%\d+]] = OpGroupNonUniformBallot %v4uint %int_3 [[cmp]]
														
 
															+// CHECK:             {{%\d+}} = OpGroupNonUniformBallotBitCount %uint %int_3 Reduce [[ballot]]
														
 
															+    results[x].val = WaveActiveCountBits(values[x].val == 0);
														
 
															 }
														
--- a/tools/clang/test/CodeGenSPIRV/sm6.wave-prefix-count-bits.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/sm6.wave-prefix-count-bits.hlsl
@@ -14,6 +14,8 @@ RWStructuredBuffer<S> values;
 
															 void main(uint3 id: SV_DispatchThreadID) {
														
 
															     uint x = id.x;
														
 
															-// CHECK:  {{%\d+}} = OpGroupNonUniformBallotBitCount %uint %int_3 ExclusiveScan {{%\d+}}
														
 
															+// CHECK:         [[cmp:%\d+]] = OpIEqual %bool {{%\d+}} %uint_0
														
 
															+// CHECK-NEXT: [[ballot:%\d+]] = OpGroupNonUniformBallot %v4uint %int_3 [[cmp]]
														
 
															+// CHECK:             {{%\d+}} = OpGroupNonUniformBallotBitCount %uint %int_3 ExclusiveScan [[ballot]]
														
 
															     values[x].val = WavePrefixCountBits(values[x].val == 0);
														
 
															 }
														
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@@ -800,7 +800,7 @@ public:
 
															     compiler.createSourceManager(compiler.getFileManager());
														
 
															     compiler.setTarget(
														
 
															         TargetInfo::CreateTargetInfo(compiler.getDiagnostics(), targetOptions));
														
 
															-    if (Opts.EnableBackCompatMode) {
														
 
															+    if (Opts.EnableDX9CompatMode) {
														
 
															       auto const ID = compiler.getDiagnostics().getCustomDiagID(clang::DiagnosticsEngine::Warning, "/Gec flag is a deprecated functionality.");
														
 
															       compiler.getDiagnostics().Report(ID);
														
 
															     }
														
@@ -855,7 +855,8 @@ public:
 
															     compiler.getLangOpts().RootSigMajor = 1;
														
 
															     compiler.getLangOpts().RootSigMinor = rootSigMinor;
														
 
															     compiler.getLangOpts().HLSLVersion = (unsigned) Opts.HLSLVersion;
														
 
															-    compiler.getLangOpts().EnableBackCompatMode = Opts.EnableBackCompatMode;
														
 
															+    compiler.getLangOpts().EnableDX9CompatMode = Opts.EnableDX9CompatMode;
														
 
															+    compiler.getLangOpts().EnableFXCCompatMode = Opts.EnableFXCCompatMode;
														
 
															     compiler.getLangOpts().UseMinPrecision = !Opts.Enable16BitTypes;
														
--- a/tools/clang/tools/libclang/dxcrewriteunused.cpp
+++ b/tools/clang/tools/libclang/dxcrewriteunused.cpp
@@ -129,7 +129,8 @@ void SetupCompilerForRewrite(CompilerInstance &compiler,
 
															   compiler.getDiagnostics().setIgnoreAllWarnings(!opts.OutputWarnings);
														
 
															   compiler.getLangOpts().HLSLVersion = (unsigned)opts.HLSLVersion;
														
 
															   compiler.getLangOpts().UseMinPrecision = !opts.Enable16BitTypes;
														
 
															-  compiler.getLangOpts().EnableBackCompatMode = opts.EnableBackCompatMode;
														
 
															+  compiler.getLangOpts().EnableDX9CompatMode = opts.EnableDX9CompatMode;
														
 
															+  compiler.getLangOpts().EnableFXCCompatMode = opts.EnableFXCCompatMode;
														
 
															   PreprocessorOptions &PPOpts = compiler.getPreprocessorOpts();
														
 
															   if (rewrite != nullptr) {
	`@@ -1 +1 @@`
	`-Subproject commit dcf23bdabacc3c54b83b1f9367e7a8adb27f8d87`
			`+Subproject commit d5b2e1255f706ce1f88812217e9a554f299848af`
	`@@ -1 +1 @@`
	`-Subproject commit 9fbcce4ca17de7b2d8f6b322bcd1d43a7d6adc29`
			`+Subproject commit 4b4bd4c53aaa020f7e349aede394d42476b7e3aa`
	`@@ -1 +1 @@`
	`-Subproject commit d25268a55f6f6f38c65a7d1b7b119e33a46d1688`
			`+Subproject commit 440527a61e1c91188195f7de212c63c77e8f0a45`