7 lat temu · d36eb1731c
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -610,6 +610,123 @@ Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP,
 
				   return Builder.CreateBitCast(byte4, CI->getType());
			
 
				 }
			
 
				 
			
 
				+// Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
			
 
				+// Fxc uses the below rules when choosing mul-only code gen pattern to implement pow function.
			
 
				+// Rule 1: Applicable only to power values in the range [INT32_MIN, INT32_MAX]
			
 
				+// Rule 2: The maximum number of mul ops needed shouldn't exceed (2n+1) or (n+1) based on whether the power
			
 
				+//         is a positive or a negative value. Here "n" is the number of scalar elements in power.
			
 
				+// Rule 3: Power must be an exact value.
			
 
				+// +----------+---------------------+------------------+
			
 
				+// | BaseType | IsExponentPositive  | MaxMulOpsAllowed |
			
 
				+// +----------+---------------------+------------------+
			
 
				+// | float4x4 | True                |               33 |
			
 
				+// | float4x4 | False               |               17 |
			
 
				+// | float4x2 | True                |               17 |
			
 
				+// | float4x2 | False               |                9 |
			
 
				+// | float2x4 | True                |               17 |
			
 
				+// | float2x4 | False               |                9 |
			
 
				+// | float4   | True                |                9 |
			
 
				+// | float4   | False               |                5 |
			
 
				+// | float2   | True                |                5 |
			
 
				+// | float2   | False               |                3 |
			
 
				+// | float    | True                |                3 |
			
 
				+// | float    | False               |                2 |
			
 
				+// +----------+---------------------+------------------+
			
 
				+
			
 
				+bool CanUseFxcMulOnlyPatternForPow(IRBuilder<>& Builder, Value *x, Value *pow, int32_t& powI) {
			
 
				+  // Applicable only when power is a literal.
			
 
				+  if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)) {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // Only apply this code gen on splat values.
			
 
				+  if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
			
 
				+    if (!hlsl::dxilutil::IsSplat(cdv)) {
			
 
				+      return false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  APFloat powAPF = isa<ConstantDataVector>(pow) ?
			
 
				+    cast<ConstantDataVector>(pow)->getElementAsAPFloat(0) : // should be a splat value
			
 
				+    cast<ConstantFP>(pow)->getValueAPF();
			
 
				+  APSInt powAPS(32, false);
			
 
				+  bool isExact = false;
			
 
				+  // Try converting float value of power to integer and also check if the float value is exact.
			
 
				+  APFloat::opStatus status = powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
			
 
				+  if (status == APFloat::opStatus::opOK && isExact) {
			
 
				+    powI = powAPS.getExtValue();
			
 
				+    uint32_t powU = abs(powI);
			
 
				+    int setBitCount = 0;
			
 
				+    int maxBitSetPos = -1;
			
 
				+    for (int i = 0; i < 32; i++) {
			
 
				+      if ((powU >> i) & 1) {
			
 
				+        setBitCount++;
			
 
				+        maxBitSetPos = i;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
			
 
				+    unsigned numElem = isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements() : 1;
			
 
				+    int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1;
			
 
				+    int mulOpNeeded = maxBitSetPos + setBitCount - 1;
			
 
				+    return mulOpNeeded <= mulOpThreshold;
			
 
				+  }
			
 
				+
			
 
				+  return false;
			
 
				+}
			
 
				+
			
 
				+Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<>& Builder, Value *x, const int32_t y) {
			
 
				+  uint32_t absY = abs(y);
			
 
				+  // If y is zero then always return 1.
			
 
				+  if (absY == 0) {
			
 
				+    return ConstantFP::get(x->getType(), 1);
			
 
				+  }
			
 
				+
			
 
				+  int lastSetPos = -1;
			
 
				+  Value *result = nullptr;
			
 
				+  Value *mul = nullptr;
			
 
				+  for (int i = 0; i < 32; i++) {
			
 
				+    if ((absY >> i) & 1) {
			
 
				+      for (int j = i; j > lastSetPos; j--) {
			
 
				+        if (!mul) {
			
 
				+          mul = x;
			
 
				+        }
			
 
				+        else {
			
 
				+          mul = Builder.CreateFMul(mul, mul);
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      result = (result == nullptr) ? mul : Builder.CreateFMul(result, mul);
			
 
				+      lastSetPos = i;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Compute reciprocal for negative power values.
			
 
				+  if (y < 0) {
			
 
				+    Value* constOne = ConstantFP::get(x->getType(), 1);
			
 
				+    result = Builder.CreateFDiv(constOne, result);
			
 
				+  }
			
 
				+
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
			
 
				+  // As applicable implement pow using only mul ops as done by Fxc.
			
 
				+  int32_t p = 0;
			
 
				+  if (isFXCCompatMode && CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
			
 
				+    return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
			
 
				+  }
			
 
				+
			
 
				+  // Default to log-mul-exp pattern if previous scenarios don't apply.
			
 
				+  // t = log(x);
			
 
				+  Value *logX =
			
 
				+    TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
			
 
				+  // t = y * t;
			
 
				+  Value *mulY = Builder.CreateFMul(logX, y);
			
 
				+  // pow = exp(t);
			
 
				+  return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
			
 
				+}
			
 
				+
			
 
				 Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
			
 
				                                  OP::OpCode opcode,
			
 
				                                  HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
			
@@ -1502,11 +1619,12 @@ Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
				   Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst);
			
 
				   Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l);
			
 
				   Result = Builder.CreateInsertElement(Result, diffuse, 1);
			
 
				-  // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h * m).
			
 
				+  // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h ^ m).
			
 
				   Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst);
			
 
				   Value *specCond = Builder.CreateOr(nlCmp, nhCmp);
			
 
				-  Value *nhMulM = Builder.CreateFMul(n_dot_h, m);
			
 
				-  Value *spec = Builder.CreateSelect(specCond, zeroConst, nhMulM);
			
 
				+  bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
			
 
				+  Value *nhPowM = TranslatePowImpl(&helper.hlslOP, Builder, n_dot_h, m, isFXCCompatMode);
			
 
				+  Value *spec = Builder.CreateSelect(specCond, zeroConst, nhPowM);
			
 
				   Result = Builder.CreateInsertElement(Result, spec, 2);
			
 
				   return Result;
			
 
				 }
			
@@ -2114,122 +2232,6 @@ Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
				   return Builder.CreateSelect(cond, zero, one);
			
 
				 }
			
 
				 
			
 
				-// Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
			
 
				-// Fxc uses the below rules when choosing mul-only code gen pattern to implement pow function.
			
 
				-// Rule 1: Applicable only to power values in the range [INT32_MIN, INT32_MAX]
			
 
				-// Rule 2: The maximum number of mul ops needed shouldn't exceed (2n+1) or (n+1) based on whether the power
			
 
				-//         is a positive or a negative value. Here "n" is the number of scalar elements in power.
			
 
				-// Rule 3: Power must be an exact value.
			
 
				-// +----------+---------------------+------------------+
			
 
				-// | BaseType | IsExponentPositive  | MaxMulOpsAllowed |
			
 
				-// +----------+---------------------+------------------+
			
 
				-// | float4x4 | True                |               33 |
			
 
				-// | float4x4 | False               |               17 |
			
 
				-// | float4x2 | True                |               17 |
			
 
				-// | float4x2 | False               |                9 |
			
 
				-// | float2x4 | True                |               17 |
			
 
				-// | float2x4 | False               |                9 |
			
 
				-// | float4   | True                |                9 |
			
 
				-// | float4   | False               |                5 |
			
 
				-// | float2   | True                |                5 |
			
 
				-// | float2   | False               |                3 |
			
 
				-// | float    | True                |                3 |
			
 
				-// | float    | False               |                2 |
			
 
				-// +----------+---------------------+------------------+
			
 
				-
			
 
				-bool CanUseFxcMulOnlyPatternForPow(IRBuilder<>& Builder, Value *x, Value *pow, int32_t& powI) {
			
 
				-  // Applicable only when power is a literal.
			
 
				-  if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)) {
			
 
				-    return false;
			
 
				-  }
			
 
				-
			
 
				-  // Only apply this code gen on splat values.
			
 
				-  if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
			
 
				-    if (!hlsl::dxilutil::IsSplat(cdv)) {
			
 
				-      return false;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  APFloat powAPF = isa<ConstantDataVector>(pow) ?
			
 
				-    cast<ConstantDataVector>(pow)->getElementAsAPFloat(0) : // should be a splat value
			
 
				-    cast<ConstantFP>(pow)->getValueAPF();
			
 
				-  APSInt powAPS(32, false);
			
 
				-  bool isExact = false;
			
 
				-  // Try converting float value of power to integer and also check if the float value is exact.
			
 
				-  APFloat::opStatus status = powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
			
 
				-  if (status == APFloat::opStatus::opOK && isExact) {
			
 
				-    powI = powAPS.getExtValue();
			
 
				-    uint32_t powU = abs(powI);
			
 
				-    int setBitCount = 0;
			
 
				-    int maxBitSetPos = -1;
			
 
				-    for (int i = 0; i < 32; i++) {
			
 
				-      if ((powU >> i) & 1) {
			
 
				-        setBitCount++;
			
 
				-        maxBitSetPos = i;
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
			
 
				-    unsigned numElem = isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements() : 1;
			
 
				-    int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1;
			
 
				-    int mulOpNeeded = maxBitSetPos + setBitCount - 1;
			
 
				-    return mulOpNeeded <= mulOpThreshold;
			
 
				-  }
			
 
				-
			
 
				-  return false;
			
 
				-}
			
 
				-
			
 
				-Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<>& Builder, Value *x, const int32_t y) {
			
 
				-  uint32_t absY = abs(y);
			
 
				-  // If y is zero then always return 1.
			
 
				-  if (absY == 0) {
			
 
				-    return ConstantFP::get(x->getType(), 1);
			
 
				-  }
			
 
				-
			
 
				-  int lastSetPos = -1;
			
 
				-  Value *result = nullptr;
			
 
				-  Value *mul = nullptr;
			
 
				-  for (int i = 0; i < 32; i++) {
			
 
				-    if ((absY >> i) & 1) {
			
 
				-      for (int j = i; j > lastSetPos; j--) {
			
 
				-        if (!mul) {
			
 
				-          mul = x;
			
 
				-        } else {
			
 
				-          mul = Builder.CreateFMul(mul, mul);
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      result = (result == nullptr) ? mul : Builder.CreateFMul(result, mul);
			
 
				-      lastSetPos = i;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // Compute reciprocal for negative power values.
			
 
				-  if (y < 0) {
			
 
				-    Value* constOne = ConstantFP::get(x->getType(), 1);
			
 
				-    result = Builder.CreateFDiv(constOne, result);
			
 
				-  }
			
 
				-
			
 
				-  return result;
			
 
				-}
			
 
				-
			
 
				-Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
			
 
				-  // As applicable implement pow using only mul ops as done by Fxc.
			
 
				-  int32_t p=0;
			
 
				-  if (isFXCCompatMode && CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
			
 
				-    return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
			
 
				-  }
			
 
				-
			
 
				-  // Default to log-mul-exp pattern if previous scenarios don't apply.
			
 
				-  // t = log(x);
			
 
				-  Value *logX =
			
 
				-    TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
			
 
				-  // t = y * t;
			
 
				-  Value *mulY = Builder.CreateFMul(logX, y);
			
 
				-  // pow = exp(t);
			
 
				-  return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
			
 
				-}
			
 
				-
			
 
				 Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
			
 
				                     HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
			
 
				   hlsl::OP *hlslOP = &helper.hlslOP;
			
--- a/tools/clang/test/CodeGenHLSL/quick-test/lit-function.hlsl
+++ b/tools/clang/test/CodeGenHLSL/quick-test/lit-function.hlsl
@@ -0,0 +1,20 @@
 
				+// RUN: %dxc -T ps_6_0 -E main  %s | FileCheck %s
			
 
				+
			
 
				+// Verify lit function defined as lit(ambient, diffuse, specular, 1) where:
			
 
				+// ambient = 1.
			
 
				+// diffuse = ((n l) < 0) ? 0 : n l.
			
 
				+// specular = ((n l) < 0) || ((n h) < 0) ? 0 : ((n h) ^ m).
			
 
				+
			
 
				+// CHECK: fcmp
			
 
				+// CHECK: select
			
 
				+// CHECK: fcmp
			
 
				+// CHECK: or
			
 
				+// CHECK: Log
			
 
				+// CHECK: fmul
			
 
				+// CHECK: Exp
			
 
				+// CHECK: select
			
 
				+
			
 
				+float4 main(float a : A, float b : B, float c : C) : SV_Target
			
 
				+{
			
 
				+  return lit(a, b, c);
			
 
				+}