Przeglądaj źródła

Fix lit function (#1565)

Vishal Sharma 7 lat temu
rodzic
commit
d36eb1731c

+ 121 - 119
lib/HLSL/HLOperationLower.cpp

@@ -610,6 +610,123 @@ Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP,
   return Builder.CreateBitCast(byte4, CI->getType());
 }
 
+// Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
+// Fxc uses the below rules when choosing mul-only code gen pattern to implement pow function.
+// Rule 1: Applicable only to power values in the range [INT32_MIN, INT32_MAX]
+// Rule 2: The maximum number of mul ops needed shouldn't exceed (2n+1) or (n+1) based on whether the power
+//         is a positive or a negative value. Here "n" is the number of scalar elements in power.
+// Rule 3: Power must be an exact value.
+// +----------+---------------------+------------------+
+// | BaseType | IsExponentPositive  | MaxMulOpsAllowed |
+// +----------+---------------------+------------------+
+// | float4x4 | True                |               33 |
+// | float4x4 | False               |               17 |
+// | float4x2 | True                |               17 |
+// | float4x2 | False               |                9 |
+// | float2x4 | True                |               17 |
+// | float2x4 | False               |                9 |
+// | float4   | True                |                9 |
+// | float4   | False               |                5 |
+// | float2   | True                |                5 |
+// | float2   | False               |                3 |
+// | float    | True                |                3 |
+// | float    | False               |                2 |
+// +----------+---------------------+------------------+
+
+bool CanUseFxcMulOnlyPatternForPow(IRBuilder<>& Builder, Value *x, Value *pow, int32_t& powI) {
+  // Applicable only when power is a literal.
+  if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)) {
+    return false;
+  }
+
+  // Only apply this code gen on splat values.
+  if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
+    if (!hlsl::dxilutil::IsSplat(cdv)) {
+      return false;
+    }
+  }
+
+  APFloat powAPF = isa<ConstantDataVector>(pow) ?
+    cast<ConstantDataVector>(pow)->getElementAsAPFloat(0) : // should be a splat value
+    cast<ConstantFP>(pow)->getValueAPF();
+  APSInt powAPS(32, false);
+  bool isExact = false;
+  // Try converting float value of power to integer and also check if the float value is exact.
+  APFloat::opStatus status = powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
+  if (status == APFloat::opStatus::opOK && isExact) {
+    powI = powAPS.getExtValue();
+    uint32_t powU = abs(powI);
+    int setBitCount = 0;
+    int maxBitSetPos = -1;
+    for (int i = 0; i < 32; i++) {
+      if ((powU >> i) & 1) {
+        setBitCount++;
+        maxBitSetPos = i;
+      }
+    }
+
+    DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
+    unsigned numElem = isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements() : 1;
+    int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1;
+    int mulOpNeeded = maxBitSetPos + setBitCount - 1;
+    return mulOpNeeded <= mulOpThreshold;
+  }
+
+  return false;
+}
+
+Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<>& Builder, Value *x, const int32_t y) {
+  uint32_t absY = abs(y);
+  // If y is zero then always return 1.
+  if (absY == 0) {
+    return ConstantFP::get(x->getType(), 1);
+  }
+
+  int lastSetPos = -1;
+  Value *result = nullptr;
+  Value *mul = nullptr;
+  for (int i = 0; i < 32; i++) {
+    if ((absY >> i) & 1) {
+      for (int j = i; j > lastSetPos; j--) {
+        if (!mul) {
+          mul = x;
+        }
+        else {
+          mul = Builder.CreateFMul(mul, mul);
+        }
+      }
+
+      result = (result == nullptr) ? mul : Builder.CreateFMul(result, mul);
+      lastSetPos = i;
+    }
+  }
+
+  // Compute reciprocal for negative power values.
+  if (y < 0) {
+    Value* constOne = ConstantFP::get(x->getType(), 1);
+    result = Builder.CreateFDiv(constOne, result);
+  }
+
+  return result;
+}
+
+Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
+  // As applicable implement pow using only mul ops as done by Fxc.
+  int32_t p = 0;
+  if (isFXCCompatMode && CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
+    return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
+  }
+
+  // Default to log-mul-exp pattern if previous scenarios don't apply.
+  // t = log(x);
+  Value *logX =
+    TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
+  // t = y * t;
+  Value *mulY = Builder.CreateFMul(logX, y);
+  // pow = exp(t);
+  return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
+}
+
 Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
                                  OP::OpCode opcode,
                                  HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
@@ -1502,11 +1619,12 @@ Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst);
   Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l);
   Result = Builder.CreateInsertElement(Result, diffuse, 1);
-  // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h * m).
+  // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h ^ m).
   Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst);
   Value *specCond = Builder.CreateOr(nlCmp, nhCmp);
-  Value *nhMulM = Builder.CreateFMul(n_dot_h, m);
-  Value *spec = Builder.CreateSelect(specCond, zeroConst, nhMulM);
+  bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
+  Value *nhPowM = TranslatePowImpl(&helper.hlslOP, Builder, n_dot_h, m, isFXCCompatMode);
+  Value *spec = Builder.CreateSelect(specCond, zeroConst, nhPowM);
   Result = Builder.CreateInsertElement(Result, spec, 2);
   return Result;
 }
@@ -2114,122 +2232,6 @@ Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   return Builder.CreateSelect(cond, zero, one);
 }
 
-// Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
-// Fxc uses the below rules when choosing mul-only code gen pattern to implement pow function.
-// Rule 1: Applicable only to power values in the range [INT32_MIN, INT32_MAX]
-// Rule 2: The maximum number of mul ops needed shouldn't exceed (2n+1) or (n+1) based on whether the power
-//         is a positive or a negative value. Here "n" is the number of scalar elements in power.
-// Rule 3: Power must be an exact value.
-// +----------+---------------------+------------------+
-// | BaseType | IsExponentPositive  | MaxMulOpsAllowed |
-// +----------+---------------------+------------------+
-// | float4x4 | True                |               33 |
-// | float4x4 | False               |               17 |
-// | float4x2 | True                |               17 |
-// | float4x2 | False               |                9 |
-// | float2x4 | True                |               17 |
-// | float2x4 | False               |                9 |
-// | float4   | True                |                9 |
-// | float4   | False               |                5 |
-// | float2   | True                |                5 |
-// | float2   | False               |                3 |
-// | float    | True                |                3 |
-// | float    | False               |                2 |
-// +----------+---------------------+------------------+
-
-bool CanUseFxcMulOnlyPatternForPow(IRBuilder<>& Builder, Value *x, Value *pow, int32_t& powI) {
-  // Applicable only when power is a literal.
-  if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)) {
-    return false;
-  }
-
-  // Only apply this code gen on splat values.
-  if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
-    if (!hlsl::dxilutil::IsSplat(cdv)) {
-      return false;
-    }
-  }
-
-  APFloat powAPF = isa<ConstantDataVector>(pow) ?
-    cast<ConstantDataVector>(pow)->getElementAsAPFloat(0) : // should be a splat value
-    cast<ConstantFP>(pow)->getValueAPF();
-  APSInt powAPS(32, false);
-  bool isExact = false;
-  // Try converting float value of power to integer and also check if the float value is exact.
-  APFloat::opStatus status = powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
-  if (status == APFloat::opStatus::opOK && isExact) {
-    powI = powAPS.getExtValue();
-    uint32_t powU = abs(powI);
-    int setBitCount = 0;
-    int maxBitSetPos = -1;
-    for (int i = 0; i < 32; i++) {
-      if ((powU >> i) & 1) {
-        setBitCount++;
-        maxBitSetPos = i;
-      }
-    }
-
-    DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
-    unsigned numElem = isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements() : 1;
-    int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1;
-    int mulOpNeeded = maxBitSetPos + setBitCount - 1;
-    return mulOpNeeded <= mulOpThreshold;
-  }
-
-  return false;
-}
-
-Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<>& Builder, Value *x, const int32_t y) {
-  uint32_t absY = abs(y);
-  // If y is zero then always return 1.
-  if (absY == 0) {
-    return ConstantFP::get(x->getType(), 1);
-  }
-
-  int lastSetPos = -1;
-  Value *result = nullptr;
-  Value *mul = nullptr;
-  for (int i = 0; i < 32; i++) {
-    if ((absY >> i) & 1) {
-      for (int j = i; j > lastSetPos; j--) {
-        if (!mul) {
-          mul = x;
-        } else {
-          mul = Builder.CreateFMul(mul, mul);
-        }
-      }
-
-      result = (result == nullptr) ? mul : Builder.CreateFMul(result, mul);
-      lastSetPos = i;
-    }
-  }
-
-  // Compute reciprocal for negative power values.
-  if (y < 0) {
-    Value* constOne = ConstantFP::get(x->getType(), 1);
-    result = Builder.CreateFDiv(constOne, result);
-  }
-
-  return result;
-}
-
-Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
-  // As applicable implement pow using only mul ops as done by Fxc.
-  int32_t p=0;
-  if (isFXCCompatMode && CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
-    return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
-  }
-
-  // Default to log-mul-exp pattern if previous scenarios don't apply.
-  // t = log(x);
-  Value *logX =
-    TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
-  // t = y * t;
-  Value *mulY = Builder.CreateFMul(logX, y);
-  // pow = exp(t);
-  return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
-}
-
 Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                     HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;

+ 20 - 0
tools/clang/test/CodeGenHLSL/quick-test/lit-function.hlsl

@@ -0,0 +1,20 @@
+// RUN: %dxc -T ps_6_0 -E main  %s | FileCheck %s
+
+// Verify lit function defined as lit(ambient, diffuse, specular, 1) where:
+// ambient = 1.
+// diffuse = ((n l) < 0) ? 0 : n l.
+// specular = ((n l) < 0) || ((n h) < 0) ? 0 : ((n h) ^ m).
+
+// CHECK: fcmp
+// CHECK: select
+// CHECK: fcmp
+// CHECK: or
+// CHECK: Log
+// CHECK: fmul
+// CHECK: Exp
+// CHECK: select
+
+float4 main(float a : A, float b : B, float c : C) : SV_Target
+{
+  return lit(a, b, c);
+}