Przeglądaj źródła

Create x*x for pow(x,2). (#2777)

Xiang Li 5 lat temu
rodzic
commit
729307c4d9

+ 6 - 1
lib/HLSL/HLOperationLower.cpp

@@ -712,8 +712,13 @@ Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<>& Builder, Value *x, const
 Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
 Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
   // As applicable implement pow using only mul ops as done by Fxc.
   // As applicable implement pow using only mul ops as done by Fxc.
   int32_t p = 0;
   int32_t p = 0;
-  if (isFXCCompatMode && CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
+  if (CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
+    if (isFXCCompatMode) {
     return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
     return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
+    } else if (p == 2) {
+      // Only take care 2 for it will not affect register pressure.
+      return Builder.CreateFMul(x, x);
+    }
   }
   }
 
 
   // Default to log-mul-exp pattern if previous scenarios don't apply.
   // Default to log-mul-exp pattern if previous scenarios don't apply.

+ 16 - 0
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/power/pow2.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -T ps_6_0 -E main %s | FileCheck %s
+
+// Make sure pow 2 not generate log -> mul 2 -> exp.
+// Log
+// CHECK-NOT:call float @dx.op.unary.f32(i32 23,
+// CHECK-NOT:fmul fast float %{{.*}}, 2.000000e+00
+// Exp
+// CHECK-NOT:call float @dx.op.unary.f32(i32 21,
+// CHECK: fmul fast float %[[a:.*]], %[[a]]
+// CHECK: fmul fast float %[[b:.*]], %[[b]]
+// CHECK: fmul fast float %[[c:.*]], %[[c]]
+// CHECK: fmul fast float %[[d:.*]], %[[d]]
+
+float4 main(float a :A, float3 b:B) : SV_Target {
+  return float4(pow(a, 2), pow(b,2));
+}

+ 0 - 2
tools/clang/test/HLSLFileCheck/samples/MiniEngine/FXAAPass1_Luma_CS.hlsl

@@ -9,8 +9,6 @@
 // CHECK: FMax
 // CHECK: FMax
 // CHECK: FMin
 // CHECK: FMin
 // CHECK: FAbs
 // CHECK: FAbs
-// CHECK: Log
-// CHECK: Exp
 // CHECK: Saturate
 // CHECK: Saturate
 // CHECK: bufferUpdateCounter
 // CHECK: bufferUpdateCounter
 // CHECK: bufferStore
 // CHECK: bufferStore

+ 0 - 2
tools/clang/test/HLSLFileCheck/samples/MiniEngine/FXAAPass1_RGB_CS.hlsl

@@ -10,8 +10,6 @@
 // CHECK: FMax
 // CHECK: FMax
 // CHECK: FMin
 // CHECK: FMin
 // CHECK: FAbs
 // CHECK: FAbs
-// CHECK: Log
-// CHECK: Exp
 // CHECK: Saturate
 // CHECK: Saturate
 // CHECK: bufferUpdateCounter
 // CHECK: bufferUpdateCounter
 // CHECK: bufferStore
 // CHECK: bufferStore

+ 0 - 2
tools/clang/test/HLSLFileCheck/samples/SubD11_SmoothPS.hlsl

@@ -7,8 +7,6 @@
 // CHECK: Rsqrt
 // CHECK: Rsqrt
 // CHECK: sample
 // CHECK: sample
 // CHECK: sample
 // CHECK: sample
-// CHECK: Log
-// CHECK: Exp
 // CHECK: Rsqrt
 // CHECK: Rsqrt
 // CHECK: dot3
 // CHECK: dot3
 // CHECK: Saturate
 // CHECK: Saturate

+ 0 - 2
tools/clang/test/HLSLFileCheck/samples/d3d11/SubD11_SmoothPS.hlsl

@@ -7,8 +7,6 @@
 // CHECK: Rsqrt
 // CHECK: Rsqrt
 // CHECK: sample
 // CHECK: sample
 // CHECK: sample
 // CHECK: sample
-// CHECK: Log
-// CHECK: Exp
 // CHECK: Rsqrt
 // CHECK: Rsqrt
 // CHECK: dot3
 // CHECK: dot3
 // CHECK: Saturate
 // CHECK: Saturate