瀏覽代碼

Fixed sign and abs intrinsics on unsigned operands. (#1825)

Makes abs(uint) a no-op and sign(uint) equivalent to uint != 0, matching the correct FXC behavior.
Tristan Labelle 6 年之前
父節點
當前提交
20c61bc577

+ 8 - 0
include/dxc/HlslIntrinsicOp.h

@@ -265,12 +265,14 @@ import hctdb_instrhelp
   IOP_WaveActiveUSum,
   IOP_WavePrefixUProduct,
   IOP_WavePrefixUSum,
+  IOP_uabs,
   IOP_uclamp,
   IOP_ufirstbithigh,
   IOP_umad,
   IOP_umax,
   IOP_umin,
   IOP_umul,
+  IOP_usign,
   MOP_InterlockedUMax,
   MOP_InterlockedUMin,
   Num_Intrinsics,
@@ -293,12 +295,14 @@ import hctdb_instrhelp
   case IntrinsicOp::IOP_WaveActiveSum:
   case IntrinsicOp::IOP_WavePrefixProduct:
   case IntrinsicOp::IOP_WavePrefixSum:
+  case IntrinsicOp::IOP_abs:
   case IntrinsicOp::IOP_clamp:
   case IntrinsicOp::IOP_firstbithigh:
   case IntrinsicOp::IOP_mad:
   case IntrinsicOp::IOP_max:
   case IntrinsicOp::IOP_min:
   case IntrinsicOp::IOP_mul:
+  case IntrinsicOp::IOP_sign:
   case IntrinsicOp::MOP_InterlockedMax:
   case IntrinsicOp::MOP_InterlockedMin:
 // HLSL-HAS-UNSIGNED-INTRINSICS:END
@@ -332,6 +336,8 @@ import hctdb_instrhelp
     return static_cast<unsigned>(IntrinsicOp::IOP_WavePrefixUProduct);
   case IntrinsicOp::IOP_WavePrefixSum:
     return static_cast<unsigned>(IntrinsicOp::IOP_WavePrefixUSum);
+  case IntrinsicOp::IOP_abs:
+    return static_cast<unsigned>(IntrinsicOp::IOP_uabs);
   case IntrinsicOp::IOP_clamp:
     return static_cast<unsigned>(IntrinsicOp::IOP_uclamp);
   case IntrinsicOp::IOP_firstbithigh:
@@ -344,6 +350,8 @@ import hctdb_instrhelp
     return static_cast<unsigned>(IntrinsicOp::IOP_umin);
   case IntrinsicOp::IOP_mul:
     return static_cast<unsigned>(IntrinsicOp::IOP_umul);
+  case IntrinsicOp::IOP_sign:
+    return static_cast<unsigned>(IntrinsicOp::IOP_usign);
   case IntrinsicOp::MOP_InterlockedMax:
     return static_cast<unsigned>(IntrinsicOp::MOP_InterlockedUMax);
   case IntrinsicOp::MOP_InterlockedMin:

+ 29 - 26
lib/HLSL/HLOperationLower.cpp

@@ -1226,7 +1226,7 @@ Value *TranslateWaveReadLaneFirst(CallInst *CI, IntrinsicOp IOP,
                               CI->getOperand(1)->getType(), CI, hlslOP);
 }
 
-Value *TransalteAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+Value *TranslateAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                     HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
   Type *pOverloadTy = CI->getType()->getScalarType();
@@ -1243,6 +1243,11 @@ Value *TransalteAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   }
 }
 
+Value *TranslateUAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+  HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
+  return CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx); // No-op
+}
+
 Value *GenerateCmpNEZero(Value *val, IRBuilder<> Builder) {
   Type *Ty = val->getType();
   Type *EltTy = Ty->getScalarType();
@@ -2195,30 +2200,26 @@ Value *TranslateSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                      HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
   Type *Ty = val->getType();
-  Type *EltTy = Ty->getScalarType();
+  bool IsInt = Ty->getScalarType()->isIntegerTy();
+
   IRBuilder<> Builder(CI);
+  Constant *zero = Constant::getNullValue(Ty);
+  Value *zeroLtVal = IsInt ? Builder.CreateICmpSLT(zero, val) : Builder.CreateFCmpOLT(zero, val);
+  Value *valLtZero = IsInt ? Builder.CreateICmpSLT(val, zero) : Builder.CreateFCmpOLT(val, zero);
+  zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
+  valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
+  return Builder.CreateSub(zeroLtVal, valLtZero);
+}
 
-  if (EltTy->isIntegerTy()) {
-    Constant *zero = ConstantInt::get(Ty->getScalarType(), 0);
-    if (Ty != EltTy) {
-      zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
-    }
-    Value *zeroLtVal = Builder.CreateICmpSLT(zero, val);
-    zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
-    Value *valLtZero = Builder.CreateICmpSLT(val, zero);
-    valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
-    return Builder.CreateSub(zeroLtVal, valLtZero);
-  } else {
-    Constant *zero = ConstantFP::get(Ty->getScalarType(), 0.0);
-    if (Ty != EltTy) {
-      zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
-    }
-    Value *zeroLtVal = Builder.CreateFCmpOLT(zero, val);
-    zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
-    Value *valLtZero = Builder.CreateFCmpOLT(val, zero);
-    valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
-    return Builder.CreateSub(zeroLtVal, valLtZero);
-  }
+Value *TranslateUSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+  HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
+  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
+  Type *Ty = val->getType();
+
+  IRBuilder<> Builder(CI);
+  Constant *zero = Constant::getNullValue(Ty);
+  Value *nonZero = Builder.CreateICmpNE(val, zero);
+  return Builder.CreateZExt(nonZero, CI->getType());
 }
 
 Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -4746,7 +4747,7 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::IOP_WorldToObject3x4, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::WorldToObject},
     {IntrinsicOp::IOP_WorldToObject4x3, TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::WorldToObject},
     {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::IOP_abs, TransalteAbs, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_abs, TranslateAbs, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos},
     {IntrinsicOp::IOP_all, TranslateAll, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_any, TranslateAny, DXIL::OpCode::NumOpCodes},
@@ -4911,12 +4912,14 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     { IntrinsicOp::IOP_WaveActiveUSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
     { IntrinsicOp::IOP_WavePrefixUProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
     { IntrinsicOp::IOP_WavePrefixUSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
+    { IntrinsicOp::IOP_uabs, TranslateUAbs, DXIL::OpCode::NumOpCodes },
     { IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes },
     { IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitHi },
     { IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad},
     { IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax},
-    { IntrinsicOp::IOP_umin,   TranslateFUIBinary, DXIL::OpCode::UMin },
-    { IntrinsicOp::IOP_umul,   TranslateFUIBinary, DXIL::OpCode::UMul },
+    { IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin },
+    { IntrinsicOp::IOP_umul, TranslateFUIBinary, DXIL::OpCode::UMul },
+    { IntrinsicOp::IOP_usign, TranslateUSign, DXIL::OpCode::UMax },
     { IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
     { IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
 };

+ 1 - 1
tools/clang/lib/Sema/gen_intrin_main_tables_15.h

@@ -1585,7 +1585,7 @@ static const HLSL_INTRINSIC g_Intrinsics[] =
     {(UINT)hlsl::IntrinsicOp::IOP_round, false, true, -1, 2, g_Intrinsics_Args170},
     {(UINT)hlsl::IntrinsicOp::IOP_rsqrt, false, true, -1, 2, g_Intrinsics_Args171},
     {(UINT)hlsl::IntrinsicOp::IOP_saturate, false, true, -1, 2, g_Intrinsics_Args172},
-    {(UINT)hlsl::IntrinsicOp::IOP_sign, false, true, -1, 2, g_Intrinsics_Args173},
+    {(UINT)hlsl::IntrinsicOp::IOP_sign, false, true, 0, 2, g_Intrinsics_Args173},
     {(UINT)hlsl::IntrinsicOp::IOP_sin, false, true, -1, 2, g_Intrinsics_Args174},
     {(UINT)hlsl::IntrinsicOp::IOP_sincos, false, false, -1, 4, g_Intrinsics_Args175},
     {(UINT)hlsl::IntrinsicOp::IOP_sinh, false, true, -1, 2, g_Intrinsics_Args176},

+ 11 - 0
tools/clang/test/CodeGenHLSL/quick-test/intrinsic_uabs_usign.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s
+
+// Test the unsigned version of the abs and sign intrinsics
+
+// CHECK: call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 -1)
+// CHECK: call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 1, i32 1)
+
+uint2 main() : OUT
+{
+    return uint2(abs((uint)0xFFFFFFFF), sign((uint)0xFFFFFFFF));
+}

+ 1 - 3
tools/clang/test/CodeGenHLSL/uint64_1.hlsl

@@ -5,8 +5,6 @@
 // CHECK: sdiv i64
 // CHECK: shl i64
 // CHECK: mul i64
-// For iabs.
-// CHECK: IMax
 // CHECK: UMax
 // CHECK: UMin
 // CHECK: uitofp i64
@@ -32,7 +30,7 @@ float4 main(float idx1 : Idx1, float idx2 : Idx2, int2 c : C) : SV_Target
   buf2[idx1*3].b = r;
 
   r *= b << 5;
-  r = abs(r);
+  r = abs(r); // No-op on uints
   r = max(r, c.x);
   r = min(r, c.y);
   return r;

+ 2 - 2
utils/hct/gen_intrin_main.txt

@@ -83,7 +83,7 @@ int<4> [[rn]] D3DCOLORtoUBYTE4(in $match<0, 1> float<4> x) : d3dcolortoubyte4;
 uint [[rn]]  GetRenderTargetSampleCount() : rtsampleinfo;
 float<2> [[rn]] GetRenderTargetSamplePosition(in int s) : rtsamplepos;
 void [[]]   abort();
-$type1 [[rn]] abs(in numeric<> x);
+$type1 [[rn,unsigned_op=uabs]] abs(in numeric<> x);
 $type1 [[rn]] acos(in float_like<> x);
 bool [[rn]] all(in any<> x);
 void [[]] AllMemoryBarrier() : syncallmemory_ug;
@@ -201,7 +201,7 @@ $type1 [[rn]] reversebits(in any_int<> x);
 $type1 [[rn]] round(in float_like<> x);
 $type1 [[rn]] rsqrt(in float_like<> x);
 $type1 [[rn]] saturate(in any_float<> x);
-$match<1, 0> int<> [[rn]] sign(in numeric<> x);
+$match<1, 0> int<> [[rn,unsigned_op=usign,overload=0]] sign(in numeric<> x);
 $type1 [[rn]] sin(in float_like<> x);
 void [[]] sincos(in float_like<> x, out $type1 s, out $type1 c);
 $type1 [[rn]] sinh(in float_like<> x);