2
0
Эх сурвалжийг харах

Implement fallback path for IsHelper on SM < 6.6 (#3408)

Tex Riddell 4 жил өмнө
parent
commit
3b99af518d

+ 1 - 0
include/dxc/DXIL/DxilConstants.h

@@ -1486,6 +1486,7 @@ namespace DXIL {
   extern const char *kDxBreakFuncName;
   extern const char *kDxBreakCondName;
   extern const char *kDxBreakMDName;
+  extern const char *kDxIsHelperGlobalName;
 
 } // namespace DXIL
 

+ 1 - 0
lib/DXIL/DxilModule.cpp

@@ -80,6 +80,7 @@ const char* kFP32DenormValueFtzString      = "ftz";
 const char *kDxBreakFuncName = "dx.break";
 const char *kDxBreakCondName = "dx.break.cond";
 const char *kDxBreakMDName = "dx.break.br";
+const char *kDxIsHelperGlobalName = "dx.ishelper";
 }
 
 // Avoid dependency on DxilModule from llvm::Module using this:

+ 232 - 5
lib/HLSL/DxilPreparePasses.cpp

@@ -16,6 +16,7 @@
 #include "dxc/Support/Global.h"
 #include "dxc/DXIL/DxilTypeSystem.h"
 #include "dxc/DXIL/DxilUtil.h"
+#include "dxc/DXIL/DxilEntryProps.h"
 #include "dxc/DXIL/DxilFunctionProps.h"
 #include "dxc/DXIL/DxilInstructions.h"
 #include "dxc/DXIL/DxilConstants.h"
@@ -31,6 +32,7 @@
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -364,7 +366,7 @@ public:
     }
   }
 
-  void patchDxil_1_6(Module &M, hlsl::OP *hlslOP, unsigned ValMajor, unsigned ValMinor) {
+  void RemoveAnnotateHandle(hlsl::OP *hlslOP) {
     for (auto it : hlslOP->GetOpFuncList(DXIL::OpCode::AnnotateHandle)) {
       Function *F = it.second;
       if (!F)
@@ -379,6 +381,229 @@ public:
     }
   }
 
+  ///////////////////////////////////////////////////
+  // IsHelperLane() lowering for SM < 6.6
+
+  // Identify pattern icmp_eq(0, dx.coverage())
+  bool IsCmpZOfCoverage(Value *V, hlsl::OP *hlslOP) {
+    if (ICmpInst *IC = dyn_cast<ICmpInst>(V)) {
+      if (IC->getPredicate() == ICmpInst::ICMP_EQ) {
+        Value *V0 = IC->getOperand(0);
+        Value *V1 = IC->getOperand(1);
+        if (!isa<ConstantInt>(V0))
+          std::swap(V0, V1);
+        if (ConstantInt *C = dyn_cast<ConstantInt>(V0)) {
+          if (CallInst *CI = dyn_cast<CallInst>(V1)) {
+            // compare dx.op.coverage with zero
+            if (C->isZero() &&
+                hlslOP->IsDxilOpFuncCallInst(CI, DXIL::OpCode::Coverage)) {
+              return true;
+            }
+          }
+        }
+      }
+    }
+    return false;
+  }
+
+  // Identify init as use in entry block that either:
+  //  - non-PS: store i32 0
+  //  - PS: store zext(icmp_eq(0, dx.coverage()))
+  bool IsInitOfIsHelperGV(User *U, hlsl::OP *hlslOP) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      BasicBlock *BB = SI->getParent();
+      if (BB == &BB->getParent()->getEntryBlock()) {
+        Value *V = SI->getValueOperand();
+        if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
+          if (C->isZero()) {
+            return true;
+          }
+        } else if (ZExtInst *ZEI = dyn_cast<ZExtInst>(V)) {
+          if (IsCmpZOfCoverage(ZEI->getOperand(0), hlslOP)) {
+            return true;
+          }
+        }
+      }
+    }
+    return false;
+  }
+
+  void RemoveFnIfIsHelperInit(User *U, hlsl::OP *hlslOP,
+                              SmallSetVector<Function *, 4> &psEntries) {
+    if (Instruction *I = dyn_cast<Instruction>(U)) {
+      // Early out: only check if in function still in set
+      Function *F = I->getParent()->getParent();
+      if (!psEntries.count(F))
+        return;
+      if (IsInitOfIsHelperGV(I, hlslOP)) {
+        psEntries.remove(F);
+      }
+    }
+  }
+
+  // Init IsHelper GV to zext(!dx.op.coverage()) in PS entry points
+  void InitIsHelperGV(Module &M) {
+    GlobalVariable *GV =
+        M.getGlobalVariable(DXIL::kDxIsHelperGlobalName, /*AllowLocal*/ true);
+    if (!GV)
+      return;
+
+    DxilModule &DM = M.GetDxilModule();
+    hlsl::OP *hlslOP = DM.GetOP();
+    const ShaderModel *pSM = DM.GetShaderModel();
+
+    // If PS, and GV is ExternalLinkage, change to InternalLinkage
+    // This can happen after link to final PS.
+    if (pSM->IsPS() && GV->getLinkage() == GlobalValue::ExternalLinkage) {
+      GV->setLinkage(GlobalValue::InternalLinkage);
+    }
+
+    // add PS entry points to set
+    SmallSetVector<Function*, 4> psEntries;
+    if (pSM->IsPS()) {
+      psEntries.insert(DM.GetEntryFunction());
+    } else if (pSM->IsLib()) {
+      for (auto &F : M.functions()) {
+        if (DM.HasDxilEntryProps(&F)) {
+          if (DM.GetDxilEntryProps(&F).props.IsPS()) {
+            psEntries.insert(&F);
+          }
+        }
+      }
+    }
+
+    // iterate users of GV to skip entries that already init GV
+    for (auto &U : GV->uses()) {
+      RemoveFnIfIsHelperInit(U.getUser(), DM.GetOP(), psEntries);
+    }
+
+    // store zext(!dx.op.coverage())
+    Type *I32Ty = Type::getInt32Ty(hlslOP->GetCtx());
+    Constant *C0 = hlslOP->GetI32Const(0);
+    Constant *OpArg = hlslOP->GetI32Const((int)DXIL::OpCode::Coverage);
+    Function *CoverageF = nullptr;
+    for (auto *F : psEntries) {
+      if (!CoverageF)
+        CoverageF = hlslOP->GetOpFunc(DXIL::OpCode::Coverage, I32Ty);
+      IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
+      Value *V = Builder.CreateCall(CoverageF, {OpArg});
+      V = Builder.CreateICmpEQ(C0, V);
+      V = Builder.CreateZExt(V, I32Ty);
+      Builder.CreateStore(V, GV);
+    }
+  }
+
+  GlobalVariable *GetOrCreateIsHelperGV(Module &M, hlsl::OP *hlslOP) {
+    GlobalVariable *GV =
+        M.getGlobalVariable(DXIL::kDxIsHelperGlobalName, /*AllowLocal*/ true);
+    if (GV)
+      return GV;
+    DxilModule &DM = M.GetDxilModule();
+    const ShaderModel *pSM = DM.GetShaderModel();
+    GV = new GlobalVariable(M, IntegerType::get(M.getContext(), 32),
+                            /*constant*/ false,
+                            pSM->IsLib() ? GlobalValue::ExternalLinkage
+                                         : GlobalValue::InternalLinkage,
+                            /*Initializer*/ hlslOP->GetI32Const(0),
+                            DXIL::kDxIsHelperGlobalName);
+    return GV;
+  }
+
+  // Replace IsHelperLane() with false (for non-lib, non-PS SM)
+  void ReplaceIsHelperWithConstFalse(hlsl::OP *hlslOP) {
+    Constant *False = hlslOP->GetI1Const(0);
+    bool bDone = false;
+    while (!bDone) {
+      bDone = true;
+      for (auto it : hlslOP->GetOpFuncList(DXIL::OpCode::IsHelperLane)) {
+        Function *F = it.second;
+        if (!F)
+          continue;
+        for (auto uit = F->user_begin(); uit != F->user_end();) {
+          CallInst *CI = dyn_cast<CallInst>(*(uit++));
+          CI->replaceAllUsesWith(False);
+          CI->eraseFromParent();
+        }
+        hlslOP->RemoveFunction(F);
+        F->eraseFromParent();
+        bDone = false;
+        break;
+      }
+    }
+  }
+
+  void ConvertIsHelperToLoadGV(hlsl::OP *hlslOP) {
+    GlobalVariable *GV = nullptr;
+    Type *I1Ty = Type::getInt1Ty(hlslOP->GetCtx());
+    bool bDone = false;
+    while (!bDone) {
+      bDone = true;
+      for (auto it : hlslOP->GetOpFuncList(DXIL::OpCode::IsHelperLane)) {
+        Function *F = it.second;
+        if (!F)
+          continue;
+        for (auto uit = F->user_begin(); uit != F->user_end();) {
+          CallInst *CI = cast<CallInst>(*(uit++));
+          if (!GV)
+            GV = GetOrCreateIsHelperGV(*F->getParent(), hlslOP);
+          IRBuilder<> Builder(CI);
+          Value *V = Builder.CreateLoad(GV);
+          V = Builder.CreateTrunc(V, I1Ty);
+          CI->replaceAllUsesWith(V);
+          CI->eraseFromParent();
+        }
+        hlslOP->RemoveFunction(F);
+        F->eraseFromParent();
+        bDone = false;
+        break;
+      }
+    }
+  }
+
+  void ConvertDiscardToStoreGV(hlsl::OP *hlslOP) {
+    GlobalVariable *GV = nullptr;
+    Type *I32Ty = Type::getInt32Ty(hlslOP->GetCtx());
+    for (auto it : hlslOP->GetOpFuncList(DXIL::OpCode::Discard)) {
+      Function *F = it.second;
+      if (!F)
+        continue;
+      for (auto uit = F->user_begin(); uit != F->user_end();) {
+        CallInst *CI = cast<CallInst>(*(uit++));
+        if (!GV)
+          GV = GetOrCreateIsHelperGV(*F->getParent(), hlslOP);
+        IRBuilder<> Builder(CI);
+        Value *Cond =
+            Builder.CreateZExt(DxilInst_Discard(CI).get_condition(), I32Ty);
+        Builder.CreateStore(Cond, GV);
+      }
+    }
+  }
+  ///////////////////////////////////////////////////
+
+  void patchDxil_1_6(Module &M, hlsl::OP *hlslOP, unsigned ValMajor, unsigned ValMinor) {
+    RemoveAnnotateHandle(hlslOP);
+
+    // Convert IsHelperLane() on down-level targets
+    const ShaderModel *pSM = M.GetDxilModule().GetShaderModel();
+    if (pSM->IsLib() || pSM->IsPS()) {
+      ConvertIsHelperToLoadGV(hlslOP);
+      ConvertDiscardToStoreGV(hlslOP);
+      InitIsHelperGV(M);
+
+      // Set linkage of dx.ishelper to internal for validator version < 1.6
+      // This means IsHelperLane() fallback code will not return correct result
+      // in an exported function linked to a PS in another library in this case.
+      // But it won't pass validation otherwise.
+      if (pSM->IsLib() && DXIL::CompareVersions(ValMajor, ValMinor, 1, 6) < 1) {
+        if (GlobalVariable *GV = M.getGlobalVariable(DXIL::kDxIsHelperGlobalName, /*AllowLocal*/ true)) {
+          GV->setLinkage(GlobalValue::InternalLinkage);
+        }
+      }
+    } else {
+      ReplaceIsHelperWithConstFalse(hlslOP);
+    }
+  }
+
   // Replace llvm.lifetime.start/.end intrinsics with undef or zeroinitializer
   // stores (for earlier validator versions) unless the pointer is a global
   // that has an initializer.
@@ -485,7 +710,7 @@ public:
       bool IsLib = DM.GetShaderModel()->IsLib();
       // Skip validation patch for lib.
       if (!IsLib) {
-        if (ValMajor == 1 && ValMinor <= 1) {
+        if (DXIL::CompareVersions(ValMajor, ValMinor, 1, 1) <= 0) {
           patchValidation_1_1(M);
         }
 
@@ -497,15 +722,17 @@ public:
 
       // Replace lifetime intrinsics if requested or necessary.
       const bool forceZeroStoreLifetimes = DM.GetForceZeroStoreLifetimes();
-      if (forceZeroStoreLifetimes || DxilMinor < 6) {
+      if (forceZeroStoreLifetimes ||
+          DXIL::CompareVersions(DxilMajor, DxilMinor, 1, 6) < 0) {
         patchLifetimeIntrinsics(M, ValMajor, ValMinor, forceZeroStoreLifetimes);
       }
 
-      // Remove store undef output.
       hlsl::OP *hlslOP = DM.GetOP();
-      if (DxilMinor < 6) {
+      // Basic down-conversions for Dxil < 1.6
+      if (DXIL::CompareVersions(DxilMajor, DxilMinor, 1, 6) < 0) {
         patchDxil_1_6(M, hlslOP, ValMajor, ValMinor);
       }
+      // Remove store undef output.
       RemoveStoreUndefOutput(M, hlslOP);
 
       // Turn dx.break() conditional into global

+ 8 - 0
lib/HLSL/DxilValidation.cpp

@@ -3675,6 +3675,14 @@ static void ValidateGlobalVariable(GlobalVariable &GV,
     isRes |= isResourceGlobal(ValCtx.DxilMod.GetSRVs());
     isRes |= isSamplerGlobal(ValCtx.DxilMod.GetSamplers());
     isInternalGV |= isRes;
+
+    // Allow special dx.ishelper for library target
+    if (GV.getName().compare(DXIL::kDxIsHelperGlobalName) == 0) {
+      Type *Ty = GV.getType()->getPointerElementType();
+      if (Ty->isIntegerTy() && Ty->getScalarSizeInBits() == 32) {
+        isInternalGV = true;
+      }
+    }
   }
 
   if (!isInternalGV) {

+ 3 - 0
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -5774,6 +5774,9 @@ public:
       // only for non-constant static globals
       if (!dxilutil::IsStaticGlobal(&GV) || GV.isConstant())
         continue;
+      // Skip dx.ishelper
+      if (GV.getName().compare(DXIL::kDxIsHelperGlobalName) == 0)
+        continue;
       // Skip if GV used in functions other than entry.
       if (!usedOnlyInEntry(&GV, entryAndInitFunctionSet))
         continue;

+ 114 - 2
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/helper/IsHelperLane.hlsl

@@ -16,21 +16,43 @@
 // RUN: %dxc -T lib_6_6 %s | FileCheck %s -check-prefixes=CHECKLIB
 // RUN: %dxc -T lib_6_6 -fcgl %s | FileCheck %s -check-prefixes=CHECKHLLIB
 
+// RUN: %dxc -E vs -T vs_6_0 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E gs -T gs_6_0 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E hs -T hs_6_0 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E ds -T ds_6_0 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E ps -T ps_6_0 %s | FileCheck %s -check-prefixes=CHECKGV
+// RUN: %dxc -E cs -T cs_6_0 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E as -T as_6_5 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E ms -T ms_6_5 %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E vs -T vs_6_0 -Od %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E gs -T gs_6_0 -Od %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E hs -T hs_6_0 -Od %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E ds -T ds_6_0 -Od %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E ps -T ps_6_0 -Od %s | FileCheck %s -check-prefixes=CHECKGV
+// RUN: %dxc -E cs -T cs_6_0 -Od %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E as -T as_6_5 -Od %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -E ms -T ms_6_5 -Od %s | FileCheck %s -check-prefixes=CHECKCONST
+// RUN: %dxc -T lib_6_5 %s | FileCheck %s -check-prefixes=CHECKLIBGV
+
+
 // Exactly one call
 // CHECK define void @{{.*}}()
 // CHECK: call i1 @dx.op.isHelperLane.i1(i32 221)
 // CHECK-NOT: call i1 @dx.op.isHelperLane.i1(i32 221)
 
+
 // Exactly two calls for HS and PC func
 // CHECKHS define void @{{.*}}()
 // CHECKHS: call i1 @dx.op.isHelperLane.i1(i32 221)
 // CHECKHS: call i1 @dx.op.isHelperLane.i1(i32 221)
 // CHECKHS-NOT: call i1 @dx.op.isHelperLane.i1(i32 221)
 
+
 // Translated to constant zero, so no call:
 // CHECKCONST: define void @{{.*}}()
 // CHECKCONST-NOT: call i1 @dx.op.isHelperLane.i1(i32 221)
 
+
 // No calls simplified for lib target.
 // 10 for: vs, gs, hs + pc, ds, cs, as, ms, and exported testfn
 // CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
@@ -45,6 +67,7 @@
 // CHECKLIB: call i1 @dx.op.isHelperLane.i1(i32 221)
 // CHECKLIB-NOT: call i1 @dx.op.isHelperLane.i1(i32 221)
 
+
 // One HL call from each function
 // 18 functions for HL lib due to entry cloning
 // CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id:.*]])
@@ -67,6 +90,93 @@
 // CHECKHLLIB: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
 // CHECKHLLIB-NOT: call i1 @"dx.hl.op..i1 (i32)"(i32 [[id]])
 
+
+// CHECKGV:   %[[cov:.*]] = call i32 @dx.op.coverage.i32(i32 91)  ; Coverage()
+// CHECKGV:   %[[cmp:.*]] = icmp eq i32 0, %[[cov]]
+// CHECKGV:   %[[zext:.*]] = zext i1 %[[cmp]] to i32
+// CHECKGV:   store i32 %[[zext]], i32* @dx.ishelper
+// CHECKGV:   store i32 1, i32* @dx.ishelper
+// CHECKGV-NEXT:   call void @dx.op.discard
+// CHECKGV:   %[[load:.*]] = load i32, i32* @dx.ishelper
+// CHECKGV:   trunc i32 %[[load]] to i1
+
+
+// CHECKLIBGV: @dx.ishelper = {{(internal )?}}global i32 0
+
+// CHECKLIBGV-LABEL: define void @cs()
+// CHECKLIBGV-NOT: call i32 @dx.op.coverage.i32(i32 91)
+// CHECKLIBGV-NOT: store i32 %{{.*}}, i32* @dx.ishelper
+// CHECKLIBGV:   %[[load:.*]] = load i32, i32* @dx.ishelper
+// CHECKLIBGV:   trunc i32 %[[load]] to i1
+// CHECKLIBGV-LABEL: ret void
+
+// CHECKLIBGV-LABEL: define void @as()
+// CHECKLIBGV-NOT: call i32 @dx.op.coverage.i32(i32 91)
+// CHECKLIBGV-NOT: store i32 %{{.*}}, i32* @dx.ishelper
+// CHECKLIBGV:   %[[load:.*]] = load i32, i32* @dx.ishelper
+// CHECKLIBGV:   trunc i32 %[[load]] to i1
+// CHECKLIBGV-LABEL: ret void
+
+// CHECKLIBGV-LABEL: define <4 x float> @{{.*}}?testfn{{.*}}()
+// CHECKLIBGV-NOT: call i32 @dx.op.coverage.i32(i32 91)
+// CHECKLIBGV-NOT: store i32 %{{.*}}, i32* @dx.ishelper
+// CHECKLIBGV:   %[[load:.*]] = load i32, i32* @dx.ishelper
+// CHECKLIBGV:   trunc i32 %[[load]] to i1
+// CHECKLIBGV-LABEL: ret <4 x float>
+
+// CHECKLIBGV-LABEL: define void @vs()
+// CHECKLIBGV-NOT: call i32 @dx.op.coverage.i32(i32 91)
+// CHECKLIBGV-NOT: store i32 %{{.*}}, i32* @dx.ishelper
+// CHECKLIBGV:   %[[load:.*]] = load i32, i32* @dx.ishelper
+// CHECKLIBGV:   trunc i32 %[[load]] to i1
+// CHECKLIBGV-LABEL: ret void
+
+// CHECKLIBGV-LABEL: define void @gs()
+// CHECKLIBGV-NOT: call i32 @dx.op.coverage.i32(i32 91)
+// CHECKLIBGV-NOT: store i32 %{{.*}}, i32* @dx.ishelper
+// CHECKLIBGV:   %[[load:.*]] = load i32, i32* @dx.ishelper
+// CHECKLIBGV:   trunc i32 %[[load]] to i1
+// CHECKLIBGV-LABEL: ret void
+
+// CHECKLIBGV-LABEL: define void @{{.*}}?pc{{.*}}()
+// CHECKLIBGV-NOT: call i32 @dx.op.coverage.i32(i32 91)
+// CHECKLIBGV-NOT: store i32 %{{.*}}, i32* @dx.ishelper
+// CHECKLIBGV:   %[[load:.*]] = load i32, i32* @dx.ishelper
+// CHECKLIBGV:   trunc i32 %[[load]] to i1
+// CHECKLIBGV-LABEL: ret void
+
+// CHECKLIBGV-LABEL: define void @hs()
+// CHECKLIBGV-NOT: call i32 @dx.op.coverage.i32(i32 91)
+// CHECKLIBGV-NOT: store i32 %{{.*}}, i32* @dx.ishelper
+// CHECKLIBGV:   %[[load:.*]] = load i32, i32* @dx.ishelper
+// CHECKLIBGV:   trunc i32 %[[load]] to i1
+// CHECKLIBGV-LABEL: ret void
+
+// CHECKLIBGV-LABEL: define void @ds()
+// CHECKLIBGV-NOT: call i32 @dx.op.coverage.i32(i32 91)
+// CHECKLIBGV-NOT: store i32 %{{.*}}, i32* @dx.ishelper
+// CHECKLIBGV:   %[[load:.*]] = load i32, i32* @dx.ishelper
+// CHECKLIBGV:   trunc i32 %[[load]] to i1
+// CHECKLIBGV-LABEL: ret void
+
+// CHECKLIBGV-LABEL: define void @ps()
+// CHECKLIBGV:   %[[cov:.*]] = call i32 @dx.op.coverage.i32(i32 91)  ; Coverage()
+// CHECKLIBGV:   %[[cmp:.*]] = icmp eq i32 0, %[[cov]]
+// CHECKLIBGV:   %[[zext:.*]] = zext i1 %[[cmp]] to i32
+// CHECKLIBGV:   store i32 %[[zext]], i32* @dx.ishelper
+// CHECKLIBGV:   store i32 1, i32* @dx.ishelper
+// CHECKLIBGV-NEXT:   call void @dx.op.discard
+// CHECKLIBGV:   %[[load:.*]] = load i32, i32* @dx.ishelper
+// CHECKLIBGV:   trunc i32 %[[load]] to i1
+// CHECKLIBGV-LABEL: ret void
+
+// CHECKLIBGV-LABEL: define void @ms()
+// CHECKLIBGV-NOT: call i32 @dx.op.coverage.i32(i32 91)
+// CHECKLIBGV-NOT: store i32 %{{.*}}, i32* @dx.ishelper
+// CHECKLIBGV:   %[[load:.*]] = load i32, i32* @dx.ishelper
+// CHECKLIBGV:   trunc i32 %[[load]] to i1
+// CHECKLIBGV-LABEL: ret void
+
 float4 a;
 
 /// Vertex Shader
@@ -150,8 +260,10 @@ PosStruct ds(const float3 bary : SV_DomainLocation,
 /// Pixel Shader
 
 [shader("pixel")]
-float4 ps(): SV_Target
+float4 ps(float f : IN): SV_Target
 {
+  if (f < 0.0)
+    discard;
   float4 result = a + IsHelperLane();
   return ddx(result);
 }
@@ -165,7 +277,7 @@ RWStructuredBuffer<float4> SB;
 void cs(uint gidx : SV_GroupIndex)
 {
   float4 result = a + IsHelperLane();
-  SB[gidx] = ddx(result);
+  SB[gidx] = QuadReadAcrossX(result);
 }
 
 /// Amplification Shader