Răsfoiți Sursa

Handle null dominator tree in memcpy replace (#2901)

The previous code was written under the belief that static globals
used as parameters couldn't result in a ReplaceMemCpy. They can when
they are not initialized to zero.

To handle this and any other circumstances that might result in a null
dominatortree, the function checking for user domination will regenerate
the dominatortree when necessary.
Greg Roth 5 ani în urmă
părinte
comite
624c41d397

+ 15 - 7
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -2222,7 +2222,7 @@ void SROA_Helper::RewriteForGEP(GEPOperator *GEP, IRBuilder<> &Builder) {
         NewGEPs.emplace_back(NewGEP);
       }
       const bool bAllowReplace = isa<AllocaInst>(OldVal);
-      if (!SROA_Helper::LowerMemcpy(GEP, /*annoation*/ nullptr, typeSys, DL, DT, bAllowReplace)) {
+      if (!SROA_Helper::LowerMemcpy(GEP, /*annotation*/ nullptr, typeSys, DL, DT, bAllowReplace)) {
         SROA_Helper helper(GEP, NewGEPs, DeadInsts, typeSys, DL, DT);
         helper.RewriteForScalarRepl(GEP, Builder);
         for (Value *NewGEP : NewGEPs) {
@@ -3627,13 +3627,15 @@ static bool ReplaceUseOfZeroInitBeforeDef(Instruction *I, GlobalVariable *GV) {
   }
 }
 
+// Use `DT` to trace all users and make sure `I`'s BB dominates them all
 static bool DominateAllUsersDom(Instruction *I, Value *V, DominatorTree *DT) {
   BasicBlock *BB = I->getParent();
+  Function *F = I->getParent()->getParent();
   for (auto U = V->user_begin(); U != V->user_end(); ) {
     Instruction *UI = dyn_cast<Instruction>(*(U++));
-    if (!UI)
+    // If not an instruction or from a differnt function, nothing to check, move along.
+    if (!UI || UI->getParent()->getParent() != F)
       continue;
-    assert (UI->getParent()->getParent() == I->getParent()->getParent());
 
     if (!DT->dominates(BB, UI->getParent()))
       return false;
@@ -3654,7 +3656,13 @@ static bool DominateAllUsers(Instruction *I, Value *V, DominatorTree *DT) {
   if (&F->getEntryBlock() == I->getParent())
     return true;
 
-  return DominateAllUsersDom(I, V, DT);
+  if (!DT) {
+    DominatorTree TempDT;
+    TempDT.recalculate(*F);
+    return DominateAllUsersDom(I, V, &TempDT);
+  } else {
+    return DominateAllUsersDom(I, V, DT);
+  }
 }
 
 bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
@@ -4180,8 +4188,8 @@ void SROA_Parameter_HLSL::flattenGlobal(GlobalVariable *GV) {
     GlobalVariable *EltGV = cast<GlobalVariable>(WorkList.front());
     WorkList.pop_front();
     const bool bAllowReplace = true;
-    // Globals don't need DomTree here because they take another path
-    if (SROA_Helper::LowerMemcpy(EltGV, /*annoation*/ nullptr, dxilTypeSys, DL,
+    // SROA_Parameter_HLSL has no access to a domtree, if one is needed, it'll be generated
+    if (SROA_Helper::LowerMemcpy(EltGV, /*annotation*/ nullptr, dxilTypeSys, DL,
                                  nullptr /*DT */, bAllowReplace)) {
       continue;
     }
@@ -4206,7 +4214,7 @@ void SROA_Parameter_HLSL::flattenGlobal(GlobalVariable *GV) {
       }
       EltGV = NewEltGV;
     } else {
-      // Globals don't need DomTree
+      // SROA_Parameter_HLSL has no access to a domtree, if one is needed, it'll be generated
       SROAed = SROA_Helper::DoScalarReplacement(
           EltGV, Elts, Builder, bFlatVector,
           // TODO: set precise.

+ 60 - 0
tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/memcpy_gv.hlsl

@@ -0,0 +1,60 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// This test contrives a way to reach a potential memcopy replacement involving a static global
+// Previously, this was believed impossible so a nullptr was provided for the domtree.
+// Since it is possible, this verifies that a crash doesn't occur as the null case is now handled.
+
+// A struct, however minimal, is needed to force a memcpy
+struct struct_val_t
+{
+    float val;
+};
+
+// A retrieval function to prevent an additional load that would prevent the problem optimization
+float GetVal(struct struct_val_t sv) {
+  return sv.val;
+}
+
+cbuffer cbuf : register(b0 , space2 ) { struct_val_t sv; };
+
+// Trivial conditional initialization of the first global
+// meant to keep the memcopy src GEP out of the entry block
+float CondInit(float val)
+{
+    if (val <= 0.0)
+      return 0.0;
+    return val;
+}
+
+// A global initialization that makes use of memcpy
+struct_val_t MemCpyInit()
+{
+    struct_val_t i;
+    i = sv;
+    return i;
+}
+
+static const float global1 = CondInit( 0.4242424242);
+static const struct_val_t global2 = MemCpyInit();
+
+// This function is embarassingly optimizable in later passes.
+// This check is mostly just to verify it doesn't crash anymore
+// So checks are very simple
+//CHECK: define void @main
+//CHECK: ret void
+float main(int a : A) : SV_Target
+{
+  // Implicit inititalization of global1 including conditional will go here.
+  // This initialization prevents memcpy source being in the entry block which makes it dominate everything
+  // global1 isn't even used, but the conditionals it introduces remain
+  // at the point of memcopy lowering
+
+  // Implicit initialization of global2 which includes the memcopy operation in question will go here.
+
+  // This conditional prevents the source from dominating all uses of the memcopy dest
+  if (a)
+    // This function call keeps the memcopy being the only load use of the alloca
+    // at least at this stage of the passes
+    return GetVal(global2);
+  return 0;
+}