Quellcode durchsuchen

Handle more complex addressing modes (#1147)

Fix HoistConstantArray pass to recognize more complex load & store patterns.

Previously it was expecting no nesting and just a simple list of GetElementPtr's that fed loads or stores. Now it can accept a tree (DAG) of GetElementPtrs that feed leaf node loads or stores.
These patterns cannot be directly derived from HLSL, however they can occur due to previous optimizations that transform the array accesses (like a partial CSE or similar).
Grant vor 7 Jahren
Ursprung
Commit
18138ff8cf

+ 55 - 24
lib/Transforms/Scalar/HoistConstantArray.cpp

@@ -127,9 +127,11 @@ namespace {
     std::vector<Constant *> m_Values;
     bool m_IsConstArray;
 
-    bool AnalyzeStore(StoreInst *);
-    bool StoreConstant(uint64_t index, Constant *value);
+    bool AnalyzeStore(StoreInst *SI);
+    bool StoreConstant(int64_t index, Constant *value);
     void EnsureSize();
+    void GetArrayStores(GEPOperator *gep,
+                        std::vector<StoreInst *> &stores) const;
     bool AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps);
     bool AllGEPUsersAreValid(GEPOperator *gep);
     UndefValue *UndefElement();
@@ -184,17 +186,29 @@ GlobalVariable *CandidateArray::GetGlobalArray() const {
   return GV;
 }
 
-// Get a list of all the stores that write to the array.
-std::vector<StoreInst*> CandidateArray::GetArrayStores() const {
-  std::vector<StoreInst*> stores;
+// Get a list of all the stores that write to the array through one or more
+// GetElementPtrInst operations.
+std::vector<StoreInst *> CandidateArray::GetArrayStores() const {
+  std::vector<StoreInst *> stores;
   for (User *U : m_Alloca->users())
     if (GEPOperator *gep = dyn_cast<GEPOperator>(U))
-      for (User *GU : gep->users())
-        if (StoreInst *SI = dyn_cast<StoreInst>(GU))
-          stores.push_back(SI);
+      GetArrayStores(gep, stores);
   return stores;
 }
 
+// Recursively collect all the stores that write to the pointer/buffer
+// referred to by this GetElementPtrInst.
+void CandidateArray::GetArrayStores(GEPOperator *gep,
+                                    std::vector<StoreInst *> &stores) const {
+  for (User *GU : gep->users()) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(GU)) {
+      stores.push_back(SI);
+    }
+    else if (GEPOperator *GEPI = dyn_cast<GEPOperator>(GU)) {
+      GetArrayStores(GEPI, stores);
+    }
+  }
+}
 // Check to see that all the users of the array are GEPs.
 // If so, populate the `geps` vector with a list of all geps that use the array.
 bool CandidateArray::AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps) {
@@ -214,6 +228,7 @@ bool CandidateArray::AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps) {
 //  1. A store of a constant value that does not overwrite an existing constant
 //     with a different value.
 //  2. A load instruction.
+//  3. Another GetElementPtrInst that itself only has valid uses (recursively)
 // Any other use is considered invalid.
 bool CandidateArray::AllGEPUsersAreValid(GEPOperator *gep) {
   for (User *U : gep->users()) {
@@ -221,6 +236,10 @@ bool CandidateArray::AllGEPUsersAreValid(GEPOperator *gep) {
       if (!AnalyzeStore(SI))
         return false;
     }
+    else if (GEPOperator *recursive_gep = dyn_cast<GEPOperator>(U)) {
+      if (!AllGEPUsersAreValid(recursive_gep))
+        return false;
+    }
     else if (!isa<LoadInst>(U)) {
       return false;
     }
@@ -254,29 +273,41 @@ void CandidateArray::AnalyzeUses() {
 bool CandidateArray::AnalyzeStore(StoreInst *SI) {
   if (!isa<Constant>(SI->getValueOperand()))
     return false;
+  // Walk up the ladder of GetElementPtr instructions to accumulate the index
+  int64_t index = 0;
+  for (auto iter = SI->getPointerOperand(); iter != m_Alloca;) {
+    GEPOperator *gep = cast<GEPOperator>(iter);
+    if (!gep->hasAllConstantIndices())
+      return false;
 
-  GEPOperator *gep = cast<GEPOperator>(SI->getPointerOperand());
-  if (!gep->hasAllConstantIndices())
-    return false;
-
-  assert(gep->getPointerOperand() == m_Alloca);
-  assert(gep->getNumIndices() == 2);
-
-  ConstantInt *ptrOffset = cast<ConstantInt>(gep->getOperand(1));
-  ConstantInt *index = cast<ConstantInt>(gep->getOperand(2));
-
-  // Non-zero offset is unexpected, but could occur in the wild. Bail out if we see it.
-  if (!ptrOffset->isZero())
-    return false;
-
-  return StoreConstant(index->getLimitedValue(), cast<Constant>(SI->getValueOperand()));
+    // Deal with the 'extra 0' index from what might have been a global pointer
+    // https://www.llvm.org/docs/GetElementPtr.html#why-is-the-extra-0-index-required
+    if ((gep->getNumIndices() == 2) && (gep->getPointerOperand() == m_Alloca)) {
+      // Non-zero offset is unexpected, but could occur in the wild. Bail out if
+      // we see it.
+      ConstantInt *ptrOffset = cast<ConstantInt>(gep->getOperand(1));
+      if (!ptrOffset->isZero())
+        return false;
+    }
+    else if (gep->getNumIndices() != 1) {
+      return false;
+    }
+
+    // Accumulate the index
+    ConstantInt *c = cast<ConstantInt>(gep->getOperand(gep->getNumIndices()));
+    index += c->getSExtValue();
+
+    iter = gep->getPointerOperand();
+  }
+
+  return StoreConstant(index, cast<Constant>(SI->getValueOperand()));
 }
 
 // Check if the store is valid and record the value if so.
 // A valid constant store is either:
 //  1. A store of a new constant
 //  2. A store of the same constant to the same location
-bool CandidateArray::StoreConstant(uint64_t index, Constant *value) {
+bool CandidateArray::StoreConstant(int64_t index, Constant *value) {
   EnsureSize();
   size_t i = static_cast<size_t>(index);
   if (i >= m_Values.size())

+ 104 - 0
tools/clang/test/HLSL/hca/15.ll

@@ -0,0 +1,104 @@
+; RUN: %opt %s -hlsl-hca -S | FileCheck %s
+
+; Make sure we added the global and removed the alloca
+; CHECK: @a.gva.hca = internal unnamed_addr constant [6 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00]
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK: call void @dx.op.storeOutput.f32
+
+;static const float a[]= { 1, 2, 3, 4};
+;uint b;
+;float main() : SV_Target {
+;  return a[b];
+;}
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f:64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
+%"$Globals" = type { i32 }
+
+@"\01?b@@3IB" = constant i32 0, align 4
+
+define void @main() {
+entry:
+  %a.gva = alloca [6 x float]
+  %gva.init = getelementptr [6 x float], [6 x float]* %a.gva, i32 0, i32 0
+  store float 1.000000e+00, float* %gva.init
+  %gva.init1 = getelementptr float, float* %gva.init, i32 2
+  store float 3.000000e+00, float* %gva.init1
+  %gva.init2 = getelementptr float, float* %gva.init1, i32 -1
+  store float 2.000000e+00, float* %gva.init2
+  %gva.init3 = getelementptr float, float* %gva.init2, i32 2
+  store float 4.000000e+00, float* %gva.init3
+  %gva.init4 = getelementptr float, float* %gva.init1, i32 2
+  store float 5.000000e+00, float* %gva.init4
+  %gva.init5 = getelementptr float, float* %gva.init1, i32 3
+  store float 6.000000e+00, float* %gva.init5
+  %"$Globals_buffer" = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
+  %0 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %"$Globals_buffer", i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  %1 = extractvalue %dx.types.CBufRet.i32 %0, 0
+  %arrayidx = getelementptr inbounds [6 x float], [6 x float]* %a.gva, i32 0, i32 %1
+  %2 = load float, float* %arrayidx, align 4, !tbaa !20
+  %arrayidx1 = getelementptr float, float* %arrayidx, i32 1
+  %3 = load float, float* %arrayidx1, align 4, !tbaa !20
+  %arrayidx2 = getelementptr float, float* %arrayidx1, i32 1
+  %4 = load float, float* %arrayidx2, align 4, !tbaa !20
+  %arrayidx3 = getelementptr float, float* %arrayidx2, i32 1
+  %5 = load float, float* %arrayidx3, align 4, !tbaa !20
+  %arrayidx4 = getelementptr float, float* %arrayidx1, i32 3
+  %6 = load float, float* %arrayidx4, align 4, !tbaa !20
+  %7 = fadd fast float %2, %3
+  %8 = fadd fast float %4, %5
+  %9 = fadd fast float %6, %7
+  %10 = fadd fast float %8, %9
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %10)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+
+!llvm.ident = !{!0}
+!dx.version = !{!1}
+!dx.valver = !{!2}
+!dx.shaderModel = !{!3}
+!dx.resources = !{!4}
+!dx.typeAnnotations = !{!7, !10}
+!dx.viewIdState = !{!14}
+!dx.entryPoints = !{!15}
+
+!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
+!1 = !{i32 1, i32 0}
+!2 = !{i32 1, i32 2}
+!3 = !{!"ps", i32 6, i32 0}
+!4 = !{null, null, !5, null}
+!5 = !{!6}
+!6 = !{i32 0, %"$Globals"* undef, !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
+!7 = !{i32 0, %"$Globals" undef, !8}
+!8 = !{i32 4, !9}
+!9 = !{i32 6, !"b", i32 3, i32 0, i32 7, i32 5}
+!10 = !{i32 1, void ()* @main, !11}
+!11 = !{!12}
+!12 = !{i32 0, !13, !13}
+!13 = !{}
+!14 = !{[2 x i32] [i32 0, i32 1]}
+!15 = !{void ()* @main, !"main", !16, !4, null}
+!16 = !{null, !17, null}
+!17 = !{!18}
+!18 = !{i32 0, !"SV_Target", i8 9, i8 16, !19, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!19 = !{i32 0}
+!20 = !{!21, !21, i64 0}
+!21 = !{!"float", !22, i64 0}
+!22 = !{!"omnipotent char", !23, i64 0}
+!23 = !{!"Simple C/C++ TBAA"}

+ 1 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -5878,6 +5878,7 @@ TEST_F(CompilerTest, HoistConstantArray) {
   CodeGenTestCheck(L"hca\\12.hlsl");
   CodeGenTestCheck(L"hca\\13.hlsl");
   CodeGenTestCheck(L"hca\\14.hlsl");
+  CodeGenTestCheck(L"hca\\15.ll");
 }
 
 TEST_F(CompilerTest, VecElemConstEval) {