瀏覽代碼

Turn stores to initializer for static variable where possible. (#2122)

Adam Yang 6 年之前
父節點
當前提交
91f37618db

+ 1 - 0
include/llvm/InitializePasses.h

@@ -260,6 +260,7 @@ void initializeResourceToHandlePass(PassRegistry&);
 void initializeSROA_SSAUp_HLSLPass(PassRegistry&);
 void initializeHoistConstantArrayPass(PassRegistry&);
 void initializeDxilLoopUnrollPass(PassRegistry&);
+void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
 // HLSL Change Ends
 void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
 void initializeScalarEvolutionPass(PassRegistry&);

+ 7 - 0
include/llvm/Transforms/Scalar.h

@@ -124,6 +124,13 @@ void initializeSROA_DT_HLSLPass(PassRegistry&);
 ModulePass *createSROA_Parameter_HLSL();
 void initializeSROA_Parameter_HLSLPass(PassRegistry&);
 
+//===----------------------------------------------------------------------===//
+//
+// Cleans up constant stores that didn't get a chance to be turned into initializers
+//
+Pass *createDxilFixConstArrayInitializerPass();
+void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
+
 Pass *createDxilLoopUnrollPass(unsigned MaxIterationAttempt);
 void initializeDxilLoopUnrollPass(PassRegistry&);
 //===----------------------------------------------------------------------===//

+ 1 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -100,6 +100,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilLegalizeSampleOffsetPassPass(Registry);
     initializeDxilLoadMetadataPass(Registry);
     initializeDxilLoopUnrollPass(Registry);
+    initializeDxilFixConstArrayInitializerPass(Registry);
     initializeDxilLowerCreateHandleForLibPass(Registry);
     initializeDxilPrecisePropagatePassPass(Registry);
     initializeDxilPreserveAllOutputsPass(Registry);

+ 4 - 0
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -294,6 +294,10 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   MPM.add(createCFGSimplificationPass());
 
   MPM.add(createDeadCodeEliminationPass());
+
+  if (OptLevel > 0) {
+    MPM.add(createDxilFixConstArrayInitializerPass());
+  }
 }
 // HLSL Change Ends
 

+ 1 - 0
lib/Transforms/Scalar/CMakeLists.txt

@@ -45,6 +45,7 @@ add_llvm_library(LLVMScalarOpts
   ScalarReplAggregates.cpp
   ScalarReplAggregatesHLSL.cpp  # HLSL Change
   DxilLoopUnroll.cpp # HLSL Change
+  DxilFixConstArrayInitializer.cpp # HLSL Change
   Scalarizer.cpp
   SeparateConstOffsetFromGEP.cpp
   SimplifyCFGPass.cpp

+ 180 - 0
lib/Transforms/Scalar/DxilFixConstArrayInitializer.cpp

@@ -0,0 +1,180 @@
+//===- DxilFixConstArrayInitializer.cpp - Special Construct Initializer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/Transforms/Scalar.h"
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/HLSL/HLModule.h"
+
+#include <unordered_map>
+#include <limits>
+
+using namespace llvm;
+
+namespace {
+
+class DxilFixConstArrayInitializer : public ModulePass {
+public:
+  static char ID;
+  DxilFixConstArrayInitializer() : ModulePass(ID) {
+    initializeDxilFixConstArrayInitializerPass(*PassRegistry::getPassRegistry());
+  }
+  bool runOnModule(Module &M) override;
+  const char *getPassName() const override { return "Dxil Fix Const Array Initializer"; }
+};
+
+char DxilFixConstArrayInitializer::ID;
+}
+
+static bool TryFixGlobalVariable(GlobalVariable &GV, BasicBlock *EntryBlock, const std::unordered_map<Instruction *, unsigned> &InstOrder) {
+  // Only proceed if the variable has an undef initializer
+  if (!GV.hasInitializer() || !isa<UndefValue>(GV.getInitializer()))
+    return false;
+
+  // Only handle cases when it's an array of scalars.
+  Type *Ty = GV.getType()->getPointerElementType();
+  if (!Ty->isArrayTy())
+    return false;
+
+  // Don't handle arrays that are too big
+  if (Ty->getArrayNumElements() > 1024)
+    return false;
+
+  Type *ElementTy = Ty->getArrayElementType();
+
+  // Only handle arrays of scalar types
+  if (ElementTy->isAggregateType() || ElementTy->isVectorTy())
+    return false;
+
+  // The instruction index at which point we no longer consider it
+  // safe to fold Stores. It's the earliest store with non-constant index,
+  // earliest store with non-constant value, or a load
+  unsigned FirstUnsafeIndex = std::numeric_limits<unsigned>::max();
+
+  SmallVector<StoreInst *, 8> PossibleFoldableStores;
+
+  // First do a pass to find the boundary for where we could fold stores. Get a
+  // list of stores that may be folded.
+  for (User *U : GV.users()) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+      bool AllConstIndices = GEP->hasAllConstantIndices();
+      unsigned NumIndices = GEP->getNumIndices();
+
+      if (NumIndices != 2)
+        return false;
+
+      for (User *GEPUser : GEP->users()) {
+        if (StoreInst *Store = dyn_cast<StoreInst>(GEPUser)) {
+          if (Store->getParent() != EntryBlock)
+            continue;
+          unsigned StoreIndex = InstOrder.at(Store);
+          if (!AllConstIndices || !isa<Constant>(Store->getValueOperand())) {
+            FirstUnsafeIndex = std::min(StoreIndex, FirstUnsafeIndex);
+            continue;
+          }
+          PossibleFoldableStores.push_back(Store);
+        }
+        else if (LoadInst *Load = dyn_cast<LoadInst>(GEPUser)) {
+          if (Load->getParent() != EntryBlock)
+            continue;
+          FirstUnsafeIndex = std::min(FirstUnsafeIndex, InstOrder.at(Load));
+        }
+        // If we have something weird like chained GEPS, or bitcasts, give up.
+        else {
+          return false;
+        }
+      }
+    }
+  }
+  
+  SmallVector<Constant *, 16> InitValue;
+  SmallVector<unsigned, 16>   LatestStores;
+  SmallVector<StoreInst *, 8> StoresToRemove;
+
+  InitValue.resize(Ty->getArrayNumElements());
+  LatestStores.resize(Ty->getArrayNumElements());
+
+  for (StoreInst *Store : PossibleFoldableStores) {
+    unsigned StoreIndex = InstOrder.at(Store);
+    // Skip stores that are out of bounds
+    if (StoreIndex >= FirstUnsafeIndex)
+      continue;
+
+    GEPOperator *GEP = cast<GEPOperator>(Store->getPointerOperand());
+    uint64_t Index = cast<ConstantInt>(GEP->getOperand(2))->getLimitedValue();
+
+    if (LatestStores[Index] <= StoreIndex) {
+      InitValue[Index] = cast<Constant>(Store->getValueOperand());
+      LatestStores[Index] = StoreIndex;
+    }
+    StoresToRemove.push_back(Store);
+  }
+
+  // Give up if we have missing indices
+  for (Constant *C : InitValue)
+    if (!C)
+      return false;
+
+  GV.setInitializer(ConstantArray::get(cast<ArrayType>(Ty), InitValue));
+
+  for (StoreInst *Store : StoresToRemove)
+    Store->eraseFromParent();
+
+  return true;
+}
+
+bool DxilFixConstArrayInitializer::runOnModule(Module &M) {
+  BasicBlock *EntryBlock = nullptr;
+
+  if (M.HasDxilModule()) {
+    hlsl::DxilModule &DM = M.GetDxilModule();
+    if (DM.GetEntryFunction()) {
+      EntryBlock = &DM.GetEntryFunction()->getEntryBlock();
+    }
+  }
+  else if (M.HasHLModule()) {
+    hlsl::HLModule &HM = M.GetHLModule();
+    if (HM.GetEntryFunction())
+      EntryBlock = &HM.GetEntryFunction()->getEntryBlock();
+  }
+
+  if (!EntryBlock)
+    return false;
+
+  // If some block might branch to the entry for some reason (like if it's a loop header),
+  // give up now. Have to make sure this block is not preceeded by anything.
+  if (pred_begin(EntryBlock) != pred_end(EntryBlock))
+    return false;
+
+  // Find the instruction order for everything in the entry block.
+  std::unordered_map<Instruction *, unsigned> InstOrder;
+  for (Instruction &I : *EntryBlock) {
+    InstOrder[&I] = InstOrder.size();
+  }
+
+  bool Changed = false;
+  for (GlobalVariable &GV : M.globals()) {
+    Changed = TryFixGlobalVariable(GV, EntryBlock, InstOrder);
+  }
+
+  return Changed;
+}
+
+
+Pass *llvm::createDxilFixConstArrayInitializerPass() {
+  return new DxilFixConstArrayInitializer();
+}
+
+INITIALIZE_PASS(DxilFixConstArrayInitializer, "dxil-fix-array-init", "Dxil Fix Array Initializer", false, false)

+ 26 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/function_call_initializer.hlsl

@@ -0,0 +1,26 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: = internal unnamed_addr constant [10 x float] [
+// CHECK-NOT: store float
+
+float f(float a) {
+  return a * 2;
+}
+
+static float GLOB[10] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+  f(4),
+  f(5),
+  f(6),
+  f(7),
+  f(8),
+  f(9),
+};
+
+[RootSignature("")]
+float main(float a : A) : SV_Target {
+  return GLOB[a];
+}

+ 27 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/function_call_initializer_uint.hlsl

@@ -0,0 +1,27 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: = internal unnamed_addr constant [10 x i32] [
+// CHECK-NOT: store i32
+
+uint f(uint a) {
+  return a * 2;
+}
+
+static uint GLOB[10] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+  f(4),
+  f(5),
+  f(6),
+  f(7),
+  f(8),
+  f(9),
+};
+
+[RootSignature("")]
+float main(uint a : A) : SV_Target {
+  return GLOB[a];
+}
+

+ 26 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/function_call_initializer_vec.hlsl

@@ -0,0 +1,26 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: = internal unnamed_addr constant [10 x float] [
+// CHECK: = internal unnamed_addr constant [10 x float] [
+// CHECK-NOT: store float
+
+float2 f(float a) {
+  return float2(a, a * 2);
+}
+
+static float2 GLOB[10] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+  f(4),
+  f(5),
+  f(6),
+  f(7),
+  f(8),
+  f(9),
+};
+
+[RootSignature("")]
+float main(float a : A) : SV_Target {
+  return GLOB[a].x + GLOB[a].y;
+}

+ 20 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_duplicate_store.hlsl

@@ -0,0 +1,20 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: = internal unnamed_addr constant [4 x i32] [i32 0, i32 2, i32 4, i32 20]
+// CHECK-NOT: store i32
+
+uint f(uint a) {
+  return a * 2;
+}
+
+static uint GLOB[4] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+};
+
+[RootSignature("")]
+float main(uint a : A) : SV_Target {
+  GLOB[3] = f(10);
+  return GLOB[a];
+}

+ 23 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_duplicate_store2.hlsl

@@ -0,0 +1,23 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: = internal unnamed_addr global [4 x i32] [i32 0, i32 2, i32 4, i32 6]
+// CHECK: store i32
+// CHECK: store i32
+// CHECK-NOT: store i32
+
+uint f(uint a) {
+  return a * 2;
+}
+
+static uint GLOB[4] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+};
+
+[RootSignature("")]
+float main(uint a : A, uint b : B) : SV_Target {
+  GLOB[b] = 20;
+  GLOB[3] = 10;
+  return GLOB[a];
+}

+ 21 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_duplicate_store3.hlsl

@@ -0,0 +1,21 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: = internal unnamed_addr constant [4 x i32] [i32 0, i32 2, i32 4, i32 20]
+
+// CHECK-NOT: store i32
+
+uint f(uint a) {
+  return a * 2;
+}
+
+static uint GLOB[4] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+};
+
+[RootSignature("")]
+float main(uint a : A, uint b : B) : SV_Target {
+  GLOB[3] = 20;
+  return GLOB[a];
+}

+ 25 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_load_store.hlsl

@@ -0,0 +1,25 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: = internal unnamed_addr global [4 x i32] [i32 0, i32 2, i32 4, i32 6]
+// CHECK: load i32
+// CHECK: store i32
+// CHECK: load i32
+// CHECK-NOT: store i32
+// CHECK-NOT: load i32
+
+uint f(uint a) {
+  return a * 2;
+}
+
+static uint GLOB[4] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+};
+
+[RootSignature("")]
+float main(uint a : A) : SV_Target {
+  uint result = GLOB[a];
+  GLOB[3] = f(10);
+  return result + GLOB[a];
+}