Prechádzať zdrojové kódy

Add pass to hoist constant arrays (#205)

This commit adds a new pass to hoist arrays from local to global scope
when the array only contains constant values. A local array will use
alloca and stores for initialization, but a global array uses a global
variable with a constant initializer.

The idea is that a global array with constant initializer will allow the
backend compilers to produce better code because it is much easier to see
that the array is fully constant.
David Peixotto 8 rokov pred
rodič
commit
0ba250e9dc

+ 1 - 0
include/llvm/InitializePasses.h

@@ -256,6 +256,7 @@ void initializeSROA_Parameter_HLSLPass(PassRegistry&);
 void initializeDynamicIndexingVectorToArrayPass(PassRegistry&);
 void initializeMultiDimArrayToOneDimArrayPass(PassRegistry&);
 void initializeSROA_SSAUp_HLSLPass(PassRegistry&);
+void initializeHoistConstantArrayPass(PassRegistry&);
 // HLSL Change Ends
 void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
 void initializeScalarEvolutionPass(PassRegistry&);

+ 7 - 0
include/llvm/Transforms/Scalar.h

@@ -134,6 +134,13 @@ void initializeDynamicIndexingVectorToArrayPass(PassRegistry&);
 //
 ModulePass *createMultiDimArrayToOneDimArrayPass();
 void initializeMultiDimArrayToOneDimArrayPass(PassRegistry&);
+
+//===----------------------------------------------------------------------===//
+// Hoist a local array initialized with constant values to a global array with
+// a constant initializer.
+//
+ModulePass *createHoistConstantArrayPass();
+void initializeHoistConstantArrayPass(PassRegistry&);
 // HLSL Change Ends
 
 //===----------------------------------------------------------------------===//

+ 1 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -92,6 +92,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeHLEmitMetadataPass(Registry);
     initializeHLEnsureMetadataPass(Registry);
     initializeHLMatrixLowerPassPass(Registry);
+    initializeHoistConstantArrayPass(Registry);
     initializeIPSCCPPass(Registry);
     initializeIndVarSimplifyPass(Registry);
     initializeInstructionCombiningPassPass(Registry);

+ 1 - 0
lib/Transforms/Scalar/CMakeLists.txt

@@ -11,6 +11,7 @@ add_llvm_library(LLVMScalarOpts
   FlattenCFGPass.cpp
   Float2Int.cpp
   GVN.cpp
+  HoistConstantArray.cpp
   InductiveRangeCheckElimination.cpp
   IndVarSimplify.cpp
   JumpThreading.cpp

+ 375 - 0
lib/Transforms/Scalar/HoistConstantArray.cpp

@@ -0,0 +1,375 @@
+//===- HoistConstantArray.cpp - Code to perform constant array hoisting ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// Copyright (C) Microsoft Corporation. All rights reserved.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hoisting of constant local arrays to global arrays.
+// The idea is to change the array initialization from function local memory
+// using alloca and stores to global constant memory using a global variable
+// and constant initializer. We only hoist arrays that have all constant elements.
+// The frontend will hoist the arrays if they are declared static, but we can
+// hoist any array that is only ever initialized with constant data.
+//
+// This transformation was developed to work with the dxil produced from the
+// hlsl compiler. Hoisting the array to use a constant initializer should allow
+// a dxil backend compiler to generate more efficent code than a local array.
+// For example, it could use an immediate constant pool to represent the array.
+//
+// We limit hoisting to those arrays that are initialized by constant values.
+// We still hoist if the array is partially initialized as long as no
+// non-constant values are written. The uninitialized values will be hoisted 
+// as undef values.
+//
+// Improvements:
+// Currently we do not merge arrays that have the same constant values. We
+// create the global variables with `unnamed_addr` set which means they
+// can be merged with other constants. We should probably use a separate
+// pass to merge all the unnamed_addr constants.
+//
+// Example:
+//
+// float main(int i : I) : SV_Target{
+//   float A[] = { 1, 2, 3 };
+//   return A[i];
+// }
+//
+// Without array hoisting, we generate the following dxil
+//
+// define void @main() {
+// entry:
+//   %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+//   %A = alloca[3 x float], align 4
+//   %1 = getelementptr inbounds[3 x float], [3 x float] * %A, i32 0, i32 0
+//   store float 1.000000e+00, float* %1, align 4
+//   %2 = getelementptr inbounds[3 x float], [3 x float] * %A, i32 0, i32 1
+//   store float 2.000000e+00, float* %2, align 4
+//   %3 = getelementptr inbounds[3 x float], [3 x float] * %A, i32 0, i32 2
+//   store float 3.000000e+00, float* %3, align 4
+//   %arrayidx = getelementptr inbounds[3 x float], [3 x float] * %A, i32 0, i32 %0
+//   %4 = load float, float* %arrayidx, align 4, !tbaa !14
+//   call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %4);
+//   ret void
+// }
+//
+// With array hoisting enabled we generate this dxil
+//
+// @A.hca = internal unnamed_addr constant [3 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00]
+// define void @main() {
+// entry:
+//   %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+//   %arrayidx = getelementptr inbounds[3 x float], [3 x float] * @A.hca, i32 0, i32 %0
+//   %1 = load float, float* %arrayidx, align 4, !tbaa !14
+//   call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %1)
+//   ret void
+// }
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Casting.h"
+using namespace llvm;
+
+namespace {
+  class CandidateArray;
+
+  //===--------------------------------------------------------------------===//
+  // HoistConstantArray pass implementation
+  //
+  class HoistConstantArray : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    HoistConstantArray() : ModulePass(ID) {
+      initializeHoistConstantArrayPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesCFG();
+    }
+  private:
+    bool runOnFunction(Function &F);
+    std::vector<AllocaInst *> findCandidateAllocas(Function &F);
+    void hoistArray(const CandidateArray &candidate);
+    void removeLocalArrayStores(const CandidateArray &candidate);
+ };
+
+  // Represents an array we are considering for hoisting.
+  // Contains helper routines for analyzing if hoisting is possible
+  // and creating the global variable for the hoisted array.
+  class CandidateArray {
+  public:
+    explicit CandidateArray(AllocaInst *);
+    bool IsConstArray() const { return m_IsConstArray; }
+    void AnalyzeUses();
+    GlobalVariable *GetGlobalArray() const;
+    AllocaInst *GetLocalArray() const { return m_Alloca; }
+    std::vector<StoreInst*> GetArrayStores() const;
+
+  private:
+    AllocaInst *m_Alloca;
+    ArrayType *m_ArrayType;
+    std::vector<Constant *> m_Values;
+    bool m_IsConstArray;
+
+    bool AnalyzeStore(StoreInst *);
+    bool StoreConstant(uint64_t index, Constant *value);
+    void EnsureSize();
+    bool AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps);
+    bool AllGEPUsersAreValid(GEPOperator *gep);
+    UndefValue *UndefElement();
+  };
+}
+
+// Returns the ArrayType for the alloca or nullptr if the alloca
+// does not allocate an array.
+static ArrayType *getAllocaArrayType(AllocaInst *allocaInst) {
+  return dyn_cast<ArrayType>(allocaInst->getType()->getPointerElementType());
+}
+
+// Check if the instruction is an alloca that we should consider for hoisting.
+// The alloca must allocate and array of primitive types.
+static AllocaInst *isHoistableArrayAlloca(Instruction *I) {
+  AllocaInst *allocaInst = dyn_cast<AllocaInst>(I);
+  if (!allocaInst)
+    return nullptr;
+
+  ArrayType *arrayTy = getAllocaArrayType(allocaInst);
+  if (!arrayTy)
+    return nullptr;
+
+  if (!arrayTy->getElementType()->isSingleValueType())
+    return nullptr;
+
+  return allocaInst;
+}
+
+// ----------------------------------------------------------------------------
+// CandidateArray implementation
+// ----------------------------------------------------------------------------
+
+// Create the candidate array for the alloca.
+CandidateArray::CandidateArray(AllocaInst *AI)
+  : m_Alloca(AI)
+  , m_Values()
+  , m_IsConstArray(false)
+{
+  assert(isHoistableArrayAlloca(AI));
+  m_ArrayType = getAllocaArrayType(AI);
+}
+
+// Get the global variable with a constant initializer for the array.
+// Only valid to call if the array has been analyzed as a constant array.
+GlobalVariable *CandidateArray::GetGlobalArray() const {
+  assert(IsConstArray());
+  Constant *initializer = ConstantArray::get(m_ArrayType, m_Values);
+  Module *M = m_Alloca->getModule();
+  GlobalVariable *GV = new GlobalVariable(*M, m_ArrayType, true, GlobalVariable::LinkageTypes::InternalLinkage, initializer, Twine(m_Alloca->getName()) + ".hca");
+  GV->setUnnamedAddr(true);
+  return GV;
+}
+
+// Get a list of all the stores that write to the array.
+std::vector<StoreInst*> CandidateArray::GetArrayStores() const {
+  std::vector<StoreInst*> stores;
+  for (User *U : m_Alloca->users())
+    if (GEPOperator *gep = dyn_cast<GEPOperator>(U))
+      for (User *GU : gep->users())
+        if (StoreInst *SI = dyn_cast<StoreInst>(GU))
+          stores.push_back(SI);
+  return stores;
+}
+
+// Check to see that all the users of the array are GEPs.
+// If so, populate the `geps` vector with a list of all geps that use the array.
+bool CandidateArray::AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps) {
+  for (User *U : m_Alloca->users()) {
+    GEPOperator *gep = dyn_cast<GEPOperator>(U);
+    if (!gep)
+      return false;
+
+    geps.push_back(gep);
+  }
+
+  return true;
+}
+
+// Check that all gep uses are valid.
+// A valid use is either
+//  1. A store of a constant value that does not overwrite an existing constant
+//     with a different value.
+//  2. A load instruction.
+// Any other use is considered invalid.
+bool CandidateArray::AllGEPUsersAreValid(GEPOperator *gep) {
+  for (User *U : gep->users()) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (!AnalyzeStore(SI))
+        return false;
+    }
+    else if (!isa<LoadInst>(U)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Analyze all uses of the array to see if it qualifes as a constant array.
+// We check the following conditions:
+//  1. Make sure alloca is only used by GEP.
+//  2. Make sure GEP is only used in load/store.
+//  3. Make sure all stores have constant indicies.
+//  4. Make sure all stores are constants.
+//  5. Make sure all stores to same location are the same constant.
+void CandidateArray::AnalyzeUses() {
+  m_IsConstArray = false;
+  std::vector<GEPOperator *> geps;
+  if (!AllArrayUsersAreGEP(geps))
+    return;
+
+  for (GEPOperator *gep : geps)
+    if (!AllGEPUsersAreValid(gep))
+      return;
+
+  m_IsConstArray = true;
+}
+
+// Analyze a store to see if it is a valid constant store.
+// A valid store will write a constant value to a known (constant) location.
+bool CandidateArray::AnalyzeStore(StoreInst *SI) {
+  if (!isa<Constant>(SI->getValueOperand()))
+    return false;
+
+  GEPOperator *gep = cast<GEPOperator>(SI->getPointerOperand());
+  if (!gep->hasAllConstantIndices())
+    return false;
+
+  assert(gep->getPointerOperand() == m_Alloca);
+  assert(gep->getNumIndices() == 2);
+
+  ConstantInt *ptrOffset = cast<ConstantInt>(gep->getOperand(1));
+  ConstantInt *index = cast<ConstantInt>(gep->getOperand(2));
+
+  // Non-zero offset is unexpected, but could occur in the wild. Bail out if we see it.
+  if (!ptrOffset->isZero())
+    return false;
+
+  return StoreConstant(index->getLimitedValue(), cast<Constant>(SI->getValueOperand()));
+}
+
+// Check if the store is valid and record the value if so.
+// A valid constant store is either:
+//  1. A store of a new constant
+//  2. A store of the same constant to the same location
+bool CandidateArray::StoreConstant(uint64_t index, Constant *value) {
+  EnsureSize();
+  size_t i = static_cast<size_t>(index);
+  if (i >= m_Values.size())
+    return false;
+  if (m_Values[i] == UndefElement())
+    m_Values[i] = value;
+
+  return m_Values[i] == value;
+}
+
+// We lazily create the values array until we have a store of a
+// constant that we need to remember. This avoids memory overhead
+// for obviously non-constant arrays.
+void CandidateArray::EnsureSize() {
+  if (m_Values.size() == 0) {
+    m_Values.resize(m_ArrayType->getNumElements(), UndefElement());
+  }
+  assert(m_Values.size() == m_ArrayType->getNumElements());
+}
+
+// Get an undef value of the correct type for the array.
+UndefValue *CandidateArray::UndefElement() {
+  return UndefValue::get(m_ArrayType->getElementType());
+}
+
+
+// ----------------------------------------------------------------------------
+// Pass Implementation
+// ----------------------------------------------------------------------------
+
+// Find the allocas that are candidates for array hoisting in the function.
+std::vector<AllocaInst*> HoistConstantArray::findCandidateAllocas(Function &F) {
+  std::vector<AllocaInst*> candidates;
+  for (Instruction &I : F.getEntryBlock())
+    if (AllocaInst *allocaInst = isHoistableArrayAlloca(&I))
+        candidates.push_back(allocaInst);
+
+  return candidates;
+}
+
+// Remove local stores to the array.
+// We remove them explicitly rather than relying on DCE to find they are dead.
+// Other uses (e.g. geps) can be easily cleaned up by DCE.
+void HoistConstantArray::removeLocalArrayStores(const CandidateArray &candidate) {
+  std::vector<StoreInst*> stores = candidate.GetArrayStores();
+  for (StoreInst *store : stores)
+    store->eraseFromParent();
+}
+
+// Hoist an array from a local to a global.
+void HoistConstantArray::hoistArray(const CandidateArray &candidate) {
+  assert(candidate.IsConstArray());
+
+  removeLocalArrayStores(candidate);
+  AllocaInst *local = candidate.GetLocalArray();
+  GlobalVariable *global = candidate.GetGlobalArray();
+  local->replaceAllUsesWith(global);
+  local->eraseFromParent();
+}
+
+// Perform array hoisting on a single function.
+bool HoistConstantArray::runOnFunction(Function &F) {
+  bool changed = false;
+  std::vector<AllocaInst *> candidateAllocas = findCandidateAllocas(F);
+
+  for (AllocaInst *AI : candidateAllocas) {
+    CandidateArray candidate(AI);
+    candidate.AnalyzeUses();
+    if (candidate.IsConstArray()) {
+      hoistArray(candidate);
+      changed |= true;
+    }
+  }
+
+  return changed;
+}
+
+char HoistConstantArray::ID = 0;
+INITIALIZE_PASS(HoistConstantArray, "hlsl-hca", "Hoist constant arrays", false, false)
+
+bool HoistConstantArray::runOnModule(Module &M) {
+  bool changed = false;
+  for (Function &F : M) {
+    if (F.isDeclaration())
+      continue;
+
+    changed |= runOnFunction(F);
+  }
+
+  return changed;
+}
+
+ModulePass *llvm::createHoistConstantArrayPass() {
+  return new HoistConstantArray();
+}
+

+ 16 - 0
tools/clang/test/HLSL/hca/01.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK:     internal unnamed_addr constant [3 x float]
+// CHECK-NOT: alloca [3 x float]
+
+float foo(int i) {
+    float A[] = {
+        1, 2, 3
+    };
+    return A[i];
+}
+
+float main(int i : I) : SV_Target {
+
+    return foo(i);
+}
+

+ 14 - 0
tools/clang/test/HLSL/hca/02.hlsl

@@ -0,0 +1,14 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK: alloca [3 x float]
+
+float main(int i : I, int b : B) : SV_Target {
+    float A[] = {
+        1, 2, 3
+    };
+    if (b)
+        A[2] = 4;
+    else 
+        A[2] = 5;
+    return A[i];
+}
+

+ 10 - 0
tools/clang/test/HLSL/hca/03.hlsl

@@ -0,0 +1,10 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK: alloca [3 x float]
+
+float main(int i : I, float f : F) : SV_Target {
+    float A[] = {
+        1, 2, f
+    };
+    return A[i];
+}
+

+ 16 - 0
tools/clang/test/HLSL/hca/04.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK:     internal unnamed_addr constant [3 x float]
+// CHECK-NOT: alloca [3 x float]
+
+float foo(int i) {
+    float A[] = {
+        1, 2, 3
+    };
+    return A[i];
+}
+
+float main(int i : I) : SV_Target {
+
+    return foo(i) + foo(i+1);
+}
+

+ 13 - 0
tools/clang/test/HLSL/hca/05.hlsl

@@ -0,0 +1,13 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK: alloca [3 x float]
+
+void foo(inout float f) {
+    f += 1;
+}
+
+float main(int i : I, int j : J) : SV_Target {
+    float A[] = {1,2,3};
+    foo(A[i]);
+    return A[j];
+}
+

+ 10 - 0
tools/clang/test/HLSL/hca/06.hlsl

@@ -0,0 +1,10 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK:     internal unnamed_addr constant [3 x float]
+// CHECK-NOT: alloca [3 x float]
+
+float main(int i : I) : SV_Target {
+    float A[3];
+    A[0] = 1;
+    A[2] = 2;
+    return A[i];
+}

+ 10 - 0
tools/clang/test/HLSL/hca/07.hlsl

@@ -0,0 +1,10 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK: alloca [3 x float]
+
+float main(int i : I, int b : B) : SV_Target {
+    float A[3] = {1,2,3};
+    [branch]
+    if (b)
+        A[2] = 4;
+    return A[i];
+}

+ 13 - 0
tools/clang/test/HLSL/hca/08.hlsl

@@ -0,0 +1,13 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK:     internal unnamed_addr constant [3 x float]
+// CHECK-NOT: alloca [3 x float]
+
+float main(int i : I, int b : B) : SV_Target {
+    float A[3];
+    A[0] = 1;
+    A[1] = 2;
+    [branch]
+    if (b)
+        A[2] = 4;
+    return A[i];
+}

+ 8 - 0
tools/clang/test/HLSL/hca/09.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK:     internal unnamed_addr constant [3 x float]
+// CHECK-NOT: alloca [3 x float]
+
+int main(int i : I, int b : B) : SV_Target {
+    float A[3] = {1,2,3};
+    return asuint(A[i]);
+}

+ 10 - 0
tools/clang/test/HLSL/hca/10.hlsl

@@ -0,0 +1,10 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK: alloca [3 x float]
+
+int main(int i : I, int j : J) : SV_Target {
+    float A[3];
+    A[0] = 1;
+    A[2] = 2;
+    A[j] = 3;
+    return A[i];
+}

+ 20 - 0
tools/clang/test/HLSL/hca/11.hlsl

@@ -0,0 +1,20 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK: alloca [3 x i32]
+
+// We could get hoist the arrays individually but the two allocas are
+// merged by inlining and that prevents hoisting. It becomes a good
+// negative test because different constants are written to the alloca.
+
+int foo(int i) {
+    int A[] = {1,2,3};
+    return A[i];
+}
+
+int bar(int i) {
+    int A[] = {4,5,6};
+    return A[i];
+}
+
+int main(int i : I) : SV_Target {
+    return foo(i) + bar(i);
+}

+ 29 - 0
tools/clang/test/HLSL/hca/12.hlsl

@@ -0,0 +1,29 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK:     internal unnamed_addr constant [3 x i32]
+// CHECK:     internal unnamed_addr constant [3 x i32]
+// CHECK-NOT: alloca [3 x i32]
+
+struct S {
+    int x;
+    int y;
+};
+
+int foo(int i) {
+    S A[3];
+    S s;
+    s.x = 1;
+    s.y = 2;
+    A[0] = s;
+    s.x = 3;
+    s.y = 4;
+    A[1] = s;
+    s.x = 5;
+    s.y = 6;
+    A[2] = s;
+
+    return A[i].x + A[i+1].y;
+}
+
+int main(int i : I) : SV_Target {
+    return foo(i);
+}

+ 19 - 0
tools/clang/test/HLSL/hca/13.hlsl

@@ -0,0 +1,19 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK-DAG: internal unnamed_addr constant [3 x i32]
+// CHECK-DAG: internal unnamed_addr constant [4 x i32]
+// CHECK-NOT: alloca [
+
+int foo(int i) {
+    int A[] = {1,2,3};
+    return A[i];
+}
+
+int bar(int i) {
+    int A[] = {4,5,6,7};
+    return A[i];
+}
+
+int main(int i : I) : SV_Target {
+    int B[] = {1,2,3};
+    return foo(i) + bar(i) + B[i];
+}

+ 30 - 0
tools/clang/test/HLSL/hca/14.hlsl

@@ -0,0 +1,30 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-hca -dce | %FileCheck %s
+// CHECK-DAG:     internal unnamed_addr constant [3 x i32]
+// CHECK-DAG:     internal unnamed_addr constant [3 x float]
+// CHECK-NOT: alloca [3 x i32]
+// CHECK-NOT: alloca [3 x float]
+
+struct S {
+    int x;
+    float y;
+};
+
+int foo(int i) {
+    S A[3];
+    S s;
+    s.x = 1;
+    s.y = 2;
+    A[0] = s;
+    s.x = 3;
+    s.y = 4;
+    A[1] = s;
+    s.x = 5;
+    s.y = 6;
+    A[2] = s;
+
+    return A[i].x + A[i+1].y;
+}
+
+int main(int i : I) : SV_Target {
+    return foo(i);
+}

+ 18 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -843,6 +843,7 @@ public:
   TEST_METHOD(CodeGenDx12MiniEngineUpsampleandblurcs)
   TEST_METHOD(DxilGen_StoreOutput)
   TEST_METHOD(ConstantFolding)
+  TEST_METHOD(HoistConstantArray)
 
   dxc::DxcDllSupport m_dllSupport;
   bool m_CompilerPreservesBBNames;
@@ -4196,6 +4197,23 @@ TEST_F(CompilerTest, ConstantFolding) {
   CodeGenTestCheck(L"constprop\\bfi.ll");
 }
 
+TEST_F(CompilerTest, HoistConstantArray) {
+  CodeGenTestCheck(L"hca\\01.hlsl");
+  CodeGenTestCheck(L"hca\\02.hlsl");
+  CodeGenTestCheck(L"hca\\03.hlsl");
+  CodeGenTestCheck(L"hca\\04.hlsl");
+  CodeGenTestCheck(L"hca\\05.hlsl");
+  CodeGenTestCheck(L"hca\\06.hlsl");
+  CodeGenTestCheck(L"hca\\07.hlsl");
+  CodeGenTestCheck(L"hca\\08.hlsl");
+  CodeGenTestCheck(L"hca\\09.hlsl");
+  CodeGenTestCheck(L"hca\\10.hlsl");
+  CodeGenTestCheck(L"hca\\11.hlsl");
+  CodeGenTestCheck(L"hca\\12.hlsl");
+  CodeGenTestCheck(L"hca\\13.hlsl");
+  CodeGenTestCheck(L"hca\\14.hlsl");
+}
+
 TEST_F(CompilerTest, PreprocessWhenValidThenOK) {
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcOperationResult> pResult;

+ 1 - 0
utils/hct/hctdb.py

@@ -1219,6 +1219,7 @@ class db_dxil(object):
         add_pass('hlsl-dxil-condense', 'DxilCondenseResources', 'DXIL Condense Resources', [])
         add_pass('hlsl-dxilemit', 'DxilEmitMetadata', 'HLSL DXIL Metadata Emit', [])
         add_pass('hlsl-dxilload', 'DxilLoadMetadata', 'HLSL DXIL Metadata Load', [])
+        add_pass('hlsl-hca', 'HoistConstantArray', 'HLSL constant array hoisting', [])
         add_pass('ipsccp', 'IPSCCP', 'Interprocedural Sparse Conditional Constant Propagation', [])
         add_pass('globalopt', 'GlobalOpt', 'Global Variable Optimizer', [])
         add_pass('deadargelim', 'DAE', 'Dead Argument Elimination', [])