Переглянути джерело

Added instructions to preserve intermediate values of computations. (#2721)

Adam Yang 5 роки тому
батько
коміт
227c8e6f5a
28 змінених файлів з 997 додано та 188 видалено
  1. 3 0
      include/dxc/DXIL/DxilMetadataHelper.h
  2. 5 2
      include/llvm/Analysis/DxilValueCache.h
  3. 3 0
      include/llvm/InitializePasses.h
  4. 7 4
      include/llvm/Transforms/Scalar.h
  5. 18 5
      lib/Analysis/ScalarEvolution.cpp
  6. 1 0
      lib/DXIL/DxilMetadataHelper.cpp
  7. 3 2
      lib/HLSL/DxcOptimizer.cpp
  8. 0 5
      lib/HLSL/DxilLegalizeSampleOffsetPass.cpp
  9. 459 81
      lib/HLSL/DxilNoops.cpp
  10. 8 2
      lib/Transforms/IPO/PassManagerBuilder.cpp
  11. 0 38
      lib/Transforms/Scalar/DxilLoopUnroll.cpp
  12. 2 0
      tools/clang/lib/CodeGen/CGDecl.cpp
  13. 5 0
      tools/clang/lib/CodeGen/CodeGenFunction.cpp
  14. 45 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noop_no_fold_double.hlsl
  15. 47 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noop_no_fold_int.hlsl
  16. 56 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noop_out_args.hlsl
  17. 41 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noop_resource_var.hlsl
  18. 28 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noop_void_return.hlsl
  19. 47 29
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_call.hlsl
  20. 34 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_groupshare.hlsl
  21. 31 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_mandatory_immed.hlsl
  22. 38 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_mandatory_immed_load.hlsl
  23. 29 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_memcpy.hlsl
  24. 13 6
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold.hlsl
  25. 25 12
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold_vec.hlsl
  26. 19 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_simple_call.hlsl
  27. 27 0
      tools/clang/test/HLSLFileCheck/dxil/debug/preserve_rewrite.hlsl
  28. 3 2
      utils/hct/hctdb.py

+ 3 - 0
include/dxc/DXIL/DxilMetadataHelper.h

@@ -230,6 +230,9 @@ public:
   // Variable debug layout metadata.
   static const char kDxilVariableDebugLayoutMDName[];
 
+  // Indication of temporary storage metadata.
+  static const char kDxilTempAllocaMDName[];
+
   // Validator version.
   static const char kDxilValidatorVersionMDName[];
   // Validator version uses the same constants for fields as kDxilVersion*

+ 5 - 2
include/llvm/Analysis/DxilValueCache.h

@@ -1,4 +1,4 @@
-//===--------- DxilValueCache.cpp - Dxil Constant Value Cache ------------===//
+//===--------- DxilValueCache.h - Dxil Constant Value Cache --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,6 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_ANALYSIS_DXILVALUECACHE_H
+#define LLVM_ANALYSIS_DXILVALUECACHE_H
+
 #include "llvm/Pass.h"
 #include "llvm/IR/ValueMap.h"
 
@@ -80,4 +83,4 @@ Pass *createDxilValueCachePass();
 
 }
 
-
+#endif

+ 3 - 0
include/llvm/InitializePasses.h

@@ -266,6 +266,9 @@ void initializeDxilFinalizeNoopsPass(PassRegistry&);
 void initializeDxilEliminateVectorPass(PassRegistry&);
 void initializeDxilConditionalMem2RegPass(PassRegistry&);
 void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
+void initializeDxilInsertPreservesPass(PassRegistry&);
+void initializeDxilFinalizePreservesPass(PassRegistry&);
+void initializeDxilPreserveToSelectPass(PassRegistry&);
 // HLSL Change Ends
 void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
 void initializeScalarEvolutionPass(PassRegistry&);

+ 7 - 4
include/llvm/Transforms/Scalar.h

@@ -146,11 +146,14 @@ void initializeDxilEraseDeadRegionPass(PassRegistry&);
 Pass *createDxilEliminateVectorPass();
 void initializeDxilEliminateVectorPass(PassRegistry&);
 
-Pass *createDxilInsertNoopsPass();
-void initializeDxilInsertNoopsPass(PassRegistry&);
+Pass *createDxilInsertPreservesPass();
+void initializeDxilInsertPreservesPass(PassRegistry&);
 
-Pass *createDxilFinalizeNoopsPass();
-void initializeDxilFinalizeNoopsPass(PassRegistry&);
+Pass *createDxilFinalizePreservesPass();
+void initializeDxilFinalizePreservesPass(PassRegistry&);
+
+Pass *createDxilPreserveToSelectPass();
+void initializeDxilPreserveToSelectPass(PassRegistry&);
 
 //===----------------------------------------------------------------------===//
 //

+ 18 - 5
lib/Analysis/ScalarEvolution.cpp

@@ -5619,11 +5619,24 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
   // HLSL Change begin
   SmallVector<std::pair<Instruction *, Constant *>, 4> KnownInvariantOps;
   if (Instruction *CondI = dyn_cast<Instruction>(Cond)) {
-    for (Use &U : CondI->operands()) {
-      if (Instruction *OpI = dyn_cast<Instruction>(U.get())) {
-        if (Value *V = getAnalysis<DxilValueCache>().GetValue(OpI)) {
-          if (Constant *C = dyn_cast<Constant>(V))
-            KnownInvariantOps.push_back({ OpI, C });
+    SmallVector<Instruction *, 4> Worklist;
+    DxilValueCache *DVC = &getAnalysis<DxilValueCache>();
+
+    Worklist.push_back(CondI);
+    while (Worklist.size()) {
+      Instruction *I = Worklist.pop_back_val();
+
+      if (Constant *C = DVC->GetConstValue(I)) {
+        KnownInvariantOps.push_back({ I, C });
+      }
+      else if (CurrentIterVals.count(I)) {
+        continue;
+      }
+      else if (L->contains(I)) {
+        for (Use &U : I->operands()) {
+          if (Instruction *OpI = dyn_cast<Instruction>(U.get())) {
+            Worklist.push_back(OpI);
+          }
         }
       }
     }

+ 1 - 0
lib/DXIL/DxilMetadataHelper.cpp

@@ -51,6 +51,7 @@ const char DxilMDHelper::kDxilTypeSystemHelperVariablePrefix[]        = "dx.type
 const char DxilMDHelper::kDxilControlFlowHintMDName[]                 = "dx.controlflow.hints";
 const char DxilMDHelper::kDxilPreciseAttributeMDName[]                = "dx.precise";
 const char DxilMDHelper::kDxilVariableDebugLayoutMDName[]             = "dx.dbg.varlayout";
+const char DxilMDHelper::kDxilTempAllocaMDName[]                      = "dx.temp";
 const char DxilMDHelper::kDxilNonUniformAttributeMDName[]             = "dx.nonuniform";
 const char DxilMDHelper::kHLDxilResourceAttributeMDName[]             = "dx.hl.resource.attribute";
 const char DxilMDHelper::kDxilValidatorVersionMDName[]                = "dx.valver";

+ 3 - 2
lib/HLSL/DxcOptimizer.cpp

@@ -97,10 +97,10 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilEraseDeadRegionPass(Registry);
     initializeDxilExpandTrigIntrinsicsPass(Registry);
     initializeDxilFinalizeModulePass(Registry);
-    initializeDxilFinalizeNoopsPass(Registry);
+    initializeDxilFinalizePreservesPass(Registry);
     initializeDxilFixConstArrayInitializerPass(Registry);
     initializeDxilGenerationPassPass(Registry);
-    initializeDxilInsertNoopsPass(Registry);
+    initializeDxilInsertPreservesPass(Registry);
     initializeDxilLegalizeEvalOperationsPass(Registry);
     initializeDxilLegalizeResourcesPass(Registry);
     initializeDxilLegalizeSampleOffsetPassPass(Registry);
@@ -109,6 +109,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilLowerCreateHandleForLibPass(Registry);
     initializeDxilPrecisePropagatePassPass(Registry);
     initializeDxilPreserveAllOutputsPass(Registry);
+    initializeDxilPreserveToSelectPass(Registry);
     initializeDxilPromoteLocalResourcesPass(Registry);
     initializeDxilPromoteStaticResourcesPass(Registry);
     initializeDxilSimpleGVNHoistPass(Registry);

+ 0 - 5
lib/HLSL/DxilLegalizeSampleOffsetPass.cpp

@@ -73,11 +73,6 @@ public:
     // Run simple optimization to legalize offsets.
     LegalizeOffsets(ssaIllegalOffsets);
 
-    // Remove PHINodes to keep code shape.
-    legacy::FunctionPassManager PM(F.getParent());
-    PM.add(createDemoteRegisterToMemoryHlslPass());
-    PM.run(F);
-
     FinalCheck(illegalOffsets, F, hlslOP);
 
     return true;

+ 459 - 81
lib/HLSL/DxilNoops.cpp

@@ -8,28 +8,106 @@
 // Passes to insert dx.noops() and replace them with llvm.donothing()        //
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
+//
+// Here is how dx.preserve and dx.noop work.
+//
+// For example, the following HLSL code:
+//
+//     float foo(float y) {
+//        float x = 10;
+//        x = 20;
+//        x += y;
+//        return x;
+//     }
+//
+//     float main() : SV_Target {
+//       float ret = foo(10);
+//       return ret;
+//     }
+//
+// Ordinarily, it gets lowered as:
+//
+//     dx.op.storeOutput(3.0)
+//
+// Intermediate steps at "x = 20;", "x += y;", "return x", and
+// even the call to "foo()" are lost.
+//
+// But with with Preserve and Noop:
+//
+//     void call dx.noop()           // float ret = foo(10);
+//       %y = dx.preserve(10.0, 10.0)  // argument: y=10
+//       %x0 = dx.preserve(10.0, 10.0) // float x = 10;
+//       %x1 = dx.preserve(20.0, %x0)  // x = 20;
+//       %x2 = fadd %x1, %y            // x += y;
+//       void call dx.noop()           // return x
+//     %ret = dx.preserve(%x2, %x2)   // ret = returned from foo()
+//     dx.op.storeOutput(%ret)
+//
+// All the intermediate transformations are visible and could be
+// made inspectable in the debugger.
+//
+// The reason why dx.preserve takes 2 arguments is so that the previous
+// value of a variable does not get cleaned up by DCE. For example:
+//
+//    float x = ...;
+//    do_some_stuff_with(x);
+//    do_some_other_stuff(); // At this point, x's last values
+//                           // are dead and register allocators
+//                           // are free to reuse its location during
+//                           // call this code.
+//                           // So until x is assigned a new value below
+//                           // x could become unavailable.
+//                           //
+//                           // The second parameter in dx.preserve
+//                           // keeps x's previous value alive.
+//
+//    x = ...; // Assign something else
+//
+//
+// When emitting proper DXIL, dx.noop and dx.preserve are lowered to
+// ordinary LLVM instructions that do not affect the semantic of the
+// shader, but can be used by a debugger or backend generator if they
+// know what to look for.
+//
+// We generate two special internal constant global vars:
+//
+//      @dx.preserve.value = internal constant i1 false
+//      @dx.nothing = internal constant i32 0
+//
+// "call dx.noop()" is lowered to "load @dx.nothing"
+//
+// "... = call dx.preserve(%cur_val, %last_val)" is lowered to:
+//
+//    %p = load @dx.preserve.value
+//    ... = select i1 %p, %last_val, %cur_val
+//
+// Since %p is guaranteed to be false, the select is guaranteed
+// to return %cur_val.
+//
 
 #include "llvm/Pass.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "dxc/DXIL/DxilMetadataHelper.h"
+#include "dxc/DXIL/DxilConstants.h"
+
+#include <unordered_set>
 
 using namespace llvm;
 
 namespace {
 StringRef kNoopName = "dx.noop";
+StringRef kPreservePrefix = "dx.preserve.";
 StringRef kNothingName = "dx.nothing";
+StringRef kPreserveName = "dx.preserve.value";
 }
 
-//==========================================================
-// Insertion pass
-//
-
-namespace {
-
-Function *GetOrCreateNoopF(Module &M) {
+static Function *GetOrCreateNoopF(Module &M) {
   LLVMContext &Ctx = M.getContext();
   FunctionType *FT = FunctionType::get(Type::getVoidTy(Ctx), false);
   Function *NoopF = cast<Function>(M.getOrInsertFunction(::kNoopName, FT));
@@ -37,74 +115,334 @@ Function *GetOrCreateNoopF(Module &M) {
   return NoopF;
 }
 
-class DxilInsertNoops : public FunctionPass {
-public:
-  static char ID;
-  DxilInsertNoops() : FunctionPass(ID) {
-    initializeDxilInsertNoopsPass(*PassRegistry::getPassRegistry());
+static bool ShouldPreserve(Value *V) {
+  if (isa<Constant>(V)) return true;
+  if (isa<Argument>(V)) return true;
+  if (isa<LoadInst>(V)) return true;
+  if (ExtractElementInst *GEP = dyn_cast<ExtractElementInst>(V)) {
+    return ShouldPreserve(GEP->getVectorOperand());
   }
+  if (isa<CallInst>(V)) return true;
+  return false;
+}
 
-  bool runOnFunction(Function &F) override;
-  const char *getPassName() const override { return "Dxil Insert Noops"; }
+struct Store_Info {
+  Instruction *StoreOrMC = nullptr;
+  Value *Source = nullptr; // Alloca, GV, or Argument
+  bool AllowLoads = false;
 };
 
-char DxilInsertNoops::ID;
+static void FindAllStores(Value *Ptr, std::vector<Store_Info> *Stores, std::vector<Value *> &WorklistStorage, std::unordered_set<Value *> &SeenStorage) {
+  assert(isa<Argument>(Ptr) || isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr));
+
+  WorklistStorage.clear();
+  WorklistStorage.push_back(Ptr);
+  // Don't clear Seen Storage because two pointers can be involved with the same
+  // memcpy. Clearing it can get the memcpy added twice.
+
+  unsigned StartIdx = Stores->size();
+  bool AllowLoad = false;
+  while (WorklistStorage.size()) {
+    Value *V = WorklistStorage.back();
+    WorklistStorage.pop_back();
+    SeenStorage.insert(V);
+
+    if (isa<BitCastOperator>(V) || isa<GEPOperator>(V) || isa<GlobalVariable>(V) || isa<AllocaInst>(V) || isa<Argument>(V)) {
+      for (User *U : V->users()) {
+        // Allow load if MC reads from pointer
+        if (MemCpyInst *MC = dyn_cast<MemCpyInst>(U)) {
+          AllowLoad |= MC->getSource() == V;
+        }
+        else if (isa<LoadInst>(U)) {
+          AllowLoad = true;
+        }
+        // Add to worklist if we haven't seen it before.
+        else {
+          if (!SeenStorage.count(U))
+            WorklistStorage.push_back(U);
+        }
+      }
+    }
+    else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
+      if (ShouldPreserve(Store->getValueOperand())) {
+        Store_Info Info;
+        Info.StoreOrMC = Store;
+        Info.Source = Ptr;
+        Stores->push_back(Info);
+      }
+    }
+    else if (MemCpyInst *MC = dyn_cast<MemCpyInst>(V)) {
+      Store_Info Info;
+      Info.StoreOrMC = MC;
+      Info.Source = Ptr;
+      Stores->push_back(Info);
+    }
+  }
+
+  if (isa<GlobalVariable>(Ptr)) {
+    AllowLoad = true;
+  }
+
+  if (AllowLoad) {
+    Store_Info *ptr = Stores->data();
+    for (unsigned i = StartIdx; i < Stores->size(); i++)
+      ptr[i].AllowLoads = true;
+  }
+}
+
+static Value *GetOrCreatePreserveCond(Function *F) {
+  assert(!F->isDeclaration());
+
+  Module *M = F->getParent();
+  GlobalVariable *GV = M->getGlobalVariable(kPreserveName, true);
+  if (!GV) {
+    Type *i32Ty = Type::getInt32Ty(M->getContext());
+    GV = new GlobalVariable(*M,
+      i32Ty, true,
+      llvm::GlobalValue::InternalLinkage,
+      llvm::ConstantInt::get(i32Ty, 0), kPreserveName);
+  }
+
+  for (User *U : GV->users()) {
+    LoadInst *LI = cast<LoadInst>(U);
+    if (LI->getParent()->getParent() == F) {
+      assert(LI->user_begin() != LI->user_end() &&
+        std::next(LI->user_begin()) == LI->user_end());
+
+      return *LI->user_begin();
+    }
+  }
+
+  BasicBlock *BB = &F->getEntryBlock();
+  Instruction *InsertPt = &BB->front();
+  while (isa<AllocaInst>(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
+    InsertPt = InsertPt->getNextNode();
+
+  IRBuilder<> B(InsertPt);
+
+  LoadInst *Load = B.CreateLoad(GV);
+  return B.CreateTrunc(Load, B.getInt1Ty());
 }
 
-bool DxilInsertNoops::runOnFunction(Function &F) {
-  Module &M = *F.getParent();
-  Function *NoopF = nullptr;
-  bool Changed = false;
 
-  // Find instructions where we want to insert nops
-  for (BasicBlock &BB : F) {
-    for (BasicBlock::iterator It = BB.begin(), E = BB.end(); It != E;) {
-      bool InsertNop = false;
-      Instruction &I = *(It++);
-      // If we are calling a real function, insert one
-      // at the callsite.
-      if (CallInst *Call = dyn_cast<CallInst>(&I)) {
-        if (Function *F = Call->getCalledFunction()) {
-          if (!F->isDeclaration())
-            InsertNop = true;
+static Function *GetOrCreatePreserveF(Module *M, Type *Ty) {
+  std::string str = kPreservePrefix;
+  raw_string_ostream os(str);
+  Ty->print(os);
+  os.flush();
+
+  FunctionType *FT = FunctionType::get(Ty, { Ty, Ty }, false);
+  Function *PreserveF = cast<Function>(M->getOrInsertFunction(str, FT));
+  PreserveF->addFnAttr(Attribute::AttrKind::ReadNone);
+  PreserveF->addFnAttr(Attribute::AttrKind::NoUnwind);
+  return PreserveF;
+}
+
+static Instruction *CreatePreserve(Value *V, Value *LastV, Instruction *InsertPt) {
+  assert(V->getType() == LastV->getType());
+  Type *Ty = V->getType();
+  Function *PreserveF = GetOrCreatePreserveF(InsertPt->getModule(), Ty);
+  return CallInst::Create(PreserveF, ArrayRef<Value *> { V, LastV }, "", InsertPt);
+}
+
+static void LowerPreserveToSelect(CallInst *CI) {
+  Value *V = CI->getArgOperand(0);
+  Value *LastV = CI->getArgOperand(1);
+
+  if (LastV == V)
+    LastV = UndefValue::get(V->getType());
+
+  Value *Cond = GetOrCreatePreserveCond(CI->getParent()->getParent());
+  SelectInst *Select = SelectInst::Create(Cond, LastV, V, "", CI);
+  Select->setDebugLoc(CI->getDebugLoc());
+  CI->replaceAllUsesWith(Select);
+  CI->eraseFromParent();
+}
+
+static void InsertNoopAt(Instruction *I) {
+  Module &M = *I->getModule();
+  Function *NoopF = GetOrCreateNoopF(M);
+  CallInst *Noop = CallInst::Create(NoopF, {}, I);
+  Noop->setDebugLoc(I->getDebugLoc());
+}
+
+
+//==========================================================
+// Insertion pass
+//
+// This pass inserts dx.noop and dx.preserve where we want
+// to preserve line mapping or perserve some intermediate
+// values.
+
+struct DxilInsertPreserves : public ModulePass {
+  static char ID;
+  DxilInsertPreserves() : ModulePass(ID) {
+    initializeDxilInsertPreservesPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override {
+
+    std::vector<Store_Info> Stores;
+    std::vector<Value *> WorklistStorage;
+    std::unordered_set<Value *> SeenStorage;
+
+    for (GlobalVariable &GV : M.globals()) {
+      if (GV.getLinkage() != GlobalValue::LinkageTypes::InternalLinkage ||
+        GV.getType()->getPointerAddressSpace() == hlsl::DXIL::kTGSMAddrSpace)
+      {
+        continue;
+      }
+
+      for (User *U : GV.users()) {
+        if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+          InsertNoopAt(LI);
         }
       }
-      else if (MemCpyInst *MC = dyn_cast<MemCpyInst>(&I)) {
-        InsertNop = true;
+
+      FindAllStores(&GV, &Stores, WorklistStorage, SeenStorage);
+    }
+
+    bool Changed = false;
+    for (Function &F : M) {
+      if (F.isDeclaration())
+        continue;
+
+      // Collect Stores on Allocas in function
+      BasicBlock *Entry = &*F.begin();
+      for (Instruction &I : *Entry) {
+        AllocaInst *AI = dyn_cast<AllocaInst>(&I);
+        if (!AI)
+          continue;
+        // Skip temp allocas
+        if (!AI->getMetadata(hlsl::DxilMDHelper::kDxilTempAllocaMDName))
+          FindAllStores(AI, &Stores, WorklistStorage, SeenStorage);
+      }
+
+      // Collect Stores on pointer Arguments in function
+      for (Argument &Arg : F.args()) {
+        if (Arg.getType()->isPointerTy())
+          FindAllStores(&Arg, &Stores, WorklistStorage, SeenStorage);
       }
-      // If we have a copy, e.g:
-      //     float x = 0;
-      //     float y = x;    <---- copy
-      // insert a nop there.
-      else if (StoreInst *Store = dyn_cast<StoreInst>(&I)) {
+
+      // For every real function call, insert a nop
+      // so we can put a breakpoint there.
+      for (User *U : F.users()) {
+        if (CallInst *CI = dyn_cast<CallInst>(U)) {
+          InsertNoopAt(CI);
+        }
+      }
+
+      // Insert nops for void return statements
+      for (BasicBlock &BB : F) {
+        ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
+        if (Ret)
+          InsertNoopAt(Ret);
+      }
+    }
+
+    // Insert preserves or noops for these stores
+    for (Store_Info &Info : Stores) {
+      if (StoreInst *Store = dyn_cast<StoreInst>(Info.StoreOrMC)) {
         Value *V = Store->getValueOperand();
-        if (isa<LoadInst>(V) || isa<Constant>(V))
-          InsertNop = true;
+
+        if (V &&
+          !V->getType()->isAggregateType() &&
+          !V->getType()->isPointerTy())
+        {
+          IRBuilder<> B(Store);
+          Value *Last_Value = nullptr;
+          // If there's never any loads for this memory location,
+          // don't generate a load.
+          if (Info.AllowLoads) {
+            Last_Value = B.CreateLoad(Store->getPointerOperand());
+          }
+          else {
+            Last_Value = UndefValue::get(V->getType());
+          }
+
+          Instruction *Preserve = CreatePreserve(V, Last_Value, Store);
+          Preserve->setDebugLoc(Store->getDebugLoc());
+          Store->replaceUsesOfWith(V, Preserve);
+
+          Changed = true;
+        }
+        else {
+          InsertNoopAt(Store);
+        }
       }
-      // If we have a return, just to be safe.
-      else if (ReturnInst *Ret = dyn_cast<ReturnInst>(&I)) {
-        InsertNop = true;
+      else if (MemCpyInst *MC = cast<MemCpyInst>(Info.StoreOrMC)) {
+        // TODO: Do something to preserve pointer's previous value.
+        InsertNoopAt(MC);
       }
+    }
+
+    return Changed;
+  }
+
+  const char *getPassName() const override { return "Dxil Insert Preserves"; }
+};
+
+char DxilInsertPreserves::ID;
+
+Pass *llvm::createDxilInsertPreservesPass() {
+  return new DxilInsertPreserves();
+}
+
+INITIALIZE_PASS(DxilInsertPreserves, "dxil-insert-preserves", "Dxil Insert Preserves", false, false)
+
+
+//==========================================================
+// Lower dx.preserve to select
+//
+// This pass replaces all dx.preserve calls to select
+//
+
+namespace {
+
+class DxilPreserveToSelect : public ModulePass {
+public:
+  static char ID;
+
+  SmallDenseMap<Type *, Function *> PreserveFunctions;
+
+  DxilPreserveToSelect() : ModulePass(ID) {
+    initializeDxilPreserveToSelectPass(*PassRegistry::getPassRegistry());
+  }
 
-      // Do the insertion
-      if (InsertNop) {
-        if (!NoopF) 
-          NoopF = GetOrCreateNoopF(M);
-        CallInst *Noop = CallInst::Create(NoopF, {}, &I);
-        Noop->setDebugLoc(I.getDebugLoc());
+  bool runOnModule(Module &M) override {
+    bool Changed = false;
+    for (auto fit = M.getFunctionList().begin(), end = M.getFunctionList().end();
+      fit != end;)
+    {
+      Function *F = &*(fit++);
+      if (!F->isDeclaration())
+        continue;
+
+      if (F->getName().startswith(kPreservePrefix)) {
+        for (auto uit = F->user_begin(), end = F->user_end(); uit != end;) {
+          User *U = *(uit++);
+          CallInst *CI = cast<CallInst>(U);
+          LowerPreserveToSelect(CI);
+        }
+
+        F->eraseFromParent();
         Changed = true;
       }
     }
+
+    return Changed;
   }
+  const char *getPassName() const override { return "Dxil Lower Preserves to Selects"; }
+};
 
-  return Changed;
+char DxilPreserveToSelect::ID;
 }
 
-Pass *llvm::createDxilInsertNoopsPass() {
-  return new DxilInsertNoops();
+Pass *llvm::createDxilPreserveToSelectPass() {
+  return new DxilPreserveToSelect();
 }
 
-INITIALIZE_PASS(DxilInsertNoops, "dxil-insert-noops", "Dxil Insert Noops", false, false)
+INITIALIZE_PASS(DxilPreserveToSelect, "dxil-insert-noops", "Dxil Insert Noops", false, false)
 
 
 //==========================================================
@@ -113,53 +451,82 @@ INITIALIZE_PASS(DxilInsertNoops, "dxil-insert-noops", "Dxil Insert Noops", false
 
 namespace {
 
-class DxilFinalizeNoops : public ModulePass {
+class DxilFinalizePreserves : public ModulePass {
 public:
   static char ID;
   GlobalVariable *NothingGV = nullptr;
 
-  DxilFinalizeNoops() : ModulePass(ID) {
-    initializeDxilFinalizeNoopsPass(*PassRegistry::getPassRegistry());
+  DxilFinalizePreserves() : ModulePass(ID) {
+    initializeDxilFinalizePreservesPass(*PassRegistry::getPassRegistry());
   }
 
   Instruction *GetFinalNoopInst(Module &M, Instruction *InsertBefore) {
-  if (!NothingGV) {
-    NothingGV = M.getGlobalVariable(kNothingName);
     if (!NothingGV) {
-      Type *i32Ty = Type::getInt32Ty(M.getContext());
-      NothingGV = new GlobalVariable(M,
-        i32Ty, true,
-        llvm::GlobalValue::InternalLinkage,
-        llvm::ConstantInt::get(i32Ty, 0), kNothingName);
+      NothingGV = M.getGlobalVariable(kNothingName);
+      if (!NothingGV) {
+        Type *i32Ty = Type::getInt32Ty(M.getContext());
+        NothingGV = new GlobalVariable(M,
+          i32Ty, true,
+          llvm::GlobalValue::InternalLinkage,
+          llvm::ConstantInt::get(i32Ty, 0), kNothingName);
+      }
     }
-  }
 
-  return new llvm::LoadInst(NothingGV, nullptr, InsertBefore);
-}
+    return new llvm::LoadInst(NothingGV, nullptr, InsertBefore);
+  }
 
+  bool LowerPreserves(Module &M);
+  bool LowerNoops(Module &M);
   bool runOnModule(Module &M) override;
-  const char *getPassName() const override { return "Dxil Finalize Noops"; }
+  const char *getPassName() const override { return "Dxil Finalize Preserves"; }
 };
 
-char DxilFinalizeNoops::ID;
+char DxilFinalizePreserves::ID;
 }
 
-// Replace all @dx.noop's with @llvm.donothing
-bool DxilFinalizeNoops::runOnModule(Module &M) {
+// Fix undefs in the dx.preserve -> selects
+bool DxilFinalizePreserves::LowerPreserves(Module &M) {
+  bool Changed = false;
+
+  GlobalVariable *GV = M.getGlobalVariable(kPreserveName, true);
+  if (GV) {
+    for (User *U : GV->users()) {
+      LoadInst *LI = cast<LoadInst>(U);
+      assert(LI->user_begin() != LI->user_end() &&
+        std::next(LI->user_begin()) == LI->user_end());
+      Instruction *I = cast<Instruction>(*LI->user_begin());
+
+      for (User *UU : I->users()) {
+
+        SelectInst *P = cast<SelectInst>(UU);
+        Value *PrevV = P->getTrueValue();
+        Value *CurV = P->getFalseValue();
+
+        if (isa<UndefValue>(PrevV) || isa<Constant>(PrevV)) {
+          P->setOperand(1, CurV);
+          Changed = true;
+        }
+      }
+    }
+  }
+
+  return Changed;
+}
+
+// Replace all @dx.noop's with load @dx.nothing.value
+bool DxilFinalizePreserves::LowerNoops(Module &M) {
+  bool Changed = false;
+
   Function *NoopF = nullptr;
   for (Function &F : M) {
     if (!F.isDeclaration())
       continue;
-    if (F.getName() == ::kNoopName) {
+    if (F.getName() == kNoopName) {
       NoopF = &F;
-      break;
     }
   }
 
-  if (!NoopF)
-    return false;
-
-  if (!NoopF->user_empty()) {
+  if (NoopF) {
     for (auto It = NoopF->user_begin(), E = NoopF->user_end(); It != E;) {
       User *U = *(It++);
       CallInst *CI = cast<CallInst>(U);
@@ -168,18 +535,29 @@ bool DxilFinalizeNoops::runOnModule(Module &M) {
       Nop->setDebugLoc(CI->getDebugLoc());
 
       CI->eraseFromParent();
+      Changed = true;
     }
+
+    assert(NoopF->user_empty() && "dx.noop calls must be all removed now");
+    NoopF->eraseFromParent();
   }
 
-  assert(NoopF->user_empty() && "dx.noop calls must be all removed now");
-  NoopF->eraseFromParent();
+  return Changed;
+}
 
-  return true;
+// Replace all preserves and nops
+bool DxilFinalizePreserves::runOnModule(Module &M) {
+  bool Changed = false;
+
+  Changed |= LowerPreserves(M);
+  Changed |= LowerNoops(M);
+
+  return Changed;
 }
 
-Pass *llvm::createDxilFinalizeNoopsPass() {
-  return new DxilFinalizeNoops();
+Pass *llvm::createDxilFinalizePreservesPass() {
+  return new DxilFinalizePreserves();
 }
 
-INITIALIZE_PASS(DxilFinalizeNoops, "dxil-finalize-noops", "Dxil Finalize Noops", false, false)
+INITIALIZE_PASS(DxilFinalizePreserves, "dxil-finalize-preserves", "Dxil Finalize Preserves", false, false)
 

+ 8 - 2
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -322,9 +322,11 @@ void PassManagerBuilder::populateModulePassManager(
   if (OptLevel == 0) {
     if (!HLSLHighLevel) {
       MPM.add(createHLEnsureMetadataPass()); // HLSL Change - rehydrate metadata from high-level codegen
-      MPM.add(createDxilInsertNoopsPass()); // HLSL Change - insert noop instructions
     }
 
+    if (!HLSLHighLevel)
+      MPM.add(createDxilInsertPreservesPass()); // HLSL Change - insert preserve instructions
+
     if (Inliner) {
       MPM.add(Inliner);
       Inliner = nullptr;
@@ -340,7 +342,11 @@ void PassManagerBuilder::populateModulePassManager(
     else if (!Extensions.empty()) // HLSL Change - GlobalExtensions not considered
       MPM.add(createBarrierNoopPass());
 
+    if (!HLSLHighLevel)
+      MPM.add(createDxilPreserveToSelectPass()); // HLSL Change - lower preserve instructions to selects
+
     addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
+
     // HLSL Change Begins.
     addHLSLPasses(HLSLHighLevel, OptLevel, HLSLExtensionsCodeGen, MPM);
     if (!HLSLHighLevel) {
@@ -349,7 +355,7 @@ void PassManagerBuilder::populateModulePassManager(
       MPM.add(createDxilLowerCreateHandleForLibPass());
       MPM.add(createDxilTranslateRawBuffer());
       MPM.add(createDxilLegalizeSampleOffsetPass());
-      MPM.add(createDxilFinalizeNoopsPass());
+      MPM.add(createDxilFinalizePreservesPass());
       MPM.add(createDxilFinalizeModulePass());
       MPM.add(createComputeViewIdStatePass());
       MPM.add(createDxilDeadFunctionEliminationPass());

+ 0 - 38
lib/Transforms/Scalar/DxilLoopUnroll.cpp

@@ -171,39 +171,6 @@ static bool GetConstantI1(Value *V, bool *Val=nullptr) {
   return false;
 }
 
-// Copied from llvm::SimplifyInstructionsInBlock
-static bool SimplifyInstructionsInBlock_NoDelete(BasicBlock *BB,
-                                       const TargetLibraryInfo *TLI) {
-  bool MadeChange = false;
-
-#ifndef NDEBUG
-  // In debug builds, ensure that the terminator of the block is never replaced
-  // or deleted by these simplifications. The idea of simplification is that it
-  // cannot introduce new instructions, and there is no way to replace the
-  // terminator of a block without introducing a new instruction.
-  AssertingVH<Instruction> TerminatorVH(--BB->end());
-#endif
-
-  for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) {
-    assert(!BI->isTerminator());
-    Instruction *Inst = BI++;
-
-    WeakVH BIHandle(BI);
-    if (recursivelySimplifyInstruction(Inst, TLI)) {
-      MadeChange = true;
-      if (BIHandle != BI)
-        BI = BB->begin();
-      continue;
-    }
-#if 0 // HLSL Change
-    MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
-#endif // HLSL Change
-    if (BIHandle != BI)
-      BI = BB->begin();
-  }
-  return MadeChange;
-}
-
 static bool IsMarkedFullUnroll(Loop *L) {
   if (MDNode *LoopID = L->getLoopID())
     return GetUnrollMetadata(LoopID, "llvm.loop.unroll.full");
@@ -972,11 +939,6 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
       }
     }
 
-    // Simplify instructions in the cloned blocks to create
-    // constant exit conditions.
-    for (BasicBlock *ClonedBB : CurIteration.Body)
-      SimplifyInstructionsInBlock_NoDelete(ClonedBB, NULL);
-
     // Check exit condition to see if we fully unrolled the loop
     if (BranchInst *BI = dyn_cast<BranchInst>(CurIteration.Latch->getTerminator())) {
       bool Cond = false;

+ 2 - 0
tools/clang/lib/CodeGen/CGDecl.cpp

@@ -29,6 +29,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
 #include "CGHLSLRuntime.h" // HLSL Change
+#include "dxc/DXIL/DxilMetadataHelper.h" // HLSL Change
 using namespace clang;
 using namespace CodeGen;
 
@@ -1788,6 +1789,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
       // Otherwise, create a temporary to hold the value.
       llvm::AllocaInst *Alloc =
           CreateTempAlloca(ConvertTypeForMem(Ty), D.getName() + ".addr");
+      Alloc->setMetadata(hlsl::DxilMDHelper::kDxilTempAllocaMDName, llvm::MDTuple::get(Alloc->getContext(), {})); // HLSL Change
       Alloc->setAlignment(Align.getQuantity());
       DeclPtr = Alloc;
       DoStore = true;

+ 5 - 0
tools/clang/lib/CodeGen/CodeGenFunction.cpp

@@ -32,6 +32,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Operator.h"
+#include "dxc/DXIL/DxilMetadataHelper.h" // HLSL Change
 using namespace clang;
 using namespace CodeGen;
 
@@ -731,6 +732,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
     ReturnValue = Builder.CreateLoad(Addr, "agg.result");
   } else {
     ReturnValue = CreateIRTemp(RetTy, "retval");
+    // HLSL Change begin
+    cast<llvm::Instruction>(ReturnValue)
+      ->setMetadata(hlsl::DxilMDHelper::kDxilTempAllocaMDName, llvm::MDTuple::get(ReturnValue->getContext(), {}));
+    // HLSL Change end
 
     // Tell the epilog emitter to autorelease the result.  We do this
     // now so that various specialized functions can suppress it

+ 45 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noop_no_fold_double.hlsl

@@ -0,0 +1,45 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
+float4 main() : SV_Target {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
+
+  double x = 10;
+  // select i1 %[[p]], double 1.000000e+01, %[[preserve_f64]]
+
+  double y = x + 5;
+  // CHECK: %[[a1:.+]] = fadd
+  // select i1 %[[p]], double [[a1]], double [[a1]]
+
+  double z = y * 2;
+  // CHECK: %[[b1:.+]] = fmul
+  // select i1 %[[p]], double [[b1]], double [[b1]]
+
+  double w = z / 0.5;
+  // CHECK: %[[c1:.+]] = fdiv
+  // select i1 %[[p]], double [[c1]], double [[c1]]
+
+  Texture2D tex = tex0; 
+  // CHECK: load i32, i32* @dx.nothing
+
+  // CHECK: br i1
+  if (w >= 0) {
+    tex = tex1;
+    // CHECK: load i32, i32* @dx.nothing
+    // CHECK: br
+  }
+
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+
+  return tex.Load(0) + float4(x,y,z,w);
+}
+

+ 47 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noop_no_fold_int.hlsl

@@ -0,0 +1,47 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
+float4 main() : SV_Target {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
+
+  int x = 10;
+  // select i1 %[[p]], i32 10, i32 10
+
+  int y = x + 5;
+  // CHECK: %[[a1:.+]] = add
+  // select i1 %[[p]], i32 [[a1]], i32 [[a1]]
+
+  int z = y * 2;
+  // CHECK: %[[b1:.+]] = mul
+  // select i1 %[[p]], i32 [[b1]], i32 [[b1]]
+
+  int w = z / 0.5;
+  // CHECK: sitofp
+  // CHECK: fdiv
+  // CHECK: %[[c1:.+]] = fptosi
+  // select i1 %[[p]], i32 [[c1]], i32 [[c1]]
+
+  Texture2D tex = tex0; 
+  // CHECK: load i32, i32* @dx.nothing
+
+  // CHECK: br i1
+  if (w >= 0) {
+    tex = tex1;
+    // CHECK: load i32, i32* @dx.nothing
+    // CHECK: br
+  }
+
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+
+  return tex.Load(0) + float4(x,y,z,w);
+}
+

+ 56 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noop_out_args.hlsl

@@ -0,0 +1,56 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+struct S {
+  float x;
+  float y;
+};
+
+void foo(out S arg) {
+  arg.x = 20;
+  arg.y = 30;
+  return;
+}
+
+void bar(inout S arg) {
+  arg.x *= 2;
+  arg.y *= 3;
+  return;
+}
+
+void baz(inout float x, inout float y) {
+  x *= 0.5;
+  y *= 0.5;
+  return;
+}
+
+[RootSignature("")]
+float main() : SV_Target {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
+
+  S s;
+
+  // CHECK: load i32, i32* @dx.nothing
+  foo(s);
+    // CHECK: select i1 %[[p]]
+    // CHECK: select i1 %[[p]]
+    // CHECK: load i32, i32* @dx.nothing
+
+  // CHECK: load i32, i32* @dx.nothing
+  bar(s);
+    // CHECK: fmul
+    // CHECK: fmul
+    // CHECK: load i32, i32* @dx.nothing
+
+  // CHECK: load i32, i32* @dx.nothing
+  baz(s.x, s.y);
+    // CHECK: fmul
+    // CHECK: fmul
+    // CHECK: load i32, i32* @dx.nothing
+
+  // CHECK: fadd
+  return s.x + s.y;
+}
+
+
+

+ 41 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noop_resource_var.hlsl

@@ -0,0 +1,41 @@
+// RUN: %dxc -E main -T cs_6_0 %s -Od | FileCheck %s
+
+RWBuffer<float> uav : register(u0);
+
+cbuffer cb : register(b0) {
+  float foo;
+  uint i;
+}
+
+static RWBuffer<float> my_uav;
+
+void store_things() {
+  float val = sin(foo);
+  RWBuffer<float> local_uav = my_uav;
+  local_uav = my_uav;
+  local_uav[i] = val;
+}
+
+[numthreads(1, 1, 1)]
+[RootSignature("CBV(b0), DescriptorTable(UAV(u0))")]
+void main() {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
+
+  // CHECK: load i32, i32* @dx.nothing
+  my_uav = uav;
+
+  // select i1 [[p]],
+  float ret = foo;
+
+  // CHECK: load i32, i32* @dx.nothing
+  store_things();
+    // CHECK: unary.f32(i32 13
+    // CHECK: load i32, i32* @dx.nothing
+    // CHECK: load i32, i32* @dx.nothing
+    // CHECK: call void @dx.op.bufferStore.f32(
+
+  // CHECK: call void @dx.op.bufferStore.f32(
+  uav[i] = ret;
+}
+

+ 28 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noop_void_return.hlsl

@@ -0,0 +1,28 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+static float my_glob;
+
+void foo() {
+  my_glob = 10;
+  return;
+}
+
+[RootSignature("")]
+float main() : SV_Target {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
+
+  // CHECK: select i1 %[[p]]
+  my_glob = 0;
+
+  // Function call
+  // CHECK: load i32, i32* @dx.nothing
+  foo();
+    // CHECK: select i1 %[[p]]
+    // void return
+
+  return my_glob;
+}
+
+
+

+ 47 - 29
tools/clang/test/HLSLFileCheck/dxil/debug/noops_call.hlsl

@@ -65,23 +65,30 @@ float4 depth2(float4 val)
 [RootSignature("")]
 float4 main( float4 unused : SV_POSITION, float4 color : COLOR ) : SV_Target
 {
+    // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+    // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
     float4 ret1 = localScopeVar_func(color);
     // ** call **
     // CHECK: load i32, i32* @dx.nothing
-    // CHECK: fmul
-    // CHECK: fmul
-    // CHECK: fmul
-    // CHECK: fmul
+    // CHECK: %[[v1:.+]] = fmul
+    // CHECK: %[[v2:.+]] = fmul
+    // CHECK: %[[v3:.+]] = fmul
+    // CHECK: %[[v4:.+]] = fmul
+    // CHECK: select i1 %[[p]], float %[[v1]], float %[[v1]]
+    // CHECK: select i1 %[[p]], float %[[v2]], float %[[v2]]
+    // CHECK: select i1 %[[p]], float %[[v3]], float %[[v3]]
+    // CHECK: select i1 %[[p]], float %[[v4]], float %[[v4]]
     // ** return **
-    // CHECK: load i32, i32* @dx.nothing
 
     float4 ret2 = localRegVar_func(ret1);
     // ** call **
     // CHECK: load i32, i32* @dx.nothing
     // ** copy **
-    // CHECK: load i32, i32* @dx.nothing
+    // CHECK: select i1 %[[p]],
+    // CHECK: select i1 %[[p]],
+    // CHECK: select i1 %[[p]],
+    // CHECK: select i1 %[[p]],
     // ** return **
-    // CHECK: load i32, i32* @dx.nothing
 
     float4 ret3 = array_func(ret2);
     // ** call **
@@ -95,25 +102,29 @@ float4 main( float4 unused : SV_POSITION, float4 color : COLOR ) : SV_Target
     // CHECK: load
     // CHECK: load
     // ** return **
-    // CHECK: load i32, i32* @dx.nothing
 
     float4 ret4 = typedef_func(ret3);
     // ** call **
     // CHECK: load i32, i32* @dx.nothing
     // ** copy **
-    // CHECK: load i32, i32* @dx.nothing
+    // CHECK: select i1 %[[p]], float %{{.+}}
+    // CHECK: select i1 %[[p]], float %{{.+}}
+    // CHECK: select i1 %[[p]], float %{{.+}}
+    // CHECK: select i1 %[[p]], float %{{.+}}
     // ** return **
-    // CHECK: load i32, i32* @dx.nothing
 
     float4 ret5 = global_func(ret4);
     // ** call **
     // CHECK: load i32, i32* @dx.nothing
-    // CHECK: fmul
-    // CHECK: fmul
-    // CHECK: fmul
-    // CHECK: fmul
+    // CHECK: %[[a1:.+]] = fmul
+    // CHECK: %[[a2:.+]] = fmul
+    // CHECK: %[[a3:.+]] = fmul
+    // CHECK: %[[a4:.+]] = fmul
+    // CHECK: select i1 %[[p]], float %[[a1]], float %[[a1]]
+    // CHECK: select i1 %[[p]], float %[[a2]], float %[[a2]]
+    // CHECK: select i1 %[[p]], float %[[a3]], float %[[a3]]
+    // CHECK: select i1 %[[p]], float %[[a4]], float %[[a4]]
     // ** return **
-    // CHECK: load i32, i32* @dx.nothing
 
     float4 ret6 = depth2(ret5);
     // ** call **
@@ -125,26 +136,33 @@ float4 main( float4 unused : SV_POSITION, float4 color : COLOR ) : SV_Target
         // ** call **
         // CHECK: load i32, i32* @dx.nothing
         // depth4() {
-          // CHECK: fmul
-          // CHECK: fmul
-          // CHECK: fmul
-          // CHECK: fmul
+          // CHECK: %[[b1:.+]] = fmul
+          // CHECK: %[[b2:.+]] = fmul
+          // CHECK: %[[b3:.+]] = fmul
+          // CHECK: %[[b4:.+]] = fmul
           // CHECK: load i32, i32* @dx.nothing
         // }
-        // CHECK: fmul
-        // CHECK: fmul
-        // CHECK: fmul
-        // CHECK: fmul
+        // CHECK: %[[c1:.+]] = fmul
+        // CHECK: %[[c2:.+]] = fmul
+        // CHECK: %[[c3:.+]] = fmul
+        // CHECK: %[[c4:.+]] = fmul
         // CHECK: load i32, i32* @dx.nothing
       // }
-      // CHECK: fmul
-      // CHECK: fmul
-      // CHECK: fmul
-      // CHECK: fmul
-      // CHECK: load i32, i32* @dx.nothing
+      // CHECK: %[[d1:.+]] = fmul
+      // CHECK: %[[d2:.+]] = fmul
+      // CHECK: %[[d3:.+]] = fmul
+      // CHECK: %[[d4:.+]] = fmul
     // }
+    // CHECK: select i1 %[[p]], float %{{.+}}, float %[[d1]]
+    // CHECK: select i1 %[[p]], float %{{.+}}, float %[[d2]]
+    // CHECK: select i1 %[[p]], float %{{.+}}, float %[[d3]]
+    // CHECK: select i1 %[[p]], float %{{.+}}, float %[[d4]]
 
     return max(ret6, color);
-    // CHECK: load i32, i32* @dx.nothing
+    // CHECK: call float @dx.op.binary.f32(i32 35
+    // CHECK: call float @dx.op.binary.f32(i32 35
+    // CHECK: call float @dx.op.binary.f32(i32 35
+    // CHECK: call float @dx.op.binary.f32(i32 35
+
 }
 

+ 34 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noops_groupshare.hlsl

@@ -0,0 +1,34 @@
+// RUN: %dxc -E main -T cs_6_0 %s -Od | FileCheck %s
+
+RWBuffer<float> uav : register(u0);
+
+cbuffer cb : register(b0) {
+  float foo;
+  uint i;
+}
+
+groupshared float bar;
+
+[numthreads(1, 1, 1)]
+[RootSignature("CBV(b0), DescriptorTable(UAV(u0))")]
+void main() {
+
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
+
+  // CHECK: store
+  bar = 1;
+
+  // CHECK: store
+  bar = foo;
+
+  // select i1 [[p]],
+  float ret = foo;
+
+  // select i1 [[p]],
+  ret = bar;
+
+  // CHECK: call void @dx.op.bufferStore.f32(
+  uav[i] = ret;
+}
+

+ 31 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noops_mandatory_immed.hlsl

@@ -0,0 +1,31 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away and
+// do not impact things that require comstant (Like sample offset);
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+SamplerState samp0 : register(s0);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1)), DescriptorTable(Sampler(s0))")]
+float4 main(float2 uv : TEXCOORD) : SV_Target {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
+
+  int a = -8;
+  // CHECK: %[[preserve_a:.+]] = select i1 %[[p]], i32 -8, i32 -8
+
+  int b = 7;
+  // CHECK: %[[preserve_b:.+]] = select i1 %[[p]], i32 7, i32 7
+
+  int d = a;
+  // CHECK: %[[preserve_d:.+]] = select i1 %[[p]], i32 %[[preserve_a]], i32 %[[preserve_a]]
+
+  int e = b + a;
+  // CHECK: %[[add:.+]] = add
+
+  // CHECK: call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 60, 
+  // CHECK-SAME: i32 -8, i32 -1
+  return tex0.Sample(samp0, uv, int2(d,e));
+}
+

+ 38 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noops_mandatory_immed_load.hlsl

@@ -0,0 +1,38 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away and
+// do not impact things that require comstant (Like sample offset);
+
+static const int2 offsets[] = {
+  int2(-1,-1),
+  int2(1,-1),
+  int2(1,1),
+  int2(7,-8),
+};
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+SamplerState samp0 : register(s0);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1)), DescriptorTable(Sampler(s0))")]
+float4 main(float2 uv : TEXCOORD) : SV_Target {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
+
+  int a = 1;
+  // CHECK: %[[a:.+]] = select i1 %[[p]], i32 1, i32 1
+
+  int b = 2;
+  // CHECK: %[[b:.+]] = select i1 %[[p]], i32 2, i32 2
+
+  int d = a;
+  // CHECK: %[[d:.+]] = select i1 %[[p]], i32 %[[a]], i32 %[[a]]
+
+  int e = d + b;
+  // CHECK: %[[add:.+]] = add
+
+  // CHECK: call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 60, 
+  // CHECK-SAME: i32 7, i32 -8
+  return tex0.Sample(samp0, uv, offsets[e]);
+}
+

+ 29 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noops_memcpy.hlsl

@@ -0,0 +1,29 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od /Zi | FileCheck %s
+
+struct S {
+  float x;
+  float y;
+};
+
+float main() : SV_Target {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
+
+  S a = { 0.f, 1.f};
+  // CHECK: select i1 %[[p]], float
+  // CHECK: select i1 %[[p]], float
+
+  S b = { 2.f, 3.f};
+  // CHECK: select i1 %[[p]], float
+  // CHECK: select i1 %[[p]], float
+
+  S c = { a.x+b.x, a.y+b.y };
+  // CHECK: fmul
+  // CHECK: fmul
+
+  S d = c;
+  // Memcpy should just get lowered to a noop for now.
+  // CHECK: load i32, i32* @dx.nothing
+
+  return d.x + d.y;
+}

+ 13 - 6
tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T ps_6_6 %s -Od | FileCheck %s
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
 
 // Test that non-const arithmetic are not optimized away
 
@@ -7,16 +7,23 @@ Texture2D tex1 : register(t1);
 
 [RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
 float4 main() : SV_Target {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
 
   float x = 10;
-  // CHECK: load i32, i32* @dx.nothing
+  // select i1 %[[p]], float 1.000000e+01, float 1.000000e+01
 
   float y = x + 5;
-  // CHECK: fadd
+  // CHECK: %[[a1:.+]] = fadd
+  // select i1 %[[p]], float [[a1]], float [[a1]]
+
   float z = y * 2;
-  // CHECK: fmul
+  // CHECK: %[[b1:.+]] = fmul
+  // select i1 %[[p]], float [[b1]], float [[b1]]
+
   float w = z / 0.5;
-  // CHECK: fdiv
+  // CHECK: %[[c1:.+]] = fdiv
+  // select i1 %[[p]], float [[c1]], float [[c1]]
 
   Texture2D tex = tex0; 
   // CHECK: load i32, i32* @dx.nothing
@@ -32,7 +39,7 @@ float4 main() : SV_Target {
   // CHECK: fadd
   // CHECK: fadd
   // CHECK: fadd
+
   return tex.Load(0) + float4(x,y,z,w);
-  // CHECK: load i32, i32* @dx.nothing
 }
 

+ 25 - 12
tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold_vec.hlsl

@@ -9,21 +9,30 @@ Texture2D tex1 : register(t1);
 
 [RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
 float4 main() : SV_Target {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
 
   float2 xy = float2(10, 20);
-  // CHECK: load i32, i32* @dx.nothing
+  // select i1 %[[p]], float 1.000000e+01, float 1.000000e+01
+  // select i1 %[[p]], float 2.000000e+01, float 2.000000e+01
 
   float2 zw = xy + float2(5, 30);
-  // CHECK: fadd
-  // CHECK: fadd
+  // CHECK: %[[a1:.+]] = fadd
+  // CHECK: %[[a2:.+]] = fadd
+  // select i1 %[[p]], float [[a1]], float [[a1]]
+  // select i1 %[[p]], float [[a2]], float [[a2]]
 
   float2 foo = zw * 2;
-  // CHECK: fmul
-  // CHECK: fmul
+  // CHECK: %[[b1:.+]] = fmul
+  // CHECK: %[[b2:.+]] = fmul
+  // select i1 %[[p]], float [[b1]], float [[b1]]
+  // select i1 %[[p]], float [[b2]], float [[b2]]
 
   float2 bar = foo / 0.5;
-  // CHECK: fdiv
-  // CHECK: fdiv
+  // CHECK: %[[c1:.+]] = fdiv
+  // CHECK: %[[c2:.+]] = fdiv
+  // select i1 %[[p]], float [[c1]], float [[c1]]
+  // select i1 %[[p]], float [[c2]], float [[c2]]
 
   Texture2D tex = tex0; 
   // CHECK: load i32, i32* @dx.nothing
@@ -35,12 +44,16 @@ float4 main() : SV_Target {
     // CHECK: br
   }
 
-  // CHECK: fadd
-  // CHECK: fadd
-  // CHECK: fadd
-  // CHECK: fadd
+  // CHECK: %[[d1:.+]] = fadd
+  // CHECK: %[[d2:.+]] = fadd
+  // CHECK: %[[d3:.+]] = fadd
+  // CHECK: %[[d4:.+]] = fadd
+  // select i1 %[[p]], float [[d1]], %[[preserve_f32]]
+  // select i1 %[[p]], float [[d2]], %[[preserve_f32]]
+  // select i1 %[[p]], float [[d3]], %[[preserve_f32]]
+  // select i1 %[[p]], float [[d4]], %[[preserve_f32]]
+
   return tex.Load(0) + float4(foo,bar);
-  // CHECK: load i32, i32* @dx.nothing
 }
 
 

+ 19 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noops_simple_call.hlsl

@@ -0,0 +1,19 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+float foo(float arg) {
+  return arg;
+}
+
+float main() : SV_Target {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
+
+  float x = 10; // CHECK: %[[x:.+]] = select i1 %[[p]], float 1.000000e+01, float 1.000000e+01
+  float y = foo(x); // CHECK: load i32, i32* @dx.nothing
+    // Return
+    // CHECK: load i32, i32* @dx.nothing
+  // CHECK: %[[y:.+]] = select i1 %[[p]], float %[[x]], float %[[x]]
+
+  return y;
+}
+

+ 27 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/preserve_rewrite.hlsl

@@ -0,0 +1,27 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
+float main() : SV_Target {
+  // CHECK: %[[p_load:[0-9]+]] = load i32, i32* @dx.preserve.value
+  // CHECK: %[[p:[0-9]+]] = trunc i32 %[[p_load]] to i1
+
+  int x = 10;
+  // CHECK: %[[x1:.+]] = select i1 %[[p]], i32 10, i32 10
+  x = 6;
+  // CHECK: %[[x2:.+]] = select i1 %[[p]], i32 %[[x1]], i32 6
+  x = 10;
+  // CHECK: %[[x3:.+]] = select i1 %[[p]], i32 %[[x2]], i32 10
+  x = 40;
+  // CHECK: %[[x4:.+]] = select i1 %[[p]], i32 %[[x3]], i32 40
+  x = 80;
+  // CHECK: %[[x5:.+]] = select i1 %[[p]], i32 %[[x4]], i32 80
+  x = x * 5;
+  // CHECK: %[[x6:.+]] = mul 
+  // CHECK-SAME: %[[x5]]
+
+  return x;
+}
+

+ 3 - 2
utils/hct/hctdb.py

@@ -2009,8 +2009,9 @@ class db_dxil(object):
         add_pass('dxil-fix-array-init', 'DxilFixConstArrayInitializer', 'Dxil Fix Array Initializer', [])
         add_pass('hlsl-validate-wave-sensitivity', 'DxilValidateWaveSensitivity', 'HLSL DXIL wave sensitiveity validation', [])
         add_pass('dxil-elim-vector', 'DxilEliminateVector', 'Dxil Eliminate Vectors', [])
-        add_pass('dxil-finalize-noops', 'DxilFinalizeNoops', 'Dxil Finalize Noops', [])
-        add_pass('dxil-insert-noops', 'DxilInsertNoops', 'Dxil Insert Noops', [])
+        add_pass('dxil-finalize-preserves', 'DxilFinalizePreserves', 'Dxil Finalize Preserves', [])
+        add_pass('dxil-insert-preserves', 'DxilInsertPreserves', 'Dxil Insert Noops', [])
+        add_pass('dxil-preserve-to-select', 'DxilPreserveToSelect', 'Dxil Insert Noops', [])
         add_pass('dxil-value-cache', 'DxilValueCache', 'Dxil Value Cache',[])
 
         category_lib="llvm"