Browse Source

Backport -ftime-trace from upstream llvm & clang (#4873)

* Backport -ftime-trace from upstream llvm & clang

This change back-ports the -ftime-trace feature implemented by Aras
Pranckevičius (@aras-p) into DXC.

Aras has a great blog post describing the motivaiton for this feature
and how to use it (see:
https://aras-p.info/blog/2019/01/16/time-trace-timeline-flame-chart-prof
iler-for-Clang/).

As shaders are getting more and more complex we need better timing data
for ourselves as compiler writers and our users as HLSL writers to know
how to improve compile times of complex shaders.

This implementation is a starting point to give us data to help drive
our efforts to improve compile-time performance.

Because DXC's LLVM is very far removed from the version of LLVM the
patch was originally written for this change is not strictly a
cherry-pick of
https://github.com/llvm/llvm-project/commit/d880de2d19d46f7cfea7aa593602
458440bc1e8d. Instead this change is a manually applied and
reconstructed patch with significant alterations along the way. The
alaterations are performed by myself, but credit for the patch remains
with @aras-p.

Patch by Aras Pranckevičius (@aras-p).
Chris B 2 năm trước cách đây
mục cha
commit
45eec33ad4

+ 1 - 0
include/dxc/Support/HLSLOptions.h

@@ -207,6 +207,7 @@ public:
   bool EnableLifetimeMarkers = false; // OPT_enable_lifetime_markers
   bool ForceDisableLocTracking = false; // OPT_fdisable_loc_tracking
   bool TimeReport = false; // OPT_ftime_report
+  std::string TimeTrace = ""; // OPT_ftime_trace[EQ]
 
   // Optimization pass enables, disables and selects
   std::map<std::string, bool> DxcOptimizationToggles; // OPT_opt_enable & OPT_opt_disable

+ 6 - 0
include/dxc/Support/HLSLOptions.td

@@ -174,6 +174,12 @@ def fdisable_loc_tracking : Flag<["-"], "fdisable-loc-tracking">,
 
 
 def ftime_report : Flag<["-"], "ftime-report">, Group<hlslcomp_Group>, Flags<[CoreOption]>, HelpText<"Print time report">;
+def ftime_trace : Flag<["-"], "ftime-trace">,
+  Group<hlslcomp_Group>, Flags<[CoreOption]>,
+  HelpText<"Print hierchial time tracing to stdout">;
+def ftime_trace_EQ : Joined<["-"], "ftime-trace=">,
+  Group<hlslcomp_Group>, Flags<[CoreOption]>,
+  HelpText<"Print hierchial time tracing to file">;
 
 /*
 def fno_caret_diagnostics : Flag<["-"], "fno-caret-diagnostics">, Group<hlslcomp_Group>,

+ 1 - 0
include/dxc/Support/dxcapi.impl.h

@@ -98,6 +98,7 @@ inline DxcOutputType DxcGetOutputType(DXC_OUT_KIND kind) {
   case DXC_OUT_TEXT:
   case DXC_OUT_REMARKS:
   case DXC_OUT_TIME_REPORT:
+  case DXC_OUT_TIME_TRACE:
     return DxcOutputType_Text;
   default:
     return DxcOutputType_None;

+ 2 - 1
include/dxc/dxcapi.h

@@ -478,8 +478,9 @@ typedef enum DXC_OUT_KIND {
   DXC_OUT_EXTRA_OUTPUTS  = 10,// IDxcExtraResults - Extra outputs
   DXC_OUT_REMARKS = 11,       // IDxcBlobUtf8 or IDxcBlobWide - text directed at stdout
   DXC_OUT_TIME_REPORT = 12,   // IDxcBlobUtf8 or IDxcBlobWide - text directed at stdout
+  DXC_OUT_TIME_TRACE = 13,   // IDxcBlobUtf8 or IDxcBlobWide - text directed at stdout
 
-  DXC_OUT_LAST = DXC_OUT_TIME_REPORT, // Last value for a counter
+  DXC_OUT_LAST = DXC_OUT_TIME_TRACE, // Last value for a counter
 
   DXC_OUT_NUM_ENUMS,
   DXC_OUT_FORCE_DWORD = 0xFFFFFFFF

+ 71 - 0
include/llvm/Support/TimeProfiler.h

@@ -0,0 +1,71 @@
+//===- llvm/Support/TimeProfiler.h - Hierarchical Time Profiler -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TIME_PROFILER_H
+#define LLVM_SUPPORT_TIME_PROFILER_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+struct TimeTraceProfiler;
+extern TimeTraceProfiler *TimeTraceProfilerInstance;
+
+/// Initialize the time trace profiler.
+/// This sets up the global \p TimeTraceProfilerInstance
+/// variable to be the profiler instance.
+void timeTraceProfilerInitialize();
+
+/// Cleanup the time trace profiler, if it was initialized.
+void timeTraceProfilerCleanup();
+
+/// Is the time trace profiler enabled, i.e. initialized?
+inline bool timeTraceProfilerEnabled() {
+  return TimeTraceProfilerInstance != nullptr;
+}
+
+/// Write profiling data to output file.
+/// Data produced is JSON, in Chrome "Trace Event" format, see
+/// https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
+void timeTraceProfilerWrite(raw_ostream &OS);
+
+/// Manually begin a time section, with the given \p Name and \p Detail.
+/// Profiler copies the string data, so the pointers can be given into
+/// temporaries. Time sections can be hierarchical; every Begin must have a
+/// matching End pair but they can nest.
+void timeTraceProfilerBegin(StringRef Name, StringRef Detail);
+void timeTraceProfilerBegin(StringRef Name,
+                            llvm::function_ref<std::string()> Detail);
+
+/// Manually end the last time section.
+void timeTraceProfilerEnd();
+
+/// The TimeTraceScope is a helper class to call the begin and end functions
+/// of the time trace profiler.  When the object is constructed, it begins
+/// the section; and when it is destroyed, it stops it. If the time profiler
+/// is not initialized, the overhead is a single branch.
+struct TimeTraceScope {
+  TimeTraceScope(StringRef Name, StringRef Detail) {
+    if (TimeTraceProfilerInstance != nullptr)
+      timeTraceProfilerBegin(Name, Detail);
+  }
+  TimeTraceScope(StringRef Name, llvm::function_ref<std::string()> Detail) {
+    if (TimeTraceProfilerInstance != nullptr)
+      timeTraceProfilerBegin(Name, Detail);
+  }
+  ~TimeTraceScope() {
+    if (TimeTraceProfilerInstance != nullptr)
+      timeTraceProfilerEnd();
+  }
+};
+
+} // end namespace llvm
+
+#endif

+ 13 - 0
lib/Analysis/IPA/CallGraphSCCPass.cpp

@@ -26,6 +26,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/TimeProfiler.h" // HLSL Change
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -126,6 +127,12 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
     }
 
     {
+      // HLSL Change Begin - Support hierarchial time tracing.
+      StringRef FnName = (*CurSCC.begin())->getFunction()
+                             ? (*CurSCC.begin())->getFunction()->getName()
+                             : "Unnamed";
+      TimeTraceScope FunctionScope("CGSCCPass-Function", FnName);
+      // HLSL Change End - Support hierarchial time tracing.
       TimeRegion PassTimer(getPassTimer(CGSP));
       Changed = CGSP->runOnSCC(CurSCC);
     }
@@ -150,6 +157,9 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
     if (Function *F = CGN->getFunction()) {
       dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
       {
+        // HLSL Change Begin - Support hierarchial time tracing.
+        TimeTraceScope FunctionScope("CGSCCPass-Function", F->getName());
+        // HLSL Change End - Support hierarchial time tracing.
         TimeRegion PassTimer(getPassTimer(FPP));
         Changed |= FPP->runOnFunction(*F);
       }
@@ -398,6 +408,9 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
   for (unsigned PassNo = 0, e = getNumContainedPasses();
        PassNo != e; ++PassNo) {
     Pass *P = getContainedPass(PassNo);
+    // HLSL Change Begin - Support hierarchial time tracing.
+    TimeTraceScope PassScope("RunCallGraphSCCPass", P->getPassName());
+    // HLSL Change End - Support hierarchial time tracing.
     
     // If we're in -debug-pass=Executions mode, construct the SCC node list,
     // otherwise avoid constructing this string as it is expensive.

+ 3 - 0
lib/DxcSupport/HLSLOptions.cpp

@@ -782,6 +782,9 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.ForceDisableLocTracking =
       Args.hasFlag(OPT_fdisable_loc_tracking, OPT_INVALID, false);
   opts.TimeReport = Args.hasFlag(OPT_ftime_report, OPT_INVALID, false);
+  opts.TimeTrace = Args.hasFlag(OPT_ftime_trace, OPT_INVALID, false) ? "-" : "";
+  if (Args.hasArg(OPT_ftime_trace_EQ))
+    opts.TimeTrace = Args.getLastArgValue(OPT_ftime_trace_EQ);
   opts.EnablePayloadQualifiers = Args.hasFlag(OPT_enable_payload_qualifiers, OPT_INVALID,
                                             DXIL::CompareVersions(Major, Minor, 6, 7) >= 0); 
 

+ 16 - 1
lib/IR/LegacyPassManager.cpp

@@ -24,6 +24,7 @@
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/TimeValue.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <map>
@@ -1321,7 +1322,7 @@ bool BBPassManager::runOnFunction(Function &F) {
 
   bool Changed = doInitialization(F);
 
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       BasicBlockPass *BP = getContainedPass(Index);
       bool LocalChanged = false;
@@ -1552,10 +1553,18 @@ bool FPPassManager::runOnFunction(Function &F) {
   // Collect inherited analysis from Module level pass manager.
   populateInheritedAnalysis(TPM->activeStack);
 
+  // HLSL Change Begin - Support hierarchial time tracing.
+  llvm::TimeTraceScope FunctionScope("OptFunction", F.getName());
+  // HLSL Change End
+
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     FunctionPass *FP = getContainedPass(Index);
     bool LocalChanged = false;
 
+    // HLSL Change Begin - Support hierarchial time tracing.
+    llvm::TimeTraceScope PassScope("RunFunctionPass", FP->getPassName());
+    // HLSL Change End - Support hierarchial time tracing.
+
     dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
     dumpRequiredSet(FP);
 
@@ -1578,12 +1587,14 @@ bool FPPassManager::runOnFunction(Function &F) {
     recordAvailableAnalysis(FP);
     removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
   }
+
   return Changed;
 }
 
 bool FPPassManager::runOnModule(Module &M) {
   bool Changed = false;
 
+  llvm::TimeTraceScope TimeScope("OptModule", M.getName());
   for (Function &F : M)
     Changed |= runOnFunction(F);
 
@@ -1616,6 +1627,8 @@ bool FPPassManager::doFinalization(Module &M) {
 /// the module, and if so, return true.
 bool
 MPPassManager::runOnModule(Module &M) {
+  llvm::TimeTraceScope TimeScope("OptModule", M.getName());
+
   bool Changed = false;
 
   // Initialize on-the-fly passes
@@ -1632,6 +1645,8 @@ MPPassManager::runOnModule(Module &M) {
     ModulePass *MP = getContainedPass(Index);
     bool LocalChanged = false;
 
+    llvm::TimeTraceScope PassScope("RunModulePass", MP->getPassName());
+
     dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
     dumpRequiredSet(MP);
 

+ 1 - 0
lib/Support/CMakeLists.txt

@@ -131,6 +131,7 @@ add_llvm_library(LLVMSupport
   TargetRegistry.cpp
   ThreadLocal.cpp
   Threading.cpp
+  TimeProfiler.cpp # HLSL Change - Support hierarchial time tracing.
   TimeValue.cpp
   Valgrind.cpp
   Watchdog.cpp

+ 184 - 0
lib/Support/TimeProfiler.cpp

@@ -0,0 +1,184 @@
+//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Hierarchical time profiler implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/FileSystem.h"
+#include <cassert>
+#include <chrono>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+using namespace std::chrono;
+
+namespace llvm {
+
+TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
+
+static std::string escapeString(StringRef Src) {
+  std::string OS;
+  for (const char &C : Src) {
+    switch (C) {
+    case '"':
+    case '/':
+    case '\\':
+    case '\b':
+    case '\f':
+    case '\n':
+    case '\r':
+    case '\t':
+      OS += '\\';
+      OS += C;
+      break;
+    default:
+      if (std::isprint(C) != 0) {
+        OS += C;
+      }
+    }
+  }
+  return OS;
+}
+
+typedef duration<steady_clock::rep, steady_clock::period> DurationType;
+typedef std::pair<std::string, DurationType> NameAndDuration;
+
+struct Entry {
+  time_point<steady_clock> Start;
+  DurationType Duration;
+  std::string Name;
+  std::string Detail;
+};
+
+struct TimeTraceProfiler {
+  TimeTraceProfiler() {
+    Stack.reserve(8);
+    Entries.reserve(128);
+    StartTime = steady_clock::now();
+  }
+
+  void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
+    Entry E = {steady_clock::now(), {}, Name, Detail()};
+    Stack.push_back(std::move(E));
+  }
+
+  void end() {
+    assert(!Stack.empty() && "Must call begin() first");
+    auto &E = Stack.back();
+    E.Duration = steady_clock::now() - E.Start;
+
+    // Only include sections longer than 500us.
+    if (duration_cast<microseconds>(E.Duration).count() > 500)
+      Entries.emplace_back(E);
+
+    // Track total time taken by each "name", but only the topmost levels of
+    // them; e.g. if there's a template instantiation that instantiates other
+    // templates from within, we only want to add the topmost one. "topmost"
+    // happens to be the ones that don't have any currently open entries above
+    // itself.
+    if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
+          return Val.Name == E.Name;
+        }) == Stack.rend()) {
+      TotalPerName[E.Name] += E.Duration;
+      CountPerName[E.Name]++;
+    }
+
+    Stack.pop_back();
+  }
+
+  void Write(raw_ostream &OS) {
+    assert(Stack.empty() &&
+           "All profiler sections should be ended when calling Write");
+
+    OS << "{ \"traceEvents\": [\n";
+
+    // Emit all events for the main flame graph.
+    for (const auto &E : Entries) {
+      auto StartUs = duration_cast<microseconds>(E.Start - StartTime).count();
+      auto DurUs = duration_cast<microseconds>(E.Duration).count();
+      OS << "{ \"pid\":1, \"tid\":0, \"ph\":\"X\", \"ts\":" << StartUs
+          << ", \"dur\":" << DurUs << ", \"name\":\"" << escapeString(E.Name)
+          << "\", \"args\":{ \"detail\":\"" << escapeString(E.Detail)
+          << "\"} },\n";
+    }
+
+    // Emit totals by section name as additional "thread" events, sorted from
+    // longest one.
+    int Tid = 1;
+    std::vector<NameAndDuration> SortedTotals;
+    SortedTotals.reserve(TotalPerName.size());
+    for (const auto &E : TotalPerName) {
+      SortedTotals.push_back(E);
+    }
+    std::sort(SortedTotals.begin(), SortedTotals.end(),
+              [](const NameAndDuration &A, const NameAndDuration &B) {
+                return A.second > B.second;
+              });
+    for (const auto &E : SortedTotals) {
+      auto DurUs = duration_cast<microseconds>(E.second).count();
+      OS << "{ \"pid\":1, \"tid\":" << Tid << ", \"ph\":\"X\", \"ts\":" << 0
+          << ", \"dur\":" << DurUs << ", \"name\":\"Total "
+          << escapeString(E.first)
+          << "\", \"args\":{ \"count\":" << CountPerName[E.first]
+          << ", \"avg ms\":" << (DurUs / CountPerName[E.first] / 1000)
+          << "} },\n";
+      ++Tid;
+    }
+
+    // Emit metadata event with process name.
+    OS << "{ \"cat\":\"\", \"pid\":1, \"tid\":0, \"ts\":0, \"ph\":\"M\", "
+           "\"name\":\"process_name\", \"args\":{ \"name\":\"clang\" } }\n";
+    OS << "] }\n";
+  }
+
+  std::vector<Entry> Stack;
+  std::vector<Entry> Entries;
+  std::unordered_map<std::string, DurationType> TotalPerName;
+  std::unordered_map<std::string, size_t> CountPerName;
+  time_point<steady_clock> StartTime;
+};
+
+void timeTraceProfilerInitialize() {
+  assert(TimeTraceProfilerInstance == nullptr &&
+         "Profiler should not be initialized");
+  TimeTraceProfilerInstance = new TimeTraceProfiler();
+}
+
+void timeTraceProfilerCleanup() {
+  delete TimeTraceProfilerInstance;
+  TimeTraceProfilerInstance = nullptr;
+}
+
+void timeTraceProfilerWrite(raw_ostream &OS) {
+  assert(TimeTraceProfilerInstance != nullptr &&
+         "Profiler object can't be null");
+  TimeTraceProfilerInstance->Write(OS);
+}
+
+void timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
+  if (TimeTraceProfilerInstance != nullptr)
+    TimeTraceProfilerInstance->begin(Name, [&]() { return Detail; });
+}
+
+void timeTraceProfilerBegin(StringRef Name,
+                            llvm::function_ref<std::string()> Detail) {
+  if (TimeTraceProfilerInstance != nullptr)
+    TimeTraceProfilerInstance->begin(Name, Detail);
+}
+
+void timeTraceProfilerEnd() {
+  if (TimeTraceProfilerInstance != nullptr)
+    TimeTraceProfilerInstance->end();
+}
+
+} // namespace llvm

+ 2 - 0
tools/clang/include/clang/Driver/Options.td

@@ -1011,6 +1011,8 @@ def Wframe_larger_than_EQ : Joined<["-"], "Wframe-larger-than=">, Group<f_Group>
 def : Flag<["-"], "fterminated-vtables">, Alias<fapple_kext>;
 def fthreadsafe_statics : Flag<["-"], "fthreadsafe-statics">, Group<f_Group>;
 def ftime_report : Flag<["-"], "ftime-report">, Group<f_Group>, Flags<[CC1Option]>;
+// HLSL Change - Support hierarchial time tracing.
+def ftime_trace : Flag<["-"], "ftime-trace">, Group<f_Group>, Flags<[CC1Option]>;
 def ftlsmodel_EQ : Joined<["-"], "ftls-model=">, Group<f_Group>, Flags<[CC1Option]>;
 def ftrapv : Flag<["-"], "ftrapv">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Trap on integer overflow">;

+ 2 - 0
tools/clang/include/clang/Frontend/CodeGenOptions.def

@@ -130,6 +130,8 @@ CODEGENOPT(SimplifyLibCalls  , 1, 1) ///< Set when -fbuiltin is enabled.
 CODEGENOPT(SoftFloat         , 1, 0) ///< -soft-float.
 CODEGENOPT(StrictEnums       , 1, 0) ///< Optimize based on strict enum definition.
 CODEGENOPT(TimePasses        , 1, 0) ///< Set when -ftime-report is enabled.
+CODEGENOPT(TimeTrace         , 1, 0) ///< HLSL Change:
+                                     ///< Set when -ftime-trace is enabled.
 CODEGENOPT(UnitAtATime       , 1, 1) ///< Unused. For mirroring GCC optimization
                                      ///< selection.
 CODEGENOPT(UnrollLoops       , 1, 0) ///< Control whether loops are unrolled.

+ 5 - 1
tools/clang/include/clang/Frontend/FrontendOptions.h

@@ -128,6 +128,8 @@ public:
                                            /// metrics and statistics.
   unsigned ShowTimers : 1;                 ///< Show timers for individual
                                            /// actions.
+  unsigned TimeTrace : 1;                  /// HLSL Change
+                                           /// Output time trace profile.
   unsigned ShowVersion : 1;                ///< Show the -version text.
   unsigned FixWhatYouCan : 1;              ///< Apply fixes even if there are
                                            /// unfixable errors.
@@ -256,7 +258,9 @@ public:
 public:
   FrontendOptions() :
     DisableFree(false), RelocatablePCH(false), ShowHelp(false),
-    ShowStats(false), ShowTimers(false), ShowVersion(false),
+// HLSL Change Begin - Support hierarchial time tracing.
+    ShowStats(false), ShowTimers(false), TimeTrace(false), ShowVersion(false),
+// HLSL Change End - Support hierarchial time tracing.
     FixWhatYouCan(false), FixOnlyWarnings(false), FixAndRecompile(false),
     FixToTemporaries(false), ARCMTMigrateEmitARCErrors(false),
     SkipFunctionBodies(false), UseGlobalModuleIndex(true),

+ 3 - 0
tools/clang/lib/CodeGen/BackendUtil.cpp

@@ -29,6 +29,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TimeProfiler.h" // HLSL Change
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
@@ -763,6 +764,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
                               raw_pwrite_stream *OS) {
   EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M);
 
+  // HLSL Change - Support hierarchial time tracing.
+  TimeTraceScope TimeScope("Backend", StringRef(""));
 
   try { // HLSL Change Starts
     // Catch any fatal errors during optimization passes here

+ 7 - 0
tools/clang/lib/CodeGen/CodeGenModule.cpp

@@ -53,6 +53,7 @@
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TimeProfiler.h" // HLSL Change
 #include "dxc/DXIL/DxilConstants.h"    // HLSL Change
 
 using namespace clang;
@@ -1573,6 +1574,12 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
     // linkage.
     if (!shouldEmitFunction(GD))
       return;
+    
+    // HLSL Change Begin - Support hierarchial time tracing.
+    const auto *FD = dyn_cast<FunctionDecl>(D);
+    llvm::TimeTraceScope TimeScope(
+        "CodeGen Function", [FD]() { return FD->getQualifiedNameAsString(); });
+    // HLSL Change End - Support hierarchial time tracing.
 
     if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) {
       CompleteDIClassType(Method);

+ 1 - 0
tools/clang/lib/Driver/Tools.cpp

@@ -4121,6 +4121,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info);
   Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits);
   Args.AddLastArg(CmdArgs, options::OPT_ftime_report);
+  Args.AddLastArg(CmdArgs, options::OPT_ftime_trace); // HLSL Change
   Args.AddLastArg(CmdArgs, options::OPT_ftrapv);
 
   if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) {

+ 1 - 0
tools/clang/lib/Frontend/CompilerInstance.cpp

@@ -43,6 +43,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/TimeProfiler.h" // HLSL Change
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <sys/stat.h>

+ 3 - 0
tools/clang/lib/Frontend/CompilerInvocation.cpp

@@ -933,6 +933,9 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
   Opts.ShowHelp = Args.hasArg(OPT_help);
   Opts.ShowStats = Args.hasArg(OPT_print_stats);
   Opts.ShowTimers = Args.hasArg(OPT_ftime_report);
+  // HLSL Change Begin - Support hierarchial time tracing.
+  Opts.TimeTrace = Args.hasArg(OPT_ftime_trace);
+  // HLSL Change End - Support hierarchial time tracing.
   Opts.ShowVersion = Args.hasArg(OPT_version);
   Opts.ASTMergeFiles = Args.getAllArgValues(OPT_ast_merge);
   Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm);

+ 4 - 0
tools/clang/lib/Parse/ParseAST.cpp

@@ -24,6 +24,7 @@
 #include "clang/Sema/SemaConsumer.h"
 #include "clang/Sema/SemaHLSL.h" // HLSL Change
 #include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/Support/TimeProfiler.h"
 #include <cstdio>
 #include <memory>
 
@@ -98,6 +99,9 @@ void clang::ParseAST(Preprocessor &PP, ASTConsumer *Consumer,
 }
 
 void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) {
+
+  // HLSL Change - Support hierarchial time tracing.
+  llvm::TimeTraceScope TimeScope("Frontend", StringRef(""));
   // Collect global stats on Decls/Stmts (until we have a module streamer).
   if (PrintStats) {
     Decl::EnableStatistics();

+ 9 - 0
tools/clang/lib/Parse/ParseDeclCXX.cpp

@@ -26,6 +26,7 @@
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/TimeProfiler.h" // HLSL Change
 using namespace clang;
 
 /// ParseNamespace - We know that the current token is a namespace keyword. This
@@ -2883,6 +2884,14 @@ void Parser::ParseCXXMemberSpecification(SourceLocation RecordLoc,
          TagType == DeclSpec::TST_union  ||
          TagType == DeclSpec::TST_class) && "Invalid TagType!");
 
+  // HLSL Change Begin - Support hierarchial time tracing.
+  llvm::TimeTraceScope TimeScope("ParseClass", [&]() {
+    if (auto *TD = dyn_cast_or_null<NamedDecl>(TagDecl))
+      return TD->getQualifiedNameAsString();
+    return std::string("<anonymous>");
+  });
+  // HLSL Change End - Support hierarchial time tracing.
+
   PrettyDeclStackTraceEntry CrashInfo(Actions, TagDecl, RecordLoc,
                                       "parsing struct/union/class body");
 

+ 9 - 0
tools/clang/lib/Parse/ParseTemplate.cpp

@@ -20,6 +20,7 @@
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/Scope.h"
+#include "llvm/Support/TimeProfiler.h" // HLSL Change
 using namespace clang;
 
 /// \brief Parse a template declaration, explicit instantiation, or
@@ -238,6 +239,14 @@ Parser::ParseSingleDeclarationAfterTemplate(
     return nullptr;
   }
 
+  // HLSL Change Begin - Support hierarchial time tracing.
+  llvm::TimeTraceScope TimeScope("ParseTemplate", [&]() {
+    return DeclaratorInfo.getIdentifier() != nullptr
+               ? DeclaratorInfo.getIdentifier()->getName()
+               : "<unknown>";
+  });
+  // HLSL Change End - Support hierarchial time tracing.
+
   LateParsedAttrList LateParsedAttrs(true);
   if (DeclaratorInfo.isFunctionDeclarator())
     MaybeParseGNUAttributes(DeclaratorInfo, &LateParsedAttrs);

+ 9 - 1
tools/clang/lib/Sema/Sema.cpp

@@ -41,6 +41,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/Support/TimeProfiler.h"
 using namespace clang;
 using namespace sema;
 
@@ -683,7 +684,14 @@ void Sema::ActOnEndOfTranslationUnit() {
       PendingInstantiations.insert(PendingInstantiations.begin(),
                                    Pending.begin(), Pending.end());
     }
-    PerformPendingInstantiations();
+
+    // HLSL Change Begin - Support hierarchial time tracing.
+    {
+      llvm::TimeTraceScope TimeScope("PerformPendingInstantiations",
+                                     StringRef(""));
+      PerformPendingInstantiations();
+    }
+    // HLSL Change Ends - Support hierarchial time tracing.
 
     if (LateTemplateParserCleanup)
       LateTemplateParserCleanup(OpaqueParser);

+ 8 - 0
tools/clang/lib/Sema/SemaTemplateInstantiate.cpp

@@ -23,6 +23,7 @@
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Template.h"
 #include "clang/Sema/TemplateDeduction.h"
+#include "llvm/Support/TimeProfiler.h" // HLSL Change
 
 using namespace clang;
 using namespace sema;
@@ -1941,6 +1942,13 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation,
                                 Instantiation->getInstantiatedFromMemberClass(),
                                      Pattern, PatternDef, TSK, Complain))
     return true;
+  
+  // HLSL Change Begin - Support hierarchial time tracing.
+  llvm::TimeTraceScope TimeScope("InstantiateClass", [&]() {
+    return Instantiation->getQualifiedNameAsString();
+  });
+  // HLSL Change End - Support hierarchial time tracing.
+
   Pattern = PatternDef;
 
   // \brief Record the point of instantiation.

+ 7 - 0
tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

@@ -24,6 +24,7 @@
 #include "clang/Sema/PrettyDeclStackTrace.h"
 #include "clang/Sema/SemaHLSL.h" // HLSL Change
 #include "clang/Sema/Template.h"
+#include "llvm/Support/TimeProfiler.h" // HLSL Change
 
 using namespace clang;
 
@@ -3352,6 +3353,12 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
   if (Function->isInvalidDecl() || Function->isDefined())
     return;
 
+  // HLSL Change Begin - Support hierarchial time tracing.
+  llvm::TimeTraceScope TimeScope("InstantiateFunction", [&]() {
+    return Function->getQualifiedNameAsString();
+  });
+  // HLSL Change End - Support hierarchial time tracing.
+
   // Never instantiate an explicit specialization except if it is a class scope
   // explicit specialization.
   if (Function->getTemplateSpecializationKind() == TSK_ExplicitSpecialization &&

+ 7 - 0
tools/clang/test/DXC/ftime-trace.hlsl

@@ -0,0 +1,7 @@
+// RUN: %dxc -E main -T vs_6_0 %s -ftime-trace | FileCheck %s
+// RUN: %dxc -E main -T vs_6_0 %s -ftime-trace=%t.json
+// RUN: cat %t.json | FileCheck %s
+
+// CHECK: { "traceEvents": [
+
+void main() {}

+ 28 - 23
tools/clang/tools/dxclib/dxc.cpp

@@ -266,28 +266,24 @@ static void WriteDxcExtraOuputs(IDxcResult *pResult) {
   }
 }
 
-static void WriteDxcOutputToConsole(IDxcOperationResult *pCompileResult, DXC_OUT_KIND kind) {
-  CComPtr<IDxcResult> pResult;
-  if (SUCCEEDED(pCompileResult->QueryInterface(&pResult))) {
-    if (!pResult->HasOutput(kind)) {
-      return;
-    }
-
-    CComPtr<IDxcBlob> pBlob;
-    IFT(pResult->GetOutput(kind, IID_PPV_ARGS(&pBlob), nullptr));
-    llvm::StringRef outputString((LPSTR)pBlob->GetBufferPointer(),
-                            pBlob->GetBufferSize());
-    llvm::SmallVector<llvm::StringRef, 20> lines;
-    outputString.split(lines, "\n");
-    
-    std::string outputStr;
-    llvm::raw_string_ostream SS(outputStr);
-    for (auto line : lines) {
-      SS << "; " << line << "\n";
-    }
+static void WriteDxcOutputToConsole(IDxcResult *pResult, DXC_OUT_KIND kind) {
+  if (!pResult->HasOutput(kind))
+    return;
 
-    WriteUtf8ToConsole(outputStr.data(), outputStr.size());
+  CComPtr<IDxcBlob> pBlob;
+  IFT(pResult->GetOutput(kind, IID_PPV_ARGS(&pBlob), nullptr));
+  llvm::StringRef outputString((LPSTR)pBlob->GetBufferPointer(),
+                          pBlob->GetBufferSize());
+  llvm::SmallVector<llvm::StringRef, 20> lines;
+  outputString.split(lines, "\n");
+  
+  std::string outputStr;
+  llvm::raw_string_ostream SS(outputStr);
+  for (auto line : lines) {
+    SS << "; " << line << "\n";
   }
+
+  WriteUtf8ToConsole(outputStr.data(), outputStr.size());
 }
 
 std::string getDependencyOutputFileName(llvm::StringRef inputFileName) {
@@ -867,14 +863,11 @@ int DxcContext::Compile() {
     WriteOperationErrorsToConsole(pCompileResult, m_Opts.OutputWarnings);
   }
   
-  if (m_Opts.TimeReport)
-    WriteDxcOutputToConsole(pCompileResult, DXC_OUT_TIME_REPORT);
 
   HRESULT status;
   IFT(pCompileResult->GetStatus(&status));
   if (SUCCEEDED(status) || m_Opts.AstDump || m_Opts.OptDump ||
       m_Opts.DumpDependencies) {
-    WriteDxcOutputToConsole(pCompileResult, DXC_OUT_REMARKS);
     CComPtr<IDxcBlob> pProgram;
     IFT(pCompileResult->GetResult(&pProgram));
     if (pProgram.p != nullptr) {
@@ -883,6 +876,18 @@ int DxcContext::Compile() {
       // Now write out extra parts
       CComPtr<IDxcResult> pResult;
       if (SUCCEEDED(pCompileResult->QueryInterface(&pResult))) {
+        WriteDxcOutputToConsole(pResult, DXC_OUT_REMARKS);
+        WriteDxcOutputToConsole(pResult, DXC_OUT_TIME_REPORT);
+        
+        if (m_Opts.TimeTrace == "-")
+          WriteDxcOutputToConsole(pResult, DXC_OUT_TIME_TRACE);
+        else if (!m_Opts.TimeTrace.empty()) {
+          CComPtr<IDxcBlob> pData;
+          CComPtr<IDxcBlobWide> pName;
+          IFT(pResult->GetOutput(DXC_OUT_TIME_TRACE, IID_PPV_ARGS(&pData), &pName));
+          WriteBlobToFile(pData, m_Opts.TimeTrace, m_Opts.DefaultTextCodePage);
+        }
+
         WriteDxcOutputToFile(DXC_OUT_ROOT_SIGNATURE, pResult, m_Opts.DefaultTextCodePage);
         WriteDxcOutputToFile(DXC_OUT_SHADER_HASH, pResult, m_Opts.DefaultTextCodePage);
         WriteDxcOutputToFile(DXC_OUT_REFLECTION, pResult, m_Opts.DefaultTextCodePage);

+ 13 - 0
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -24,6 +24,7 @@
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/CodeGen/CodeGenAction.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Support/Timer.h"
 #include "dxc/Support/WinIncludes.h"
@@ -637,6 +638,7 @@ public:
     _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
     _In_ REFIID riid, _Out_ LPVOID *ppResult      // IDxcResult: status, buffer, and errors
   ) override {
+    llvm::TimeTraceScope TimeScope("Compile", StringRef(""));
     if (pSource == nullptr || ppResult == nullptr ||
         (argCount > 0 && pArguments == nullptr))
       return E_INVALIDARG;
@@ -1785,6 +1787,8 @@ HRESULT DxcCompilerAdapter::WrapCompile(
     bool finished = false;
     CComPtr<IDxcOperationResult> pOperationResult;
     dxcutil::ReadOptsAndValidate(mainArgs, opts, pOutputStream, &pOperationResult, finished);
+    if (!opts.TimeTrace.empty())
+      llvm::timeTraceProfilerInitialize();
     if (finished) {
       IFT(pOperationResult->QueryInterface(ppResult));
       return S_OK;
@@ -1835,6 +1839,15 @@ HRESULT DxcCompilerAdapter::WrapCompile(
                                    TimeReport.size()));
     }
 
+    if (llvm::timeTraceProfilerEnabled()) {
+      std::string TimeTrace;
+      raw_string_ostream OS(TimeTrace);
+      llvm::timeTraceProfilerWrite(OS);
+      llvm::timeTraceProfilerCleanup();
+      IFT(pResult->SetOutputString(DXC_OUT_TIME_TRACE, TimeTrace.c_str(),
+                                   TimeTrace.size()));
+    }
+
     outStream.flush();
 
     // Insert any warnings generated here

+ 27 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -173,6 +173,7 @@ public:
   TEST_METHOD(CompileWhenIncludeFlagsThenIncludeUsed)
   TEST_METHOD(CompileThenCheckDisplayIncludeProcess)
   TEST_METHOD(CompileThenPrintTimeReport)
+  TEST_METHOD(CompileThenPrintTimeTrace)
   TEST_METHOD(CompileWhenIncludeMissingThenFail)
   TEST_METHOD(CompileWhenIncludeHasPathThenOK)
   TEST_METHOD(CompileWhenIncludeEmptyThenOK)
@@ -2839,6 +2840,32 @@ TEST_F(CompilerTest, CompileThenPrintTimeReport) {
                        text.find("... Pass execution timing report ..."));
 }
 
+TEST_F(CompilerTest, CompileThenPrintTimeTrace) {
+  CComPtr<IDxcCompiler> pCompiler;
+  CComPtr<IDxcOperationResult> pResult;
+  CComPtr<IDxcBlobEncoding> pSource;
+  CComPtr<TestIncludeHandler> pInclude;
+
+  VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
+  CreateBlobFromText("float4 main() : SV_Target { return 0.0; }", &pSource);
+
+  LPCWSTR args[] = {L"-ftime-trace"};
+  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
+                                      L"ps_6_0", args, _countof(args), nullptr,
+                                      0, pInclude, &pResult));
+  VerifyOperationSucceeded(pResult);
+
+  CComPtr<IDxcResult> pCompileResult;
+  CComPtr<IDxcBlob> pReportBlob;
+  pResult->QueryInterface(&pCompileResult);
+  VERIFY_SUCCEEDED(pCompileResult->GetOutput(
+      DXC_OUT_TIME_TRACE, IID_PPV_ARGS(&pReportBlob), nullptr));
+  std::string text(BlobToUtf8(pReportBlob));
+
+  VERIFY_ARE_NOT_EQUAL(string::npos,
+                       text.find("{ \"traceEvents\": ["));
+}
+
 TEST_F(CompilerTest, CompileWhenIncludeMissingThenFail) {
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcOperationResult> pResult;

+ 5 - 0
utils/hct/hcttestcmds.cmd

@@ -140,6 +140,11 @@ call :run dxc.exe /T ps_6_0 "%testfiles%\smoke.hlsl" -ftime-report
 call :check_file log find "Pass execution timing report"
 if %Failed% neq 0 goto :failed
 
+set testname=time-report
+call :run dxc.exe /T ps_6_0 "%testfiles%\smoke.hlsl" -ftime-trace
+call :check_file log find "traceEvents"
+if %Failed% neq 0 goto :failed
+
 set testname=Check Warning
 call :run dxc.exe /T ps_6_0 "%testfiles%\smoke.hlsl" /Dcheck_warning
 call :check_file log find warning: