CpuProfiler.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include <CpuProfiler.h>
  9. #include <AzCore/Interface/Interface.h>
  10. #include <AzCore/Serialization/SerializeContext.h>
  11. #include <AzCore/Statistics/StatisticalProfilerProxy.h>
  12. #include <AzCore/std/smart_ptr/shared_ptr.h>
  13. #include <AzCore/std/time.h>
  14. namespace Profiler
  15. {
  16. thread_local CpuTimingLocalStorage* CpuProfiler::ms_threadLocalStorage = nullptr;
  17. // --- CachedTimeRegion ---
  18. CachedTimeRegion::CachedTimeRegion(const GroupRegionName& groupRegionName)
  19. : m_groupRegionName(groupRegionName)
  20. {
  21. }
  22. CachedTimeRegion::CachedTimeRegion(const GroupRegionName& groupRegionName, uint16_t stackDepth, uint64_t startTick, uint64_t endTick)
  23. : m_groupRegionName(groupRegionName)
  24. , m_stackDepth(stackDepth)
  25. , m_startTick(startTick)
  26. , m_endTick(endTick)
  27. {
  28. }
  29. // --- GroupRegionName ---
  30. CachedTimeRegion::GroupRegionName::GroupRegionName(const char* const group, const char* const region)
  31. : m_groupName(group)
  32. , m_regionName(region)
  33. {
  34. }
  35. AZStd::size_t CachedTimeRegion::GroupRegionName::Hash::operator()(const CachedTimeRegion::GroupRegionName& name) const
  36. {
  37. AZStd::size_t seed = 0;
  38. AZStd::hash_combine(seed, name.m_groupName);
  39. AZStd::hash_combine(seed, name.m_regionName);
  40. return seed;
  41. }
  42. bool CachedTimeRegion::GroupRegionName::operator==(const GroupRegionName& other) const
  43. {
  44. return (m_groupName == other.m_groupName) && (m_regionName == other.m_regionName);
  45. }
  46. // --- CpuProfiler ---
  47. void CpuProfiler::Init()
  48. {
  49. AZ::Interface<AZ::Debug::Profiler>::Register(this);
  50. m_initialized = true;
  51. AZ::SystemTickBus::Handler::BusConnect();
  52. m_continuousCaptureData.set_capacity(10);
  53. }
  54. void CpuProfiler::Shutdown()
  55. {
  56. if (!m_initialized)
  57. {
  58. return;
  59. }
  60. // When this call is made, no more thread profiling calls can be performed anymore
  61. AZ::Interface<AZ::Debug::Profiler>::Unregister(this);
  62. // Wait for the remaining threads that might still be processing its profiling calls
  63. AZStd::unique_lock<AZStd::shared_mutex> shutdownLock(m_shutdownMutex);
  64. m_enabled = false;
  65. // Cleanup all TLS
  66. m_registeredThreads.clear();
  67. m_timeRegionMap.clear();
  68. m_initialized = false;
  69. m_continuousCaptureInProgress.store(false);
  70. m_continuousCaptureData.clear();
  71. AZ::SystemTickBus::Handler::BusDisconnect();
  72. }
  73. void CpuProfiler::BeginRegion(const AZ::Debug::Budget* budget, const char* eventName, size_t eventNameArgCount, ...)
  74. {
  75. // Try to lock here, the shutdownMutex will only be contested when the CpuProfiler is shutting down.
  76. if (m_shutdownMutex.try_lock_shared())
  77. {
  78. if (m_enabled)
  79. {
  80. // Lazy initialization, creates an instance of the Thread local data if it's not created, and registers it
  81. RegisterThreadStorage();
  82. va_list args;
  83. va_start(args, eventNameArgCount);
  84. // Push it to the stack
  85. CachedTimeRegion timeRegion({ budget->Name(), AZStd::fixed_string<512>::format_arg(eventName, args).c_str() });
  86. ms_threadLocalStorage->RegionStackPushBack(timeRegion);
  87. }
  88. m_shutdownMutex.unlock_shared();
  89. }
  90. }
  91. void CpuProfiler::EndRegion([[maybe_unused]] const AZ::Debug::Budget* budget)
  92. {
  93. // Try to lock here, the shutdownMutex will only be contested when the CpuProfiler is shutting down.
  94. if (m_shutdownMutex.try_lock_shared())
  95. {
  96. // guard against enabling mid-marker
  97. if (m_enabled && ms_threadLocalStorage != nullptr)
  98. {
  99. ms_threadLocalStorage->RegionStackPopBack();
  100. }
  101. m_shutdownMutex.unlock_shared();
  102. }
  103. }
  104. const TimeRegionMap& CpuProfiler::GetTimeRegionMap() const
  105. {
  106. return m_timeRegionMap;
  107. }
  108. bool CpuProfiler::BeginContinuousCapture()
  109. {
  110. bool expected = false;
  111. if (m_continuousCaptureInProgress.compare_exchange_strong(expected, true))
  112. {
  113. m_enabled = true;
  114. AZ_TracePrintf("Profiler", "Continuous capture started\n");
  115. return true;
  116. }
  117. AZ_TracePrintf("Profiler", "Attempting to start a continuous capture while one already in progress");
  118. return false;
  119. }
  120. bool CpuProfiler::EndContinuousCapture(AZStd::ring_buffer<TimeRegionMap>& flushTarget)
  121. {
  122. if (!m_continuousCaptureInProgress.load())
  123. {
  124. AZ_TracePrintf("Profiler", "Attempting to end a continuous capture while one not in progress");
  125. return false;
  126. }
  127. if (m_continuousCaptureEndingMutex.try_lock())
  128. {
  129. m_enabled = false;
  130. flushTarget = AZStd::move(m_continuousCaptureData);
  131. m_continuousCaptureData.clear();
  132. AZ_TracePrintf("Profiler", "Continuous capture ended\n");
  133. m_continuousCaptureInProgress.store(false);
  134. m_continuousCaptureEndingMutex.unlock();
  135. return true;
  136. }
  137. return false;
  138. }
  139. bool CpuProfiler::IsContinuousCaptureInProgress() const
  140. {
  141. return m_continuousCaptureInProgress.load();
  142. }
  143. void CpuProfiler::SetProfilerEnabled(bool enabled)
  144. {
  145. AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);
  146. // Early out if the state is already the same or a continuous capture is in progress
  147. if (m_enabled == enabled || m_continuousCaptureInProgress.load())
  148. {
  149. return;
  150. }
  151. // Set the dirty flag in all the TLS to clear the caches
  152. if (enabled)
  153. {
  154. // Iterate through all the threads, and set the clearing flag
  155. for (auto& threadLocal : m_registeredThreads)
  156. {
  157. threadLocal->m_clearContainers = true;
  158. }
  159. m_enabled = true;
  160. }
  161. else
  162. {
  163. m_enabled = false;
  164. }
  165. }
  166. bool CpuProfiler::IsProfilerEnabled() const
  167. {
  168. return m_enabled;
  169. }
  170. void CpuProfiler::OnSystemTick()
  171. {
  172. if (!m_enabled)
  173. {
  174. return;
  175. }
  176. if (m_continuousCaptureInProgress.load() && m_continuousCaptureEndingMutex.try_lock())
  177. {
  178. if (m_continuousCaptureData.full() && m_continuousCaptureData.size() != MaxFramesToSave)
  179. {
  180. const AZStd::size_t size = m_continuousCaptureData.size();
  181. m_continuousCaptureData.set_capacity(AZStd::min(MaxFramesToSave, size + size / 2));
  182. }
  183. m_continuousCaptureData.push_back(AZStd::move(m_timeRegionMap));
  184. m_timeRegionMap.clear();
  185. m_continuousCaptureEndingMutex.unlock();
  186. }
  187. AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);
  188. // Iterate through all the threads, and collect the thread's cached time regions
  189. TimeRegionMap newMap;
  190. for (auto& threadLocal : m_registeredThreads)
  191. {
  192. ThreadTimeRegionMap& threadMapEntry = newMap[threadLocal->m_executingThreadId];
  193. threadLocal->TryFlushCachedMap(threadMapEntry);
  194. }
  195. // Clear all TLS that flagged themselves to be deleted, meaning that the thread is already terminated
  196. AZStd::remove_if(m_registeredThreads.begin(), m_registeredThreads.end(), [](const AZStd::intrusive_ptr<CpuTimingLocalStorage>& thread)
  197. {
  198. return thread->m_deleteFlag.load();
  199. });
  200. // Update our saved time regions to the last frame's collected data
  201. m_timeRegionMap = AZStd::move(newMap);
  202. }
  203. void CpuProfiler::RegisterThreadStorage()
  204. {
  205. AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);
  206. if (!ms_threadLocalStorage)
  207. {
  208. ms_threadLocalStorage = aznew CpuTimingLocalStorage();
  209. m_registeredThreads.emplace_back(ms_threadLocalStorage);
  210. }
  211. }
  212. // --- CpuTimingLocalStorage ---
  213. CpuTimingLocalStorage::CpuTimingLocalStorage()
  214. {
  215. m_executingThreadId = AZStd::this_thread::get_id();
  216. }
  217. CpuTimingLocalStorage::~CpuTimingLocalStorage()
  218. {
  219. m_deleteFlag = true;
  220. }
  221. void CpuTimingLocalStorage::RegionStackPushBack(CachedTimeRegion& timeRegion)
  222. {
  223. // If it was (re)enabled, clear the lists first
  224. if (m_clearContainers)
  225. {
  226. m_clearContainers = false;
  227. m_stackLevel = 0;
  228. m_cachedTimeRegionMap.clear();
  229. m_timeRegionStack.clear();
  230. ResetCachedData();
  231. }
  232. timeRegion.m_stackDepth = aznumeric_cast<uint16_t>(m_stackLevel);
  233. AZ_Assert(m_timeRegionStack.size() < TimeRegionStackSize, "Adding too many time regions to the stack. Increase the size of TimeRegionStackSize.");
  234. m_timeRegionStack.push_back(timeRegion);
  235. // Increment the stack
  236. m_stackLevel++;
  237. // Set the starting time at the end, to avoid recording the minor overhead
  238. m_timeRegionStack.back().m_startTick = AZStd::GetTimeNowTicks();
  239. }
  240. void CpuTimingLocalStorage::RegionStackPopBack()
  241. {
  242. // Early out when the stack is empty, this might happen when the profiler was enabled while the thread encountered profiling markers
  243. if (m_timeRegionStack.empty())
  244. {
  245. return;
  246. }
  247. // Get the end timestamp here, to avoid the minor overhead
  248. const AZStd::sys_time_t endRegionTime = AZStd::GetTimeNowTicks();
  249. AZ_Assert(!m_timeRegionStack.empty(), "Trying to pop an element in the stack, but it's empty.");
  250. CachedTimeRegion back = m_timeRegionStack.back();
  251. m_timeRegionStack.pop_back();
  252. // Set the ending time
  253. back.m_endTick = endRegionTime;
  254. // Decrement the stack
  255. m_stackLevel--;
  256. // Add an entry to the cached region
  257. AddCachedRegion(back);
  258. }
  259. // Gets called when region ends and all data is set
  260. void CpuTimingLocalStorage::AddCachedRegion(const CachedTimeRegion& timeRegionCached)
  261. {
  262. if (auto iter = m_hitSizeLimitMap.find(timeRegionCached.m_groupRegionName.m_regionName.GetStringView());
  263. iter != m_hitSizeLimitMap.end() && iter->second)
  264. {
  265. return;
  266. }
  267. // Add an entry to the cached region. Discard excess data in case there is too much to handle.
  268. if (m_cachedTimeRegions.size() < TimeRegionStackSize)
  269. {
  270. m_cachedTimeRegions.push_back(timeRegionCached);
  271. }
  272. // Warn only once per thread if the cached data limit has been reached.
  273. else if (!m_cachedDataLimitReached)
  274. {
  275. AZ_Warning(
  276. "Profiler", false,
  277. "Limit for profiling data has been reached by thread %i. Excess data will be discarded. Considering moving or reducing "
  278. "profiler markers to prevent data loss.",
  279. m_executingThreadId);
  280. m_cachedDataLimitReached = true;
  281. }
  282. // If the stack is empty, add it to the local cache map. Only gets called when the stack is empty
  283. // NOTE: this is where the largest overhead will be, but due to it only being called when the stack is empty
  284. // (i.e when the root region ended), this overhead won't affect any time regions.
  285. // The exception being for functions that are being profiled and create/spawn threads that are also profiled. Unfortunately, in this
  286. // case, the overhead of the profiled threads will be added to the main thread.
  287. if (m_timeRegionStack.empty())
  288. {
  289. AZStd::unique_lock<AZStd::mutex> lock(m_cachedTimeRegionMutex);
  290. // Add the cached regions to the map
  291. for (auto& cachedTimeRegion : m_cachedTimeRegions)
  292. {
  293. const AZStd::string regionName = cachedTimeRegion.m_groupRegionName.m_regionName.GetStringView();
  294. AZStd::vector<CachedTimeRegion>& regionVec = m_cachedTimeRegionMap[regionName];
  295. regionVec.push_back(cachedTimeRegion);
  296. if (regionVec.size() >= TimeRegionStackSize)
  297. {
  298. m_hitSizeLimitMap.insert_or_assign(AZStd::move(regionName), true);
  299. }
  300. }
  301. // Clear the cached regions
  302. ResetCachedData();
  303. }
  304. }
  305. void CpuTimingLocalStorage::TryFlushCachedMap(ThreadTimeRegionMap& cachedTimeRegionMap)
  306. {
  307. // Try to lock, if it's already in use (the cached regions in the array are being copied to the map)
  308. // it'll show up in the next iteration when the user requests it.
  309. if (m_cachedTimeRegionMutex.try_lock())
  310. {
  311. // Only flush cached time regions if there are entries available
  312. if (!m_cachedTimeRegionMap.empty())
  313. {
  314. cachedTimeRegionMap = AZStd::move(m_cachedTimeRegionMap);
  315. m_cachedTimeRegionMap.clear();
  316. m_hitSizeLimitMap.clear();
  317. }
  318. m_cachedTimeRegionMutex.unlock();
  319. }
  320. }
  321. void CpuTimingLocalStorage::ResetCachedData()
  322. {
  323. m_cachedTimeRegions.clear();
  324. m_cachedDataLimitReached = false;
  325. }
  326. // --- CpuProfilingStatisticsSerializer ---
  327. CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializer(const AZStd::ring_buffer<TimeRegionMap>& continuousData)
  328. {
  329. // Create serializable entries
  330. for (const auto& timeRegionMap : continuousData)
  331. {
  332. for (const auto& [threadId, regionMap] : timeRegionMap)
  333. {
  334. for (const auto& [regionName, regionVec] : regionMap)
  335. {
  336. for (const auto& region : regionVec)
  337. {
  338. m_cpuProfilingStatisticsSerializerEntries.emplace_back(region, threadId);
  339. }
  340. }
  341. }
  342. }
  343. m_timeTicksPerSecond = AZStd::GetTimeTicksPerSecond();
  344. }
  345. void CpuProfilingStatisticsSerializer::Reflect(AZ::ReflectContext* context)
  346. {
  347. if (auto* serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
  348. {
  349. serializeContext->Class<CpuProfilingStatisticsSerializer>()
  350. ->Version(1)
  351. ->Field("cpuProfilingStatisticsSerializerEntries", &CpuProfilingStatisticsSerializer::m_cpuProfilingStatisticsSerializerEntries)
  352. ->Field("timeTicksPerSecond", &CpuProfilingStatisticsSerializer::m_timeTicksPerSecond);
  353. }
  354. CpuProfilingStatisticsSerializerEntry::Reflect(context);
  355. }
  356. // --- CpuProfilingStatisticsSerializerEntry ---
  357. CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry::CpuProfilingStatisticsSerializerEntry(
  358. const CachedTimeRegion& cachedTimeRegion, AZStd::thread_id threadId)
  359. {
  360. m_groupName = cachedTimeRegion.m_groupRegionName.m_groupName;
  361. m_regionName = cachedTimeRegion.m_groupRegionName.m_regionName;
  362. m_stackDepth = cachedTimeRegion.m_stackDepth;
  363. m_startTick = cachedTimeRegion.m_startTick;
  364. m_endTick = cachedTimeRegion.m_endTick;
  365. m_threadId = AZStd::hash<AZStd::thread_id>{}(threadId);
  366. }
  367. void CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry::Reflect(AZ::ReflectContext* context)
  368. {
  369. if (auto* serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
  370. {
  371. serializeContext->Class<CpuProfilingStatisticsSerializerEntry>()
  372. ->Version(1)
  373. ->Field("groupName", &CpuProfilingStatisticsSerializerEntry::m_groupName)
  374. ->Field("regionName", &CpuProfilingStatisticsSerializerEntry::m_regionName)
  375. ->Field("stackDepth", &CpuProfilingStatisticsSerializerEntry::m_stackDepth)
  376. ->Field("startTick", &CpuProfilingStatisticsSerializerEntry::m_startTick)
  377. ->Field("endTick", &CpuProfilingStatisticsSerializerEntry::m_endTick)
  378. ->Field("threadId", &CpuProfilingStatisticsSerializerEntry::m_threadId);
  379. }
  380. }
  381. } // namespace Profiler