Profiler.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
  2. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  3. // SPDX-License-Identifier: MIT
  4. #include <Jolt/Jolt.h>
  5. #include <Jolt/Core/Profiler.h>
  6. #include <Jolt/Core/Color.h>
  7. #include <Jolt/Core/StringTools.h>
  8. #include <Jolt/Core/QuickSort.h>
  9. JPH_SUPPRESS_WARNINGS_STD_BEGIN
  10. #include <fstream>
  11. JPH_SUPPRESS_WARNINGS_STD_END
  12. #ifdef JPH_PROFILE_ENABLED
  13. JPH_NAMESPACE_BEGIN
  14. //////////////////////////////////////////////////////////////////////////////////////////
  15. // Profiler
  16. //////////////////////////////////////////////////////////////////////////////////////////
  17. Profiler *Profiler::sInstance = nullptr;
  18. #ifdef JPH_SHARED_LIBRARY
  19. static thread_local ProfileThread *sInstance = nullptr;
  20. ProfileThread *ProfileThread::sGetInstance()
  21. {
  22. return sInstance;
  23. }
  24. void ProfileThread::sSetInstance(ProfileThread *inInstance)
  25. {
  26. sInstance = inInstance;
  27. }
  28. #else
  29. thread_local ProfileThread *ProfileThread::sInstance = nullptr;
  30. #endif
  31. bool ProfileMeasurement::sOutOfSamplesReported = false;
  32. void Profiler::UpdateReferenceTime()
  33. {
  34. mReferenceTick = GetProcessorTickCount();
  35. mReferenceTime = std::chrono::high_resolution_clock::now();
  36. }
  37. uint64 Profiler::GetProcessorTicksPerSecond() const
  38. {
  39. uint64 ticks = GetProcessorTickCount();
  40. std::chrono::high_resolution_clock::time_point time = std::chrono::high_resolution_clock::now();
  41. return (ticks - mReferenceTick) * 1000000000ULL / std::chrono::duration_cast<std::chrono::nanoseconds>(time - mReferenceTime).count();
  42. }
  43. void Profiler::NextFrame()
  44. {
  45. std::lock_guard lock(mLock);
  46. if (mDump)
  47. {
  48. DumpInternal();
  49. mDump = false;
  50. }
  51. for (ProfileThread *t : mThreads)
  52. t->mCurrentSample = 0;
  53. UpdateReferenceTime();
  54. }
  55. void Profiler::Dump(const string_view &inTag)
  56. {
  57. mDump = true;
  58. mDumpTag = inTag;
  59. }
  60. void Profiler::AddThread(ProfileThread *inThread)
  61. {
  62. std::lock_guard lock(mLock);
  63. mThreads.push_back(inThread);
  64. }
  65. void Profiler::RemoveThread(ProfileThread *inThread)
  66. {
  67. std::lock_guard lock(mLock);
  68. Array<ProfileThread *>::iterator i = find(mThreads.begin(), mThreads.end(), inThread);
  69. JPH_ASSERT(i != mThreads.end());
  70. mThreads.erase(i);
  71. }
  72. void Profiler::sAggregate(int inDepth, uint32 inColor, ProfileSample *&ioSample, const ProfileSample *inEnd, Aggregators &ioAggregators, KeyToAggregator &ioKeyToAggregator)
  73. {
  74. // Store depth
  75. ioSample->mDepth = uint8(min(255, inDepth));
  76. // Update color
  77. if (ioSample->mColor == 0)
  78. ioSample->mColor = inColor;
  79. else
  80. inColor = ioSample->mColor;
  81. // Start accumulating totals
  82. uint64 cycles_this_with_children = ioSample->mEndCycle - ioSample->mStartCycle;
  83. uint64 cycles_in_children = 0;
  84. // Loop over following samples until we find a sample that starts on or after our end
  85. ProfileSample *sample;
  86. for (sample = ioSample + 1; sample < inEnd && sample->mStartCycle < ioSample->mEndCycle; ++sample)
  87. {
  88. JPH_ASSERT(sample[-1].mStartCycle <= sample->mStartCycle);
  89. JPH_ASSERT(sample->mStartCycle >= ioSample->mStartCycle);
  90. JPH_ASSERT(sample->mEndCycle <= ioSample->mEndCycle);
  91. // This is a direct child of us, accumulate time
  92. cycles_in_children += sample->mEndCycle - sample->mStartCycle;
  93. // Recurse and skip over the children of this child
  94. sAggregate(inDepth + 1, inColor, sample, inEnd, ioAggregators, ioKeyToAggregator);
  95. }
  96. // Find the aggregator for this name / filename pair
  97. Aggregator *aggregator;
  98. KeyToAggregator::iterator aggregator_idx = ioKeyToAggregator.find(ioSample->mName);
  99. if (aggregator_idx == ioKeyToAggregator.end())
  100. {
  101. // Not found, add to map and insert in array
  102. ioKeyToAggregator.try_emplace(ioSample->mName, ioAggregators.size());
  103. ioAggregators.emplace_back(ioSample->mName);
  104. aggregator = &ioAggregators.back();
  105. }
  106. else
  107. {
  108. // Found
  109. aggregator = &ioAggregators[aggregator_idx->second];
  110. }
  111. // Add the measurement to the aggregator
  112. aggregator->AccumulateMeasurement(cycles_this_with_children, cycles_in_children);
  113. // Update ioSample to the last child of ioSample
  114. JPH_ASSERT(sample[-1].mStartCycle <= ioSample->mEndCycle);
  115. JPH_ASSERT(sample >= inEnd || sample->mStartCycle >= ioSample->mEndCycle);
  116. ioSample = sample - 1;
  117. }
  118. void Profiler::DumpInternal()
  119. {
  120. // Freeze data from threads
  121. // Note that this is not completely thread safe: As a profile sample is added mCurrentSample is incremented
  122. // but the data is not written until the sample finishes. So if we dump the profile information while
  123. // some other thread is running, we may get some garbage information from the previous frame
  124. Threads threads;
  125. for (ProfileThread *t : mThreads)
  126. threads.push_back({ t->mThreadName, t->mSamples, t->mSamples + t->mCurrentSample });
  127. // Shift all samples so that the first sample is at zero
  128. uint64 min_cycle = 0xffffffffffffffffUL;
  129. for (const ThreadSamples &t : threads)
  130. if (t.mSamplesBegin < t.mSamplesEnd)
  131. min_cycle = min(min_cycle, t.mSamplesBegin[0].mStartCycle);
  132. for (const ThreadSamples &t : threads)
  133. for (ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  134. {
  135. s->mStartCycle -= min_cycle;
  136. s->mEndCycle -= min_cycle;
  137. }
  138. // Determine tag of this profile
  139. String tag;
  140. if (mDumpTag.empty())
  141. {
  142. // Next sequence number
  143. static int number = 0;
  144. ++number;
  145. tag = ConvertToString(number);
  146. }
  147. else
  148. {
  149. // Take provided tag
  150. tag = mDumpTag;
  151. mDumpTag.clear();
  152. }
  153. // Aggregate data across threads
  154. Aggregators aggregators;
  155. KeyToAggregator key_to_aggregators;
  156. for (const ThreadSamples &t : threads)
  157. for (ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  158. sAggregate(0, Color::sGetDistinctColor(0).GetUInt32(), s, end, aggregators, key_to_aggregators);
  159. // Dump as list
  160. DumpList(tag.c_str(), aggregators);
  161. // Dump as chart
  162. DumpChart(tag.c_str(), threads, key_to_aggregators, aggregators);
  163. }
  164. static String sHTMLEncode(const char *inString)
  165. {
  166. String str(inString);
  167. StringReplace(str, "<", "&lt;");
  168. StringReplace(str, ">", "&gt;");
  169. return str;
  170. }
  171. void Profiler::DumpList(const char *inTag, const Aggregators &inAggregators)
  172. {
  173. // Open file
  174. std::ofstream f;
  175. f.open(StringFormat("profile_list_%s.html", inTag).c_str(), std::ofstream::out | std::ofstream::trunc);
  176. if (!f.is_open())
  177. return;
  178. // Write header
  179. f << R"(<!DOCTYPE html>
  180. <html>
  181. <head>
  182. <title>Profile List</title>
  183. <link rel="stylesheet" href="WebIncludes/semantic.min.css">
  184. <script type="text/javascript" src="WebIncludes/jquery-3.6.4.min.js"></script>
  185. <script type="text/javascript" src="WebIncludes/semantic.min.js"></script>
  186. <script type="text/javascript" src="WebIncludes/tablesort.js"></script>
  187. <script type="text/javascript">$(document).ready(function() { $('table').tablesort({ compare: function(a, b) { return isNaN(a) || isNaN(b)? a.localeCompare(b) : Number(a) - Number(b); } }); });</script>
  188. </head>
  189. <body class="minimal pushable">
  190. <table id="profile" class="ui sortable celled striped table">
  191. <thead>
  192. <tr>
  193. <th>Description</th>
  194. <th class="sorted descending">Total time with children (%)</th>
  195. <th>Total time (%)</th>
  196. <th>Calls</th>
  197. <th>&micro;s / call with children</th>
  198. <th>&micro;s / call</th>
  199. <th>Min. &micro;s / call</th>
  200. <th>Max. &micro;s / call</th>
  201. </tr>
  202. </thead>
  203. <tbody style="text-align: right;">
  204. )";
  205. // Get total time
  206. uint64 total_time = 0;
  207. for (const Aggregator &item : inAggregators)
  208. total_time += item.mTotalCyclesInCallWithChildren - item.mTotalCyclesInChildren;
  209. // Get cycles per second
  210. uint64 cycles_per_second = GetProcessorTicksPerSecond();
  211. // Sort the list
  212. Aggregators aggregators = inAggregators;
  213. QuickSort(aggregators.begin(), aggregators.end());
  214. // Write all aggregators
  215. for (const Aggregator &item : aggregators)
  216. {
  217. uint64 cycles_in_call_no_children = item.mTotalCyclesInCallWithChildren - item.mTotalCyclesInChildren;
  218. char str[2048];
  219. snprintf(str, sizeof(str), R"(<tr>
  220. <td style="text-align: left;">%s</td>
  221. <td>%.1f</td>
  222. <td>%.1f</td>
  223. <td>%u</td>
  224. <td>%.2f</td>
  225. <td>%.2f</td>
  226. <td>%.2f</td>
  227. <td>%.2f</td>
  228. </tr>)",
  229. sHTMLEncode(item.mName).c_str(), // Description
  230. 100.0 * item.mTotalCyclesInCallWithChildren / total_time, // Total time with children
  231. 100.0 * cycles_in_call_no_children / total_time, // Total time no children
  232. item.mCallCounter, // Calls
  233. 1000000.0 * item.mTotalCyclesInCallWithChildren / cycles_per_second / item.mCallCounter, // us / call with children
  234. 1000000.0 * cycles_in_call_no_children / cycles_per_second / item.mCallCounter, // us / call no children
  235. 1000000.0 * item.mMinCyclesInCallWithChildren / cycles_per_second, // Min. us / call with children
  236. 1000000.0 * item.mMaxCyclesInCallWithChildren / cycles_per_second); // Max. us / call with children
  237. f << str;
  238. }
  239. // End table
  240. f << R"(</tbody></table></body></html>)";
  241. }
  242. void Profiler::DumpChart(const char *inTag, const Threads &inThreads, const KeyToAggregator &inKeyToAggregators, const Aggregators &inAggregators)
  243. {
  244. // Open file
  245. std::ofstream f;
  246. f.open(StringFormat("profile_chart_%s.html", inTag).c_str(), std::ofstream::out | std::ofstream::trunc);
  247. if (!f.is_open())
  248. return;
  249. // Write header
  250. f << R"(<!DOCTYPE html>
  251. <html>
  252. <head>
  253. <title>Profile Chart</title>
  254. <link rel="stylesheet" href="WebIncludes/profile_chart.css">
  255. <script type="text/javascript" src="WebIncludes/profile_chart.js"></script>
  256. </head>
  257. <body onload="startChart();">
  258. <script type="text/javascript">
  259. )";
  260. // Get cycles per second
  261. uint64 cycles_per_second = GetProcessorTicksPerSecond();
  262. f << "var cycles_per_second = " << cycles_per_second << ";\n";
  263. // Dump samples
  264. f << "var threads = [\n";
  265. bool first_thread = true;
  266. for (const ThreadSamples &t : inThreads)
  267. {
  268. if (!first_thread)
  269. f << ",\n";
  270. first_thread = false;
  271. f << "{\nthread_name: \"" << t.mThreadName << "\",\naggregator: [";
  272. bool first = true;
  273. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  274. {
  275. if (!first)
  276. f << ",";
  277. first = false;
  278. f << inKeyToAggregators.find(s->mName)->second;
  279. }
  280. f << "],\ncolor: [";
  281. first = true;
  282. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  283. {
  284. if (!first)
  285. f << ",";
  286. first = false;
  287. Color c(s->mColor);
  288. f << StringFormat("\"#%02x%02x%02x\"", c.r, c.g, c.b);
  289. }
  290. f << "],\nstart: [";
  291. first = true;
  292. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  293. {
  294. if (!first)
  295. f << ",";
  296. first = false;
  297. f << s->mStartCycle;
  298. }
  299. f << "],\ncycles: [";
  300. first = true;
  301. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  302. {
  303. if (!first)
  304. f << ",";
  305. first = false;
  306. f << s->mEndCycle - s->mStartCycle;
  307. }
  308. f << "],\ndepth: [";
  309. first = true;
  310. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  311. {
  312. if (!first)
  313. f << ",";
  314. first = false;
  315. f << int(s->mDepth);
  316. }
  317. f << "]\n}";
  318. }
  319. // Dump aggregated data
  320. f << "];\nvar aggregated = {\nname: [";
  321. bool first = true;
  322. for (const Aggregator &a : inAggregators)
  323. {
  324. if (!first)
  325. f << ",";
  326. first = false;
  327. String name = "\"" + sHTMLEncode(a.mName) + "\"";
  328. f << name;
  329. }
  330. f << "],\ncalls: [";
  331. first = true;
  332. for (const Aggregator &a : inAggregators)
  333. {
  334. if (!first)
  335. f << ",";
  336. first = false;
  337. f << a.mCallCounter;
  338. }
  339. f << "],\nmin_cycles: [";
  340. first = true;
  341. for (const Aggregator &a : inAggregators)
  342. {
  343. if (!first)
  344. f << ",";
  345. first = false;
  346. f << a.mMinCyclesInCallWithChildren;
  347. }
  348. f << "],\nmax_cycles: [";
  349. first = true;
  350. for (const Aggregator &a : inAggregators)
  351. {
  352. if (!first)
  353. f << ",";
  354. first = false;
  355. f << a.mMaxCyclesInCallWithChildren;
  356. }
  357. f << "],\ncycles_per_frame: [";
  358. first = true;
  359. for (const Aggregator &a : inAggregators)
  360. {
  361. if (!first)
  362. f << ",";
  363. first = false;
  364. f << a.mTotalCyclesInCallWithChildren;
  365. }
  366. // Write footer
  367. f << R"(]};
  368. </script>
  369. <canvas id="canvas"></canvas>
  370. <div id="tooltip"></div>
  371. </tbody></table></body></html>)";
  372. }
  373. JPH_NAMESPACE_END
  374. #endif // JPH_PROFILE_ENABLED