Profiler.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
  2. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  3. // SPDX-License-Identifier: MIT
  4. #include <Jolt/Jolt.h>
  5. #include <Jolt/Core/Profiler.h>
  6. #include <Jolt/Core/Color.h>
  7. #include <Jolt/Core/StringTools.h>
  8. #include <Jolt/Core/QuickSort.h>
  9. JPH_SUPPRESS_WARNINGS_STD_BEGIN
  10. #include <fstream>
  11. JPH_SUPPRESS_WARNINGS_STD_END
  12. #ifdef JPH_PROFILE_ENABLED
  13. JPH_NAMESPACE_BEGIN
  14. //////////////////////////////////////////////////////////////////////////////////////////
  15. // Profiler
  16. //////////////////////////////////////////////////////////////////////////////////////////
  17. Profiler *Profiler::sInstance = nullptr;
  18. thread_local ProfileThread *ProfileThread::sInstance = nullptr;
  19. bool ProfileMeasurement::sOutOfSamplesReported = false;
  20. void Profiler::NextFrame()
  21. {
  22. std::lock_guard lock(mLock);
  23. if (mDump)
  24. {
  25. DumpInternal();
  26. mDump = false;
  27. }
  28. for (ProfileThread *t : mThreads)
  29. t->mCurrentSample = 0;
  30. }
  31. void Profiler::Dump(const string_view &inTag)
  32. {
  33. mDump = true;
  34. mDumpTag = inTag;
  35. }
  36. void Profiler::AddThread(ProfileThread *inThread)
  37. {
  38. std::lock_guard lock(mLock);
  39. mThreads.push_back(inThread);
  40. }
  41. void Profiler::RemoveThread(ProfileThread *inThread)
  42. {
  43. std::lock_guard lock(mLock);
  44. Array<ProfileThread *>::iterator i = find(mThreads.begin(), mThreads.end(), inThread);
  45. JPH_ASSERT(i != mThreads.end());
  46. mThreads.erase(i);
  47. }
  48. void Profiler::sAggregate(int inDepth, uint32 inColor, ProfileSample *&ioSample, const ProfileSample *inEnd, Aggregators &ioAggregators, KeyToAggregator &ioKeyToAggregator)
  49. {
  50. // Store depth
  51. ioSample->mDepth = uint8(min(255, inDepth));
  52. // Update color
  53. if (ioSample->mColor == 0)
  54. ioSample->mColor = inColor;
  55. else
  56. inColor = ioSample->mColor;
  57. // Start accumulating totals
  58. uint64 cycles_this_with_children = ioSample->mEndCycle - ioSample->mStartCycle;
  59. uint64 cycles_in_children = 0;
  60. // Loop over following samples until we find a sample that starts on or after our end
  61. ProfileSample *sample;
  62. for (sample = ioSample + 1; sample < inEnd && sample->mStartCycle < ioSample->mEndCycle; ++sample)
  63. {
  64. JPH_ASSERT(sample[-1].mStartCycle <= sample->mStartCycle);
  65. JPH_ASSERT(sample->mStartCycle >= ioSample->mStartCycle);
  66. JPH_ASSERT(sample->mEndCycle <= ioSample->mEndCycle);
  67. // This is a direct child of us, accumulate time
  68. cycles_in_children += sample->mEndCycle - sample->mStartCycle;
  69. // Recurse and skip over the children of this child
  70. sAggregate(inDepth + 1, inColor, sample, inEnd, ioAggregators, ioKeyToAggregator);
  71. }
  72. // Find the aggregator for this name / filename pair
  73. Aggregator *aggregator;
  74. KeyToAggregator::iterator aggregator_idx = ioKeyToAggregator.find(ioSample->mName);
  75. if (aggregator_idx == ioKeyToAggregator.end())
  76. {
  77. // Not found, add to map and insert in array
  78. ioKeyToAggregator.try_emplace(ioSample->mName, ioAggregators.size());
  79. ioAggregators.emplace_back(ioSample->mName);
  80. aggregator = &ioAggregators.back();
  81. }
  82. else
  83. {
  84. // Found
  85. aggregator = &ioAggregators[aggregator_idx->second];
  86. }
  87. // Add the measurement to the aggregator
  88. aggregator->AccumulateMeasurement(cycles_this_with_children, cycles_in_children);
  89. // Update ioSample to the last child of ioSample
  90. JPH_ASSERT(sample[-1].mStartCycle <= ioSample->mEndCycle);
  91. JPH_ASSERT(sample >= inEnd || sample->mStartCycle >= ioSample->mEndCycle);
  92. ioSample = sample - 1;
  93. }
  94. void Profiler::DumpInternal()
  95. {
  96. // Freeze data from threads
  97. // Note that this is not completely thread safe: As a profile sample is added mCurrentSample is incremented
  98. // but the data is not written until the sample finishes. So if we dump the profile information while
  99. // some other thread is running, we may get some garbage information from the previous frame
  100. Threads threads;
  101. for (ProfileThread *t : mThreads)
  102. threads.push_back({ t->mThreadName, t->mSamples, t->mSamples + t->mCurrentSample });
  103. // Shift all samples so that the first sample is at zero
  104. uint64 min_cycle = 0xffffffffffffffffUL;
  105. for (const ThreadSamples &t : threads)
  106. if (t.mSamplesBegin < t.mSamplesEnd)
  107. min_cycle = min(min_cycle, t.mSamplesBegin[0].mStartCycle);
  108. for (const ThreadSamples &t : threads)
  109. for (ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  110. {
  111. s->mStartCycle -= min_cycle;
  112. s->mEndCycle -= min_cycle;
  113. }
  114. // Determine tag of this profile
  115. String tag;
  116. if (mDumpTag.empty())
  117. {
  118. // Next sequence number
  119. static int number = 0;
  120. ++number;
  121. tag = ConvertToString(number);
  122. }
  123. else
  124. {
  125. // Take provided tag
  126. tag = mDumpTag;
  127. mDumpTag.clear();
  128. }
  129. // Aggregate data across threads
  130. Aggregators aggregators;
  131. KeyToAggregator key_to_aggregators;
  132. for (const ThreadSamples &t : threads)
  133. for (ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  134. sAggregate(0, Color::sGetDistinctColor(0).GetUInt32(), s, end, aggregators, key_to_aggregators);
  135. // Dump as list
  136. DumpList(tag.c_str(), aggregators);
  137. // Dump as chart
  138. DumpChart(tag.c_str(), threads, key_to_aggregators, aggregators);
  139. }
  140. static String sHTMLEncode(const char *inString)
  141. {
  142. String str(inString);
  143. StringReplace(str, "<", "&lt;");
  144. StringReplace(str, ">", "&gt;");
  145. return str;
  146. }
  147. void Profiler::DumpList(const char *inTag, const Aggregators &inAggregators)
  148. {
  149. // Open file
  150. std::ofstream f;
  151. f.open(StringFormat("profile_list_%s.html", inTag).c_str(), std::ofstream::out | std::ofstream::trunc);
  152. if (!f.is_open())
  153. return;
  154. // Write header
  155. f << R"(<!DOCTYPE html>
  156. <html>
  157. <head>
  158. <title>Profile List</title>
  159. <link rel="stylesheet" href="WebIncludes/semantic.min.css">
  160. <script type="text/javascript" src="WebIncludes/jquery-3.6.4.min.js"></script>
  161. <script type="text/javascript" src="WebIncludes/semantic.min.js"></script>
  162. <script type="text/javascript" src="WebIncludes/tablesort.js"></script>
  163. <script type="text/javascript">$(document).ready(function() { $('table').tablesort({ compare: function(a, b) { return isNaN(a) || isNaN(b)? a.localeCompare(b) : Number(a) - Number(b); } }); });</script>
  164. </head>
  165. <body class="minimal pushable">
  166. <table id="profile" class="ui sortable celled striped table">
  167. <thead>
  168. <tr>
  169. <th>Description</th>
  170. <th class="sorted descending">Total time with children (%)</th>
  171. <th>Total time (%)</th>
  172. <th>Calls</th>
  173. <th>&micro;s / call with children</th>
  174. <th>&micro;s / call</th>
  175. <th>Min. &micro;s / call</th>
  176. <th>Max. &micro;s / call</th>
  177. </tr>
  178. </thead>
  179. <tbody style="text-align: right;">
  180. )";
  181. // Get total time
  182. uint64 total_time = 0;
  183. for (const Aggregator &item : inAggregators)
  184. total_time += item.mTotalCyclesInCallWithChildren - item.mTotalCyclesInChildren;
  185. // Get cycles per second
  186. uint64 cycles_per_second = GetProcessorTicksPerSecond();
  187. // Sort the list
  188. Aggregators aggregators = inAggregators;
  189. QuickSort(aggregators.begin(), aggregators.end());
  190. // Write all aggregators
  191. for (const Aggregator &item : aggregators)
  192. {
  193. uint64 cycles_in_call_no_children = item.mTotalCyclesInCallWithChildren - item.mTotalCyclesInChildren;
  194. char str[2048];
  195. snprintf(str, sizeof(str), R"(<tr>
  196. <td style="text-align: left;">%s</td>
  197. <td>%.1f</td>
  198. <td>%.1f</td>
  199. <td>%u</td>
  200. <td>%.2f</td>
  201. <td>%.2f</td>
  202. <td>%.2f</td>
  203. <td>%.2f</td>
  204. </tr>)",
  205. sHTMLEncode(item.mName).c_str(), // Description
  206. 100.0 * item.mTotalCyclesInCallWithChildren / total_time, // Total time with children
  207. 100.0 * cycles_in_call_no_children / total_time, // Total time no children
  208. item.mCallCounter, // Calls
  209. 1000000.0 * item.mTotalCyclesInCallWithChildren / cycles_per_second / item.mCallCounter, // us / call with children
  210. 1000000.0 * cycles_in_call_no_children / cycles_per_second / item.mCallCounter, // us / call no children
  211. 1000000.0 * item.mMinCyclesInCallWithChildren / cycles_per_second, // Min. us / call with children
  212. 1000000.0 * item.mMaxCyclesInCallWithChildren / cycles_per_second); // Max. us / call with children
  213. f << str;
  214. }
  215. // End table
  216. f << R"(</tbody></table></body></html>)";
  217. }
  218. void Profiler::DumpChart(const char *inTag, const Threads &inThreads, const KeyToAggregator &inKeyToAggregators, const Aggregators &inAggregators)
  219. {
  220. // Open file
  221. std::ofstream f;
  222. f.open(StringFormat("profile_chart_%s.html", inTag).c_str(), std::ofstream::out | std::ofstream::trunc);
  223. if (!f.is_open())
  224. return;
  225. // Write header
  226. f << R"(<!DOCTYPE html>
  227. <html>
  228. <head>
  229. <title>Profile Chart</title>
  230. <link rel="stylesheet" href="WebIncludes/profile_chart.css">
  231. <script type="text/javascript" src="WebIncludes/profile_chart.js"></script>
  232. </head>
  233. <body onload="startChart();">
  234. <script type="text/javascript">
  235. )";
  236. // Get cycles per second
  237. uint64 cycles_per_second = GetProcessorTicksPerSecond();
  238. f << "var cycles_per_second = " << cycles_per_second << ";\n";
  239. // Dump samples
  240. f << "var threads = [\n";
  241. bool first_thread = true;
  242. for (const ThreadSamples &t : inThreads)
  243. {
  244. if (!first_thread)
  245. f << ",\n";
  246. first_thread = false;
  247. f << "{\nthread_name: \"" << t.mThreadName << "\",\naggregator: [";
  248. bool first = true;
  249. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  250. {
  251. if (!first)
  252. f << ",";
  253. first = false;
  254. f << inKeyToAggregators.find(s->mName)->second;
  255. }
  256. f << "],\ncolor: [";
  257. first = true;
  258. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  259. {
  260. if (!first)
  261. f << ",";
  262. first = false;
  263. Color c(s->mColor);
  264. f << StringFormat("\"#%02x%02x%02x\"", c.r, c.g, c.b);
  265. }
  266. f << "],\nstart: [";
  267. first = true;
  268. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  269. {
  270. if (!first)
  271. f << ",";
  272. first = false;
  273. f << s->mStartCycle;
  274. }
  275. f << "],\ncycles: [";
  276. first = true;
  277. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  278. {
  279. if (!first)
  280. f << ",";
  281. first = false;
  282. f << s->mEndCycle - s->mStartCycle;
  283. }
  284. f << "],\ndepth: [";
  285. first = true;
  286. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  287. {
  288. if (!first)
  289. f << ",";
  290. first = false;
  291. f << int(s->mDepth);
  292. }
  293. f << "]\n}";
  294. }
  295. // Dump aggregated data
  296. f << "];\nvar aggregated = {\nname: [";
  297. bool first = true;
  298. for (const Aggregator &a : inAggregators)
  299. {
  300. if (!first)
  301. f << ",";
  302. first = false;
  303. String name = "\"" + sHTMLEncode(a.mName) + "\"";
  304. f << name;
  305. }
  306. f << "],\ncalls: [";
  307. first = true;
  308. for (const Aggregator &a : inAggregators)
  309. {
  310. if (!first)
  311. f << ",";
  312. first = false;
  313. f << a.mCallCounter;
  314. }
  315. f << "],\nmin_cycles: [";
  316. first = true;
  317. for (const Aggregator &a : inAggregators)
  318. {
  319. if (!first)
  320. f << ",";
  321. first = false;
  322. f << a.mMinCyclesInCallWithChildren;
  323. }
  324. f << "],\nmax_cycles: [";
  325. first = true;
  326. for (const Aggregator &a : inAggregators)
  327. {
  328. if (!first)
  329. f << ",";
  330. first = false;
  331. f << a.mMaxCyclesInCallWithChildren;
  332. }
  333. f << "],\ncycles_per_frame: [";
  334. first = true;
  335. for (const Aggregator &a : inAggregators)
  336. {
  337. if (!first)
  338. f << ",";
  339. first = false;
  340. f << a.mTotalCyclesInCallWithChildren;
  341. }
  342. // Write footer
  343. f << R"(]};
  344. </script>
  345. <canvas id="canvas"></canvas>
  346. <div id="tooltip"></div>
  347. </tbody></table></body></html>)";
  348. }
  349. JPH_NAMESPACE_END
  350. #endif // JPH_PROFILE_ENABLED