Profiler.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
  2. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  3. // SPDX-License-Identifier: MIT
  4. #include <Jolt/Jolt.h>
  5. #include <Jolt/Core/Profiler.h>
  6. #include <Jolt/Core/Color.h>
  7. #include <Jolt/Core/StringTools.h>
  8. #include <Jolt/Core/QuickSort.h>
  9. JPH_SUPPRESS_WARNINGS_STD_BEGIN
  10. #include <fstream>
  11. JPH_SUPPRESS_WARNINGS_STD_END
  12. #ifdef JPH_PROFILE_ENABLED
  13. JPH_NAMESPACE_BEGIN
  14. //////////////////////////////////////////////////////////////////////////////////////////
  15. // Profiler
  16. //////////////////////////////////////////////////////////////////////////////////////////
  17. Profiler *Profiler::sInstance = nullptr;
  18. #ifdef JPH_SHARED_LIBRARY
  19. static thread_local ProfileThread *sInstance = nullptr;
  20. ProfileThread *ProfileThread::sGetInstance()
  21. {
  22. return sInstance;
  23. }
  24. void ProfileThread::sSetInstance(ProfileThread *inInstance)
  25. {
  26. sInstance = inInstance;
  27. }
  28. #else
  29. thread_local ProfileThread *ProfileThread::sInstance = nullptr;
  30. #endif
  31. bool ProfileMeasurement::sOutOfSamplesReported = false;
  32. void Profiler::NextFrame()
  33. {
  34. std::lock_guard lock(mLock);
  35. if (mDump)
  36. {
  37. DumpInternal();
  38. mDump = false;
  39. }
  40. for (ProfileThread *t : mThreads)
  41. t->mCurrentSample = 0;
  42. }
  43. void Profiler::Dump(const string_view &inTag)
  44. {
  45. mDump = true;
  46. mDumpTag = inTag;
  47. }
  48. void Profiler::AddThread(ProfileThread *inThread)
  49. {
  50. std::lock_guard lock(mLock);
  51. mThreads.push_back(inThread);
  52. }
  53. void Profiler::RemoveThread(ProfileThread *inThread)
  54. {
  55. std::lock_guard lock(mLock);
  56. Array<ProfileThread *>::iterator i = find(mThreads.begin(), mThreads.end(), inThread);
  57. JPH_ASSERT(i != mThreads.end());
  58. mThreads.erase(i);
  59. }
  60. void Profiler::sAggregate(int inDepth, uint32 inColor, ProfileSample *&ioSample, const ProfileSample *inEnd, Aggregators &ioAggregators, KeyToAggregator &ioKeyToAggregator)
  61. {
  62. // Store depth
  63. ioSample->mDepth = uint8(min(255, inDepth));
  64. // Update color
  65. if (ioSample->mColor == 0)
  66. ioSample->mColor = inColor;
  67. else
  68. inColor = ioSample->mColor;
  69. // Start accumulating totals
  70. uint64 cycles_this_with_children = ioSample->mEndCycle - ioSample->mStartCycle;
  71. uint64 cycles_in_children = 0;
  72. // Loop over following samples until we find a sample that starts on or after our end
  73. ProfileSample *sample;
  74. for (sample = ioSample + 1; sample < inEnd && sample->mStartCycle < ioSample->mEndCycle; ++sample)
  75. {
  76. JPH_ASSERT(sample[-1].mStartCycle <= sample->mStartCycle);
  77. JPH_ASSERT(sample->mStartCycle >= ioSample->mStartCycle);
  78. JPH_ASSERT(sample->mEndCycle <= ioSample->mEndCycle);
  79. // This is a direct child of us, accumulate time
  80. cycles_in_children += sample->mEndCycle - sample->mStartCycle;
  81. // Recurse and skip over the children of this child
  82. sAggregate(inDepth + 1, inColor, sample, inEnd, ioAggregators, ioKeyToAggregator);
  83. }
  84. // Find the aggregator for this name / filename pair
  85. Aggregator *aggregator;
  86. KeyToAggregator::iterator aggregator_idx = ioKeyToAggregator.find(ioSample->mName);
  87. if (aggregator_idx == ioKeyToAggregator.end())
  88. {
  89. // Not found, add to map and insert in array
  90. ioKeyToAggregator.try_emplace(ioSample->mName, ioAggregators.size());
  91. ioAggregators.emplace_back(ioSample->mName);
  92. aggregator = &ioAggregators.back();
  93. }
  94. else
  95. {
  96. // Found
  97. aggregator = &ioAggregators[aggregator_idx->second];
  98. }
  99. // Add the measurement to the aggregator
  100. aggregator->AccumulateMeasurement(cycles_this_with_children, cycles_in_children);
  101. // Update ioSample to the last child of ioSample
  102. JPH_ASSERT(sample[-1].mStartCycle <= ioSample->mEndCycle);
  103. JPH_ASSERT(sample >= inEnd || sample->mStartCycle >= ioSample->mEndCycle);
  104. ioSample = sample - 1;
  105. }
  106. void Profiler::DumpInternal()
  107. {
  108. // Freeze data from threads
  109. // Note that this is not completely thread safe: As a profile sample is added mCurrentSample is incremented
  110. // but the data is not written until the sample finishes. So if we dump the profile information while
  111. // some other thread is running, we may get some garbage information from the previous frame
  112. Threads threads;
  113. for (ProfileThread *t : mThreads)
  114. threads.push_back({ t->mThreadName, t->mSamples, t->mSamples + t->mCurrentSample });
  115. // Shift all samples so that the first sample is at zero
  116. uint64 min_cycle = 0xffffffffffffffffUL;
  117. for (const ThreadSamples &t : threads)
  118. if (t.mSamplesBegin < t.mSamplesEnd)
  119. min_cycle = min(min_cycle, t.mSamplesBegin[0].mStartCycle);
  120. for (const ThreadSamples &t : threads)
  121. for (ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  122. {
  123. s->mStartCycle -= min_cycle;
  124. s->mEndCycle -= min_cycle;
  125. }
  126. // Determine tag of this profile
  127. String tag;
  128. if (mDumpTag.empty())
  129. {
  130. // Next sequence number
  131. static int number = 0;
  132. ++number;
  133. tag = ConvertToString(number);
  134. }
  135. else
  136. {
  137. // Take provided tag
  138. tag = mDumpTag;
  139. mDumpTag.clear();
  140. }
  141. // Aggregate data across threads
  142. Aggregators aggregators;
  143. KeyToAggregator key_to_aggregators;
  144. for (const ThreadSamples &t : threads)
  145. for (ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  146. sAggregate(0, Color::sGetDistinctColor(0).GetUInt32(), s, end, aggregators, key_to_aggregators);
  147. // Dump as list
  148. DumpList(tag.c_str(), aggregators);
  149. // Dump as chart
  150. DumpChart(tag.c_str(), threads, key_to_aggregators, aggregators);
  151. }
  152. static String sHTMLEncode(const char *inString)
  153. {
  154. String str(inString);
  155. StringReplace(str, "<", "&lt;");
  156. StringReplace(str, ">", "&gt;");
  157. return str;
  158. }
  159. void Profiler::DumpList(const char *inTag, const Aggregators &inAggregators)
  160. {
  161. // Open file
  162. std::ofstream f;
  163. f.open(StringFormat("profile_list_%s.html", inTag).c_str(), std::ofstream::out | std::ofstream::trunc);
  164. if (!f.is_open())
  165. return;
  166. // Write header
  167. f << R"(<!DOCTYPE html>
  168. <html>
  169. <head>
  170. <title>Profile List</title>
  171. <link rel="stylesheet" href="WebIncludes/semantic.min.css">
  172. <script type="text/javascript" src="WebIncludes/jquery-3.6.4.min.js"></script>
  173. <script type="text/javascript" src="WebIncludes/semantic.min.js"></script>
  174. <script type="text/javascript" src="WebIncludes/tablesort.js"></script>
  175. <script type="text/javascript">$(document).ready(function() { $('table').tablesort({ compare: function(a, b) { return isNaN(a) || isNaN(b)? a.localeCompare(b) : Number(a) - Number(b); } }); });</script>
  176. </head>
  177. <body class="minimal pushable">
  178. <table id="profile" class="ui sortable celled striped table">
  179. <thead>
  180. <tr>
  181. <th>Description</th>
  182. <th class="sorted descending">Total time with children (%)</th>
  183. <th>Total time (%)</th>
  184. <th>Calls</th>
  185. <th>&micro;s / call with children</th>
  186. <th>&micro;s / call</th>
  187. <th>Min. &micro;s / call</th>
  188. <th>Max. &micro;s / call</th>
  189. </tr>
  190. </thead>
  191. <tbody style="text-align: right;">
  192. )";
  193. // Get total time
  194. uint64 total_time = 0;
  195. for (const Aggregator &item : inAggregators)
  196. total_time += item.mTotalCyclesInCallWithChildren - item.mTotalCyclesInChildren;
  197. // Get cycles per second
  198. uint64 cycles_per_second = GetProcessorTicksPerSecond();
  199. // Sort the list
  200. Aggregators aggregators = inAggregators;
  201. QuickSort(aggregators.begin(), aggregators.end());
  202. // Write all aggregators
  203. for (const Aggregator &item : aggregators)
  204. {
  205. uint64 cycles_in_call_no_children = item.mTotalCyclesInCallWithChildren - item.mTotalCyclesInChildren;
  206. char str[2048];
  207. snprintf(str, sizeof(str), R"(<tr>
  208. <td style="text-align: left;">%s</td>
  209. <td>%.1f</td>
  210. <td>%.1f</td>
  211. <td>%u</td>
  212. <td>%.2f</td>
  213. <td>%.2f</td>
  214. <td>%.2f</td>
  215. <td>%.2f</td>
  216. </tr>)",
  217. sHTMLEncode(item.mName).c_str(), // Description
  218. 100.0 * item.mTotalCyclesInCallWithChildren / total_time, // Total time with children
  219. 100.0 * cycles_in_call_no_children / total_time, // Total time no children
  220. item.mCallCounter, // Calls
  221. 1000000.0 * item.mTotalCyclesInCallWithChildren / cycles_per_second / item.mCallCounter, // us / call with children
  222. 1000000.0 * cycles_in_call_no_children / cycles_per_second / item.mCallCounter, // us / call no children
  223. 1000000.0 * item.mMinCyclesInCallWithChildren / cycles_per_second, // Min. us / call with children
  224. 1000000.0 * item.mMaxCyclesInCallWithChildren / cycles_per_second); // Max. us / call with children
  225. f << str;
  226. }
  227. // End table
  228. f << R"(</tbody></table></body></html>)";
  229. }
  230. void Profiler::DumpChart(const char *inTag, const Threads &inThreads, const KeyToAggregator &inKeyToAggregators, const Aggregators &inAggregators)
  231. {
  232. // Open file
  233. std::ofstream f;
  234. f.open(StringFormat("profile_chart_%s.html", inTag).c_str(), std::ofstream::out | std::ofstream::trunc);
  235. if (!f.is_open())
  236. return;
  237. // Write header
  238. f << R"(<!DOCTYPE html>
  239. <html>
  240. <head>
  241. <title>Profile Chart</title>
  242. <link rel="stylesheet" href="WebIncludes/profile_chart.css">
  243. <script type="text/javascript" src="WebIncludes/profile_chart.js"></script>
  244. </head>
  245. <body onload="startChart();">
  246. <script type="text/javascript">
  247. )";
  248. // Get cycles per second
  249. uint64 cycles_per_second = GetProcessorTicksPerSecond();
  250. f << "var cycles_per_second = " << cycles_per_second << ";\n";
  251. // Dump samples
  252. f << "var threads = [\n";
  253. bool first_thread = true;
  254. for (const ThreadSamples &t : inThreads)
  255. {
  256. if (!first_thread)
  257. f << ",\n";
  258. first_thread = false;
  259. f << "{\nthread_name: \"" << t.mThreadName << "\",\naggregator: [";
  260. bool first = true;
  261. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  262. {
  263. if (!first)
  264. f << ",";
  265. first = false;
  266. f << inKeyToAggregators.find(s->mName)->second;
  267. }
  268. f << "],\ncolor: [";
  269. first = true;
  270. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  271. {
  272. if (!first)
  273. f << ",";
  274. first = false;
  275. Color c(s->mColor);
  276. f << StringFormat("\"#%02x%02x%02x\"", c.r, c.g, c.b);
  277. }
  278. f << "],\nstart: [";
  279. first = true;
  280. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  281. {
  282. if (!first)
  283. f << ",";
  284. first = false;
  285. f << s->mStartCycle;
  286. }
  287. f << "],\ncycles: [";
  288. first = true;
  289. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  290. {
  291. if (!first)
  292. f << ",";
  293. first = false;
  294. f << s->mEndCycle - s->mStartCycle;
  295. }
  296. f << "],\ndepth: [";
  297. first = true;
  298. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  299. {
  300. if (!first)
  301. f << ",";
  302. first = false;
  303. f << int(s->mDepth);
  304. }
  305. f << "]\n}";
  306. }
  307. // Dump aggregated data
  308. f << "];\nvar aggregated = {\nname: [";
  309. bool first = true;
  310. for (const Aggregator &a : inAggregators)
  311. {
  312. if (!first)
  313. f << ",";
  314. first = false;
  315. String name = "\"" + sHTMLEncode(a.mName) + "\"";
  316. f << name;
  317. }
  318. f << "],\ncalls: [";
  319. first = true;
  320. for (const Aggregator &a : inAggregators)
  321. {
  322. if (!first)
  323. f << ",";
  324. first = false;
  325. f << a.mCallCounter;
  326. }
  327. f << "],\nmin_cycles: [";
  328. first = true;
  329. for (const Aggregator &a : inAggregators)
  330. {
  331. if (!first)
  332. f << ",";
  333. first = false;
  334. f << a.mMinCyclesInCallWithChildren;
  335. }
  336. f << "],\nmax_cycles: [";
  337. first = true;
  338. for (const Aggregator &a : inAggregators)
  339. {
  340. if (!first)
  341. f << ",";
  342. first = false;
  343. f << a.mMaxCyclesInCallWithChildren;
  344. }
  345. f << "],\ncycles_per_frame: [";
  346. first = true;
  347. for (const Aggregator &a : inAggregators)
  348. {
  349. if (!first)
  350. f << ",";
  351. first = false;
  352. f << a.mTotalCyclesInCallWithChildren;
  353. }
  354. // Write footer
  355. f << R"(]};
  356. </script>
  357. <canvas id="canvas"></canvas>
  358. <div id="tooltip"></div>
  359. </tbody></table></body></html>)";
  360. }
  361. JPH_NAMESPACE_END
  362. #endif // JPH_PROFILE_ENABLED