Profiler.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  2. // SPDX-License-Identifier: MIT
  3. #include <Jolt/Jolt.h>
  4. #include <Jolt/Core/Profiler.h>
  5. #include <Jolt/Core/Color.h>
  6. #include <Jolt/Core/StringTools.h>
  7. JPH_SUPPRESS_WARNINGS_STD_BEGIN
  8. #include <fstream>
  9. JPH_SUPPRESS_WARNINGS_STD_END
  10. #ifdef JPH_PROFILE_ENABLED
  11. JPH_NAMESPACE_BEGIN
  12. //////////////////////////////////////////////////////////////////////////////////////////
  13. // Profiler
  14. //////////////////////////////////////////////////////////////////////////////////////////
  15. Profiler Profiler::sInstance;
  16. thread_local ProfileThread *ProfileThread::sInstance = nullptr;
  17. bool ProfileMeasurement::sOutOfSamplesReported = false;
  18. void Profiler::NextFrame()
  19. {
  20. lock_guard lock(mLock);
  21. if (mDump)
  22. {
  23. DumpInternal();
  24. mDump = false;
  25. }
  26. for (ProfileThread *t : mThreads)
  27. t->mCurrentSample = 0;
  28. }
  29. void Profiler::Dump(string inTag)
  30. {
  31. mDump = true;
  32. mDumpTag = inTag;
  33. }
  34. void Profiler::AddThread(ProfileThread *inThread)
  35. {
  36. lock_guard lock(mLock);
  37. mThreads.push_back(inThread);
  38. }
  39. void Profiler::RemoveThread(ProfileThread *inThread)
  40. {
  41. lock_guard lock(mLock);
  42. vector<ProfileThread *>::iterator i = find(mThreads.begin(), mThreads.end(), inThread);
  43. JPH_ASSERT(i != mThreads.end());
  44. mThreads.erase(i);
  45. }
  46. void Profiler::sAggregate(int inDepth, uint32 inColor, ProfileSample *&ioSample, const ProfileSample *inEnd, Aggregators &ioAggregators, KeyToAggregator &ioKeyToAggregator)
  47. {
  48. // Store depth
  49. ioSample->mDepth = uint8(min(255, inDepth));
  50. // Update color
  51. if (ioSample->mColor == 0)
  52. ioSample->mColor = inColor;
  53. else
  54. inColor = ioSample->mColor;
  55. // Start accumulating totals
  56. uint64 cycles_this_with_children = ioSample->mEndCycle - ioSample->mStartCycle;
  57. uint64 cycles_in_children = 0;
  58. // Loop over following samples until we find a sample that starts on or after our end
  59. ProfileSample *sample;
  60. for (sample = ioSample + 1; sample < inEnd && sample->mStartCycle < ioSample->mEndCycle; ++sample)
  61. {
  62. JPH_ASSERT(sample[-1].mStartCycle <= sample->mStartCycle);
  63. JPH_ASSERT(sample->mStartCycle >= ioSample->mStartCycle);
  64. JPH_ASSERT(sample->mEndCycle <= ioSample->mEndCycle);
  65. // This is a direct child of us, accumulate time
  66. cycles_in_children += sample->mEndCycle - sample->mStartCycle;
  67. // Recurse and skip over the children of this child
  68. sAggregate(inDepth + 1, inColor, sample, inEnd, ioAggregators, ioKeyToAggregator);
  69. }
  70. // Find the aggregator for this name / filename pair
  71. Aggregator *aggregator;
  72. KeyToAggregator::iterator aggregator_idx = ioKeyToAggregator.find(ioSample->mName);
  73. if (aggregator_idx == ioKeyToAggregator.end())
  74. {
  75. // Not found, add to map and insert in array
  76. ioKeyToAggregator.try_emplace(ioSample->mName, ioAggregators.size());
  77. ioAggregators.emplace_back(ioSample->mName);
  78. aggregator = &ioAggregators.back();
  79. }
  80. else
  81. {
  82. // Found
  83. aggregator = &ioAggregators[aggregator_idx->second];
  84. }
  85. // Add the measurement to the aggregator
  86. aggregator->AccumulateMeasurement(cycles_this_with_children, cycles_in_children);
  87. // Update ioSample to the last child of ioSample
  88. JPH_ASSERT(sample[-1].mStartCycle <= ioSample->mEndCycle);
  89. JPH_ASSERT(sample >= inEnd || sample->mStartCycle >= ioSample->mEndCycle);
  90. ioSample = sample - 1;
  91. }
  92. void Profiler::DumpInternal()
  93. {
  94. // Freeze data from threads
  95. // Note that this is not completely thread safe: As a profile sample is added mCurrentSample is incremented
  96. // but the data is not written until the sample finishes. So if we dump the profile information while
  97. // some other thread is running, we may get some garbage information from the previous frame
  98. Threads threads;
  99. for (ProfileThread *t : mThreads)
  100. threads.push_back({ t->mThreadName, t->mSamples, t->mSamples + t->mCurrentSample });
  101. // Shift all samples so that the first sample is at zero
  102. uint64 min_cycle = 0xffffffffffffffffUL;
  103. for (const ThreadSamples &t : threads)
  104. if (t.mSamplesBegin < t.mSamplesEnd)
  105. min_cycle = min(min_cycle, t.mSamplesBegin[0].mStartCycle);
  106. for (const ThreadSamples &t : threads)
  107. for (ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  108. {
  109. s->mStartCycle -= min_cycle;
  110. s->mEndCycle -= min_cycle;
  111. }
  112. // Determine tag of this profile
  113. string tag;
  114. if (mDumpTag.empty())
  115. {
  116. // Next sequence number
  117. static int number = 0;
  118. ++number;
  119. tag = ConvertToString(number);
  120. }
  121. else
  122. {
  123. // Take provided tag
  124. tag = mDumpTag;
  125. mDumpTag.clear();
  126. }
  127. // Aggregate data across threads
  128. Aggregators aggregators;
  129. KeyToAggregator key_to_aggregators;
  130. for (const ThreadSamples &t : threads)
  131. for (ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  132. sAggregate(0, Color::sGetDistinctColor(0).GetUInt32(), s, end, aggregators, key_to_aggregators);
  133. // Dump as list
  134. DumpList(tag, aggregators);
  135. // Dump as chart
  136. DumpChart(tag, threads, key_to_aggregators, aggregators);
  137. }
  138. static string sHTMLEncode(string inString)
  139. {
  140. string str = inString;
  141. StringReplace(str, "<", "&lt;");
  142. StringReplace(str, ">", "&gt;");
  143. return str;
  144. }
  145. void Profiler::DumpList(string inTag, const Aggregators &inAggregators)
  146. {
  147. // Open file
  148. ofstream f;
  149. f.open(StringFormat("profile_list_%s.html", inTag.c_str()).c_str(), ofstream::out | ofstream::trunc);
  150. if (!f.is_open())
  151. return;
  152. // Write header
  153. f << R"(<!DOCTYPE html>
  154. <html>
  155. <head>
  156. <title>Profile List</title>
  157. <link rel="stylesheet" href="WebIncludes/semantic.min.css">
  158. <script type="text/javascript" src="WebIncludes/jquery-3.2.1.min.js"></script>
  159. <script type="text/javascript" src="WebIncludes/semantic.min.js"></script>
  160. <script type="text/javascript" src="WebIncludes/tablesort.js"></script>
  161. <script type="text/javascript">$(document).ready(function() { $('table').tablesort({ compare: function(a, b) { return isNaN(a) || isNaN(b)? a.localeCompare(b) : Number(a) - Number(b); } }); });</script>
  162. </head>
  163. <body class="minimal pushable">
  164. <table id="profile" class="ui sortable celled striped table">
  165. <thead>
  166. <tr>
  167. <th>Description</th>
  168. <th class="sorted descending">Total time with children (%)</th>
  169. <th>Total time (%)</th>
  170. <th>Calls</th>
  171. <th>&micro;s / call with children</th>
  172. <th>&micro;s / call</th>
  173. <th>Min. &micro;s / call</th>
  174. <th>Max. &micro;s / call</th>
  175. </tr>
  176. </thead>
  177. <tbody style="text-align: right;">
  178. )";
  179. // Get total time
  180. uint64 total_time = 0;
  181. for (const Aggregator &item : inAggregators)
  182. total_time += item.mTotalCyclesInCallWithChildren - item.mTotalCyclesInChildren;
  183. // Get cycles per second
  184. uint64 cycles_per_second = GetProcessorTicksPerSecond();
  185. // Sort the list
  186. Aggregators aggregators = inAggregators;
  187. sort(aggregators.begin(), aggregators.end());
  188. // Write all aggregators
  189. for (const Aggregator &item : aggregators)
  190. {
  191. uint64 cycles_in_call_no_children = item.mTotalCyclesInCallWithChildren - item.mTotalCyclesInChildren;
  192. char str[2048];
  193. snprintf(str, sizeof(str), R"(<tr>
  194. <td style="text-align: left;">%s</td>
  195. <td>%.1f</td>
  196. <td>%.1f</td>
  197. <td>%u</td>
  198. <td>%.2f</td>
  199. <td>%.2f</td>
  200. <td>%.2f</td>
  201. <td>%.2f</td>
  202. </tr>)",
  203. sHTMLEncode(item.mName).c_str(), // Description
  204. 100.0 * item.mTotalCyclesInCallWithChildren / total_time, // Total time with children
  205. 100.0 * cycles_in_call_no_children / total_time, // Total time no children
  206. item.mCallCounter, // Calls
  207. 1000000.0 * item.mTotalCyclesInCallWithChildren / cycles_per_second / item.mCallCounter, // us / call with children
  208. 1000000.0 * cycles_in_call_no_children / cycles_per_second / item.mCallCounter, // us / call no children
  209. 1000000.0 * item.mMinCyclesInCallWithChildren / cycles_per_second, // Min. us / call with children
  210. 1000000.0 * item.mMaxCyclesInCallWithChildren / cycles_per_second); // Max. us / call with children
  211. f << str;
  212. }
  213. // End table
  214. f << R"(</tbody></table></body></html>)";
  215. }
  216. void Profiler::DumpChart(string inTag, const Threads &inThreads, const KeyToAggregator &inKeyToAggregators, const Aggregators &inAggregators)
  217. {
  218. // Open file
  219. ofstream f;
  220. f.open(StringFormat("profile_chart_%s.html", inTag.c_str()).c_str(), ofstream::out | ofstream::trunc);
  221. if (!f.is_open())
  222. return;
  223. // Write header
  224. f << R"(<!DOCTYPE html>
  225. <html>
  226. <head>
  227. <title>Profile Chart</title>
  228. <link rel="stylesheet" href="WebIncludes/profile_chart.css">
  229. <script type="text/javascript" src="WebIncludes/profile_chart.js"></script>
  230. </head>
  231. <body onload="startChart();">
  232. <script type="text/javascript">
  233. )";
  234. // Get cycles per second
  235. uint64 cycles_per_second = GetProcessorTicksPerSecond();
  236. f << "var cycles_per_second = " << cycles_per_second << ";\n";
  237. // Dump samples
  238. f << "var threads = [\n";
  239. bool first_thread = true;
  240. for (const ThreadSamples &t : inThreads)
  241. {
  242. if (!first_thread)
  243. f << ",\n";
  244. first_thread = false;
  245. f << "{\nthread_name: \"" << t.mThreadName << "\",\naggregator: [";
  246. bool first = true;
  247. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  248. {
  249. if (!first)
  250. f << ",";
  251. first = false;
  252. f << inKeyToAggregators.find(s->mName)->second;
  253. }
  254. f << "],\ncolor: [";
  255. first = true;
  256. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  257. {
  258. if (!first)
  259. f << ",";
  260. first = false;
  261. Color c(s->mColor);
  262. f << StringFormat("\"#%02x%02x%02x\"", c.r, c.g, c.b);
  263. }
  264. f << "],\nstart: [";
  265. first = true;
  266. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  267. {
  268. if (!first)
  269. f << ",";
  270. first = false;
  271. f << s->mStartCycle;
  272. }
  273. f << "],\ncycles: [";
  274. first = true;
  275. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  276. {
  277. if (!first)
  278. f << ",";
  279. first = false;
  280. f << s->mEndCycle - s->mStartCycle;
  281. }
  282. f << "],\ndepth: [";
  283. first = true;
  284. for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
  285. {
  286. if (!first)
  287. f << ",";
  288. first = false;
  289. f << int(s->mDepth);
  290. }
  291. f << "]\n}";
  292. }
  293. // Dump aggregated data
  294. f << "];\nvar aggregated = {\nname: [";
  295. bool first = true;
  296. for (const Aggregator &a : inAggregators)
  297. {
  298. if (!first)
  299. f << ",";
  300. first = false;
  301. string name = "\"" + sHTMLEncode(a.mName) + "\"";
  302. f << name;
  303. }
  304. f << "],\ncalls: [";
  305. first = true;
  306. for (const Aggregator &a : inAggregators)
  307. {
  308. if (!first)
  309. f << ",";
  310. first = false;
  311. f << a.mCallCounter;
  312. }
  313. f << "],\nmin_cycles: [";
  314. first = true;
  315. for (const Aggregator &a : inAggregators)
  316. {
  317. if (!first)
  318. f << ",";
  319. first = false;
  320. f << a.mMinCyclesInCallWithChildren;
  321. }
  322. f << "],\nmax_cycles: [";
  323. first = true;
  324. for (const Aggregator &a : inAggregators)
  325. {
  326. if (!first)
  327. f << ",";
  328. first = false;
  329. f << a.mMaxCyclesInCallWithChildren;
  330. }
  331. f << "],\ncycles_per_frame: [";
  332. first = true;
  333. for (const Aggregator &a : inAggregators)
  334. {
  335. if (!first)
  336. f << ",";
  337. first = false;
  338. f << a.mTotalCyclesInCallWithChildren;
  339. }
  340. // Write footer
  341. f << R"(]};
  342. </script>
  343. <canvas id="canvas"></canvas>
  344. <div id="tooltip"></div>
  345. </tbody></table></body></html>)";
  346. }
  347. JPH_NAMESPACE_END
  348. #endif // JPH_PROFILE_ENABLED