BsCPUProfiler.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. #pragma once
  2. #include "BsCorePrerequisites.h"
  3. namespace BansheeEngine
  4. {
  5. class CPUProfilerReport;
  6. /**
  7. * @brief Provides various performance measuring methods.
  8. *
  9. * @note Thread safe. Matching begin*\end* calls
  10. * must belong to the same thread though.
  11. */
  12. class BS_CORE_EXPORT CPUProfiler
  13. {
  14. /**
  15. * @brief Timer class responsible for tracking elapsed time.
  16. */
  17. class Timer
  18. {
  19. public:
  20. Timer();
  21. /**
  22. * @brief Sets the start time for the timer.
  23. */
  24. void start();
  25. /**
  26. * @brief Stops the timer and calculates the elapsed time
  27. * from start time to now.
  28. */
  29. void stop();
  30. /**
  31. * @brief Resets the elapsed time to zero.
  32. */
  33. void reset();
  34. double time;
  35. private:
  36. double startTime;
  37. /**
  38. * @brief Returns time elapsed since CPU was started in millseconds.
  39. */
  40. static inline double getCurrentTime();
  41. };
  42. /**
  43. * @brief Timer class responsible for tracking number of elapsed CPU cycles.
  44. */
  45. class TimerPrecise
  46. {
  47. public:
  48. TimerPrecise();
  49. /**
  50. * @brief Starts the counter marking the current number of executed
  51. * CPU cycles since CPU was started.
  52. */
  53. void start();
  54. /**
  55. * @brief Ends the counter and calculates the number of CPU cycles between
  56. * now and the start time.
  57. */
  58. void stop();
  59. /**
  60. * @brief Resets the cycle count to zero.
  61. */
  62. void reset();
  63. UINT64 cycles;
  64. private:
  65. UINT64 startCycles;
  66. /**
  67. * @brief Queries the CPU for the current number of CPU cycles executed since the
  68. * program was started.
  69. */
  70. static inline UINT64 getNumCycles();
  71. };
  72. /**
  73. * @brief Contains data about a single profiler sample (counting time in milliseconds).
  74. *
  75. * @note A sample is created whenever a named profile block is entered. e.g. if you have a function
  76. * you are profiling, and it gets called 10 times, there will be 10 samples.
  77. */
  78. struct ProfileSample
  79. {
  80. ProfileSample(double _time, UINT64 _numAllocs, UINT64 _numFrees)
  81. :time(_time), numAllocs(_numAllocs), numFrees(_numFrees)
  82. { }
  83. double time;
  84. UINT64 numAllocs;
  85. UINT64 numFrees;
  86. };
  87. /**
  88. * @brief Contains data about a single precise profiler sample (counting CPU cycles).
  89. *
  90. * @note A sample is created whenever a named profile block is entered. e.g. if you have a function
  91. * you are profiling, and it gets called 10 times, there will be 10 samples.
  92. */
  93. struct PreciseProfileSample
  94. {
  95. PreciseProfileSample(UINT64 _cycles, UINT64 _numAllocs, UINT64 _numFrees)
  96. :cycles(_cycles), numAllocs(_numAllocs), numFrees(_numFrees)
  97. { }
  98. UINT64 cycles;
  99. UINT64 numAllocs;
  100. UINT64 numFrees;
  101. };
  102. /**
  103. * @brief Contains basic (time based) profiling data contained in a profiling block.
  104. */
  105. struct ProfileData
  106. {
  107. /**
  108. * @brief Begins a new sample and records current sample state. Previous sample must
  109. * not be active.
  110. */
  111. void beginSample();
  112. /**
  113. * @brief Records current sample state and creates a new sample based on start and end state.
  114. * Adds the sample to the sample list.
  115. */
  116. void endSample();
  117. /**
  118. * @brief Removes the last added sample from the sample list and makes it active again. You must
  119. * call endSample when done as if you called beginSample.
  120. */
  121. void resumeLastSample();
  122. ProfilerVector<ProfileSample> samples;
  123. Timer timer;
  124. UINT64 memAllocs;
  125. UINT64 memFrees;
  126. };
  127. /**
  128. * @brief Contains precise (CPU cycle based) profiling data contained in a profiling block.
  129. */
  130. struct PreciseProfileData
  131. {
  132. /**
  133. * @brief Begins a new sample and records current sample state. Previous sample must
  134. * not be active.
  135. */
  136. void beginSample();
  137. /**
  138. * @brief Records current sample state and creates a new sample based on start and end state.
  139. * Adds the sample to the sample list.
  140. */
  141. void endSample();
  142. /**
  143. * @brief Removes the last added sample from the sample list and makes it active again. You must
  144. * call endSample when done as if you called beginSample.
  145. */
  146. void resumeLastSample();
  147. ProfilerVector<PreciseProfileSample> samples;
  148. TimerPrecise timer;
  149. UINT64 memAllocs;
  150. UINT64 memFrees;
  151. };
  152. /**
  153. * @brief Contains all sampling information about a single named profiling block.
  154. * Each block has its own sampling information and optionally child blocks.
  155. */
  156. struct ProfiledBlock
  157. {
  158. ProfiledBlock();
  159. ~ProfiledBlock();
  160. /**
  161. * @brief Attempts to find a child block with the specified name. Returns
  162. * null if not found.
  163. */
  164. ProfiledBlock* findChild(const ProfilerString& name) const;
  165. ProfilerString name;
  166. ProfileData basic;
  167. PreciseProfileData precise;
  168. ProfilerVector<ProfiledBlock*> children;
  169. };
  170. /**
  171. * @brief CPU sampling type.
  172. */
  173. enum class ActiveSamplingType
  174. {
  175. Basic, /**< Sample using milliseconds. */
  176. Precise /**< Sample using CPU cycles. */
  177. };
  178. /**
  179. * @brief Contains data about the currently active profiling block.
  180. */
  181. struct ActiveBlock
  182. {
  183. ActiveBlock()
  184. :type(ActiveSamplingType::Basic), block(nullptr)
  185. { }
  186. ActiveBlock(ActiveSamplingType _type, ProfiledBlock* _block)
  187. :type(_type), block(_block)
  188. { }
  189. ActiveSamplingType type;
  190. ProfiledBlock* block;
  191. };
  192. /**
  193. * @brief Contains data about an active profiling thread.
  194. */
  195. struct ThreadInfo
  196. {
  197. ThreadInfo();
  198. /**
  199. * @brief Starts profiling on the thread. New primary profiling block
  200. * is created with the given name.
  201. */
  202. void begin(const ProfilerString& _name);
  203. /**
  204. * @brief Ends profiling on the thread. You should end all samples before calling this,
  205. * but if you don't they will be terminated automatically.
  206. */
  207. void end();
  208. /**
  209. * @brief Deletes all internal profiling data and makes the object ready for another
  210. * iteration. Should be called after end in order to delete any existing data.
  211. */
  212. void reset();
  213. /**
  214. * @brief Gets the primary profiling block used by the thread.
  215. */
  216. ProfiledBlock* getBlock();
  217. /**
  218. * @brief Deletes the provided block.
  219. */
  220. void releaseBlock(ProfiledBlock* block);
  221. static BS_THREADLOCAL ThreadInfo* activeThread;
  222. bool isActive;
  223. ProfiledBlock* rootBlock;
  224. ProfilerStack<ActiveBlock> activeBlocks;
  225. ActiveBlock activeBlock;
  226. };
  227. public:
  228. CPUProfiler();
  229. ~CPUProfiler();
  230. /**
  231. * @brief Registers a new thread we will be doing sampling in. This needs to be called before any beginSample*\endSample* calls
  232. * are made in that thread.
  233. *
  234. * @param name Name that will allow you to more easily identify the thread.
  235. */
  236. void beginThread(const ProfilerString& name);
  237. /**
  238. * @brief Ends sampling for the current thread. No beginSample*\endSample* calls after this point.
  239. */
  240. void endThread();
  241. /**
  242. * @brief Begins sample measurement. Must be followed by endSample.
  243. *
  244. * @param name Unique name for the sample you can later use to find the sampling data.
  245. */
  246. void beginSample(const ProfilerString& name);
  247. /**
  248. * @brief Ends sample measurement.
  249. *
  250. * @param name Unique name for the sample.
  251. *
  252. * @note Unique name is primarily needed to more easily identify mismatched
  253. * begin/end sample pairs. Otherwise the name in beginSample would be enough.
  254. */
  255. void endSample(const ProfilerString& name);
  256. /**
  257. * @brief Begins sample measurement. Must be followed by endSample.
  258. *
  259. * @param name Unique name for the sample you can later use to find the sampling data.
  260. *
  261. * @note This method uses very precise CPU counters to determine variety of data not
  262. * provided by standard beginSample. However due to the way these counters work you should
  263. * not use this method for larger parts of code. It does not consider context switches so if the OS
  264. * decides to switch context between measurements you will get invalid data.
  265. */
  266. void beginSamplePrecise(const ProfilerString& name);
  267. /**
  268. * @brief Ends precise sample measurement.
  269. *
  270. * @param name Unique name for the sample.
  271. *
  272. * @note Unique name is primarily needed to more easily identify mismatched
  273. * begin/end sample pairs. Otherwise the name in beginSamplePrecise would be enough.
  274. */
  275. void endSamplePrecise(const ProfilerString& name);
  276. /**
  277. * @brief Clears all sampling data, and ends any unfinished sampling blocks.
  278. */
  279. void reset();
  280. /**
  281. * @brief Generates a report from all previously sampled data.
  282. *
  283. * @note Generating a report will stop all in-progress sampling. You should make sure
  284. * you call endSample* manually beforehand so this doesn't have to happen.
  285. */
  286. CPUProfilerReport generateReport();
  287. private:
  288. /**
  289. * @brief Calculates overhead that the timing and sampling methods themselves introduce
  290. * so we might get more accurate measurements when creating reports.
  291. */
  292. void estimateTimerOverhead();
  293. private:
  294. double mBasicTimerOverhead;
  295. UINT64 mPreciseTimerOverhead;
  296. double mBasicSamplingOverheadMs;
  297. double mPreciseSamplingOverheadMs;
  298. UINT64 mBasicSamplingOverheadCycles;
  299. UINT64 mPreciseSamplingOverheadCycles;
  300. ProfilerVector<ThreadInfo*> mActiveThreads;
  301. BS_MUTEX(mThreadSync);
  302. };
  303. /**
  304. * @brief Profiling entry containing information about a single CPU profiling block
  305. * containing timing information.
  306. */
  307. struct BS_CORE_EXPORT CPUProfilerBasicSamplingEntry
  308. {
  309. struct BS_CORE_EXPORT Data
  310. {
  311. Data();
  312. String name; /**< Name of the profiling block. */
  313. UINT32 numCalls; /**< Number of times the block was entered. */
  314. UINT64 memAllocs; /**< Number of memory allocations that happened within the block. */
  315. UINT64 memFrees; /**< Number of memory deallocations that happened within the block. */
  316. double avgTimeMs; /**< Average time it took to execute the block, per call. In milliseconds. */
  317. double maxTimeMs; /**< Maximum time of a single call in the block. In milliseconds. */
  318. double totalTimeMs; /**< Total time the block took, across all calls. In milliseconds. */
  319. double avgSelfTimeMs; /**< Average time it took to execute the block, per call. Ignores time used by child blocks. In milliseconds. */
  320. double totalSelfTimeMs; /**< Total time the block took, across all calls. Ignores time used by child blocks. In milliseconds. */
  321. double estimatedSelfOverheadMs; /**< Estimated overhead of profiling methods, only for this exact block. In milliseconds. */
  322. double estimatedOverheadMs; /**< Estimated overhead of profiling methods for this block and all children. In milliseconds. */
  323. float pctOfParent; /**< Percent of parent block time this block took to execute. Ranging [0.0, 1.0]. */
  324. } data;
  325. ProfilerVector<CPUProfilerBasicSamplingEntry> childEntries;
  326. };
  327. /**
  328. * @brief Profiling entry containing information about a single CPU profiling block
  329. * containing CPU cycle count based information.
  330. */
  331. struct BS_CORE_EXPORT CPUProfilerPreciseSamplingEntry
  332. {
  333. struct BS_CORE_EXPORT Data
  334. {
  335. Data();
  336. String name; /**< Name of the profiling block. */
  337. UINT32 numCalls; /**< Number of times the block was entered. */
  338. UINT64 memAllocs; /**< Number of memory allocations that happened within the block. */
  339. UINT64 memFrees; /**< Number of memory deallocations that happened within the block. */
  340. UINT64 avgCycles; /**< Average number of cycles it took to execute the block, per call. */
  341. UINT64 maxCycles; /**< Maximum number of cycles of a single call in the block. */
  342. UINT64 totalCycles; /**< Total number of cycles across all calls in the block. */
  343. UINT64 avgSelfCycles; /**< Average number of cycles it took to execute the block, per call. Ignores cycles used by child blocks. */
  344. UINT64 totalSelfCycles; /**< Total number of cycles across all calls in the block. Ignores time used by child blocks. */
  345. UINT64 estimatedSelfOverhead; /**< Estimated overhead of profiling methods, only for this exact block. In cycles. */
  346. UINT64 estimatedOverhead; /**< Estimated overhead of profiling methods for this block and all children. In cycles. */
  347. float pctOfParent; /**< Percent of parent block cycles used by this block. Ranging [0.0, 1.0]. */
  348. } data;
  349. ProfilerVector<CPUProfilerPreciseSamplingEntry> childEntries;
  350. };
  351. /**
  352. * @brief CPU profiling report containing all profiling information for a single profiling session.
  353. */
  354. class BS_CORE_EXPORT CPUProfilerReport
  355. {
  356. public:
  357. CPUProfilerReport();
  358. /**
  359. * @brief Returns root entry for the basic (time based) sampling data. Root entry always contains the
  360. * profiling block associated with the entire thread.
  361. */
  362. const CPUProfilerBasicSamplingEntry& getBasicSamplingData() const { return mBasicSamplingRootEntry; }
  363. /**
  364. * @brief Returns root entry for the precise (CPU cycle based) sampling data. Root entry always contains the
  365. * profiling block associated with the entire thread.
  366. */
  367. const CPUProfilerPreciseSamplingEntry& getPreciseSamplingData() const { return mPreciseSamplingRootEntry; }
  368. private:
  369. friend class CPUProfiler;
  370. CPUProfilerBasicSamplingEntry mBasicSamplingRootEntry;
  371. CPUProfilerPreciseSamplingEntry mPreciseSamplingRootEntry;
  372. };
  373. }