BsProfilerCPU.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. #pragma once
  2. #include "BsCorePrerequisites.h"
  3. #include "BsModule.h"
  4. namespace BansheeEngine
  5. {
  6. class CPUProfilerReport;
  7. /**
  8. * @brief Provides various performance measuring methods.
  9. *
  10. * @note Thread safe. Matching begin*\end* calls
  11. * must belong to the same thread though.
  12. */
  13. class BS_CORE_EXPORT ProfilerCPU : public Module<ProfilerCPU>
  14. {
  15. /**
  16. * @brief Timer class responsible for tracking elapsed time.
  17. */
  18. class Timer
  19. {
  20. public:
  21. Timer();
  22. /**
  23. * @brief Sets the start time for the timer.
  24. */
  25. void start();
  26. /**
  27. * @brief Stops the timer and calculates the elapsed time
  28. * from start time to now.
  29. */
  30. void stop();
  31. /**
  32. * @brief Resets the elapsed time to zero.
  33. */
  34. void reset();
  35. double time;
  36. private:
  37. double startTime;
  38. /**
  39. * @brief Returns time elapsed since CPU was started in millseconds.
  40. */
  41. static inline double getCurrentTime();
  42. };
  43. /**
  44. * @brief Timer class responsible for tracking number of elapsed CPU cycles.
  45. */
  46. class TimerPrecise
  47. {
  48. public:
  49. TimerPrecise();
  50. /**
  51. * @brief Starts the counter marking the current number of executed
  52. * CPU cycles since CPU was started.
  53. */
  54. void start();
  55. /**
  56. * @brief Ends the counter and calculates the number of CPU cycles between
  57. * now and the start time.
  58. */
  59. void stop();
  60. /**
  61. * @brief Resets the cycle count to zero.
  62. */
  63. void reset();
  64. UINT64 cycles;
  65. private:
  66. UINT64 startCycles;
  67. /**
  68. * @brief Queries the CPU for the current number of CPU cycles executed since the
  69. * program was started.
  70. */
  71. static inline UINT64 getNumCycles();
  72. };
  73. /**
  74. * @brief Contains data about a single profiler sample (counting time in milliseconds).
  75. *
  76. * @note A sample is created whenever a named profile block is entered. e.g. if you have a function
  77. * you are profiling, and it gets called 10 times, there will be 10 samples.
  78. */
  79. struct ProfileSample
  80. {
  81. ProfileSample(double _time, UINT64 _numAllocs, UINT64 _numFrees)
  82. :time(_time), numAllocs(_numAllocs), numFrees(_numFrees)
  83. { }
  84. double time;
  85. UINT64 numAllocs;
  86. UINT64 numFrees;
  87. };
  88. /**
  89. * @brief Contains data about a single precise profiler sample (counting CPU cycles).
  90. *
  91. * @note A sample is created whenever a named profile block is entered. e.g. if you have a function
  92. * you are profiling, and it gets called 10 times, there will be 10 samples.
  93. */
  94. struct PreciseProfileSample
  95. {
  96. PreciseProfileSample(UINT64 _cycles, UINT64 _numAllocs, UINT64 _numFrees)
  97. :cycles(_cycles), numAllocs(_numAllocs), numFrees(_numFrees)
  98. { }
  99. UINT64 cycles;
  100. UINT64 numAllocs;
  101. UINT64 numFrees;
  102. };
  103. /**
  104. * @brief Contains basic (time based) profiling data contained in a profiling block.
  105. */
  106. struct ProfileData
  107. {
  108. /**
  109. * @brief Begins a new sample and records current sample state. Previous sample must
  110. * not be active.
  111. */
  112. void beginSample();
  113. /**
  114. * @brief Records current sample state and creates a new sample based on start and end state.
  115. * Adds the sample to the sample list.
  116. */
  117. void endSample();
  118. /**
  119. * @brief Removes the last added sample from the sample list and makes it active again. You must
  120. * call endSample when done as if you called beginSample.
  121. */
  122. void resumeLastSample();
  123. ProfilerVector<ProfileSample> samples;
  124. Timer timer;
  125. UINT64 memAllocs;
  126. UINT64 memFrees;
  127. };
  128. /**
  129. * @brief Contains precise (CPU cycle based) profiling data contained in a profiling block.
  130. */
  131. struct PreciseProfileData
  132. {
  133. /**
  134. * @brief Begins a new sample and records current sample state. Previous sample must
  135. * not be active.
  136. */
  137. void beginSample();
  138. /**
  139. * @brief Records current sample state and creates a new sample based on start and end state.
  140. * Adds the sample to the sample list.
  141. */
  142. void endSample();
  143. /**
  144. * @brief Removes the last added sample from the sample list and makes it active again. You must
  145. * call endSample when done as if you called beginSample.
  146. */
  147. void resumeLastSample();
  148. ProfilerVector<PreciseProfileSample> samples;
  149. TimerPrecise timer;
  150. UINT64 memAllocs;
  151. UINT64 memFrees;
  152. };
  153. /**
  154. * @brief Contains all sampling information about a single named profiling block.
  155. * Each block has its own sampling information and optionally child blocks.
  156. */
  157. struct ProfiledBlock
  158. {
  159. ProfiledBlock();
  160. ~ProfiledBlock();
  161. /**
  162. * @brief Attempts to find a child block with the specified name. Returns
  163. * null if not found.
  164. */
  165. ProfiledBlock* findChild(const ProfilerString& name) const;
  166. ProfilerString name;
  167. ProfileData basic;
  168. PreciseProfileData precise;
  169. ProfilerVector<ProfiledBlock*> children;
  170. };
  171. /**
  172. * @brief CPU sampling type.
  173. */
  174. enum class ActiveSamplingType
  175. {
  176. Basic, /**< Sample using milliseconds. */
  177. Precise /**< Sample using CPU cycles. */
  178. };
  179. /**
  180. * @brief Contains data about the currently active profiling block.
  181. */
  182. struct ActiveBlock
  183. {
  184. ActiveBlock()
  185. :type(ActiveSamplingType::Basic), block(nullptr)
  186. { }
  187. ActiveBlock(ActiveSamplingType _type, ProfiledBlock* _block)
  188. :type(_type), block(_block)
  189. { }
  190. ActiveSamplingType type;
  191. ProfiledBlock* block;
  192. };
  193. /**
  194. * @brief Contains data about an active profiling thread.
  195. */
  196. struct ThreadInfo
  197. {
  198. ThreadInfo();
  199. /**
  200. * @brief Starts profiling on the thread. New primary profiling block
  201. * is created with the given name.
  202. */
  203. void begin(const ProfilerString& _name);
  204. /**
  205. * @brief Ends profiling on the thread. You should end all samples before calling this,
  206. * but if you don't they will be terminated automatically.
  207. */
  208. void end();
  209. /**
  210. * @brief Deletes all internal profiling data and makes the object ready for another
  211. * iteration. Should be called after end in order to delete any existing data.
  212. */
  213. void reset();
  214. /**
  215. * @brief Gets the primary profiling block used by the thread.
  216. */
  217. ProfiledBlock* getBlock();
  218. /**
  219. * @brief Deletes the provided block.
  220. */
  221. void releaseBlock(ProfiledBlock* block);
  222. static BS_THREADLOCAL ThreadInfo* activeThread;
  223. bool isActive;
  224. ProfiledBlock* rootBlock;
  225. ProfilerStack<ActiveBlock> activeBlocks;
  226. ActiveBlock activeBlock;
  227. };
  228. public:
  229. ProfilerCPU();
  230. ~ProfilerCPU();
  231. /**
  232. * @brief Registers a new thread we will be doing sampling in. This needs to be called before any beginSample*\endSample* calls
  233. * are made in that thread.
  234. *
  235. * @param name Name that will allow you to more easily identify the thread.
  236. */
  237. void beginThread(const ProfilerString& name);
  238. /**
  239. * @brief Ends sampling for the current thread. No beginSample*\endSample* calls after this point.
  240. */
  241. void endThread();
  242. /**
  243. * @brief Begins sample measurement. Must be followed by endSample.
  244. *
  245. * @param name Unique name for the sample you can later use to find the sampling data.
  246. */
  247. void beginSample(const ProfilerString& name);
  248. /**
  249. * @brief Ends sample measurement.
  250. *
  251. * @param name Unique name for the sample.
  252. *
  253. * @note Unique name is primarily needed to more easily identify mismatched
  254. * begin/end sample pairs. Otherwise the name in beginSample would be enough.
  255. */
  256. void endSample(const ProfilerString& name);
  257. /**
  258. * @brief Begins sample measurement. Must be followed by endSample.
  259. *
  260. * @param name Unique name for the sample you can later use to find the sampling data.
  261. *
  262. * @note This method uses very precise CPU counters to determine variety of data not
  263. * provided by standard beginSample. However due to the way these counters work you should
  264. * not use this method for larger parts of code. It does not consider context switches so if the OS
  265. * decides to switch context between measurements you will get invalid data.
  266. */
  267. void beginSamplePrecise(const ProfilerString& name);
  268. /**
  269. * @brief Ends precise sample measurement.
  270. *
  271. * @param name Unique name for the sample.
  272. *
  273. * @note Unique name is primarily needed to more easily identify mismatched
  274. * begin/end sample pairs. Otherwise the name in beginSamplePrecise would be enough.
  275. */
  276. void endSamplePrecise(const ProfilerString& name);
  277. /**
  278. * @brief Clears all sampling data, and ends any unfinished sampling blocks.
  279. */
  280. void reset();
  281. /**
  282. * @brief Generates a report from all previously sampled data.
  283. *
  284. * @note Generating a report will stop all in-progress sampling. You should make sure
  285. * you call endSample* manually beforehand so this doesn't have to happen.
  286. */
  287. CPUProfilerReport generateReport();
  288. private:
  289. /**
  290. * @brief Calculates overhead that the timing and sampling methods themselves introduce
  291. * so we might get more accurate measurements when creating reports.
  292. */
  293. void estimateTimerOverhead();
  294. private:
  295. double mBasicTimerOverhead;
  296. UINT64 mPreciseTimerOverhead;
  297. double mBasicSamplingOverheadMs;
  298. double mPreciseSamplingOverheadMs;
  299. UINT64 mBasicSamplingOverheadCycles;
  300. UINT64 mPreciseSamplingOverheadCycles;
  301. ProfilerVector<ThreadInfo*> mActiveThreads;
  302. BS_MUTEX(mThreadSync);
  303. };
  304. /**
  305. * @brief Profiling entry containing information about a single CPU profiling block
  306. * containing timing information.
  307. */
  308. struct BS_CORE_EXPORT CPUProfilerBasicSamplingEntry
  309. {
  310. struct BS_CORE_EXPORT Data
  311. {
  312. Data();
  313. String name; /**< Name of the profiling block. */
  314. UINT32 numCalls; /**< Number of times the block was entered. */
  315. UINT64 memAllocs; /**< Number of memory allocations that happened within the block. */
  316. UINT64 memFrees; /**< Number of memory deallocations that happened within the block. */
  317. double avgTimeMs; /**< Average time it took to execute the block, per call. In milliseconds. */
  318. double maxTimeMs; /**< Maximum time of a single call in the block. In milliseconds. */
  319. double totalTimeMs; /**< Total time the block took, across all calls. In milliseconds. */
  320. double avgSelfTimeMs; /**< Average time it took to execute the block, per call. Ignores time used by child blocks. In milliseconds. */
  321. double totalSelfTimeMs; /**< Total time the block took, across all calls. Ignores time used by child blocks. In milliseconds. */
  322. double estimatedSelfOverheadMs; /**< Estimated overhead of profiling methods, only for this exact block. In milliseconds. */
  323. double estimatedOverheadMs; /**< Estimated overhead of profiling methods for this block and all children. In milliseconds. */
  324. float pctOfParent; /**< Percent of parent block time this block took to execute. Ranging [0.0, 1.0]. */
  325. } data;
  326. ProfilerVector<CPUProfilerBasicSamplingEntry> childEntries;
  327. };
  328. /**
  329. * @brief Profiling entry containing information about a single CPU profiling block
  330. * containing CPU cycle count based information.
  331. */
  332. struct BS_CORE_EXPORT CPUProfilerPreciseSamplingEntry
  333. {
  334. struct BS_CORE_EXPORT Data
  335. {
  336. Data();
  337. String name; /**< Name of the profiling block. */
  338. UINT32 numCalls; /**< Number of times the block was entered. */
  339. UINT64 memAllocs; /**< Number of memory allocations that happened within the block. */
  340. UINT64 memFrees; /**< Number of memory deallocations that happened within the block. */
  341. UINT64 avgCycles; /**< Average number of cycles it took to execute the block, per call. */
  342. UINT64 maxCycles; /**< Maximum number of cycles of a single call in the block. */
  343. UINT64 totalCycles; /**< Total number of cycles across all calls in the block. */
  344. UINT64 avgSelfCycles; /**< Average number of cycles it took to execute the block, per call. Ignores cycles used by child blocks. */
  345. UINT64 totalSelfCycles; /**< Total number of cycles across all calls in the block. Ignores time used by child blocks. */
  346. UINT64 estimatedSelfOverhead; /**< Estimated overhead of profiling methods, only for this exact block. In cycles. */
  347. UINT64 estimatedOverhead; /**< Estimated overhead of profiling methods for this block and all children. In cycles. */
  348. float pctOfParent; /**< Percent of parent block cycles used by this block. Ranging [0.0, 1.0]. */
  349. } data;
  350. ProfilerVector<CPUProfilerPreciseSamplingEntry> childEntries;
  351. };
  352. /**
  353. * @brief CPU profiling report containing all profiling information for a single profiling session.
  354. */
  355. class BS_CORE_EXPORT CPUProfilerReport
  356. {
  357. public:
  358. CPUProfilerReport();
  359. /**
  360. * @brief Returns root entry for the basic (time based) sampling data. Root entry always contains the
  361. * profiling block associated with the entire thread.
  362. */
  363. const CPUProfilerBasicSamplingEntry& getBasicSamplingData() const { return mBasicSamplingRootEntry; }
  364. /**
  365. * @brief Returns root entry for the precise (CPU cycle based) sampling data. Root entry always contains the
  366. * profiling block associated with the entire thread.
  367. */
  368. const CPUProfilerPreciseSamplingEntry& getPreciseSamplingData() const { return mPreciseSamplingRootEntry; }
  369. private:
  370. friend class ProfilerCPU;
  371. CPUProfilerBasicSamplingEntry mBasicSamplingRootEntry;
  372. CPUProfilerPreciseSamplingEntry mPreciseSamplingRootEntry;
  373. };
  374. /**
  375. * @brief Quick way to access the CPU profiler.
  376. */
  377. BS_CORE_EXPORT ProfilerCPU& gProfilerCPU();
  378. /**
  379. * @brief Shortcut for profiling a single function call.
  380. */
  381. #define PROFILE_CALL(call, name) \
  382. BansheeEngine::gProfilerCPU().beginSample(##name##); \
  383. call; \
  384. BansheeEngine::gProfilerCPU().endSample(##name##);
  385. }