CmCPUProfiler.h 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. #pragma once
  2. #include "CmPrerequisites.h"
  3. namespace BansheeEngine
  4. {
  5. class CPUProfilerReport;
  6. /**
  7. * @brief Provides various performance measuring methods
  8. *
  9. * @note This class is thread safe. Matching begin*\end* calls
  10. * must belong to the same thread though.
  11. */
  12. class CM_EXPORT CPUProfiler
  13. {
  14. class Timer
  15. {
  16. public:
  17. Timer();
  18. void start();
  19. void stop();
  20. void reset();
  21. double time;
  22. private:
  23. double startTime;
  24. static inline double getCurrentTime();
  25. };
  26. class TimerPrecise
  27. {
  28. public:
  29. TimerPrecise();
  30. void start();
  31. void stop();
  32. void reset();
  33. UINT64 cycles;
  34. private:
  35. UINT64 startCycles;
  36. static inline UINT64 getNumCycles();
  37. };
  38. struct ProfileSample
  39. {
  40. ProfileSample(double _time, UINT64 _numAllocs, UINT64 _numFrees)
  41. :time(_time), numAllocs(_numAllocs), numFrees(_numFrees)
  42. { }
  43. double time;
  44. UINT64 numAllocs;
  45. UINT64 numFrees;
  46. };
  47. struct PreciseProfileSample
  48. {
  49. PreciseProfileSample(UINT64 _cycles, UINT64 _numAllocs, UINT64 _numFrees)
  50. :cycles(_cycles), numAllocs(_numAllocs), numFrees(_numFrees)
  51. { }
  52. UINT64 cycles;
  53. UINT64 numAllocs;
  54. UINT64 numFrees;
  55. };
  56. struct ProfileData
  57. {
  58. ProfilerVector<ProfileSample> samples;
  59. Timer timer;
  60. UINT64 memAllocs;
  61. UINT64 memFrees;
  62. void beginSample();
  63. void endSample();
  64. void resumeLastSample();
  65. };
  66. struct PreciseProfileData
  67. {
  68. ProfilerVector<PreciseProfileSample> samples;
  69. TimerPrecise timer;
  70. UINT64 memAllocs;
  71. UINT64 memFrees;
  72. void beginSample();
  73. void endSample();
  74. void resumeLastSample();
  75. };
  76. struct PreciseProfiledBlock;
  77. struct ProfiledBlock;
  78. struct ProfiledBlock
  79. {
  80. ProfiledBlock();
  81. ~ProfiledBlock();
  82. ProfilerString name;
  83. ProfileData basic;
  84. PreciseProfileData precise;
  85. ProfilerVector<ProfiledBlock*> children;
  86. ProfiledBlock* findChild(const ProfilerString& name) const;
  87. };
  88. enum class ActiveSamplingType
  89. {
  90. Basic,
  91. Precise
  92. };
  93. struct ActiveBlock
  94. {
  95. ActiveBlock()
  96. :type(ActiveSamplingType::Basic), block(nullptr)
  97. { }
  98. ActiveBlock(ActiveSamplingType _type, ProfiledBlock* _block)
  99. :type(_type), block(_block)
  100. { }
  101. ActiveSamplingType type;
  102. ProfiledBlock* block;
  103. };
  104. struct ThreadInfo
  105. {
  106. ThreadInfo();
  107. static CM_THREADLOCAL ThreadInfo* activeThread;
  108. bool isActive;
  109. ProfiledBlock* rootBlock;
  110. ProfilerStack<ActiveBlock> activeBlocks;
  111. ActiveBlock activeBlock;
  112. void begin(const ProfilerString& _name);
  113. void end();
  114. void reset();
  115. ProfiledBlock* getBlock();
  116. void releaseBlock(ProfiledBlock* block);
  117. };
  118. public:
  119. CPUProfiler();
  120. ~CPUProfiler();
  121. /**
  122. * @brief Registers a new thread we will be doing sampling in. This needs to be called before any beginSample*\endSample* calls
  123. * are made in that thread.
  124. *
  125. * @param name Name that will allow you to more easily identify the thread.
  126. */
  127. void beginThread(const ProfilerString& name);
  128. /**
  129. * @brief Ends sampling for the current thread. No beginSample*\endSample* calls after this point.
  130. */
  131. void endThread();
  132. /**
  133. * @brief Begins sample measurement. Must be followed by endSample.
  134. *
  135. * @param name Unique name for the sample you can later use to find the sampling data.
  136. */
  137. void beginSample(const ProfilerString& name);
  138. /**
  139. * @brief Ends sample measurement.
  140. *
  141. * @param name Unique name for the sample.
  142. *
  143. * @note Unique name is primarily needed to more easily identify mismatched
  144. * begin/end sample pairs. Otherwise the name in beginSample would be enough.
  145. */
  146. void endSample(const ProfilerString& name);
  147. /**
  148. * @brief Begins sample measurement. Must be followed by endSample.
  149. *
  150. * @param name Unique name for the sample you can later use to find the sampling data.
  151. *
  152. * @note This method uses very precise CPU counters to determine variety of data not
  153. * provided by standard beginSample. However due to the way these counters work you should
  154. * not use this method for larger parts of code. It does not consider context switches so if the OS
  155. * decides to switch context between measurements you will get invalid data.
  156. */
  157. void beginSamplePrecise(const ProfilerString& name);
  158. /**
  159. * @brief Ends precise sample measurement.
  160. *
  161. * @param name Unique name for the sample.
  162. *
  163. * @note Unique name is primarily needed to more easily identify mismatched
  164. * begin/end sample pairs. Otherwise the name in beginSamplePrecise would be enough.
  165. */
  166. void endSamplePrecise(const ProfilerString& name);
  167. /**
  168. * @brief Clears all sampling data, and ends any unfinished sampling blocks.
  169. */
  170. void reset();
  171. /**
  172. * @brief Generates a report from all previously sampled data.
  173. *
  174. * @note Generating a report will stop all in-progress sampling. You should make sure
  175. * you call endSample* manually beforehand so this doesn't have to happen.
  176. */
  177. CPUProfilerReport generateReport();
  178. private:
  179. double mBasicTimerOverhead;
  180. UINT64 mPreciseTimerOverhead;
  181. double mBasicSamplingOverheadMs;
  182. double mPreciseSamplingOverheadMs;
  183. UINT64 mBasicSamplingOverheadCycles;
  184. UINT64 mPreciseSamplingOverheadCycles;
  185. ProfilerVector<ThreadInfo*> mActiveThreads;
  186. CM_MUTEX(mThreadSync);
  187. void estimateTimerOverhead();
  188. };
  189. struct CM_EXPORT CPUProfilerBasicSamplingEntry
  190. {
  191. struct CM_EXPORT Data
  192. {
  193. Data();
  194. String name;
  195. UINT32 numCalls;
  196. UINT64 memAllocs;
  197. UINT64 memFrees;
  198. double avgTimeMs;
  199. double maxTimeMs;
  200. double totalTimeMs;
  201. double avgSelfTimeMs;
  202. double totalSelfTimeMs;
  203. double estimatedSelfOverheadMs;
  204. double estimatedOverheadMs;
  205. float pctOfParent;
  206. } data;
  207. ProfilerVector<CPUProfilerBasicSamplingEntry> childEntries;
  208. };
  209. struct CM_EXPORT CPUProfilerPreciseSamplingEntry
  210. {
  211. struct CM_EXPORT Data
  212. {
  213. Data();
  214. String name;
  215. UINT32 numCalls;
  216. UINT64 memAllocs;
  217. UINT64 memFrees;
  218. UINT64 avgCycles;
  219. UINT64 maxCycles;
  220. UINT64 totalCycles;
  221. UINT64 avgSelfCycles;
  222. UINT64 totalSelfCycles;
  223. UINT64 estimatedSelfOverhead;
  224. UINT64 estimatedOverhead;
  225. float pctOfParent;
  226. } data;
  227. ProfilerVector<CPUProfilerPreciseSamplingEntry> childEntries;
  228. };
  229. class CM_EXPORT CPUProfilerReport
  230. {
  231. public:
  232. CPUProfilerReport();
  233. const CPUProfilerBasicSamplingEntry& getBasicSamplingData() const { return mBasicSamplingRootEntry; }
  234. const CPUProfilerPreciseSamplingEntry& getPreciseSamplingData() const { return mPreciseSamplingRootEntry; }
  235. private:
  236. friend class CPUProfiler;
  237. CPUProfilerBasicSamplingEntry mBasicSamplingRootEntry;
  238. CPUProfilerPreciseSamplingEntry mPreciseSamplingRootEntry;
  239. };
  240. }