CmCPUProfiler.h 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. #pragma once
  2. #include "CmPrerequisites.h"
  3. #include "BsProfilerFwd.h"
  4. namespace BansheeEngine
  5. {
  6. class CPUProfilerReport;
  7. /**
  8. * @brief Provides various performance measuring methods
  9. *
  10. * @note This class is thread safe. Matching begin*\end* calls
  11. * must belong to the same thread though.
  12. */
  13. class CM_EXPORT CPUProfiler
  14. {
  15. class Timer
  16. {
  17. public:
  18. Timer();
  19. void start();
  20. void stop();
  21. void reset();
  22. double time;
  23. private:
  24. double startTime;
  25. static inline double getCurrentTime();
  26. };
  27. class TimerPrecise
  28. {
  29. public:
  30. TimerPrecise();
  31. void start();
  32. void stop();
  33. void reset();
  34. UINT64 cycles;
  35. private:
  36. UINT64 startCycles;
  37. static inline UINT64 getNumCycles();
  38. };
  39. struct ProfileSample
  40. {
  41. ProfileSample(double _time, UINT64 _numAllocs, UINT64 _numFrees)
  42. :time(_time), numAllocs(_numAllocs), numFrees(_numFrees)
  43. { }
  44. double time;
  45. UINT64 numAllocs;
  46. UINT64 numFrees;
  47. };
  48. struct PreciseProfileSample
  49. {
  50. PreciseProfileSample(UINT64 _cycles, UINT64 _numAllocs, UINT64 _numFrees)
  51. :cycles(_cycles), numAllocs(_numAllocs), numFrees(_numFrees)
  52. { }
  53. UINT64 cycles;
  54. UINT64 numAllocs;
  55. UINT64 numFrees;
  56. };
  57. struct ProfileData
  58. {
  59. ProfilerVector<ProfileSample> samples;
  60. Timer timer;
  61. UINT64 memAllocs;
  62. UINT64 memFrees;
  63. void beginSample();
  64. void endSample();
  65. void resumeLastSample();
  66. };
  67. struct PreciseProfileData
  68. {
  69. ProfilerVector<PreciseProfileSample> samples;
  70. TimerPrecise timer;
  71. UINT64 memAllocs;
  72. UINT64 memFrees;
  73. void beginSample();
  74. void endSample();
  75. void resumeLastSample();
  76. };
  77. struct PreciseProfiledBlock;
  78. struct ProfiledBlock;
  79. struct ProfiledBlock
  80. {
  81. ProfiledBlock();
  82. ~ProfiledBlock();
  83. ProfilerString name;
  84. ProfileData basic;
  85. PreciseProfileData precise;
  86. ProfilerVector<ProfiledBlock*> children;
  87. ProfiledBlock* findChild(const ProfilerString& name) const;
  88. };
  89. enum class ActiveSamplingType
  90. {
  91. Basic,
  92. Precise
  93. };
  94. struct ActiveBlock
  95. {
  96. ActiveBlock()
  97. :type(ActiveSamplingType::Basic), block(nullptr)
  98. { }
  99. ActiveBlock(ActiveSamplingType _type, ProfiledBlock* _block)
  100. :type(_type), block(_block)
  101. { }
  102. ActiveSamplingType type;
  103. ProfiledBlock* block;
  104. };
  105. struct ThreadInfo
  106. {
  107. ThreadInfo();
  108. static CM_THREADLOCAL ThreadInfo* activeThread;
  109. bool isActive;
  110. ProfiledBlock* rootBlock;
  111. ProfilerStack<ActiveBlock> activeBlocks;
  112. ActiveBlock activeBlock;
  113. void begin(const ProfilerString& _name);
  114. void end();
  115. void reset();
  116. ProfiledBlock* getBlock();
  117. void releaseBlock(ProfiledBlock* block);
  118. };
  119. public:
  120. CPUProfiler();
  121. ~CPUProfiler();
  122. /**
  123. * @brief Registers a new thread we will be doing sampling in. This needs to be called before any beginSample*\endSample* calls
  124. * are made in that thread.
  125. *
  126. * @param name Name that will allow you to more easily identify the thread.
  127. */
  128. void beginThread(const ProfilerString& name);
  129. /**
  130. * @brief Ends sampling for the current thread. No beginSample*\endSample* calls after this point.
  131. */
  132. void endThread();
  133. /**
  134. * @brief Begins sample measurement. Must be followed by endSample.
  135. *
  136. * @param name Unique name for the sample you can later use to find the sampling data.
  137. */
  138. void beginSample(const ProfilerString& name);
  139. /**
  140. * @brief Ends sample measurement.
  141. *
  142. * @param name Unique name for the sample.
  143. *
  144. * @note Unique name is primarily needed to more easily identify mismatched
  145. * begin/end sample pairs. Otherwise the name in beginSample would be enough.
  146. */
  147. void endSample(const ProfilerString& name);
  148. /**
  149. * @brief Begins sample measurement. Must be followed by endSample.
  150. *
  151. * @param name Unique name for the sample you can later use to find the sampling data.
  152. *
  153. * @note This method uses very precise CPU counters to determine variety of data not
  154. * provided by standard beginSample. However due to the way these counters work you should
  155. * not use this method for larger parts of code. It does not consider context switches so if the OS
  156. * decides to switch context between measurements you will get invalid data.
  157. */
  158. void beginSamplePrecise(const ProfilerString& name);
  159. /**
  160. * @brief Ends precise sample measurement.
  161. *
  162. * @param name Unique name for the sample.
  163. *
  164. * @note Unique name is primarily needed to more easily identify mismatched
  165. * begin/end sample pairs. Otherwise the name in beginSamplePrecise would be enough.
  166. */
  167. void endSamplePrecise(const ProfilerString& name);
  168. /**
  169. * @brief Clears all sampling data, and ends any unfinished sampling blocks.
  170. */
  171. void reset();
  172. /**
  173. * @brief Generates a report from all previously sampled data.
  174. *
  175. * @note Generating a report will stop all in-progress sampling. You should make sure
  176. * you call endSample* manually beforehand so this doesn't have to happen.
  177. */
  178. CPUProfilerReport generateReport();
  179. private:
  180. double mBasicTimerOverhead;
  181. UINT64 mPreciseTimerOverhead;
  182. double mBasicSamplingOverheadMs;
  183. double mPreciseSamplingOverheadMs;
  184. UINT64 mBasicSamplingOverheadCycles;
  185. UINT64 mPreciseSamplingOverheadCycles;
  186. ProfilerVector<ThreadInfo*> mActiveThreads;
  187. CM_MUTEX(mThreadSync);
  188. void estimateTimerOverhead();
  189. };
  190. struct CM_EXPORT CPUProfilerBasicSamplingEntry
  191. {
  192. struct CM_EXPORT Data
  193. {
  194. Data();
  195. String name;
  196. UINT32 numCalls;
  197. UINT64 memAllocs;
  198. UINT64 memFrees;
  199. double avgTimeMs;
  200. double maxTimeMs;
  201. double totalTimeMs;
  202. double avgSelfTimeMs;
  203. double totalSelfTimeMs;
  204. double estimatedSelfOverheadMs;
  205. double estimatedOverheadMs;
  206. float pctOfParent;
  207. } data;
  208. ProfilerVector<CPUProfilerBasicSamplingEntry> childEntries;
  209. };
  210. struct CM_EXPORT CPUProfilerPreciseSamplingEntry
  211. {
  212. struct CM_EXPORT Data
  213. {
  214. Data();
  215. String name;
  216. UINT32 numCalls;
  217. UINT64 memAllocs;
  218. UINT64 memFrees;
  219. UINT64 avgCycles;
  220. UINT64 maxCycles;
  221. UINT64 totalCycles;
  222. UINT64 avgSelfCycles;
  223. UINT64 totalSelfCycles;
  224. UINT64 estimatedSelfOverhead;
  225. UINT64 estimatedOverhead;
  226. float pctOfParent;
  227. } data;
  228. ProfilerVector<CPUProfilerPreciseSamplingEntry> childEntries;
  229. };
  230. class CM_EXPORT CPUProfilerReport
  231. {
  232. public:
  233. CPUProfilerReport();
  234. const CPUProfilerBasicSamplingEntry& getBasicSamplingData() const { return mBasicSamplingRootEntry; }
  235. const CPUProfilerPreciseSamplingEntry& getPreciseSamplingData() const { return mPreciseSamplingRootEntry; }
  236. private:
  237. friend class CPUProfiler;
  238. CPUProfilerBasicSamplingEntry mBasicSamplingRootEntry;
  239. CPUProfilerPreciseSamplingEntry mPreciseSamplingRootEntry;
  240. };
  241. }