CmCPUProfiler.h 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. #pragma once
  2. #include "CmPrerequisites.h"
  3. namespace BansheeEngine
  4. {
  5. class CPUProfilerReport;
  6. typedef std::basic_string<char, std::char_traits<char>, StdAlloc<char, ProfilerAlloc>> ProfilerString;
  7. template <typename T, typename A = StdAlloc<T, ProfilerAlloc>>
  8. using ProfilerVector = std::vector<T, A>;
  9. template <typename T, typename A = StdAlloc<T, ProfilerAlloc>>
  10. using ProfilerStack = std::stack<T, std::deque<T, A>>;
  11. /**
  12. * @brief Provides various performance measuring methods
  13. *
  14. * @note This class is thread safe. Matching begin*\end* calls
  15. * must belong to the same thread though.
  16. */
  17. class CM_EXPORT CPUProfiler
  18. {
  19. class Timer
  20. {
  21. public:
  22. Timer();
  23. void start();
  24. void stop();
  25. void reset();
  26. double time;
  27. private:
  28. double startTime;
  29. static inline double getCurrentTime();
  30. };
  31. class TimerPrecise
  32. {
  33. public:
  34. TimerPrecise();
  35. void start();
  36. void stop();
  37. void reset();
  38. UINT64 cycles;
  39. private:
  40. UINT64 startCycles;
  41. static inline UINT64 getNumCycles();
  42. };
  43. struct ProfileSample
  44. {
  45. ProfileSample(double _time, UINT64 _numAllocs, UINT64 _numFrees)
  46. :time(_time), numAllocs(_numAllocs), numFrees(_numFrees)
  47. { }
  48. double time;
  49. UINT64 numAllocs;
  50. UINT64 numFrees;
  51. };
  52. struct PreciseProfileSample
  53. {
  54. PreciseProfileSample(UINT64 _cycles, UINT64 _numAllocs, UINT64 _numFrees)
  55. :cycles(_cycles), numAllocs(_numAllocs), numFrees(_numFrees)
  56. { }
  57. UINT64 cycles;
  58. UINT64 numAllocs;
  59. UINT64 numFrees;
  60. };
  61. struct ProfileData
  62. {
  63. ProfilerVector<ProfileSample> samples;
  64. Timer timer;
  65. UINT64 memAllocs;
  66. UINT64 memFrees;
  67. void beginSample();
  68. void endSample();
  69. void resumeLastSample();
  70. };
  71. struct PreciseProfileData
  72. {
  73. ProfilerVector<PreciseProfileSample> samples;
  74. TimerPrecise timer;
  75. UINT64 memAllocs;
  76. UINT64 memFrees;
  77. void beginSample();
  78. void endSample();
  79. void resumeLastSample();
  80. };
  81. struct PreciseProfiledBlock;
  82. struct ProfiledBlock;
  83. struct ProfiledBlock
  84. {
  85. ProfiledBlock();
  86. ~ProfiledBlock();
  87. ProfilerString name;
  88. ProfileData basic;
  89. PreciseProfileData precise;
  90. ProfilerVector<ProfiledBlock*> children;
  91. ProfiledBlock* findChild(const ProfilerString& name) const;
  92. };
  93. enum class ActiveSamplingType
  94. {
  95. Basic,
  96. Precise
  97. };
  98. struct ActiveBlock
  99. {
  100. ActiveBlock()
  101. :type(ActiveSamplingType::Basic), block(nullptr)
  102. { }
  103. ActiveBlock(ActiveSamplingType _type, ProfiledBlock* _block)
  104. :type(_type), block(_block)
  105. { }
  106. ActiveSamplingType type;
  107. ProfiledBlock* block;
  108. };
  109. struct ThreadInfo
  110. {
  111. ThreadInfo();
  112. static CM_THREADLOCAL ThreadInfo* activeThread;
  113. bool isActive;
  114. ProfiledBlock* rootBlock;
  115. ProfilerStack<ActiveBlock> activeBlocks;
  116. ActiveBlock activeBlock;
  117. void begin(const ProfilerString& _name);
  118. void end();
  119. void reset();
  120. ProfiledBlock* getBlock();
  121. void releaseBlock(ProfiledBlock* block);
  122. };
  123. public:
  124. CPUProfiler();
  125. ~CPUProfiler();
  126. /**
  127. * @brief Registers a new thread we will be doing sampling in. This needs to be called before any beginSample*\endSample* calls
  128. * are made in that thread.
  129. *
  130. * @param name Name that will allow you to more easily identify the thread.
  131. */
  132. void beginThread(const ProfilerString& name);
  133. /**
  134. * @brief Ends sampling for the current thread. No beginSample*\endSample* calls after this point.
  135. */
  136. void endThread();
  137. /**
  138. * @brief Begins sample measurement. Must be followed by endSample.
  139. *
  140. * @param name Unique name for the sample you can later use to find the sampling data.
  141. */
  142. void beginSample(const ProfilerString& name);
  143. /**
  144. * @brief Ends sample measurement and returns measured data.
  145. *
  146. * @param name Unique name for the sample.
  147. *
  148. * @note Unique name is primarily needed to more easily identify mismatched
  149. * begin/end sample pairs. Otherwise the name in beginSample would be enough.
  150. */
  151. void endSample(const ProfilerString& name);
  152. /**
  153. * @brief Begins sample measurement. Must be followed by endSample.
  154. *
  155. * @param name Unique name for the sample you can later use to find the sampling data.
  156. *
  157. * @note This method uses very precise CPU counters to determine variety of data not
  158. * provided by standard beginSample. However due to the way these counters work you should
  159. * not use this method for larger parts of code. It does not consider context switches so if the OS
  160. * decides to switch context between measurements you will get invalid data.
  161. */
  162. void beginSamplePrecise(const ProfilerString& name);
  163. /**
  164. * @brief Ends precise sample measurement and returns measured data.
  165. *
  166. * @param name Unique name for the sample.
  167. *
  168. * @note Unique name is primarily needed to more easily identify mismatched
  169. * begin/end sample pairs. Otherwise the name in beginSamplePrecise would be enough.
  170. */
  171. void endSamplePrecise(const ProfilerString& name);
  172. /**
  173. * @brief Clears all sampling data, and ends any unfinished sampling blocks.
  174. */
  175. void reset();
  176. /**
  177. * @brief Generates a report from all previously sampled data.
  178. *
  179. * @note Generating a report will stop all in-progress sampling. You should make sure
  180. * you call endSample* manually beforehand so this doesn't have to happen.
  181. */
  182. CPUProfilerReport generateReport();
  183. private:
  184. double mBasicTimerOverhead;
  185. UINT64 mPreciseTimerOverhead;
  186. double mBasicSamplingOverheadMs;
  187. double mPreciseSamplingOverheadMs;
  188. UINT64 mBasicSamplingOverheadCycles;
  189. UINT64 mPreciseSamplingOverheadCycles;
  190. ProfilerVector<ThreadInfo*> mActiveThreads;
  191. CM_MUTEX(mThreadSync);
  192. void estimateTimerOverhead();
  193. };
  194. struct CM_EXPORT CPUProfilerBasicSamplingEntry
  195. {
  196. struct CM_EXPORT Data
  197. {
  198. Data();
  199. String name;
  200. UINT32 numCalls;
  201. UINT64 memAllocs;
  202. UINT64 memFrees;
  203. double avgTimeMs;
  204. double maxTimeMs;
  205. double totalTimeMs;
  206. double avgSelfTimeMs;
  207. double totalSelfTimeMs;
  208. double estimatedSelfOverheadMs;
  209. double estimatedOverheadMs;
  210. float pctOfParent;
  211. } data;
  212. ProfilerVector<CPUProfilerBasicSamplingEntry> childEntries;
  213. };
  214. struct CM_EXPORT CPUProfilerPreciseSamplingEntry
  215. {
  216. struct CM_EXPORT Data
  217. {
  218. Data();
  219. String name;
  220. UINT32 numCalls;
  221. UINT64 memAllocs;
  222. UINT64 memFrees;
  223. UINT64 avgCycles;
  224. UINT64 maxCycles;
  225. UINT64 totalCycles;
  226. UINT64 avgSelfCycles;
  227. UINT64 totalSelfCycles;
  228. UINT64 estimatedSelfOverhead;
  229. UINT64 estimatedOverhead;
  230. float pctOfParent;
  231. } data;
  232. ProfilerVector<CPUProfilerPreciseSamplingEntry> childEntries;
  233. };
  234. class CM_EXPORT CPUProfilerReport
  235. {
  236. public:
  237. CPUProfilerReport();
  238. const CPUProfilerBasicSamplingEntry& getBasicSamplingData() const { return mBasicSamplingRootEntry; }
  239. const CPUProfilerPreciseSamplingEntry& getPreciseSamplingData() const { return mPreciseSamplingRootEntry; }
  240. private:
  241. friend class CPUProfiler;
  242. CPUProfilerBasicSamplingEntry mBasicSamplingRootEntry;
  243. CPUProfilerPreciseSamplingEntry mPreciseSamplingRootEntry;
  244. };
  245. }