CmCPUProfiler.h 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. #pragma once
  2. #include "CmPrerequisites.h"
  3. namespace CamelotFramework
  4. {
  5. class CPUProfilerReport;
  6. typedef std::basic_string<char, std::char_traits<char>, StdAlloc<char, ProfilerAlloc>> ProfilerString;
  7. template <typename T, typename A = StdAlloc<T, ProfilerAlloc>>
  8. struct ProfilerVector
  9. {
  10. typedef typename std::vector<T, A> type;
  11. };
  12. template <typename T, typename A = StdAlloc<T, ProfilerAlloc>>
  13. struct ProfilerStack
  14. {
  15. typedef typename std::stack<T, std::deque<T, A>> type;
  16. };
  17. /**
  18. * @brief Provides various performance measuring methods
  19. *
  20. * @note This class is thread safe. Matching begin*\end* calls
  21. * must belong to the same thread though.
  22. */
  23. class CM_EXPORT CPUProfiler
  24. {
  25. class Timer
  26. {
  27. public:
  28. Timer();
  29. void start();
  30. void stop();
  31. void reset();
  32. double time;
  33. private:
  34. double startTime;
  35. static inline double getCurrentTime();
  36. };
  37. class TimerPrecise
  38. {
  39. public:
  40. TimerPrecise();
  41. void start();
  42. void stop();
  43. void reset();
  44. UINT64 cycles;
  45. private:
  46. UINT64 startCycles;
  47. static inline UINT64 getNumCycles();
  48. };
  49. struct ProfileSample
  50. {
  51. ProfileSample(double _time, UINT64 _numAllocs, UINT64 _numFrees)
  52. :time(_time), numAllocs(_numAllocs), numFrees(_numFrees)
  53. { }
  54. double time;
  55. UINT64 numAllocs;
  56. UINT64 numFrees;
  57. };
  58. struct PreciseProfileSample
  59. {
  60. PreciseProfileSample(UINT64 _cycles, UINT64 _numAllocs, UINT64 _numFrees)
  61. :cycles(_cycles), numAllocs(_numAllocs), numFrees(_numFrees)
  62. { }
  63. UINT64 cycles;
  64. UINT64 numAllocs;
  65. UINT64 numFrees;
  66. };
  67. struct ProfileData
  68. {
  69. ProfilerVector<ProfileSample>::type samples;
  70. Timer timer;
  71. UINT64 memAllocs;
  72. UINT64 memFrees;
  73. void beginSample();
  74. void endSample();
  75. void resumeLastSample();
  76. };
  77. struct PreciseProfileData
  78. {
  79. ProfilerVector<PreciseProfileSample>::type samples;
  80. TimerPrecise timer;
  81. UINT64 memAllocs;
  82. UINT64 memFrees;
  83. void beginSample();
  84. void endSample();
  85. void resumeLastSample();
  86. };
  87. struct PreciseProfiledBlock;
  88. struct ProfiledBlock;
  89. struct ProfiledBlock
  90. {
  91. ProfiledBlock();
  92. ~ProfiledBlock();
  93. ProfilerString name;
  94. ProfileData basic;
  95. PreciseProfileData precise;
  96. ProfilerVector<ProfiledBlock*>::type children;
  97. ProfiledBlock* findChild(const ProfilerString& name) const;
  98. };
  99. enum class ActiveSamplingType
  100. {
  101. Basic,
  102. Precise
  103. };
  104. struct ActiveBlock
  105. {
  106. ActiveBlock()
  107. :type(ActiveSamplingType::Basic), block(nullptr)
  108. { }
  109. ActiveBlock(ActiveSamplingType _type, ProfiledBlock* _block)
  110. :type(_type), block(_block)
  111. { }
  112. ActiveSamplingType type;
  113. ProfiledBlock* block;
  114. };
  115. struct ThreadInfo
  116. {
  117. ThreadInfo();
  118. static CM_THREADLOCAL ThreadInfo* activeThread;
  119. bool isActive;
  120. ProfiledBlock* rootBlock;
  121. ProfilerStack<ActiveBlock>::type activeBlocks;
  122. ActiveBlock activeBlock;
  123. void begin(const ProfilerString& _name);
  124. void end();
  125. void reset();
  126. ProfiledBlock* getBlock();
  127. void releaseBlock(ProfiledBlock* block);
  128. };
  129. public:
  130. CPUProfiler();
  131. ~CPUProfiler();
  132. /**
  133. * @brief Registers a new thread we will be doing sampling in. This needs to be called before any beginSample*\endSample* calls
  134. * are made in that thread.
  135. *
  136. * @param name Name that will allow you to more easily identify the thread.
  137. */
  138. void beginThread(const ProfilerString& name);
  139. /**
  140. * @brief Ends sampling for the current thread. No beginSample*\endSample* calls after this point.
  141. */
  142. void endThread();
  143. /**
  144. * @brief Begins sample measurement. Must be followed by endSample.
  145. *
  146. * @param name Unique name for the sample you can later use to find the sampling data.
  147. */
  148. void beginSample(const ProfilerString& name);
  149. /**
  150. * @brief Ends sample measurement and returns measured data.
  151. *
  152. * @param name Unique name for the sample.
  153. *
  154. * @note Unique name is primarily needed to more easily identify mismatched
  155. * begin/end sample pairs. Otherwise the name in beginSample would be enough.
  156. */
  157. void endSample(const ProfilerString& name);
  158. /**
  159. * @brief Begins sample measurement. Must be followed by endSample.
  160. *
  161. * @param name Unique name for the sample you can later use to find the sampling data.
  162. *
  163. * @note This method uses very precise CPU counters to determine variety of data not
  164. * provided by standard beginSample. However due to the way these counters work you should
  165. * not use this method for larger parts of code. It does not consider context switches so if the OS
  166. * decides to switch context between measurements you will get invalid data.
  167. */
  168. void beginSamplePrecise(const ProfilerString& name);
  169. /**
  170. * @brief Ends precise sample measurement and returns measured data.
  171. *
  172. * @param name Unique name for the sample.
  173. *
  174. * @note Unique name is primarily needed to more easily identify mismatched
  175. * begin/end sample pairs. Otherwise the name in beginSamplePrecise would be enough.
  176. */
  177. void endSamplePrecise(const ProfilerString& name);
  178. /**
  179. * @brief Clears all sampling data, and ends any unfinished sampling blocks.
  180. */
  181. void reset();
  182. /**
  183. * @brief Generates a report from all previously sampled data.
  184. *
  185. * @note Generating a report will stop all in-progress sampling. You should make sure
  186. * you call endSample* manually beforehand so this doesn't have to happen.
  187. */
  188. CPUProfilerReport generateReport();
  189. private:
  190. double mBasicTimerOverhead;
  191. UINT64 mPreciseTimerOverhead;
  192. double mBasicSamplingOverheadMs;
  193. double mPreciseSamplingOverheadMs;
  194. UINT64 mBasicSamplingOverheadCycles;
  195. UINT64 mPreciseSamplingOverheadCycles;
  196. ProfilerVector<ThreadInfo*>::type mActiveThreads;
  197. CM_MUTEX(mThreadSync);
  198. void estimateTimerOverhead();
  199. };
  200. struct CM_EXPORT CPUProfilerBasicSamplingEntry
  201. {
  202. struct CM_EXPORT Data
  203. {
  204. Data();
  205. String name;
  206. UINT32 numCalls;
  207. UINT64 memAllocs;
  208. UINT64 memFrees;
  209. double avgTimeMs;
  210. double maxTimeMs;
  211. double totalTimeMs;
  212. double avgSelfTimeMs;
  213. double totalSelfTimeMs;
  214. double estimatedSelfOverheadMs;
  215. double estimatedOverheadMs;
  216. float pctOfParent;
  217. } data;
  218. ProfilerVector<CPUProfilerBasicSamplingEntry>::type childEntries;
  219. };
  220. struct CM_EXPORT CPUProfilerPreciseSamplingEntry
  221. {
  222. struct CM_EXPORT Data
  223. {
  224. Data();
  225. String name;
  226. UINT32 numCalls;
  227. UINT64 memAllocs;
  228. UINT64 memFrees;
  229. UINT64 avgCycles;
  230. UINT64 maxCycles;
  231. UINT64 totalCycles;
  232. UINT64 avgSelfCycles;
  233. UINT64 totalSelfCycles;
  234. UINT64 estimatedSelfOverhead;
  235. UINT64 estimatedOverhead;
  236. float pctOfParent;
  237. } data;
  238. ProfilerVector<CPUProfilerPreciseSamplingEntry>::type childEntries;
  239. };
  240. class CM_EXPORT CPUProfilerReport
  241. {
  242. public:
  243. CPUProfilerReport();
  244. const CPUProfilerBasicSamplingEntry& getBasicSamplingData() const { return mBasicSamplingRootEntry; }
  245. const CPUProfilerPreciseSamplingEntry& getPreciseSamplingData() const { return mPreciseSamplingRootEntry; }
  246. private:
  247. friend class CPUProfiler;
  248. CPUProfilerBasicSamplingEntry mBasicSamplingRootEntry;
  249. CPUProfilerPreciseSamplingEntry mPreciseSamplingRootEntry;
  250. };
  251. }