CmCPUProfiler.h 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. #pragma once
  2. #include "CmPrerequisitesUtil.h"
  3. namespace CamelotFramework
  4. {
  5. class CPUProfilerReport;
  6. // TODO: Add #defines for all profiler methods so we can easily remove them from final version
  7. /**
  8. * @brief Provides various performance measuring methods
  9. *
  10. * @note This class is thread safe. Matching begin*\end* calls
  11. * must belong to the same thread though.
  12. */
  13. class CM_UTILITY_EXPORT CPUProfiler
  14. {
  15. class Timer
  16. {
  17. public:
  18. Timer();
  19. void start();
  20. void stop();
  21. void reset();
  22. double time;
  23. private:
  24. double startTime;
  25. static inline double getCurrentTime();
  26. };
  27. class TimerPrecise
  28. {
  29. public:
  30. TimerPrecise();
  31. void start();
  32. void stop();
  33. void reset();
  34. UINT64 cycles;
  35. private:
  36. UINT64 startCycles;
  37. static inline UINT64 getNumCycles();
  38. };
  39. struct ProfileSample
  40. {
  41. ProfileSample(double _time)
  42. :time(_time)
  43. { }
  44. double time;
  45. };
  46. struct PreciseProfileSample
  47. {
  48. PreciseProfileSample(UINT64 _cycles)
  49. :cycles(_cycles)
  50. { }
  51. UINT64 cycles;
  52. };
  53. struct ProfileData
  54. {
  55. Vector<ProfileSample>::type samples;
  56. Timer timer;
  57. void beginSample();
  58. void endSample();
  59. void resumeLastSample();
  60. };
  61. struct PreciseProfileData
  62. {
  63. // TODO - Add cache misses, branch mispredictions, retired instructions vs. optimal number of cycles (RDPMC instruction on Intel)
  64. Vector<PreciseProfileSample>::type samples;
  65. TimerPrecise timer;
  66. void beginSample();
  67. void endSample();
  68. void resumeLastSample();
  69. };
  70. struct PreciseProfiledBlock;
  71. struct ProfiledBlock;
  72. struct ProfiledBlock
  73. {
  74. ProfiledBlock();
  75. ~ProfiledBlock();
  76. String name;
  77. ProfileData basic;
  78. PreciseProfileData precise;
  79. Vector<ProfiledBlock*>::type children;
  80. ProfiledBlock* findChild(const String& name) const;
  81. };
  82. enum class ActiveSamplingType
  83. {
  84. Basic,
  85. Precise
  86. };
  87. struct ActiveBlock
  88. {
  89. ActiveBlock()
  90. :type(ActiveSamplingType::Basic), block(nullptr)
  91. { }
  92. ActiveBlock(ActiveSamplingType _type, ProfiledBlock* _block)
  93. :type(_type), block(_block)
  94. { }
  95. ActiveSamplingType type;
  96. ProfiledBlock* block;
  97. };
  98. struct ThreadInfo
  99. {
  100. ThreadInfo();
  101. static CM_THREADLOCAL ThreadInfo* activeThread;
  102. bool isActive;
  103. ProfiledBlock* rootBlock;
  104. Stack<ActiveBlock>::type activeBlocks;
  105. ActiveBlock activeBlock;
  106. void begin(const String& _name);
  107. void end();
  108. void reset();
  109. ProfiledBlock* getBlock();
  110. void releaseBlock(ProfiledBlock* block);
  111. };
  112. public:
  113. CPUProfiler();
  114. ~CPUProfiler();
  115. /**
  116. * @brief Registers a new thread we will be doing sampling in. This needs to be called before any beginSample*\endSample* calls
  117. * are made in that thread.
  118. *
  119. * @param name Name that will allow you to more easily identify the thread.
  120. */
  121. void beginThread(const String& name);
  122. /**
  123. * @brief Ends sampling for the current thread. No beginSample*\endSample* calls after this point.
  124. */
  125. void endThread();
  126. /**
  127. * @brief Begins sample measurement. Must be followed by endSample.
  128. *
  129. * @param name Unique name for the sample you can later use to find the sampling data.
  130. */
  131. void beginSample(const String& name);
  132. /**
  133. * @brief Ends sample measurement and returns measured data.
  134. *
  135. * @param name Unique name for the sample.
  136. *
  137. * @note Unique name is primarily needed to more easily identify mismatched
  138. * begin/end sample pairs. Otherwise the name in beginSample would be enough.
  139. */
  140. void endSample(const String& name);
  141. /**
  142. * @brief Begins sample measurement. Must be followed by endSample.
  143. *
  144. * @param name Unique name for the sample you can later use to find the sampling data.
  145. *
  146. * @note This method uses very precise CPU counters to determine variety of data not
  147. * provided by standard beginSample. However due to the way these counters work you should
  148. * not use this method for larger parts of code. It does not consider context switches so if the OS
  149. * decides to switch context between measurements you will get invalid data.
  150. */
  151. void beginSamplePrecise(const String& name);
  152. /**
  153. * @brief Ends precise sample measurement and returns measured data.
  154. *
  155. * @param name Unique name for the sample.
  156. *
  157. * @note Unique name is primarily needed to more easily identify mismatched
  158. * begin/end sample pairs. Otherwise the name in beginSamplePrecise would be enough.
  159. */
  160. void endSamplePrecise(const String& name);
  161. /**
  162. * @brief Clears all sampling data, and ends any unfinished sampling blocks.
  163. */
  164. void reset();
  165. /**
  166. * @brief Generates a report from all previously sampled data.
  167. *
  168. * @note Generating a report will stop all in-progress sampling. You should make sure
  169. * you call endSample* manually beforehand so this doesn't have to happen.
  170. */
  171. CPUProfilerReport generateReport();
  172. private:
  173. double mBasicTimerOverhead;
  174. UINT64 mPreciseTimerOverhead;
  175. double mBasicSamplingOverheadMs;
  176. double mPreciseSamplingOverheadMs;
  177. UINT64 mBasicSamplingOverheadCycles;
  178. UINT64 mPreciseSamplingOverheadCycles;
  179. Vector<ThreadInfo*>::type mActiveThreads;
  180. CM_MUTEX(mThreadSync);
  181. void estimateTimerOverhead();
  182. };
  183. struct CM_UTILITY_EXPORT CPUProfilerBasicSamplingEntry
  184. {
  185. struct CM_UTILITY_EXPORT Data
  186. {
  187. Data();
  188. String name;
  189. UINT32 numCalls;
  190. double avgTimeMs;
  191. double maxTimeMs;
  192. double totalTimeMs;
  193. double avgSelfTimeMs;
  194. double totalSelfTimeMs;
  195. double estimatedSelfOverheadMs;
  196. double estimatedOverheadMs;
  197. float pctOfParent;
  198. } data;
  199. Vector<CPUProfilerBasicSamplingEntry>::type childEntries;
  200. };
  201. struct CM_UTILITY_EXPORT CPUProfilerPreciseSamplingEntry
  202. {
  203. struct CM_UTILITY_EXPORT Data
  204. {
  205. Data();
  206. String name;
  207. UINT32 numCalls;
  208. UINT64 avgCycles;
  209. UINT64 maxCycles;
  210. UINT64 totalCycles;
  211. UINT64 avgSelfCycles;
  212. UINT64 totalSelfCycles;
  213. UINT64 estimatedSelfOverhead;
  214. UINT64 estimatedOverhead;
  215. float pctOfParent;
  216. } data;
  217. Vector<CPUProfilerPreciseSamplingEntry>::type childEntries;
  218. };
  219. class CM_UTILITY_EXPORT CPUProfilerReport
  220. {
  221. public:
  222. CPUProfilerReport();
  223. const CPUProfilerBasicSamplingEntry& getBasicSamplingData() const { return mBasicSamplingRootEntry; }
  224. const CPUProfilerPreciseSamplingEntry& getPreciseSamplingData() const { return mPreciseSamplingRootEntry; }
  225. private:
  226. friend class CPUProfiler;
  227. CPUProfilerBasicSamplingEntry mBasicSamplingRootEntry;
  228. CPUProfilerPreciseSamplingEntry mPreciseSamplingRootEntry;
  229. };
  230. }