TracyOpenGL.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. #ifndef __TRACYOPENGL_HPP__
  2. #define __TRACYOPENGL_HPP__
  3. #if !defined GL_TIMESTAMP && !defined GL_TIMESTAMP_EXT
  4. # error "You must include OpenGL 3.2 headers before including TracyOpenGL.hpp"
  5. #endif
  6. #if !defined TRACY_ENABLE || defined __APPLE__
  7. #define TracyGpuContext
  8. #define TracyGpuContextName(x,y)
  9. #define TracyGpuNamedZone(x,y,z)
  10. #define TracyGpuNamedZoneC(x,y,z,w)
  11. #define TracyGpuZone(x)
  12. #define TracyGpuZoneC(x,y)
  13. #define TracyGpuZoneTransient(x,y,z)
  14. #define TracyGpuCollect
  15. #define TracyGpuNamedZoneS(x,y,z,w)
  16. #define TracyGpuNamedZoneCS(x,y,z,w,a)
  17. #define TracyGpuZoneS(x,y)
  18. #define TracyGpuZoneCS(x,y,z)
  19. #define TracyGpuZoneTransientS(x,y,z,w)
  20. namespace tracy
  21. {
  22. struct SourceLocationData;
  23. class GpuCtxScope
  24. {
  25. public:
  26. GpuCtxScope( const SourceLocationData*, bool ) {}
  27. GpuCtxScope( const SourceLocationData*, int, bool ) {}
  28. };
  29. }
  30. #else
  31. #include <atomic>
  32. #include <assert.h>
  33. #include <stdlib.h>
  34. #include "Tracy.hpp"
  35. #include "client/TracyProfiler.hpp"
  36. #include "client/TracyCallstack.hpp"
  37. #include "common/TracyAlign.hpp"
  38. #include "common/TracyAlloc.hpp"
  39. #if !defined GL_TIMESTAMP && defined GL_TIMESTAMP_EXT
  40. # define GL_TIMESTAMP GL_TIMESTAMP_EXT
  41. # define GL_QUERY_COUNTER_BITS GL_QUERY_COUNTER_BITS_EXT
  42. # define glGetQueryObjectiv glGetQueryObjectivEXT
  43. # define glGetQueryObjectui64v glGetQueryObjectui64vEXT
  44. # define glQueryCounter glQueryCounterEXT
  45. #endif
  46. #define TracyGpuContext tracy::InitRPMallocThread(); tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
  47. #define TracyGpuContextName( name, size ) tracy::GetGpuCtx().ptr->Name( name, size );
  48. #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
  49. # define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
  50. # define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
  51. # define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
  52. # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
  53. # define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
  54. #else
  55. # define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
  56. # define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
  57. # define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true )
  58. # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true )
  59. # define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
  60. #endif
  61. #define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect();
  62. #ifdef TRACY_HAS_CALLSTACK
  63. # define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
  64. # define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
  65. # define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true )
  66. # define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true )
  67. # define TracyGpuZoneTransientS( varname, name, depth, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
  68. #else
  69. # define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active )
  70. # define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active )
  71. # define TracyGpuZoneS( name, depth ) TracyGpuZone( name )
  72. # define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color )
  73. # define TracyGpuZoneTransientS( varname, name, depth, active ) TracyGpuZoneTransient( varname, name, active )
  74. #endif
  75. namespace tracy
  76. {
  77. class GpuCtx
  78. {
  79. friend class GpuCtxScope;
  80. enum { QueryCount = 64 * 1024 };
  81. public:
  82. GpuCtx()
  83. : m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
  84. , m_head( 0 )
  85. , m_tail( 0 )
  86. {
  87. assert( m_context != 255 );
  88. glGenQueries( QueryCount, m_query );
  89. int64_t tgpu;
  90. glGetInteger64v( GL_TIMESTAMP, &tgpu );
  91. int64_t tcpu = Profiler::GetTime();
  92. GLint bits;
  93. glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
  94. const float period = 1.f;
  95. const auto thread = GetThreadHandle();
  96. TracyLfqPrepare( QueueType::GpuNewContext );
  97. MemWrite( &item->gpuNewContext.cpuTime, tcpu );
  98. MemWrite( &item->gpuNewContext.gpuTime, tgpu );
  99. MemWrite( &item->gpuNewContext.thread, thread );
  100. MemWrite( &item->gpuNewContext.period, period );
  101. MemWrite( &item->gpuNewContext.context, m_context );
  102. MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
  103. MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
  104. #ifdef TRACY_ON_DEMAND
  105. GetProfiler().DeferItem( *item );
  106. #endif
  107. TracyLfqCommit;
  108. }
  109. void Name( const char* name, uint16_t len )
  110. {
  111. auto ptr = (char*)tracy_malloc( len );
  112. memcpy( ptr, name, len );
  113. TracyLfqPrepare( QueueType::GpuContextName );
  114. MemWrite( &item->gpuContextNameFat.context, m_context );
  115. MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
  116. MemWrite( &item->gpuContextNameFat.size, len );
  117. #ifdef TRACY_ON_DEMAND
  118. GetProfiler().DeferItem( *item );
  119. #endif
  120. TracyLfqCommit;
  121. }
  122. void Collect()
  123. {
  124. ZoneScopedC( Color::Red4 );
  125. if( m_tail == m_head ) return;
  126. #ifdef TRACY_ON_DEMAND
  127. if( !GetProfiler().IsConnected() )
  128. {
  129. m_head = m_tail = 0;
  130. return;
  131. }
  132. #endif
  133. while( m_tail != m_head )
  134. {
  135. GLint available;
  136. glGetQueryObjectiv( m_query[m_tail], GL_QUERY_RESULT_AVAILABLE, &available );
  137. if( !available ) return;
  138. uint64_t time;
  139. glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time );
  140. TracyLfqPrepare( QueueType::GpuTime );
  141. MemWrite( &item->gpuTime.gpuTime, (int64_t)time );
  142. MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail );
  143. MemWrite( &item->gpuTime.context, m_context );
  144. TracyLfqCommit;
  145. m_tail = ( m_tail + 1 ) % QueryCount;
  146. }
  147. }
  148. private:
  149. tracy_force_inline unsigned int NextQueryId()
  150. {
  151. const auto id = m_head;
  152. m_head = ( m_head + 1 ) % QueryCount;
  153. assert( m_head != m_tail );
  154. return id;
  155. }
  156. tracy_force_inline unsigned int TranslateOpenGlQueryId( unsigned int id )
  157. {
  158. return m_query[id];
  159. }
  160. tracy_force_inline uint8_t GetId() const
  161. {
  162. return m_context;
  163. }
  164. unsigned int m_query[QueryCount];
  165. uint8_t m_context;
  166. unsigned int m_head;
  167. unsigned int m_tail;
  168. };
  169. class GpuCtxScope
  170. {
  171. public:
  172. tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, bool is_active )
  173. #ifdef TRACY_ON_DEMAND
  174. : m_active( is_active && GetProfiler().IsConnected() )
  175. #else
  176. : m_active( is_active )
  177. #endif
  178. {
  179. if( !m_active ) return;
  180. const auto queryId = GetGpuCtx().ptr->NextQueryId();
  181. glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
  182. TracyLfqPrepare( QueueType::GpuZoneBegin );
  183. MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
  184. memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
  185. MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
  186. MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
  187. MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
  188. TracyLfqCommit;
  189. }
  190. tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active )
  191. #ifdef TRACY_ON_DEMAND
  192. : m_active( is_active && GetProfiler().IsConnected() )
  193. #else
  194. : m_active( is_active )
  195. #endif
  196. {
  197. if( !m_active ) return;
  198. const auto queryId = GetGpuCtx().ptr->NextQueryId();
  199. glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
  200. GetProfiler().SendCallstack( depth );
  201. const auto thread = GetThreadHandle();
  202. TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
  203. MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
  204. MemWrite( &item->gpuZoneBegin.thread, thread );
  205. MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
  206. MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
  207. MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
  208. TracyLfqCommit;
  209. }
  210. tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active )
  211. #ifdef TRACY_ON_DEMAND
  212. : m_active( is_active && GetProfiler().IsConnected() )
  213. #else
  214. : m_active( is_active )
  215. #endif
  216. {
  217. if( !m_active ) return;
  218. const auto queryId = GetGpuCtx().ptr->NextQueryId();
  219. glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
  220. TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
  221. const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
  222. MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
  223. memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
  224. MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
  225. MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
  226. MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
  227. TracyLfqCommit;
  228. }
  229. tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active )
  230. #ifdef TRACY_ON_DEMAND
  231. : m_active( is_active && GetProfiler().IsConnected() )
  232. #else
  233. : m_active( is_active )
  234. #endif
  235. {
  236. if( !m_active ) return;
  237. const auto queryId = GetGpuCtx().ptr->NextQueryId();
  238. glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
  239. GetProfiler().SendCallstack( depth );
  240. const auto thread = GetThreadHandle();
  241. TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLocCallstack );
  242. const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
  243. MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
  244. MemWrite( &item->gpuZoneBegin.thread, thread );
  245. MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
  246. MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
  247. MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
  248. TracyLfqCommit;
  249. }
  250. tracy_force_inline ~GpuCtxScope()
  251. {
  252. if( !m_active ) return;
  253. const auto queryId = GetGpuCtx().ptr->NextQueryId();
  254. glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
  255. TracyLfqPrepare( QueueType::GpuZoneEnd );
  256. MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
  257. memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) );
  258. MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
  259. MemWrite( &item->gpuZoneEnd.context, GetGpuCtx().ptr->GetId() );
  260. TracyLfqCommit;
  261. }
  262. private:
  263. const bool m_active;
  264. };
  265. }
  266. #endif
  267. #endif