TracyProfiler.cpp 112 KB


  1. #ifdef TRACY_ENABLE
  2. #ifdef _WIN32
  3. # ifndef NOMINMAX
  4. # define NOMINMAX
  5. # endif
  6. # include <winsock2.h>
  7. # include <windows.h>
  8. # include <tlhelp32.h>
  9. # include <inttypes.h>
  10. # include <intrin.h>
  11. #else
  12. # include <sys/time.h>
  13. # include <sys/param.h>
  14. #endif
  15. #ifdef __CYGWIN__
  16. # include <windows.h>
  17. # include <unistd.h>
  18. # include <tlhelp32.h>
  19. #endif
  20. #ifdef _GNU_SOURCE
  21. # include <errno.h>
  22. #endif
  23. #ifdef __linux__
  24. # include <dirent.h>
  25. # include <signal.h>
  26. # include <pthread.h>
  27. # include <sys/types.h>
  28. # include <sys/syscall.h>
  29. #endif
  30. #if defined __APPLE__ || defined BSD
  31. # include <sys/types.h>
  32. # include <sys/sysctl.h>
  33. #endif
  34. #if defined __APPLE__
  35. # include "TargetConditionals.h"
  36. # include <mach-o/dyld.h>
  37. #endif
  38. #ifdef __ANDROID__
  39. # include <sys/mman.h>
  40. # include <stdio.h>
  41. # include <stdint.h>
  42. # include <algorithm>
  43. # include <vector>
  44. #endif
  45. #include <algorithm>
  46. #include <assert.h>
  47. #include <atomic>
  48. #include <chrono>
  49. #include <limits>
  50. #include <new>
  51. #include <stdlib.h>
  52. #include <string.h>
  53. #include <sys/stat.h>
  54. #include <thread>
  55. #include "../common/TracyAlign.hpp"
  56. #include "../common/TracySocket.hpp"
  57. #include "../common/TracySystem.hpp"
  58. #include "../common/tracy_lz4.hpp"
  59. #include "tracy_rpmalloc.hpp"
  60. #include "TracyCallstack.hpp"
  61. #include "TracyDxt1.hpp"
  62. #include "TracyScoped.hpp"
  63. #include "TracyProfiler.hpp"
  64. #include "TracyThread.hpp"
  65. #include "TracyArmCpuTable.hpp"
  66. #include "TracySysTrace.hpp"
  67. #include "../TracyC.h"
  68. #ifdef TRACY_PORT
  69. # ifndef TRACY_DATA_PORT
  70. # define TRACY_DATA_PORT TRACY_PORT
  71. # endif
  72. # ifndef TRACY_BROADCAST_PORT
  73. # define TRACY_BROADCAST_PORT TRACY_PORT
  74. # endif
  75. #endif
  76. #ifdef __APPLE__
  77. # define TRACY_DELAYED_INIT
  78. #else
  79. # ifdef __GNUC__
  80. # define init_order( val ) __attribute__ ((init_priority(val)))
  81. # else
  82. # define init_order(x)
  83. # endif
  84. #endif
  85. #if defined _WIN32 || defined __CYGWIN__
  86. # include <lmcons.h>
  87. extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW );
  88. extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD );
  89. #else
  90. # include <unistd.h>
  91. # include <limits.h>
  92. #endif
  93. #if defined __linux__
  94. # include <sys/sysinfo.h>
  95. # include <sys/utsname.h>
  96. #endif
  97. #if !defined _WIN32 && !defined __CYGWIN__ && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
  98. # include <cpuid.h>
  99. #endif
  100. #if !( ( ( defined _WIN32 || defined __CYGWIN__ ) && _WIN32_WINNT >= _WIN32_WINNT_VISTA ) || defined __linux__ )
  101. # include <mutex>
  102. #endif
  103. namespace tracy
  104. {
  105. namespace
  106. {
  107. # if ( defined _WIN32 || defined __CYGWIN__ ) && _WIN32_WINNT >= _WIN32_WINNT_VISTA
  108. BOOL CALLBACK InitOnceCallback( PINIT_ONCE /*initOnce*/, PVOID /*Parameter*/, PVOID* /*Context*/)
  109. {
  110. rpmalloc_initialize();
  111. return TRUE;
  112. }
  113. INIT_ONCE InitOnce = INIT_ONCE_STATIC_INIT;
  114. # elif defined __linux__
  115. void InitOnceCallback()
  116. {
  117. rpmalloc_initialize();
  118. }
  119. pthread_once_t once_control = PTHREAD_ONCE_INIT;
  120. # else
  121. void InitOnceCallback()
  122. {
  123. rpmalloc_initialize();
  124. }
  125. std::once_flag once_flag;
  126. # endif
  127. }
  128. struct RPMallocInit
  129. {
  130. RPMallocInit()
  131. {
  132. # if ( defined _WIN32 || defined __CYGWIN__ ) && _WIN32_WINNT >= _WIN32_WINNT_VISTA
  133. InitOnceExecuteOnce( &InitOnce, InitOnceCallback, nullptr, nullptr );
  134. # elif defined __linux__
  135. pthread_once( &once_control, InitOnceCallback );
  136. # else
  137. std::call_once( once_flag, InitOnceCallback );
  138. # endif
  139. rpmalloc_thread_initialize();
  140. }
  141. };
  142. #ifndef TRACY_DELAYED_INIT
  143. struct InitTimeWrapper
  144. {
  145. int64_t val;
  146. };
  147. struct ProducerWrapper
  148. {
  149. tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
  150. };
  151. struct ThreadHandleWrapper
  152. {
  153. uint64_t val;
  154. };
  155. #endif
  156. #if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
  157. static inline void CpuId( uint32_t* regs, uint32_t leaf )
  158. {
  159. memset(regs, 0, sizeof(uint32_t) * 4);
  160. #if defined _WIN32 || defined __CYGWIN__
  161. __cpuidex( (int*)regs, leaf, 0 );
  162. #else
  163. __get_cpuid( leaf, regs, regs+1, regs+2, regs+3 );
  164. #endif
  165. }
  166. static void InitFailure( const char* msg )
  167. {
  168. #if defined _WIN32 || defined __CYGWIN__
  169. bool hasConsole = false;
  170. bool reopen = false;
  171. const auto attached = AttachConsole( ATTACH_PARENT_PROCESS );
  172. if( attached )
  173. {
  174. hasConsole = true;
  175. reopen = true;
  176. }
  177. else
  178. {
  179. const auto err = GetLastError();
  180. if( err == ERROR_ACCESS_DENIED )
  181. {
  182. hasConsole = true;
  183. }
  184. }
  185. if( hasConsole )
  186. {
  187. fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg );
  188. if( reopen )
  189. {
  190. freopen( "CONOUT$", "w", stderr );
  191. fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg );
  192. }
  193. }
  194. else
  195. {
  196. MessageBoxA( nullptr, msg, "Tracy Profiler initialization failure", MB_ICONSTOP );
  197. }
  198. #else
  199. fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg );
  200. #endif
  201. exit( 0 );
  202. }
  203. static int64_t SetupHwTimer()
  204. {
  205. #if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK
  206. uint32_t regs[4];
  207. CpuId( regs, 1 );
  208. if( !( regs[3] & ( 1 << 4 ) ) ) InitFailure( "CPU doesn't support RDTSC instruction." );
  209. CpuId( regs, 0x80000007 );
  210. if( !( regs[3] & ( 1 << 8 ) ) )
  211. {
  212. const char* noCheck = getenv( "TRACY_NO_INVARIANT_CHECK" );
  213. if( !noCheck || noCheck[0] != '1' )
  214. {
  215. #if defined _WIN32 || defined __CYGWIN__
  216. InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_QPC or TRACY_TIMER_FALLBACK define to use lower resolution timer." );
  217. #else
  218. InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_FALLBACK define to use lower resolution timer." );
  219. #endif
  220. }
  221. }
  222. #endif
  223. return Profiler::GetTime();
  224. }
  225. #else
  226. static int64_t SetupHwTimer()
  227. {
  228. return Profiler::GetTime();
  229. }
  230. #endif
  231. static const char* GetProcessName()
  232. {
  233. const char* processName = "unknown";
  234. #ifdef _WIN32
  235. static char buf[_MAX_PATH];
  236. GetModuleFileNameA( nullptr, buf, _MAX_PATH );
  237. const char* ptr = buf;
  238. while( *ptr != '\0' ) ptr++;
  239. while( ptr > buf && *ptr != '\\' && *ptr != '/' ) ptr--;
  240. if( ptr > buf ) ptr++;
  241. processName = ptr;
  242. #elif defined __ANDROID__
  243. # if __ANDROID_API__ >= 21
  244. auto buf = getprogname();
  245. if( buf ) processName = buf;
  246. # endif
  247. #elif defined _GNU_SOURCE || defined __CYGWIN__
  248. processName = program_invocation_short_name;
  249. #elif defined __APPLE__ || defined BSD
  250. auto buf = getprogname();
  251. if( buf ) processName = buf;
  252. #endif
  253. return processName;
  254. }
  255. static const char* GetProcessExecutablePath()
  256. {
  257. #ifdef _WIN32
  258. static char buf[_MAX_PATH];
  259. GetModuleFileNameA( nullptr, buf, _MAX_PATH );
  260. return buf;
  261. #elif defined __ANDROID__
  262. return nullptr;
  263. #elif defined _GNU_SOURCE || defined __CYGWIN__
  264. return program_invocation_name;
  265. #elif defined __APPLE__
  266. static char buf[1024];
  267. uint32_t size = 1024;
  268. _NSGetExecutablePath( buf, &size );
  269. return buf;
  270. #elif defined __DragonFly__
  271. static char buf[1024];
  272. readlink( "/proc/curproc/file", buf, 1024 );
  273. return buf;
  274. #elif defined __FreeBSD__
  275. static char buf[1024];
  276. int mib[4];
  277. mib[0] = CTL_KERN;
  278. mib[1] = KERN_PROC;
  279. mib[2] = KERN_PROC_PATHNAME;
  280. mib[3] = -1;
  281. size_t cb = 1024;
  282. sysctl( mib, 4, buf, &cb, nullptr, 0 );
  283. return buf;
  284. #elif defined __NetBSD__
  285. static char buf[1024];
  286. readlink( "/proc/curproc/exe", buf, 1024 );
  287. return buf;
  288. #else
  289. return nullptr;
  290. #endif
  291. }
  292. #if defined __linux__ && defined __ARM_ARCH
  293. static uint32_t GetHex( char*& ptr, int skip )
  294. {
  295. uint32_t ret;
  296. ptr += skip;
  297. char* end;
  298. if( ptr[0] == '0' && ptr[1] == 'x' )
  299. {
  300. ptr += 2;
  301. ret = strtol( ptr, &end, 16 );
  302. }
  303. else
  304. {
  305. ret = strtol( ptr, &end, 10 );
  306. }
  307. ptr = end;
  308. return ret;
  309. }
  310. #endif
  311. static const char* GetHostInfo()
  312. {
  313. static char buf[1024];
  314. auto ptr = buf;
  315. #if defined _WIN32 || defined __CYGWIN__
  316. t_RtlGetVersion RtlGetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlGetVersion" );
  317. if( !RtlGetVersion )
  318. {
  319. # ifdef __CYGWIN__
  320. ptr += sprintf( ptr, "OS: Windows (Cygwin)\n" );
  321. # elif defined __MINGW32__
  322. ptr += sprintf( ptr, "OS: Windows (MingW)\n" );
  323. # else
  324. ptr += sprintf( ptr, "OS: Windows\n" );
  325. # endif
  326. }
  327. else
  328. {
  329. RTL_OSVERSIONINFOW ver = { sizeof( RTL_OSVERSIONINFOW ) };
  330. RtlGetVersion( &ver );
  331. # ifdef __CYGWIN__
  332. ptr += sprintf( ptr, "OS: Windows %i.%i.%i (Cygwin)\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber );
  333. # elif defined __MINGW32__
  334. ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber );
  335. # else
  336. ptr += sprintf( ptr, "OS: Windows %i.%i.%i\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber );
  337. # endif
  338. }
  339. #elif defined __linux__
  340. struct utsname utsName;
  341. uname( &utsName );
  342. # if defined __ANDROID__
  343. ptr += sprintf( ptr, "OS: Linux %s (Android)\n", utsName.release );
  344. # else
  345. ptr += sprintf( ptr, "OS: Linux %s\n", utsName.release );
  346. # endif
  347. #elif defined __APPLE__
  348. # if TARGET_OS_IPHONE == 1
  349. ptr += sprintf( ptr, "OS: Darwin (iOS)\n" );
  350. # elif TARGET_OS_MAC == 1
  351. ptr += sprintf( ptr, "OS: Darwin (OSX)\n" );
  352. # else
  353. ptr += sprintf( ptr, "OS: Darwin (unknown)\n" );
  354. # endif
  355. #elif defined __DragonFly__
  356. ptr += sprintf( ptr, "OS: BSD (DragonFly)\n" );
  357. #elif defined __FreeBSD__
  358. ptr += sprintf( ptr, "OS: BSD (FreeBSD)\n" );
  359. #elif defined __NetBSD__
  360. ptr += sprintf( ptr, "OS: BSD (NetBSD)\n" );
  361. #elif defined __OpenBSD__
  362. ptr += sprintf( ptr, "OS: BSD (OpenBSD)\n" );
  363. #else
  364. ptr += sprintf( ptr, "OS: unknown\n" );
  365. #endif
  366. #if defined _MSC_VER
  367. # if defined __clang__
  368. ptr += sprintf( ptr, "Compiler: MSVC clang-cl %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ );
  369. # else
  370. ptr += sprintf( ptr, "Compiler: MSVC %i\n", _MSC_VER );
  371. # endif
  372. #elif defined __clang__
  373. ptr += sprintf( ptr, "Compiler: clang %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ );
  374. #elif defined __GNUC__
  375. ptr += sprintf( ptr, "Compiler: gcc %i.%i\n", __GNUC__, __GNUC_MINOR__ );
  376. #else
  377. ptr += sprintf( ptr, "Compiler: unknown\n" );
  378. #endif
  379. #if defined _WIN32 || defined __CYGWIN__
  380. # ifndef __CYGWIN__
  381. InitWinSock();
  382. # endif
  383. char hostname[512];
  384. gethostname( hostname, 512 );
  385. DWORD userSz = UNLEN+1;
  386. char user[UNLEN+1];
  387. GetUserNameA( user, &userSz );
  388. ptr += sprintf( ptr, "User: %s@%s\n", user, hostname );
  389. #else
  390. char hostname[_POSIX_HOST_NAME_MAX]{};
  391. char user[_POSIX_LOGIN_NAME_MAX]{};
  392. gethostname( hostname, _POSIX_HOST_NAME_MAX );
  393. # if defined __ANDROID__
  394. const auto login = getlogin();
  395. if( login )
  396. {
  397. strcpy( user, login );
  398. }
  399. else
  400. {
  401. memcpy( user, "(?)", 4 );
  402. }
  403. # else
  404. getlogin_r( user, _POSIX_LOGIN_NAME_MAX );
  405. # endif
  406. ptr += sprintf( ptr, "User: %s@%s\n", user, hostname );
  407. #endif
  408. #if defined __i386 || defined _M_IX86
  409. ptr += sprintf( ptr, "Arch: x86\n" );
  410. #elif defined __x86_64__ || defined _M_X64
  411. ptr += sprintf( ptr, "Arch: x64\n" );
  412. #elif defined __aarch64__
  413. ptr += sprintf( ptr, "Arch: ARM64\n" );
  414. #elif defined __ARM_ARCH
  415. ptr += sprintf( ptr, "Arch: ARM\n" );
  416. #else
  417. ptr += sprintf( ptr, "Arch: unknown\n" );
  418. #endif
  419. #if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
  420. uint32_t regs[4];
  421. char cpuModel[4*4*3];
  422. auto modelPtr = cpuModel;
  423. for( uint32_t i=0x80000002; i<0x80000005; ++i )
  424. {
  425. CpuId( regs, i );
  426. memcpy( modelPtr, regs, sizeof( regs ) ); modelPtr += sizeof( regs );
  427. }
  428. ptr += sprintf( ptr, "CPU: %s\n", cpuModel );
  429. #elif defined __linux__ && defined __ARM_ARCH
  430. bool cpuFound = false;
  431. FILE* fcpuinfo = fopen( "/proc/cpuinfo", "rb" );
  432. if( fcpuinfo )
  433. {
  434. enum { BufSize = 4*1024 };
  435. char buf[BufSize];
  436. const auto sz = fread( buf, 1, BufSize, fcpuinfo );
  437. fclose( fcpuinfo );
  438. const auto end = buf + sz;
  439. auto cptr = buf;
  440. uint32_t impl = 0;
  441. uint32_t var = 0;
  442. uint32_t part = 0;
  443. uint32_t rev = 0;
  444. while( end - cptr > 20 )
  445. {
  446. while( end - cptr > 20 && memcmp( cptr, "CPU ", 4 ) != 0 )
  447. {
  448. cptr += 4;
  449. while( end - cptr > 20 && *cptr != '\n' ) cptr++;
  450. cptr++;
  451. }
  452. if( end - cptr <= 20 ) break;
  453. cptr += 4;
  454. if( memcmp( cptr, "implementer\t: ", 14 ) == 0 )
  455. {
  456. if( impl != 0 ) break;
  457. impl = GetHex( cptr, 14 );
  458. }
  459. else if( memcmp( cptr, "variant\t: ", 10 ) == 0 ) var = GetHex( cptr, 10 );
  460. else if( memcmp( cptr, "part\t: ", 7 ) == 0 ) part = GetHex( cptr, 7 );
  461. else if( memcmp( cptr, "revision\t: ", 11 ) == 0 ) rev = GetHex( cptr, 11 );
  462. while( *cptr != '\n' && *cptr != '\0' ) cptr++;
  463. cptr++;
  464. }
  465. if( impl != 0 || var != 0 || part != 0 || rev != 0 )
  466. {
  467. cpuFound = true;
  468. ptr += sprintf( ptr, "CPU: %s%s r%ip%i\n", DecodeArmImplementer( impl ), DecodeArmPart( impl, part ), var, rev );
  469. }
  470. }
  471. if( !cpuFound )
  472. {
  473. ptr += sprintf( ptr, "CPU: unknown\n" );
  474. }
  475. #elif defined __APPLE__ && TARGET_OS_IPHONE == 1
  476. {
  477. size_t sz;
  478. sysctlbyname( "hw.machine", nullptr, &sz, nullptr, 0 );
  479. auto str = (char*)tracy_malloc( sz );
  480. sysctlbyname( "hw.machine", str, &sz, nullptr, 0 );
  481. ptr += sprintf( ptr, "Device: %s\n", DecodeIosDevice( str ) );
  482. tracy_free( str );
  483. }
  484. #else
  485. ptr += sprintf( ptr, "CPU: unknown\n" );
  486. #endif
  487. ptr += sprintf( ptr, "CPU cores: %i\n", std::thread::hardware_concurrency() );
  488. #if defined _WIN32 || defined __CYGWIN__
  489. MEMORYSTATUSEX statex;
  490. statex.dwLength = sizeof( statex );
  491. GlobalMemoryStatusEx( &statex );
  492. # ifdef _MSC_VER
  493. ptr += sprintf( ptr, "RAM: %I64u MB\n", statex.ullTotalPhys / 1024 / 1024 );
  494. # else
  495. ptr += sprintf( ptr, "RAM: %llu MB\n", statex.ullTotalPhys / 1024 / 1024 );
  496. # endif
  497. #elif defined __linux__
  498. struct sysinfo sysInfo;
  499. sysinfo( &sysInfo );
  500. ptr += sprintf( ptr, "RAM: %lu MB\n", sysInfo.totalram / 1024 / 1024 );
  501. #elif defined __APPLE__
  502. size_t memSize;
  503. size_t sz = sizeof( memSize );
  504. sysctlbyname( "hw.memsize", &memSize, &sz, nullptr, 0 );
  505. ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 );
  506. #elif defined BSD
  507. size_t memSize;
  508. size_t sz = sizeof( memSize );
  509. sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 );
  510. ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 );
  511. #else
  512. ptr += sprintf( ptr, "RAM: unknown\n" );
  513. #endif
  514. return buf;
  515. }
  516. static uint64_t GetPid()
  517. {
  518. #if defined _WIN32 || defined __CYGWIN__
  519. return uint64_t( GetCurrentProcessId() );
  520. #else
  521. return uint64_t( getpid() );
  522. #endif
  523. }
  524. static void AckServerQuery()
  525. {
  526. TracyLfqPrepare( QueueType::AckServerQueryNoop );
  527. TracyLfqCommit;
  528. }
  529. static void AckSourceCodeNotAvailable()
  530. {
  531. TracyLfqPrepare( QueueType::AckSourceCodeNotAvailable );
  532. TracyLfqCommit;
  533. }
  534. static BroadcastMessage& GetBroadcastMessage( const char* procname, size_t pnsz, int& len, int port )
  535. {
  536. static BroadcastMessage msg;
  537. msg.broadcastVersion = BroadcastVersion;
  538. msg.protocolVersion = ProtocolVersion;
  539. msg.listenPort = port;
  540. memcpy( msg.programName, procname, pnsz );
  541. memset( msg.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz );
  542. len = int( offsetof( BroadcastMessage, programName ) + pnsz + 1 );
  543. return msg;
  544. }
  545. #if defined _WIN32 || defined __CYGWIN__
  546. static DWORD s_profilerThreadId = 0;
  547. static char s_crashText[1024];
  548. LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp )
  549. {
  550. if( !GetProfiler().IsConnected() ) return EXCEPTION_CONTINUE_SEARCH;
  551. const unsigned ec = pExp->ExceptionRecord->ExceptionCode;
  552. auto msgPtr = s_crashText;
  553. switch( ec )
  554. {
  555. case EXCEPTION_ACCESS_VIOLATION:
  556. msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ACCESS_VIOLATION (0x%x). ", ec );
  557. switch( pExp->ExceptionRecord->ExceptionInformation[0] )
  558. {
  559. case 0:
  560. msgPtr += sprintf( msgPtr, "Read violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] );
  561. break;
  562. case 1:
  563. msgPtr += sprintf( msgPtr, "Write violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] );
  564. break;
  565. case 8:
  566. msgPtr += sprintf( msgPtr, "DEP violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] );
  567. break;
  568. default:
  569. break;
  570. }
  571. break;
  572. case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
  573. msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ARRAY_BOUNDS_EXCEEDED (0x%x). ", ec );
  574. break;
  575. case EXCEPTION_DATATYPE_MISALIGNMENT:
  576. msgPtr += sprintf( msgPtr, "Exception EXCEPTION_DATATYPE_MISALIGNMENT (0x%x). ", ec );
  577. break;
  578. case EXCEPTION_FLT_DIVIDE_BY_ZERO:
  579. msgPtr += sprintf( msgPtr, "Exception EXCEPTION_FLT_DIVIDE_BY_ZERO (0x%x). ", ec );
  580. break;
  581. case EXCEPTION_ILLEGAL_INSTRUCTION:
  582. msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ILLEGAL_INSTRUCTION (0x%x). ", ec );
  583. break;
  584. case EXCEPTION_IN_PAGE_ERROR:
  585. msgPtr += sprintf( msgPtr, "Exception EXCEPTION_IN_PAGE_ERROR (0x%x). ", ec );
  586. break;
  587. case EXCEPTION_INT_DIVIDE_BY_ZERO:
  588. msgPtr += sprintf( msgPtr, "Exception EXCEPTION_INT_DIVIDE_BY_ZERO (0x%x). ", ec );
  589. break;
  590. case EXCEPTION_PRIV_INSTRUCTION:
  591. msgPtr += sprintf( msgPtr, "Exception EXCEPTION_PRIV_INSTRUCTION (0x%x). ", ec );
  592. break;
  593. case EXCEPTION_STACK_OVERFLOW:
  594. msgPtr += sprintf( msgPtr, "Exception EXCEPTION_STACK_OVERFLOW (0x%x). ", ec );
  595. break;
  596. default:
  597. return EXCEPTION_CONTINUE_SEARCH;
  598. }
  599. {
  600. GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" );
  601. TracyLfqPrepare( QueueType::CrashReport );
  602. item->crashReport.time = Profiler::GetTime();
  603. item->crashReport.text = (uint64_t)s_crashText;
  604. TracyLfqCommit;
  605. }
  606. HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 );
  607. if( h == INVALID_HANDLE_VALUE ) return EXCEPTION_CONTINUE_SEARCH;
  608. THREADENTRY32 te = { sizeof( te ) };
  609. if( !Thread32First( h, &te ) )
  610. {
  611. CloseHandle( h );
  612. return EXCEPTION_CONTINUE_SEARCH;
  613. }
  614. const auto pid = GetCurrentProcessId();
  615. const auto tid = GetCurrentThreadId();
  616. do
  617. {
  618. if( te.th32OwnerProcessID == pid && te.th32ThreadID != tid && te.th32ThreadID != s_profilerThreadId )
  619. {
  620. HANDLE th = OpenThread( THREAD_SUSPEND_RESUME, FALSE, te.th32ThreadID );
  621. if( th != INVALID_HANDLE_VALUE )
  622. {
  623. SuspendThread( th );
  624. CloseHandle( th );
  625. }
  626. }
  627. }
  628. while( Thread32Next( h, &te ) );
  629. CloseHandle( h );
  630. {
  631. TracyLfqPrepare( QueueType::Crash );
  632. TracyLfqCommit;
  633. }
  634. std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) );
  635. GetProfiler().RequestShutdown();
  636. while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); };
  637. TerminateProcess( GetCurrentProcess(), 1 );
  638. return EXCEPTION_CONTINUE_SEARCH;
  639. }
  640. #endif
  641. #ifdef __linux__
  642. static long s_profilerTid = 0;
  643. static char s_crashText[1024];
  644. static std::atomic<bool> s_alreadyCrashed( false );
  645. static void ThreadFreezer( int /*signal*/ )
  646. {
  647. for(;;) sleep( 1000 );
  648. }
  649. static inline void HexPrint( char*& ptr, uint64_t val )
  650. {
  651. if( val == 0 )
  652. {
  653. *ptr++ = '0';
  654. return;
  655. }
  656. static const char HexTable[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
  657. char buf[16];
  658. auto bptr = buf;
  659. do
  660. {
  661. *bptr++ = HexTable[val%16];
  662. val /= 16;
  663. }
  664. while( val > 0 );
  665. do
  666. {
  667. *ptr++ = *--bptr;
  668. }
  669. while( bptr != buf );
  670. }
  671. static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ )
  672. {
  673. bool expected = false;
  674. if( !s_alreadyCrashed.compare_exchange_strong( expected, true ) ) ThreadFreezer( signal );
  675. auto msgPtr = s_crashText;
  676. switch( signal )
  677. {
  678. case SIGILL:
  679. strcpy( msgPtr, "Illegal Instruction.\n" );
  680. while( *msgPtr ) msgPtr++;
  681. switch( info->si_code )
  682. {
  683. case ILL_ILLOPC:
  684. strcpy( msgPtr, "Illegal opcode.\n" );
  685. break;
  686. case ILL_ILLOPN:
  687. strcpy( msgPtr, "Illegal operand.\n" );
  688. break;
  689. case ILL_ILLADR:
  690. strcpy( msgPtr, "Illegal addressing mode.\n" );
  691. break;
  692. case ILL_ILLTRP:
  693. strcpy( msgPtr, "Illegal trap.\n" );
  694. break;
  695. case ILL_PRVOPC:
  696. strcpy( msgPtr, "Privileged opcode.\n" );
  697. break;
  698. case ILL_PRVREG:
  699. strcpy( msgPtr, "Privileged register.\n" );
  700. break;
  701. case ILL_COPROC:
  702. strcpy( msgPtr, "Coprocessor error.\n" );
  703. break;
  704. case ILL_BADSTK:
  705. strcpy( msgPtr, "Internal stack error.\n" );
  706. break;
  707. default:
  708. break;
  709. }
  710. break;
  711. case SIGFPE:
  712. strcpy( msgPtr, "Floating-point exception.\n" );
  713. while( *msgPtr ) msgPtr++;
  714. switch( info->si_code )
  715. {
  716. case FPE_INTDIV:
  717. strcpy( msgPtr, "Integer divide by zero.\n" );
  718. break;
  719. case FPE_INTOVF:
  720. strcpy( msgPtr, "Integer overflow.\n" );
  721. break;
  722. case FPE_FLTDIV:
  723. strcpy( msgPtr, "Floating-point divide by zero.\n" );
  724. break;
  725. case FPE_FLTOVF:
  726. strcpy( msgPtr, "Floating-point overflow.\n" );
  727. break;
  728. case FPE_FLTUND:
  729. strcpy( msgPtr, "Floating-point underflow.\n" );
  730. break;
  731. case FPE_FLTRES:
  732. strcpy( msgPtr, "Floating-point inexact result.\n" );
  733. break;
  734. case FPE_FLTINV:
  735. strcpy( msgPtr, "Floating-point invalid operation.\n" );
  736. break;
  737. case FPE_FLTSUB:
  738. strcpy( msgPtr, "Subscript out of range.\n" );
  739. break;
  740. default:
  741. break;
  742. }
  743. break;
  744. case SIGSEGV:
  745. strcpy( msgPtr, "Invalid memory reference.\n" );
  746. while( *msgPtr ) msgPtr++;
  747. switch( info->si_code )
  748. {
  749. case SEGV_MAPERR:
  750. strcpy( msgPtr, "Address not mapped to object.\n" );
  751. break;
  752. case SEGV_ACCERR:
  753. strcpy( msgPtr, "Invalid permissions for mapped object.\n" );
  754. break;
  755. # ifdef SEGV_BNDERR
  756. case SEGV_BNDERR:
  757. strcpy( msgPtr, "Failed address bound checks.\n" );
  758. break;
  759. # endif
  760. # ifdef SEGV_PKUERR
  761. case SEGV_PKUERR:
  762. strcpy( msgPtr, "Access was denied by memory protection keys.\n" );
  763. break;
  764. # endif
  765. default:
  766. break;
  767. }
  768. break;
  769. case SIGPIPE:
  770. strcpy( msgPtr, "Broken pipe.\n" );
  771. while( *msgPtr ) msgPtr++;
  772. break;
  773. case SIGBUS:
  774. strcpy( msgPtr, "Bus error.\n" );
  775. while( *msgPtr ) msgPtr++;
  776. switch( info->si_code )
  777. {
  778. case BUS_ADRALN:
  779. strcpy( msgPtr, "Invalid address alignment.\n" );
  780. break;
  781. case BUS_ADRERR:
  782. strcpy( msgPtr, "Nonexistent physical address.\n" );
  783. break;
  784. case BUS_OBJERR:
  785. strcpy( msgPtr, "Object-specific hardware error.\n" );
  786. break;
  787. # ifdef BUS_MCEERR_AR
  788. case BUS_MCEERR_AR:
  789. strcpy( msgPtr, "Hardware memory error consumed on a machine check; action required.\n" );
  790. break;
  791. # endif
  792. # ifdef BUS_MCEERR_AO
  793. case BUS_MCEERR_AO:
  794. strcpy( msgPtr, "Hardware memory error detected in process but not consumed; action optional.\n" );
  795. break;
  796. # endif
  797. default:
  798. break;
  799. }
  800. break;
  801. default:
  802. abort();
  803. }
  804. while( *msgPtr ) msgPtr++;
  805. if( signal != SIGPIPE )
  806. {
  807. strcpy( msgPtr, "Fault address: 0x" );
  808. while( *msgPtr ) msgPtr++;
  809. HexPrint( msgPtr, uint64_t( info->si_addr ) );
  810. *msgPtr++ = '\n';
  811. }
  812. {
  813. GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" );
  814. TracyLfqPrepare( QueueType::CrashReport );
  815. item->crashReport.time = Profiler::GetTime();
  816. item->crashReport.text = (uint64_t)s_crashText;
  817. TracyLfqCommit;
  818. }
  819. DIR* dp = opendir( "/proc/self/task" );
  820. if( !dp ) abort();
  821. const auto selfTid = syscall( SYS_gettid );
  822. struct dirent* ep;
  823. while( ( ep = readdir( dp ) ) != nullptr )
  824. {
  825. if( ep->d_name[0] == '.' ) continue;
  826. int tid = atoi( ep->d_name );
  827. if( tid != selfTid && tid != s_profilerTid )
  828. {
  829. syscall( SYS_tkill, tid, SIGPWR );
  830. }
  831. }
  832. closedir( dp );
  833. {
  834. TracyLfqPrepare( QueueType::Crash );
  835. TracyLfqCommit;
  836. }
  837. std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) );
  838. GetProfiler().RequestShutdown();
  839. while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); };
  840. abort();
  841. }
  842. #endif
  843. enum { QueuePrealloc = 256 * 1024 };
  844. static Profiler* s_instance = nullptr;
  845. static Thread* s_thread;
  846. static Thread* s_compressThread;
  847. #ifdef TRACY_HAS_SYSTEM_TRACING
  848. static Thread* s_sysTraceThread = nullptr;
  849. #endif
  850. TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; }
  851. TRACY_API int64_t GetFrequencyQpc()
  852. {
  853. #if defined _WIN32 || defined __CYGWIN__
  854. LARGE_INTEGER t;
  855. QueryPerformanceFrequency( &t );
  856. return t.QuadPart;
  857. #else
  858. return 0;
  859. #endif
  860. }
  861. #ifdef TRACY_DELAYED_INIT
  862. struct ThreadNameData;
  863. TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue();
  864. TRACY_API void InitRPMallocThread();
  865. void InitRPMallocThread()
  866. {
  867. RPMallocInit rpinit;
  868. rpmalloc_thread_initialize();
  869. }
  870. struct ProfilerData
  871. {
  872. int64_t initTime = SetupHwTimer();
  873. RPMallocInit rpmalloc_init;
  874. moodycamel::ConcurrentQueue<QueueItem> queue;
  875. Profiler profiler;
  876. std::atomic<uint32_t> lockCounter { 0 };
  877. std::atomic<uint8_t> gpuCtxCounter { 0 };
  878. std::atomic<ThreadNameData*> threadNameData { nullptr };
  879. };
  880. struct ProducerWrapper
  881. {
  882. ProducerWrapper( ProfilerData& data ) : detail( data.queue ), ptr( data.queue.get_explicit_producer( detail ) ) {}
  883. moodycamel::ProducerToken detail;
  884. tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
  885. };
  886. struct ProfilerThreadData
  887. {
  888. ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {}
  889. RPMallocInit rpmalloc_init;
  890. ProducerWrapper token;
  891. GpuCtxWrapper gpuCtx;
  892. # ifdef TRACY_ON_DEMAND
  893. LuaZoneState luaZoneState;
  894. # endif
  895. };
  896. # ifdef TRACY_MANUAL_LIFETIME
  897. ProfilerData* s_profilerData = nullptr;
  898. TRACY_API void StartupProfiler()
  899. {
  900. s_profilerData = new ProfilerData;
  901. s_profilerData->profiler.SpawnWorkerThreads();
  902. }
  903. static ProfilerData& GetProfilerData()
  904. {
  905. assert(s_profilerData);
  906. return *s_profilerData;
  907. }
  908. TRACY_API void ShutdownProfiler()
  909. {
  910. delete s_profilerData;
  911. s_profilerData = nullptr;
  912. rpmalloc_finalize();
  913. }
  914. # else
  915. static std::atomic<int> profilerDataLock { 0 };
  916. static std::atomic<ProfilerData*> profilerData { nullptr };
  917. static ProfilerData& GetProfilerData()
  918. {
  919. auto ptr = profilerData.load( std::memory_order_acquire );
  920. if( !ptr )
  921. {
  922. int expected = 0;
  923. while( !profilerDataLock.compare_exchange_strong( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; }
  924. ptr = profilerData.load( std::memory_order_acquire );
  925. if( !ptr )
  926. {
  927. ptr = (ProfilerData*)malloc( sizeof( ProfilerData ) );
  928. new (ptr) ProfilerData();
  929. profilerData.store( ptr, std::memory_order_release );
  930. }
  931. profilerDataLock.store( 0, std::memory_order_release );
  932. }
  933. return *ptr;
  934. }
  935. # endif
  936. static ProfilerThreadData& GetProfilerThreadData()
  937. {
  938. thread_local ProfilerThreadData data( GetProfilerData() );
  939. return data;
  940. }
  941. TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return GetProfilerThreadData().token.ptr; }
  942. TRACY_API Profiler& GetProfiler() { return GetProfilerData().profiler; }
  943. TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return GetProfilerData().queue; }
  944. TRACY_API int64_t GetInitTime() { return GetProfilerData().initTime; }
  945. TRACY_API std::atomic<uint32_t>& GetLockCounter() { return GetProfilerData().lockCounter; }
  946. TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; }
  947. TRACY_API GpuCtxWrapper& GetGpuCtx() { return GetProfilerThreadData().gpuCtx; }
  948. TRACY_API uint64_t GetThreadHandle() { return detail::GetThreadHandleImpl(); }
  949. std::atomic<ThreadNameData*>& GetThreadNameData() { return GetProfilerData().threadNameData; }
  950. # ifdef TRACY_ON_DEMAND
  951. TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; }
  952. # endif
  953. # ifndef TRACY_MANUAL_LIFETIME
  954. namespace
  955. {
  956. const auto& __profiler_init = GetProfiler();
  957. }
  958. # endif
  959. #else
  960. TRACY_API void InitRPMallocThread()
  961. {
  962. rpmalloc_thread_initialize();
  963. }
  964. // MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this.
  965. // 1a. But s_queue is needed for initialization of variables in point 2.
  966. extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
  967. thread_local RPMallocInit init_order(106) s_rpmalloc_thread_init;
  968. // 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread.
  969. thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue );
  970. thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) };
  971. thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThreadHandleImpl() };
  972. # ifdef _MSC_VER
  973. // 1. Initialize these static variables before all other variables.
  974. # pragma warning( disable : 4075 )
  975. # pragma init_seg( ".CRT$XCB" )
  976. # endif
  977. static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() };
  978. static RPMallocInit init_order(102) s_rpmalloc_init;
  979. moodycamel::ConcurrentQueue<QueueItem> init_order(103) s_queue( QueuePrealloc );
  980. std::atomic<uint32_t> init_order(104) s_lockCounter( 0 );
  981. std::atomic<uint8_t> init_order(104) s_gpuCtxCounter( 0 );
  982. thread_local GpuCtxWrapper init_order(104) s_gpuCtx { nullptr };
  983. struct ThreadNameData;
  984. static std::atomic<ThreadNameData*> init_order(104) s_threadNameDataInstance( nullptr );
  985. std::atomic<ThreadNameData*>& s_threadNameData = s_threadNameDataInstance;
  986. # ifdef TRACY_ON_DEMAND
  987. thread_local LuaZoneState init_order(104) s_luaZoneState { 0, false };
  988. # endif
  989. static Profiler init_order(105) s_profiler;
  990. TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return s_token.ptr; }
  991. TRACY_API Profiler& GetProfiler() { return s_profiler; }
  992. TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return s_queue; }
  993. TRACY_API int64_t GetInitTime() { return s_initTime.val; }
  994. TRACY_API std::atomic<uint32_t>& GetLockCounter() { return s_lockCounter; }
  995. TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return s_gpuCtxCounter; }
  996. TRACY_API GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; }
  997. # ifdef __CYGWIN__
  998. // Hackfix for cygwin reporting memory frees without matching allocations. WTF?
  999. TRACY_API uint64_t GetThreadHandle() { return detail::GetThreadHandleImpl(); }
  1000. # else
  1001. TRACY_API uint64_t GetThreadHandle() { return s_threadHandle.val; }
  1002. # endif
  1003. std::atomic<ThreadNameData*>& GetThreadNameData() { return s_threadNameData; }
  1004. # ifdef TRACY_ON_DEMAND
  1005. TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; }
  1006. # endif
  1007. #endif
  1008. Profiler::Profiler()
  1009. : m_timeBegin( 0 )
  1010. , m_mainThread( detail::GetThreadHandleImpl() )
  1011. , m_epoch( std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count() )
  1012. , m_shutdown( false )
  1013. , m_shutdownManual( false )
  1014. , m_shutdownFinished( false )
  1015. , m_sock( nullptr )
  1016. , m_broadcast( nullptr )
  1017. , m_noExit( false )
  1018. , m_userPort( 0 )
  1019. , m_zoneId( 1 )
  1020. , m_samplingPeriod( 0 )
  1021. , m_stream( LZ4_createStream() )
  1022. , m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) )
  1023. , m_bufferOffset( 0 )
  1024. , m_bufferStart( 0 )
  1025. , m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) )
  1026. , m_serialQueue( 1024*1024 )
  1027. , m_serialDequeue( 1024*1024 )
  1028. , m_fiQueue( 16 )
  1029. , m_fiDequeue( 16 )
  1030. , m_frameCount( 0 )
  1031. , m_isConnected( false )
  1032. #ifdef TRACY_ON_DEMAND
  1033. , m_connectionId( 0 )
  1034. , m_deferredQueue( 64*1024 )
  1035. #endif
  1036. , m_paramCallback( nullptr )
  1037. , m_queryData( nullptr )
  1038. {
  1039. assert( !s_instance );
  1040. s_instance = this;
  1041. #ifndef TRACY_DELAYED_INIT
  1042. # ifdef _MSC_VER
  1043. // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here.
  1044. s_token_detail = moodycamel::ProducerToken( s_queue );
  1045. s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) };
  1046. s_threadHandle = ThreadHandleWrapper { m_mainThread };
  1047. # endif
  1048. #endif
  1049. CalibrateTimer();
  1050. CalibrateDelay();
  1051. ReportTopology();
  1052. #ifndef TRACY_NO_EXIT
  1053. const char* noExitEnv = getenv( "TRACY_NO_EXIT" );
  1054. if( noExitEnv && noExitEnv[0] == '1' )
  1055. {
  1056. m_noExit = true;
  1057. }
  1058. #endif
  1059. const char* userPort = getenv( "TRACY_PORT" );
  1060. if( userPort )
  1061. {
  1062. m_userPort = atoi( userPort );
  1063. }
  1064. #if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME)
  1065. SpawnWorkerThreads();
  1066. #endif
  1067. }
  1068. void Profiler::SpawnWorkerThreads()
  1069. {
  1070. s_thread = (Thread*)tracy_malloc( sizeof( Thread ) );
  1071. new(s_thread) Thread( LaunchWorker, this );
  1072. s_compressThread = (Thread*)tracy_malloc( sizeof( Thread ) );
  1073. new(s_compressThread) Thread( LaunchCompressWorker, this );
  1074. #ifdef TRACY_HAS_SYSTEM_TRACING
  1075. if( SysTraceStart( m_samplingPeriod ) )
  1076. {
  1077. s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) );
  1078. new(s_sysTraceThread) Thread( SysTraceWorker, nullptr );
  1079. std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) );
  1080. }
  1081. #endif
  1082. #if defined _WIN32 || defined __CYGWIN__
  1083. s_profilerThreadId = GetThreadId( s_thread->Handle() );
  1084. AddVectoredExceptionHandler( 1, CrashFilter );
  1085. #endif
  1086. #ifdef __linux__
  1087. struct sigaction threadFreezer = {};
  1088. threadFreezer.sa_handler = ThreadFreezer;
  1089. sigaction( SIGPWR, &threadFreezer, nullptr );
  1090. struct sigaction crashHandler = {};
  1091. crashHandler.sa_sigaction = CrashHandler;
  1092. crashHandler.sa_flags = SA_SIGINFO;
  1093. sigaction( SIGILL, &crashHandler, nullptr );
  1094. sigaction( SIGFPE, &crashHandler, nullptr );
  1095. sigaction( SIGSEGV, &crashHandler, nullptr );
  1096. sigaction( SIGPIPE, &crashHandler, nullptr );
  1097. sigaction( SIGBUS, &crashHandler, nullptr );
  1098. #endif
  1099. #ifdef TRACY_HAS_CALLSTACK
  1100. InitCallstack();
  1101. #endif
  1102. m_timeBegin.store( GetTime(), std::memory_order_relaxed );
  1103. }
  1104. Profiler::~Profiler()
  1105. {
  1106. m_shutdown.store( true, std::memory_order_relaxed );
  1107. #ifdef TRACY_HAS_SYSTEM_TRACING
  1108. if( s_sysTraceThread )
  1109. {
  1110. SysTraceStop();
  1111. s_sysTraceThread->~Thread();
  1112. tracy_free( s_sysTraceThread );
  1113. }
  1114. #endif
  1115. s_compressThread->~Thread();
  1116. tracy_free( s_compressThread );
  1117. s_thread->~Thread();
  1118. tracy_free( s_thread );
  1119. tracy_free( m_lz4Buf );
  1120. tracy_free( m_buffer );
  1121. LZ4_freeStream( (LZ4_stream_t*)m_stream );
  1122. if( m_sock )
  1123. {
  1124. m_sock->~Socket();
  1125. tracy_free( m_sock );
  1126. }
  1127. if( m_broadcast )
  1128. {
  1129. m_broadcast->~UdpBroadcast();
  1130. tracy_free( m_broadcast );
  1131. }
  1132. assert( s_instance );
  1133. s_instance = nullptr;
  1134. }
  1135. bool Profiler::ShouldExit()
  1136. {
  1137. return s_instance->m_shutdown.load( std::memory_order_relaxed );
  1138. }
  1139. void Profiler::Worker()
  1140. {
  1141. #ifdef __linux__
  1142. s_profilerTid = syscall( SYS_gettid );
  1143. #endif
  1144. ThreadExitHandler threadExitHandler;
  1145. SetThreadName( "Tracy Profiler" );
  1146. #ifdef TRACY_DATA_PORT
  1147. const bool dataPortSearch = false;
  1148. auto dataPort = m_userPort != 0 ? m_userPort : TRACY_DATA_PORT;
  1149. #else
  1150. const bool dataPortSearch = m_userPort == 0;
  1151. auto dataPort = m_userPort != 0 ? m_userPort : 8086;
  1152. #endif
  1153. #ifdef TRACY_BROADCAST_PORT
  1154. const auto broadcastPort = TRACY_BROADCAST_PORT;
  1155. #else
  1156. const auto broadcastPort = 8086;
  1157. #endif
  1158. while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
  1159. rpmalloc_thread_initialize();
  1160. m_exectime = 0;
  1161. const auto execname = GetProcessExecutablePath();
  1162. if( execname )
  1163. {
  1164. struct stat st;
  1165. if( stat( execname, &st ) == 0 )
  1166. {
  1167. m_exectime = (uint64_t)st.st_mtime;
  1168. }
  1169. }
  1170. const auto procname = GetProcessName();
  1171. const auto pnsz = std::min<size_t>( strlen( procname ), WelcomeMessageProgramNameSize - 1 );
  1172. const auto hostinfo = GetHostInfo();
  1173. const auto hisz = std::min<size_t>( strlen( hostinfo ), WelcomeMessageHostInfoSize - 1 );
  1174. const uint64_t pid = GetPid();
  1175. #ifdef TRACY_ON_DEMAND
  1176. uint8_t onDemand = 1;
  1177. #else
  1178. uint8_t onDemand = 0;
  1179. #endif
  1180. #ifdef __APPLE__
  1181. uint8_t isApple = 1;
  1182. #else
  1183. uint8_t isApple = 0;
  1184. #endif
  1185. #if defined __i386 || defined _M_IX86
  1186. uint8_t cpuArch = CpuArchX86;
  1187. #elif defined __x86_64__ || defined _M_X64
  1188. uint8_t cpuArch = CpuArchX64;
  1189. #elif defined __aarch64__
  1190. uint8_t cpuArch = CpuArchArm64;
  1191. #elif defined __ARM_ARCH
  1192. uint8_t cpuArch = CpuArchArm32;
  1193. #else
  1194. uint8_t cpuArch = CpuArchUnknown;
  1195. #endif
  1196. #ifdef TRACY_NO_CODE_TRANSFER
  1197. uint8_t codeTransfer = 0;
  1198. #else
  1199. uint8_t codeTransfer = 1;
  1200. #endif
  1201. #if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
  1202. uint32_t regs[4];
  1203. char manufacturer[12];
  1204. CpuId( regs, 0 );
  1205. memcpy( manufacturer, regs+1, 4 );
  1206. memcpy( manufacturer+4, regs+3, 4 );
  1207. memcpy( manufacturer+8, regs+2, 4 );
  1208. CpuId( regs, 1 );
  1209. uint32_t cpuId = ( regs[0] & 0xFFF ) | ( ( regs[0] & 0xFFF0000 ) >> 4 );
  1210. #else
  1211. const char manufacturer[12] = {};
  1212. uint32_t cpuId = 0;
  1213. #endif
  1214. WelcomeMessage welcome;
  1215. MemWrite( &welcome.timerMul, m_timerMul );
  1216. MemWrite( &welcome.initBegin, GetInitTime() );
  1217. MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) );
  1218. MemWrite( &welcome.delay, m_delay );
  1219. MemWrite( &welcome.resolution, m_resolution );
  1220. MemWrite( &welcome.epoch, m_epoch );
  1221. MemWrite( &welcome.exectime, m_exectime );
  1222. MemWrite( &welcome.pid, pid );
  1223. MemWrite( &welcome.samplingPeriod, m_samplingPeriod );
  1224. MemWrite( &welcome.onDemand, onDemand );
  1225. MemWrite( &welcome.isApple, isApple );
  1226. MemWrite( &welcome.cpuArch, cpuArch );
  1227. MemWrite( &welcome.codeTransfer, codeTransfer );
  1228. memcpy( welcome.cpuManufacturer, manufacturer, 12 );
  1229. MemWrite( &welcome.cpuId, cpuId );
  1230. memcpy( welcome.programName, procname, pnsz );
  1231. memset( welcome.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz );
  1232. memcpy( welcome.hostInfo, hostinfo, hisz );
  1233. memset( welcome.hostInfo + hisz, 0, WelcomeMessageHostInfoSize - hisz );
  1234. moodycamel::ConsumerToken token( GetQueue() );
  1235. ListenSocket listen;
  1236. bool isListening = false;
  1237. if( !dataPortSearch )
  1238. {
  1239. isListening = listen.Listen( dataPort, 4 );
  1240. }
  1241. else
  1242. {
  1243. for( uint32_t i=0; i<20; i++ )
  1244. {
  1245. if( listen.Listen( dataPort+i, 4 ) )
  1246. {
  1247. dataPort += i;
  1248. isListening = true;
  1249. break;
  1250. }
  1251. }
  1252. }
  1253. if( !isListening )
  1254. {
  1255. for(;;)
  1256. {
  1257. if( ShouldExit() )
  1258. {
  1259. m_shutdownFinished.store( true, std::memory_order_relaxed );
  1260. return;
  1261. }
  1262. ClearQueues( token );
  1263. std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
  1264. }
  1265. }
  1266. #ifndef TRACY_NO_BROADCAST
  1267. m_broadcast = (UdpBroadcast*)tracy_malloc( sizeof( UdpBroadcast ) );
  1268. new(m_broadcast) UdpBroadcast();
  1269. # ifdef TRACY_ONLY_LOCALHOST
  1270. const char* addr = "127.255.255.255";
  1271. # else
  1272. const char* addr = "255.255.255.255";
  1273. # endif
  1274. if( !m_broadcast->Open( addr, broadcastPort ) )
  1275. {
  1276. m_broadcast->~UdpBroadcast();
  1277. tracy_free( m_broadcast );
  1278. m_broadcast = nullptr;
  1279. }
  1280. #endif
  1281. int broadcastLen = 0;
  1282. auto& broadcastMsg = GetBroadcastMessage( procname, pnsz, broadcastLen, dataPort );
  1283. uint64_t lastBroadcast = 0;
  1284. // Connections loop.
  1285. // Each iteration of the loop handles whole connection. Multiple iterations will only
  1286. // happen in the on-demand mode or when handshake fails.
  1287. for(;;)
  1288. {
  1289. // Wait for incoming connection
  1290. for(;;)
  1291. {
  1292. #ifndef TRACY_NO_EXIT
  1293. if( !m_noExit && ShouldExit() )
  1294. {
  1295. if( m_broadcast )
  1296. {
  1297. broadcastMsg.activeTime = -1;
  1298. m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
  1299. }
  1300. m_shutdownFinished.store( true, std::memory_order_relaxed );
  1301. return;
  1302. }
  1303. #endif
  1304. m_sock = listen.Accept();
  1305. if( m_sock ) break;
  1306. #ifndef TRACY_ON_DEMAND
  1307. ProcessSysTime();
  1308. #endif
  1309. if( m_broadcast )
  1310. {
  1311. const auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count();
  1312. if( t - lastBroadcast > 3000000000 ) // 3s
  1313. {
  1314. lastBroadcast = t;
  1315. const auto ts = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count();
  1316. broadcastMsg.activeTime = int32_t( ts - m_epoch );
  1317. assert( broadcastMsg.activeTime >= 0 );
  1318. m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
  1319. }
  1320. }
  1321. }
  1322. if( m_broadcast )
  1323. {
  1324. lastBroadcast = 0;
  1325. broadcastMsg.activeTime = -1;
  1326. m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
  1327. }
  1328. // Handshake
  1329. {
  1330. char shibboleth[HandshakeShibbolethSize];
  1331. auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 2000 );
  1332. if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 )
  1333. {
  1334. m_sock->~Socket();
  1335. tracy_free( m_sock );
  1336. m_sock = nullptr;
  1337. continue;
  1338. }
  1339. uint32_t protocolVersion;
  1340. res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 2000 );
  1341. if( !res )
  1342. {
  1343. m_sock->~Socket();
  1344. tracy_free( m_sock );
  1345. m_sock = nullptr;
  1346. continue;
  1347. }
  1348. if( protocolVersion != ProtocolVersion )
  1349. {
  1350. HandshakeStatus status = HandshakeProtocolMismatch;
  1351. m_sock->Send( &status, sizeof( status ) );
  1352. m_sock->~Socket();
  1353. tracy_free( m_sock );
  1354. m_sock = nullptr;
  1355. continue;
  1356. }
  1357. }
  1358. #ifdef TRACY_ON_DEMAND
  1359. const auto currentTime = GetTime();
  1360. ClearQueues( token );
  1361. m_connectionId.fetch_add( 1, std::memory_order_release );
  1362. #endif
  1363. m_isConnected.store( true, std::memory_order_release );
  1364. HandshakeStatus handshake = HandshakeWelcome;
  1365. m_sock->Send( &handshake, sizeof( handshake ) );
  1366. LZ4_resetStream( (LZ4_stream_t*)m_stream );
  1367. m_sock->Send( &welcome, sizeof( welcome ) );
  1368. m_threadCtx = 0;
  1369. m_refTimeSerial = 0;
  1370. m_refTimeCtx = 0;
  1371. m_refTimeGpu = 0;
  1372. #ifdef TRACY_ON_DEMAND
  1373. OnDemandPayloadMessage onDemand;
  1374. onDemand.frames = m_frameCount.load( std::memory_order_relaxed );
  1375. onDemand.currentTime = currentTime;
  1376. m_sock->Send( &onDemand, sizeof( onDemand ) );
  1377. m_deferredLock.lock();
  1378. for( auto& item : m_deferredQueue )
  1379. {
  1380. uint64_t ptr;
  1381. uint16_t size;
  1382. const auto idx = MemRead<uint8_t>( &item.hdr.idx );
  1383. switch( (QueueType)idx )
  1384. {
  1385. case QueueType::MessageAppInfo:
  1386. ptr = MemRead<uint64_t>( &item.messageFat.text );
  1387. size = MemRead<uint16_t>( &item.messageFat.size );
  1388. SendSingleString( (const char*)ptr, size );
  1389. break;
  1390. case QueueType::LockName:
  1391. ptr = MemRead<uint64_t>( &item.lockNameFat.name );
  1392. size = MemRead<uint16_t>( &item.lockNameFat.size );
  1393. SendSingleString( (const char*)ptr, size );
  1394. break;
  1395. default:
  1396. break;
  1397. }
  1398. AppendData( &item, QueueDataSize[idx] );
  1399. }
  1400. m_deferredLock.unlock();
  1401. #endif
  1402. // Main communications loop
  1403. int keepAlive = 0;
  1404. for(;;)
  1405. {
  1406. ProcessSysTime();
  1407. const auto status = Dequeue( token );
  1408. const auto serialStatus = DequeueSerial();
  1409. if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost )
  1410. {
  1411. break;
  1412. }
  1413. else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty )
  1414. {
  1415. if( ShouldExit() ) break;
  1416. if( m_bufferOffset != m_bufferStart )
  1417. {
  1418. if( !CommitData() ) break;
  1419. }
  1420. if( keepAlive == 500 )
  1421. {
  1422. QueueItem ka;
  1423. ka.hdr.type = QueueType::KeepAlive;
  1424. AppendData( &ka, QueueDataSize[ka.hdr.idx] );
  1425. if( !CommitData() ) break;
  1426. keepAlive = 0;
  1427. }
  1428. else
  1429. {
  1430. keepAlive++;
  1431. std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
  1432. }
  1433. }
  1434. else
  1435. {
  1436. keepAlive = 0;
  1437. }
  1438. bool connActive = true;
  1439. while( m_sock->HasData() && connActive )
  1440. {
  1441. connActive = HandleServerQuery();
  1442. }
  1443. if( !connActive ) break;
  1444. }
  1445. if( ShouldExit() ) break;
  1446. m_isConnected.store( false, std::memory_order_release );
  1447. #ifdef TRACY_ON_DEMAND
  1448. m_bufferOffset = 0;
  1449. m_bufferStart = 0;
  1450. #endif
  1451. m_sock->~Socket();
  1452. tracy_free( m_sock );
  1453. m_sock = nullptr;
  1454. #ifndef TRACY_ON_DEMAND
  1455. // Client is no longer available here. Accept incoming connections, but reject handshake.
  1456. for(;;)
  1457. {
  1458. if( ShouldExit() )
  1459. {
  1460. m_shutdownFinished.store( true, std::memory_order_relaxed );
  1461. return;
  1462. }
  1463. ClearQueues( token );
  1464. m_sock = listen.Accept();
  1465. if( m_sock )
  1466. {
  1467. char shibboleth[HandshakeShibbolethSize];
  1468. auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 1000 );
  1469. if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 )
  1470. {
  1471. m_sock->~Socket();
  1472. tracy_free( m_sock );
  1473. m_sock = nullptr;
  1474. continue;
  1475. }
  1476. uint32_t protocolVersion;
  1477. res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 1000 );
  1478. if( !res )
  1479. {
  1480. m_sock->~Socket();
  1481. tracy_free( m_sock );
  1482. m_sock = nullptr;
  1483. continue;
  1484. }
  1485. HandshakeStatus status = HandshakeNotAvailable;
  1486. m_sock->Send( &status, sizeof( status ) );
  1487. m_sock->~Socket();
  1488. tracy_free( m_sock );
  1489. }
  1490. }
  1491. #endif
  1492. }
  1493. // End of connections loop
  1494. // Client is exiting. Send items remaining in queues.
  1495. for(;;)
  1496. {
  1497. const auto status = Dequeue( token );
  1498. const auto serialStatus = DequeueSerial();
  1499. if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost )
  1500. {
  1501. m_shutdownFinished.store( true, std::memory_order_relaxed );
  1502. return;
  1503. }
  1504. else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty )
  1505. {
  1506. if( m_bufferOffset != m_bufferStart ) CommitData();
  1507. break;
  1508. }
  1509. while( m_sock->HasData() )
  1510. {
  1511. if( !HandleServerQuery() )
  1512. {
  1513. m_shutdownFinished.store( true, std::memory_order_relaxed );
  1514. return;
  1515. }
  1516. }
  1517. }
  1518. // Send client termination notice to the server
  1519. QueueItem terminate;
  1520. MemWrite( &terminate.hdr.type, QueueType::Terminate );
  1521. if( !SendData( (const char*)&terminate, 1 ) )
  1522. {
  1523. m_shutdownFinished.store( true, std::memory_order_relaxed );
  1524. return;
  1525. }
  1526. // Handle remaining server queries
  1527. for(;;)
  1528. {
  1529. if( m_sock->HasData() )
  1530. {
  1531. while( m_sock->HasData() )
  1532. {
  1533. if( !HandleServerQuery() )
  1534. {
  1535. m_shutdownFinished.store( true, std::memory_order_relaxed );
  1536. return;
  1537. }
  1538. }
  1539. while( Dequeue( token ) == DequeueStatus::DataDequeued ) {}
  1540. while( DequeueSerial() == DequeueStatus::DataDequeued ) {}
  1541. if( m_bufferOffset != m_bufferStart )
  1542. {
  1543. if( !CommitData() )
  1544. {
  1545. m_shutdownFinished.store( true, std::memory_order_relaxed );
  1546. return;
  1547. }
  1548. }
  1549. }
  1550. else
  1551. {
  1552. if( m_bufferOffset != m_bufferStart ) CommitData();
  1553. std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
  1554. }
  1555. }
  1556. }
  1557. void Profiler::CompressWorker()
  1558. {
  1559. ThreadExitHandler threadExitHandler;
  1560. SetThreadName( "Tracy DXT1" );
  1561. while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
  1562. rpmalloc_thread_initialize();
  1563. for(;;)
  1564. {
  1565. const auto shouldExit = ShouldExit();
  1566. {
  1567. bool lockHeld = true;
  1568. while( !m_fiLock.try_lock() )
  1569. {
  1570. if( m_shutdownManual.load( std::memory_order_relaxed ) )
  1571. {
  1572. lockHeld = false;
  1573. break;
  1574. }
  1575. }
  1576. if( !m_fiQueue.empty() ) m_fiQueue.swap( m_fiDequeue );
  1577. if( lockHeld )
  1578. {
  1579. m_fiLock.unlock();
  1580. }
  1581. }
  1582. const auto sz = m_fiDequeue.size();
  1583. if( sz > 0 )
  1584. {
  1585. auto fi = m_fiDequeue.data();
  1586. auto end = fi + sz;
  1587. while( fi != end )
  1588. {
  1589. const auto w = fi->w;
  1590. const auto h = fi->h;
  1591. const auto csz = size_t( w * h / 2 );
  1592. auto etc1buf = (char*)tracy_malloc( csz );
  1593. CompressImageDxt1( (const char*)fi->image, etc1buf, w, h );
  1594. tracy_free( fi->image );
  1595. TracyLfqPrepare( QueueType::FrameImage );
  1596. MemWrite( &item->frameImageFat.image, (uint64_t)etc1buf );
  1597. MemWrite( &item->frameImageFat.frame, fi->frame );
  1598. MemWrite( &item->frameImageFat.w, w );
  1599. MemWrite( &item->frameImageFat.h, h );
  1600. uint8_t flip = fi->flip;
  1601. MemWrite( &item->frameImageFat.flip, flip );
  1602. TracyLfqCommit;
  1603. fi++;
  1604. }
  1605. m_fiDequeue.clear();
  1606. }
  1607. else
  1608. {
  1609. std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) );
  1610. }
  1611. if( shouldExit )
  1612. {
  1613. return;
  1614. }
  1615. }
  1616. }
  1617. static void FreeAssociatedMemory( const QueueItem& item )
  1618. {
  1619. if( item.hdr.idx >= (int)QueueType::Terminate ) return;
  1620. uint64_t ptr;
  1621. switch( item.hdr.type )
  1622. {
  1623. case QueueType::ZoneText:
  1624. case QueueType::ZoneName:
  1625. ptr = MemRead<uint64_t>( &item.zoneTextFat.text );
  1626. tracy_free( (void*)ptr );
  1627. break;
  1628. case QueueType::MessageColor:
  1629. case QueueType::MessageColorCallstack:
  1630. ptr = MemRead<uint64_t>( &item.messageColorFat.text );
  1631. tracy_free( (void*)ptr );
  1632. break;
  1633. case QueueType::Message:
  1634. case QueueType::MessageCallstack:
  1635. #ifndef TRACY_ON_DEMAND
  1636. case QueueType::MessageAppInfo:
  1637. #endif
  1638. ptr = MemRead<uint64_t>( &item.messageFat.text );
  1639. tracy_free( (void*)ptr );
  1640. break;
  1641. case QueueType::ZoneBeginAllocSrcLoc:
  1642. case QueueType::ZoneBeginAllocSrcLocCallstack:
  1643. ptr = MemRead<uint64_t>( &item.zoneBegin.srcloc );
  1644. tracy_free( (void*)ptr );
  1645. break;
  1646. case QueueType::GpuZoneBeginAllocSrcLoc:
  1647. case QueueType::GpuZoneBeginAllocSrcLocCallstack:
  1648. case QueueType::GpuZoneBeginAllocSrcLocSerial:
  1649. case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial:
  1650. ptr = MemRead<uint64_t>( &item.gpuZoneBegin.srcloc );
  1651. tracy_free( (void*)ptr );
  1652. break;
  1653. case QueueType::CallstackSerial:
  1654. case QueueType::Callstack:
  1655. ptr = MemRead<uint64_t>( &item.callstackFat.ptr );
  1656. tracy_free( (void*)ptr );
  1657. break;
  1658. case QueueType::CallstackAlloc:
  1659. ptr = MemRead<uint64_t>( &item.callstackAllocFat.nativePtr );
  1660. tracy_free( (void*)ptr );
  1661. ptr = MemRead<uint64_t>( &item.callstackAllocFat.ptr );
  1662. tracy_free( (void*)ptr );
  1663. break;
  1664. case QueueType::CallstackSample:
  1665. ptr = MemRead<uint64_t>( &item.callstackSampleFat.ptr );
  1666. tracy_free( (void*)ptr );
  1667. break;
  1668. case QueueType::FrameImage:
  1669. ptr = MemRead<uint64_t>( &item.frameImageFat.image );
  1670. tracy_free( (void*)ptr );
  1671. break;
  1672. #ifndef TRACY_ON_DEMAND
  1673. case QueueType::LockName:
  1674. ptr = MemRead<uint64_t>( &item.lockNameFat.name );
  1675. tracy_free( (void*)ptr );
  1676. break;
  1677. case QueueType::GpuContextName:
  1678. ptr = MemRead<uint64_t>( &item.gpuContextNameFat.ptr );
  1679. tracy_free( (void*)ptr );
  1680. break;
  1681. #endif
  1682. #ifdef TRACY_ON_DEMAND
  1683. case QueueType::MessageAppInfo:
  1684. case QueueType::GpuContextName:
  1685. // Don't free memory associated with deferred messages.
  1686. break;
  1687. #endif
  1688. default:
  1689. break;
  1690. }
  1691. }
  1692. void Profiler::ClearQueues( moodycamel::ConsumerToken& token )
  1693. {
  1694. for(;;)
  1695. {
  1696. const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, []( QueueItem* item, size_t sz ) { assert( sz > 0 ); while( sz-- > 0 ) FreeAssociatedMemory( *item++ ); } );
  1697. if( sz == 0 ) break;
  1698. }
  1699. ClearSerial();
  1700. }
  1701. void Profiler::ClearSerial()
  1702. {
  1703. bool lockHeld = true;
  1704. while( !m_serialLock.try_lock() )
  1705. {
  1706. if( m_shutdownManual.load( std::memory_order_relaxed ) )
  1707. {
  1708. lockHeld = false;
  1709. break;
  1710. }
  1711. }
  1712. for( auto& v : m_serialQueue ) FreeAssociatedMemory( v );
  1713. m_serialQueue.clear();
  1714. if( lockHeld )
  1715. {
  1716. m_serialLock.unlock();
  1717. }
  1718. for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v );
  1719. m_serialDequeue.clear();
  1720. }
  1721. Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
  1722. {
  1723. bool connectionLost = false;
  1724. const auto sz = GetQueue().try_dequeue_bulk_single( token,
  1725. [this, &connectionLost] ( const uint64_t& threadId )
  1726. {
  1727. if( threadId != m_threadCtx )
  1728. {
  1729. QueueItem item;
  1730. MemWrite( &item.hdr.type, QueueType::ThreadContext );
  1731. MemWrite( &item.threadCtx.thread, threadId );
  1732. if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) connectionLost = true;
  1733. m_threadCtx = threadId;
  1734. m_refTimeThread = 0;
  1735. }
  1736. },
  1737. [this, &connectionLost] ( QueueItem* item, size_t sz )
  1738. {
  1739. if( connectionLost ) return;
  1740. assert( sz > 0 );
  1741. int64_t refThread = m_refTimeThread;
  1742. int64_t refCtx = m_refTimeCtx;
  1743. int64_t refGpu = m_refTimeGpu;
  1744. while( sz-- > 0 )
  1745. {
  1746. uint64_t ptr;
  1747. uint16_t size;
  1748. auto idx = MemRead<uint8_t>( &item->hdr.idx );
  1749. if( idx < (int)QueueType::Terminate )
  1750. {
  1751. switch( (QueueType)idx )
  1752. {
  1753. case QueueType::ZoneText:
  1754. case QueueType::ZoneName:
  1755. ptr = MemRead<uint64_t>( &item->zoneTextFat.text );
  1756. size = MemRead<uint16_t>( &item->zoneTextFat.size );
  1757. SendSingleString( (const char*)ptr, size );
  1758. tracy_free( (void*)ptr );
  1759. break;
  1760. case QueueType::Message:
  1761. case QueueType::MessageCallstack:
  1762. ptr = MemRead<uint64_t>( &item->messageFat.text );
  1763. size = MemRead<uint16_t>( &item->messageFat.size );
  1764. SendSingleString( (const char*)ptr, size );
  1765. tracy_free( (void*)ptr );
  1766. break;
  1767. case QueueType::MessageColor:
  1768. case QueueType::MessageColorCallstack:
  1769. ptr = MemRead<uint64_t>( &item->messageColorFat.text );
  1770. size = MemRead<uint16_t>( &item->messageColorFat.size );
  1771. SendSingleString( (const char*)ptr, size );
  1772. tracy_free( (void*)ptr );
  1773. break;
  1774. case QueueType::MessageAppInfo:
  1775. ptr = MemRead<uint64_t>( &item->messageFat.text );
  1776. size = MemRead<uint16_t>( &item->messageFat.size );
  1777. SendSingleString( (const char*)ptr, size );
  1778. #ifndef TRACY_ON_DEMAND
  1779. tracy_free( (void*)ptr );
  1780. #endif
  1781. break;
  1782. case QueueType::ZoneBeginAllocSrcLoc:
  1783. case QueueType::ZoneBeginAllocSrcLocCallstack:
  1784. {
  1785. int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
  1786. int64_t dt = t - refThread;
  1787. refThread = t;
  1788. MemWrite( &item->zoneBegin.time, dt );
  1789. ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc );
  1790. SendSourceLocationPayload( ptr );
  1791. tracy_free( (void*)ptr );
  1792. break;
  1793. }
  1794. case QueueType::Callstack:
  1795. ptr = MemRead<uint64_t>( &item->callstackFat.ptr );
  1796. SendCallstackPayload( ptr );
  1797. tracy_free( (void*)ptr );
  1798. break;
  1799. case QueueType::CallstackAlloc:
  1800. ptr = MemRead<uint64_t>( &item->callstackAllocFat.nativePtr );
  1801. if( ptr != 0 )
  1802. {
  1803. CutCallstack( (void*)ptr, "lua_pcall" );
  1804. SendCallstackPayload( ptr );
  1805. tracy_free( (void*)ptr );
  1806. }
  1807. ptr = MemRead<uint64_t>( &item->callstackAllocFat.ptr );
  1808. SendCallstackAlloc( ptr );
  1809. tracy_free( (void*)ptr );
  1810. break;
  1811. case QueueType::CallstackSample:
  1812. {
  1813. ptr = MemRead<uint64_t>( &item->callstackSampleFat.ptr );
  1814. SendCallstackPayload64( ptr );
  1815. tracy_free( (void*)ptr );
  1816. int64_t t = MemRead<int64_t>( &item->callstackSampleFat.time );
  1817. int64_t dt = t - refCtx;
  1818. refCtx = t;
  1819. MemWrite( &item->callstackSampleFat.time, dt );
  1820. break;
  1821. }
  1822. case QueueType::FrameImage:
  1823. {
  1824. ptr = MemRead<uint64_t>( &item->frameImageFat.image );
  1825. const auto w = MemRead<uint16_t>( &item->frameImageFat.w );
  1826. const auto h = MemRead<uint16_t>( &item->frameImageFat.h );
  1827. const auto csz = size_t( w * h / 2 );
  1828. SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData );
  1829. tracy_free( (void*)ptr );
  1830. break;
  1831. }
  1832. case QueueType::ZoneBegin:
  1833. case QueueType::ZoneBeginCallstack:
  1834. {
  1835. int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
  1836. int64_t dt = t - refThread;
  1837. refThread = t;
  1838. MemWrite( &item->zoneBegin.time, dt );
  1839. break;
  1840. }
  1841. case QueueType::ZoneEnd:
  1842. {
  1843. int64_t t = MemRead<int64_t>( &item->zoneEnd.time );
  1844. int64_t dt = t - refThread;
  1845. refThread = t;
  1846. MemWrite( &item->zoneEnd.time, dt );
  1847. break;
  1848. }
  1849. case QueueType::GpuZoneBegin:
  1850. case QueueType::GpuZoneBeginCallstack:
  1851. {
  1852. int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
  1853. int64_t dt = t - refThread;
  1854. refThread = t;
  1855. MemWrite( &item->gpuZoneBegin.cpuTime, dt );
  1856. break;
  1857. }
  1858. case QueueType::GpuZoneBeginAllocSrcLoc:
  1859. case QueueType::GpuZoneBeginAllocSrcLocCallstack:
  1860. {
  1861. int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
  1862. int64_t dt = t - refThread;
  1863. refThread = t;
  1864. MemWrite( &item->gpuZoneBegin.cpuTime, dt );
  1865. ptr = MemRead<uint64_t>( &item->gpuZoneBegin.srcloc );
  1866. SendSourceLocationPayload( ptr );
  1867. tracy_free( (void*)ptr );
  1868. break;
  1869. }
  1870. case QueueType::GpuZoneEnd:
  1871. {
  1872. int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime );
  1873. int64_t dt = t - refThread;
  1874. refThread = t;
  1875. MemWrite( &item->gpuZoneEnd.cpuTime, dt );
  1876. break;
  1877. }
  1878. case QueueType::GpuContextName:
  1879. {
  1880. ptr = MemRead<uint64_t>( &item->gpuContextNameFat.ptr );
  1881. uint16_t size = MemRead<uint16_t>( &item->gpuContextNameFat.size );
  1882. SendSingleString( (const char*)ptr, size );
  1883. #ifndef TRACY_ON_DEMAND
  1884. tracy_free( (void*)ptr );
  1885. #endif
  1886. break;
  1887. }
  1888. case QueueType::PlotData:
  1889. {
  1890. int64_t t = MemRead<int64_t>( &item->plotData.time );
  1891. int64_t dt = t - refThread;
  1892. refThread = t;
  1893. MemWrite( &item->plotData.time, dt );
  1894. break;
  1895. }
  1896. case QueueType::ContextSwitch:
  1897. {
  1898. int64_t t = MemRead<int64_t>( &item->contextSwitch.time );
  1899. int64_t dt = t - refCtx;
  1900. refCtx = t;
  1901. MemWrite( &item->contextSwitch.time, dt );
  1902. break;
  1903. }
  1904. case QueueType::ThreadWakeup:
  1905. {
  1906. int64_t t = MemRead<int64_t>( &item->threadWakeup.time );
  1907. int64_t dt = t - refCtx;
  1908. refCtx = t;
  1909. MemWrite( &item->threadWakeup.time, dt );
  1910. break;
  1911. }
  1912. case QueueType::GpuTime:
  1913. {
  1914. int64_t t = MemRead<int64_t>( &item->gpuTime.gpuTime );
  1915. int64_t dt = t - refGpu;
  1916. refGpu = t;
  1917. MemWrite( &item->gpuTime.gpuTime, dt );
  1918. break;
  1919. }
  1920. default:
  1921. assert( false );
  1922. break;
  1923. }
  1924. }
  1925. if( !AppendData( item++, QueueDataSize[idx] ) )
  1926. {
  1927. connectionLost = true;
  1928. m_refTimeThread = refThread;
  1929. m_refTimeCtx = refCtx;
  1930. m_refTimeGpu = refGpu;
  1931. return;
  1932. }
  1933. }
  1934. m_refTimeThread = refThread;
  1935. m_refTimeCtx = refCtx;
  1936. m_refTimeGpu = refGpu;
  1937. }
  1938. );
  1939. if( connectionLost ) return DequeueStatus::ConnectionLost;
  1940. return sz > 0 ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty;
  1941. }
  1942. Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop )
  1943. {
  1944. const auto sz = GetQueue().try_dequeue_bulk_single( token, [] ( const uint64_t& ) {},
  1945. [this, &timeStop] ( QueueItem* item, size_t sz )
  1946. {
  1947. assert( sz > 0 );
  1948. int64_t refCtx = m_refTimeCtx;
  1949. while( sz-- > 0 )
  1950. {
  1951. FreeAssociatedMemory( *item );
  1952. if( timeStop < 0 ) return;
  1953. const auto idx = MemRead<uint8_t>( &item->hdr.idx );
  1954. if( idx == (uint8_t)QueueType::ContextSwitch )
  1955. {
  1956. const auto csTime = MemRead<int64_t>( &item->contextSwitch.time );
  1957. if( csTime > timeStop )
  1958. {
  1959. timeStop = -1;
  1960. m_refTimeCtx = refCtx;
  1961. return;
  1962. }
  1963. int64_t dt = csTime - refCtx;
  1964. refCtx = csTime;
  1965. MemWrite( &item->contextSwitch.time, dt );
  1966. if( !AppendData( item, QueueDataSize[(int)QueueType::ContextSwitch] ) )
  1967. {
  1968. timeStop = -2;
  1969. m_refTimeCtx = refCtx;
  1970. return;
  1971. }
  1972. }
  1973. else if( idx == (uint8_t)QueueType::ThreadWakeup )
  1974. {
  1975. const auto csTime = MemRead<int64_t>( &item->threadWakeup.time );
  1976. if( csTime > timeStop )
  1977. {
  1978. timeStop = -1;
  1979. m_refTimeCtx = refCtx;
  1980. return;
  1981. }
  1982. int64_t dt = csTime - refCtx;
  1983. refCtx = csTime;
  1984. MemWrite( &item->threadWakeup.time, dt );
  1985. if( !AppendData( item, QueueDataSize[(int)QueueType::ThreadWakeup] ) )
  1986. {
  1987. timeStop = -2;
  1988. m_refTimeCtx = refCtx;
  1989. return;
  1990. }
  1991. }
  1992. item++;
  1993. }
  1994. m_refTimeCtx = refCtx;
  1995. }
  1996. );
  1997. if( timeStop == -2 ) return DequeueStatus::ConnectionLost;
  1998. return ( timeStop == -1 || sz > 0 ) ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty;
  1999. }
  2000. Profiler::DequeueStatus Profiler::DequeueSerial()
  2001. {
  2002. {
  2003. bool lockHeld = true;
  2004. while( !m_serialLock.try_lock() )
  2005. {
  2006. if( m_shutdownManual.load( std::memory_order_relaxed ) )
  2007. {
  2008. lockHeld = false;
  2009. break;
  2010. }
  2011. }
  2012. if( !m_serialQueue.empty() ) m_serialQueue.swap( m_serialDequeue );
  2013. if( lockHeld )
  2014. {
  2015. m_serialLock.unlock();
  2016. }
  2017. }
  2018. const auto sz = m_serialDequeue.size();
  2019. if( sz > 0 )
  2020. {
  2021. int64_t refSerial = m_refTimeSerial;
  2022. int64_t refGpu = m_refTimeGpu;
  2023. auto item = m_serialDequeue.data();
  2024. auto end = item + sz;
  2025. while( item != end )
  2026. {
  2027. uint64_t ptr;
  2028. auto idx = MemRead<uint8_t>( &item->hdr.idx );
  2029. if( idx < (int)QueueType::Terminate )
  2030. {
  2031. switch( (QueueType)idx )
  2032. {
  2033. case QueueType::CallstackSerial:
  2034. ptr = MemRead<uint64_t>( &item->callstackFat.ptr );
  2035. SendCallstackPayload( ptr );
  2036. tracy_free( (void*)ptr );
  2037. break;
  2038. case QueueType::LockWait:
  2039. case QueueType::LockSharedWait:
  2040. {
  2041. int64_t t = MemRead<int64_t>( &item->lockWait.time );
  2042. int64_t dt = t - refSerial;
  2043. refSerial = t;
  2044. MemWrite( &item->lockWait.time, dt );
  2045. break;
  2046. }
  2047. case QueueType::LockObtain:
  2048. case QueueType::LockSharedObtain:
  2049. {
  2050. int64_t t = MemRead<int64_t>( &item->lockObtain.time );
  2051. int64_t dt = t - refSerial;
  2052. refSerial = t;
  2053. MemWrite( &item->lockObtain.time, dt );
  2054. break;
  2055. }
  2056. case QueueType::LockRelease:
  2057. case QueueType::LockSharedRelease:
  2058. {
  2059. int64_t t = MemRead<int64_t>( &item->lockRelease.time );
  2060. int64_t dt = t - refSerial;
  2061. refSerial = t;
  2062. MemWrite( &item->lockRelease.time, dt );
  2063. break;
  2064. }
  2065. case QueueType::LockName:
  2066. {
  2067. ptr = MemRead<uint64_t>( &item->lockNameFat.name );
  2068. uint16_t size = MemRead<uint16_t>( &item->lockNameFat.size );
  2069. SendSingleString( (const char*)ptr, size );
  2070. #ifndef TRACY_ON_DEMAND
  2071. tracy_free( (void*)ptr );
  2072. #endif
  2073. break;
  2074. }
  2075. case QueueType::MemAlloc:
  2076. case QueueType::MemAllocNamed:
  2077. case QueueType::MemAllocCallstack:
  2078. case QueueType::MemAllocCallstackNamed:
  2079. {
  2080. int64_t t = MemRead<int64_t>( &item->memAlloc.time );
  2081. int64_t dt = t - refSerial;
  2082. refSerial = t;
  2083. MemWrite( &item->memAlloc.time, dt );
  2084. break;
  2085. }
  2086. case QueueType::MemFree:
  2087. case QueueType::MemFreeNamed:
  2088. case QueueType::MemFreeCallstack:
  2089. case QueueType::MemFreeCallstackNamed:
  2090. {
  2091. int64_t t = MemRead<int64_t>( &item->memFree.time );
  2092. int64_t dt = t - refSerial;
  2093. refSerial = t;
  2094. MemWrite( &item->memFree.time, dt );
  2095. break;
  2096. }
  2097. case QueueType::GpuZoneBeginSerial:
  2098. case QueueType::GpuZoneBeginCallstackSerial:
  2099. {
  2100. int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
  2101. int64_t dt = t - refSerial;
  2102. refSerial = t;
  2103. MemWrite( &item->gpuZoneBegin.cpuTime, dt );
  2104. break;
  2105. }
  2106. case QueueType::GpuZoneBeginAllocSrcLocSerial:
  2107. case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial:
  2108. {
  2109. int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
  2110. int64_t dt = t - refSerial;
  2111. refSerial = t;
  2112. MemWrite( &item->gpuZoneBegin.cpuTime, dt );
  2113. ptr = MemRead<uint64_t>( &item->gpuZoneBegin.srcloc );
  2114. SendSourceLocationPayload( ptr );
  2115. tracy_free( (void*)ptr );
  2116. break;
  2117. }
  2118. case QueueType::GpuZoneEndSerial:
  2119. {
  2120. int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime );
  2121. int64_t dt = t - refSerial;
  2122. refSerial = t;
  2123. MemWrite( &item->gpuZoneEnd.cpuTime, dt );
  2124. break;
  2125. }
  2126. case QueueType::GpuTime:
  2127. {
  2128. int64_t t = MemRead<int64_t>( &item->gpuTime.gpuTime );
  2129. int64_t dt = t - refGpu;
  2130. refGpu = t;
  2131. MemWrite( &item->gpuTime.gpuTime, dt );
  2132. break;
  2133. }
  2134. case QueueType::GpuContextName:
  2135. {
  2136. ptr = MemRead<uint64_t>( &item->gpuContextNameFat.ptr );
  2137. uint16_t size = MemRead<uint16_t>( &item->gpuContextNameFat.size );
  2138. SendSingleString( (const char*)ptr, size );
  2139. #ifndef TRACY_ON_DEMAND
  2140. tracy_free( (void*)ptr );
  2141. #endif
  2142. break;
  2143. }
  2144. default:
  2145. assert( false );
  2146. break;
  2147. }
  2148. }
  2149. if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost;
  2150. item++;
  2151. }
  2152. m_refTimeSerial = refSerial;
  2153. m_refTimeGpu = refGpu;
  2154. m_serialDequeue.clear();
  2155. }
  2156. else
  2157. {
  2158. return DequeueStatus::QueueEmpty;
  2159. }
  2160. return DequeueStatus::DataDequeued;
  2161. }
  2162. bool Profiler::CommitData()
  2163. {
  2164. bool ret = SendData( m_buffer + m_bufferStart, m_bufferOffset - m_bufferStart );
  2165. if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0;
  2166. m_bufferStart = m_bufferOffset;
  2167. return ret;
  2168. }
  2169. bool Profiler::SendData( const char* data, size_t len )
  2170. {
  2171. const lz4sz_t lz4sz = LZ4_compress_fast_continue( (LZ4_stream_t*)m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 );
  2172. memcpy( m_lz4Buf, &lz4sz, sizeof( lz4sz ) );
  2173. return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1;
  2174. }
  2175. void Profiler::SendString( uint64_t str, const char* ptr, size_t len, QueueType type )
  2176. {
  2177. assert( type == QueueType::StringData ||
  2178. type == QueueType::ThreadName ||
  2179. type == QueueType::PlotName ||
  2180. type == QueueType::FrameName ||
  2181. type == QueueType::ExternalName ||
  2182. type == QueueType::ExternalThreadName );
  2183. QueueItem item;
  2184. MemWrite( &item.hdr.type, type );
  2185. MemWrite( &item.stringTransfer.ptr, str );
  2186. assert( len <= std::numeric_limits<uint16_t>::max() );
  2187. auto l16 = uint16_t( len );
  2188. NeedDataSize( QueueDataSize[(int)type] + sizeof( l16 ) + l16 );
  2189. AppendDataUnsafe( &item, QueueDataSize[(int)type] );
  2190. AppendDataUnsafe( &l16, sizeof( l16 ) );
  2191. AppendDataUnsafe( ptr, l16 );
  2192. }
  2193. void Profiler::SendSingleString( const char* ptr, size_t len )
  2194. {
  2195. QueueItem item;
  2196. MemWrite( &item.hdr.type, QueueType::SingleStringData );
  2197. assert( len <= std::numeric_limits<uint16_t>::max() );
  2198. auto l16 = uint16_t( len );
  2199. NeedDataSize( QueueDataSize[(int)QueueType::SingleStringData] + sizeof( l16 ) + l16 );
  2200. AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SingleStringData] );
  2201. AppendDataUnsafe( &l16, sizeof( l16 ) );
  2202. AppendDataUnsafe( ptr, l16 );
  2203. }
  2204. void Profiler::SendSecondString( const char* ptr, size_t len )
  2205. {
  2206. QueueItem item;
  2207. MemWrite( &item.hdr.type, QueueType::SecondStringData );
  2208. assert( len <= std::numeric_limits<uint16_t>::max() );
  2209. auto l16 = uint16_t( len );
  2210. NeedDataSize( QueueDataSize[(int)QueueType::SecondStringData] + sizeof( l16 ) + l16 );
  2211. AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SecondStringData] );
  2212. AppendDataUnsafe( &l16, sizeof( l16 ) );
  2213. AppendDataUnsafe( ptr, l16 );
  2214. }
  2215. void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type )
  2216. {
  2217. assert( type == QueueType::FrameImageData ||
  2218. type == QueueType::SymbolCode ||
  2219. type == QueueType::SourceCode );
  2220. QueueItem item;
  2221. MemWrite( &item.hdr.type, type );
  2222. MemWrite( &item.stringTransfer.ptr, str );
  2223. assert( len <= std::numeric_limits<uint32_t>::max() );
  2224. assert( QueueDataSize[(int)type] + sizeof( uint32_t ) + len <= TargetFrameSize );
  2225. auto l32 = uint32_t( len );
  2226. NeedDataSize( QueueDataSize[(int)type] + sizeof( l32 ) + l32 );
  2227. AppendDataUnsafe( &item, QueueDataSize[(int)type] );
  2228. AppendDataUnsafe( &l32, sizeof( l32 ) );
  2229. AppendDataUnsafe( ptr, l32 );
  2230. }
  2231. void Profiler::SendSourceLocation( uint64_t ptr )
  2232. {
  2233. auto srcloc = (const SourceLocationData*)ptr;
  2234. QueueItem item;
  2235. MemWrite( &item.hdr.type, QueueType::SourceLocation );
  2236. MemWrite( &item.srcloc.name, (uint64_t)srcloc->name );
  2237. MemWrite( &item.srcloc.file, (uint64_t)srcloc->file );
  2238. MemWrite( &item.srcloc.function, (uint64_t)srcloc->function );
  2239. MemWrite( &item.srcloc.line, srcloc->line );
  2240. MemWrite( &item.srcloc.r, uint8_t( ( srcloc->color ) & 0xFF ) );
  2241. MemWrite( &item.srcloc.g, uint8_t( ( srcloc->color >> 8 ) & 0xFF ) );
  2242. MemWrite( &item.srcloc.b, uint8_t( ( srcloc->color >> 16 ) & 0xFF ) );
  2243. AppendData( &item, QueueDataSize[(int)QueueType::SourceLocation] );
  2244. }
  2245. void Profiler::SendSourceLocationPayload( uint64_t _ptr )
  2246. {
  2247. auto ptr = (const char*)_ptr;
  2248. QueueItem item;
  2249. MemWrite( &item.hdr.type, QueueType::SourceLocationPayload );
  2250. MemWrite( &item.stringTransfer.ptr, _ptr );
  2251. uint16_t len;
  2252. memcpy( &len, ptr, sizeof( len ) );
  2253. assert( len > 2 );
  2254. len -= 2;
  2255. ptr += 2;
  2256. NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( len ) + len );
  2257. AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] );
  2258. AppendDataUnsafe( &len, sizeof( len ) );
  2259. AppendDataUnsafe( ptr, len );
  2260. }
  2261. void Profiler::SendCallstackPayload( uint64_t _ptr )
  2262. {
  2263. auto ptr = (uintptr_t*)_ptr;
  2264. QueueItem item;
  2265. MemWrite( &item.hdr.type, QueueType::CallstackPayload );
  2266. MemWrite( &item.stringTransfer.ptr, _ptr );
  2267. const auto sz = *ptr++;
  2268. const auto len = sz * sizeof( uint64_t );
  2269. const auto l16 = uint16_t( len );
  2270. NeedDataSize( QueueDataSize[(int)QueueType::CallstackPayload] + sizeof( l16 ) + l16 );
  2271. AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] );
  2272. AppendDataUnsafe( &l16, sizeof( l16 ) );
  2273. if( compile_time_condition<sizeof( uintptr_t ) == sizeof( uint64_t )>::value )
  2274. {
  2275. AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz );
  2276. }
  2277. else
  2278. {
  2279. for( uintptr_t i=0; i<sz; i++ )
  2280. {
  2281. const auto val = uint64_t( *ptr++ );
  2282. AppendDataUnsafe( &val, sizeof( uint64_t ) );
  2283. }
  2284. }
  2285. }
  2286. void Profiler::SendCallstackPayload64( uint64_t _ptr )
  2287. {
  2288. auto ptr = (uint64_t*)_ptr;
  2289. QueueItem item;
  2290. MemWrite( &item.hdr.type, QueueType::CallstackPayload );
  2291. MemWrite( &item.stringTransfer.ptr, _ptr );
  2292. const auto sz = *ptr++;
  2293. const auto len = sz * sizeof( uint64_t );
  2294. const auto l16 = uint16_t( len );
  2295. NeedDataSize( QueueDataSize[(int)QueueType::CallstackPayload] + sizeof( l16 ) + l16 );
  2296. AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] );
  2297. AppendDataUnsafe( &l16, sizeof( l16 ) );
  2298. AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz );
  2299. }
  2300. void Profiler::SendCallstackAlloc( uint64_t _ptr )
  2301. {
  2302. auto ptr = (const char*)_ptr;
  2303. QueueItem item;
  2304. MemWrite( &item.hdr.type, QueueType::CallstackAllocPayload );
  2305. MemWrite( &item.stringTransfer.ptr, _ptr );
  2306. uint16_t len;
  2307. memcpy( &len, ptr, 2 );
  2308. ptr += 2;
  2309. NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( len ) + len );
  2310. AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackAllocPayload] );
  2311. AppendDataUnsafe( &len, sizeof( len ) );
  2312. AppendDataUnsafe( ptr, len );
  2313. }
  2314. void Profiler::SendCallstackFrame( uint64_t ptr )
  2315. {
  2316. #ifdef TRACY_HAS_CALLSTACK
  2317. const auto frameData = DecodeCallstackPtr( ptr );
  2318. {
  2319. SendSingleString( frameData.imageName );
  2320. QueueItem item;
  2321. MemWrite( &item.hdr.type, QueueType::CallstackFrameSize );
  2322. MemWrite( &item.callstackFrameSize.ptr, ptr );
  2323. MemWrite( &item.callstackFrameSize.size, frameData.size );
  2324. AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrameSize] );
  2325. }
  2326. for( uint8_t i=0; i<frameData.size; i++ )
  2327. {
  2328. const auto& frame = frameData.data[i];
  2329. SendSingleString( frame.name );
  2330. SendSecondString( frame.file );
  2331. QueueItem item;
  2332. MemWrite( &item.hdr.type, QueueType::CallstackFrame );
  2333. MemWrite( &item.callstackFrame.line, frame.line );
  2334. MemWrite( &item.callstackFrame.symAddr, frame.symAddr );
  2335. MemWrite( &item.callstackFrame.symLen, frame.symLen );
  2336. AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrame] );
  2337. tracy_free( (void*)frame.name );
  2338. tracy_free( (void*)frame.file );
  2339. }
  2340. #endif
  2341. }
  2342. bool Profiler::HandleServerQuery()
  2343. {
  2344. ServerQueryPacket payload;
  2345. if( !m_sock->Read( &payload, sizeof( payload ), 10 ) ) return false;
  2346. uint8_t type;
  2347. uint64_t ptr;
  2348. uint32_t extra;
  2349. memcpy( &type, &payload.type, sizeof( payload.type ) );
  2350. memcpy( &ptr, &payload.ptr, sizeof( payload.ptr ) );
  2351. memcpy( &extra, &payload.extra, sizeof( payload.extra ) );
  2352. switch( type )
  2353. {
  2354. case ServerQueryString:
  2355. SendString( ptr, (const char*)ptr, QueueType::StringData );
  2356. break;
  2357. case ServerQueryThreadString:
  2358. if( ptr == m_mainThread )
  2359. {
  2360. SendString( ptr, "Main thread", 11, QueueType::ThreadName );
  2361. }
  2362. else
  2363. {
  2364. SendString( ptr, GetThreadName( ptr ), QueueType::ThreadName );
  2365. }
  2366. break;
  2367. case ServerQuerySourceLocation:
  2368. SendSourceLocation( ptr );
  2369. break;
  2370. case ServerQueryPlotName:
  2371. SendString( ptr, (const char*)ptr, QueueType::PlotName );
  2372. break;
  2373. case ServerQueryTerminate:
  2374. return false;
  2375. case ServerQueryCallstackFrame:
  2376. SendCallstackFrame( ptr );
  2377. break;
  2378. case ServerQueryFrameName:
  2379. SendString( ptr, (const char*)ptr, QueueType::FrameName );
  2380. break;
  2381. case ServerQueryDisconnect:
  2382. HandleDisconnect();
  2383. return false;
  2384. #ifdef TRACY_HAS_SYSTEM_TRACING
  2385. case ServerQueryExternalName:
  2386. SysTraceSendExternalName( ptr );
  2387. break;
  2388. #endif
  2389. case ServerQueryParameter:
  2390. HandleParameter( ptr );
  2391. break;
  2392. case ServerQuerySymbol:
  2393. HandleSymbolQuery( ptr );
  2394. break;
  2395. #ifndef TRACY_NO_CODE_TRANSFER
  2396. case ServerQuerySymbolCode:
  2397. HandleSymbolCodeQuery( ptr, extra );
  2398. break;
  2399. #endif
  2400. case ServerQueryCodeLocation:
  2401. SendCodeLocation( ptr );
  2402. break;
  2403. case ServerQuerySourceCode:
  2404. HandleSourceCodeQuery();
  2405. break;
  2406. case ServerQueryDataTransfer:
  2407. assert( !m_queryData );
  2408. m_queryDataPtr = m_queryData = (char*)tracy_malloc( ptr + 11 );
  2409. AckServerQuery();
  2410. break;
  2411. case ServerQueryDataTransferPart:
  2412. memcpy( m_queryDataPtr, &ptr, 8 );
  2413. memcpy( m_queryDataPtr+8, &extra, 4 );
  2414. m_queryDataPtr += 12;
  2415. AckServerQuery();
  2416. break;
  2417. default:
  2418. assert( false );
  2419. break;
  2420. }
  2421. return true;
  2422. }
  2423. void Profiler::HandleDisconnect()
  2424. {
  2425. moodycamel::ConsumerToken token( GetQueue() );
  2426. #ifdef TRACY_HAS_SYSTEM_TRACING
  2427. if( s_sysTraceThread )
  2428. {
  2429. auto timestamp = GetTime();
  2430. for(;;)
  2431. {
  2432. const auto status = DequeueContextSwitches( token, timestamp );
  2433. if( status == DequeueStatus::ConnectionLost )
  2434. {
  2435. return;
  2436. }
  2437. else if( status == DequeueStatus::QueueEmpty )
  2438. {
  2439. if( m_bufferOffset != m_bufferStart )
  2440. {
  2441. if( !CommitData() ) return;
  2442. }
  2443. }
  2444. if( timestamp < 0 )
  2445. {
  2446. if( m_bufferOffset != m_bufferStart )
  2447. {
  2448. if( !CommitData() ) return;
  2449. }
  2450. break;
  2451. }
  2452. ClearSerial();
  2453. if( m_sock->HasData() )
  2454. {
  2455. while( m_sock->HasData() )
  2456. {
  2457. if( !HandleServerQuery() ) return;
  2458. }
  2459. if( m_bufferOffset != m_bufferStart )
  2460. {
  2461. if( !CommitData() ) return;
  2462. }
  2463. }
  2464. else
  2465. {
  2466. if( m_bufferOffset != m_bufferStart )
  2467. {
  2468. if( !CommitData() ) return;
  2469. }
  2470. std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
  2471. }
  2472. }
  2473. }
  2474. #endif
  2475. QueueItem terminate;
  2476. MemWrite( &terminate.hdr.type, QueueType::Terminate );
  2477. if( !SendData( (const char*)&terminate, 1 ) ) return;
  2478. for(;;)
  2479. {
  2480. ClearQueues( token );
  2481. if( m_sock->HasData() )
  2482. {
  2483. while( m_sock->HasData() )
  2484. {
  2485. if( !HandleServerQuery() ) return;
  2486. }
  2487. if( m_bufferOffset != m_bufferStart )
  2488. {
  2489. if( !CommitData() ) return;
  2490. }
  2491. }
  2492. else
  2493. {
  2494. if( m_bufferOffset != m_bufferStart )
  2495. {
  2496. if( !CommitData() ) return;
  2497. }
  2498. std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
  2499. }
  2500. }
  2501. }
  2502. void Profiler::CalibrateTimer()
  2503. {
  2504. #ifdef TRACY_HW_TIMER
  2505. std::atomic_signal_fence( std::memory_order_acq_rel );
  2506. const auto t0 = std::chrono::high_resolution_clock::now();
  2507. const auto r0 = GetTime();
  2508. std::atomic_signal_fence( std::memory_order_acq_rel );
  2509. std::this_thread::sleep_for( std::chrono::milliseconds( 200 ) );
  2510. std::atomic_signal_fence( std::memory_order_acq_rel );
  2511. const auto t1 = std::chrono::high_resolution_clock::now();
  2512. const auto r1 = GetTime();
  2513. std::atomic_signal_fence( std::memory_order_acq_rel );
  2514. const auto dt = std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count();
  2515. const auto dr = r1 - r0;
  2516. m_timerMul = double( dt ) / double( dr );
  2517. #else
  2518. m_timerMul = 1.;
  2519. #endif
  2520. }
  2521. void Profiler::CalibrateDelay()
  2522. {
  2523. constexpr int Iterations = 50000;
  2524. auto mindiff = std::numeric_limits<int64_t>::max();
  2525. for( int i=0; i<Iterations * 10; i++ )
  2526. {
  2527. const auto t0i = GetTime();
  2528. const auto t1i = GetTime();
  2529. const auto dti = t1i - t0i;
  2530. if( dti > 0 && dti < mindiff ) mindiff = dti;
  2531. }
  2532. m_resolution = mindiff;
  2533. #ifdef TRACY_DELAYED_INIT
  2534. m_delay = m_resolution;
  2535. #else
  2536. constexpr int Events = Iterations * 2; // start + end
  2537. static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
  2538. static const tracy::SourceLocationData __tracy_source_location { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 };
  2539. const auto t0 = GetTime();
  2540. for( int i=0; i<Iterations; i++ )
  2541. {
  2542. {
  2543. TracyLfqPrepare( QueueType::ZoneBegin );
  2544. MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
  2545. MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location );
  2546. TracyLfqCommit;
  2547. }
  2548. {
  2549. TracyLfqPrepare( QueueType::ZoneEnd );
  2550. MemWrite( &item->zoneEnd.time, GetTime() );
  2551. TracyLfqCommit;
  2552. }
  2553. }
  2554. const auto t1 = GetTime();
  2555. const auto dt = t1 - t0;
  2556. m_delay = dt / Events;
  2557. moodycamel::ConsumerToken token( GetQueue() );
  2558. int left = Events;
  2559. while( left != 0 )
  2560. {
  2561. const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, [](QueueItem* item, size_t sz){} );
  2562. assert( sz > 0 );
  2563. left -= (int)sz;
  2564. }
  2565. assert( GetQueue().size_approx() == 0 );
  2566. #endif
  2567. }
  2568. void Profiler::ReportTopology()
  2569. {
  2570. #ifndef TRACY_DELAYED_INIT
  2571. struct CpuData
  2572. {
  2573. uint32_t package;
  2574. uint32_t core;
  2575. uint32_t thread;
  2576. };
  2577. #if defined _WIN32 || defined __CYGWIN__
  2578. t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = (t_GetLogicalProcessorInformationEx)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetLogicalProcessorInformationEx" );
  2579. if( !_GetLogicalProcessorInformationEx ) return;
  2580. DWORD psz = 0;
  2581. _GetLogicalProcessorInformationEx( RelationProcessorPackage, nullptr, &psz );
  2582. auto packageInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( psz );
  2583. auto res = _GetLogicalProcessorInformationEx( RelationProcessorPackage, packageInfo, &psz );
  2584. assert( res );
  2585. DWORD csz = 0;
  2586. _GetLogicalProcessorInformationEx( RelationProcessorCore, nullptr, &csz );
  2587. auto coreInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( csz );
  2588. res = _GetLogicalProcessorInformationEx( RelationProcessorCore, coreInfo, &csz );
  2589. assert( res );
  2590. SYSTEM_INFO sysinfo;
  2591. GetSystemInfo( &sysinfo );
  2592. const uint32_t numcpus = sysinfo.dwNumberOfProcessors;
  2593. auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus );
  2594. for( uint32_t i=0; i<numcpus; i++ ) cpuData[i].thread = i;
  2595. int idx = 0;
  2596. auto ptr = packageInfo;
  2597. while( (char*)ptr < ((char*)packageInfo) + psz )
  2598. {
  2599. assert( ptr->Relationship == RelationProcessorPackage );
  2600. // FIXME account for GroupCount
  2601. auto mask = ptr->Processor.GroupMask[0].Mask;
  2602. int core = 0;
  2603. while( mask != 0 )
  2604. {
  2605. if( mask & 1 ) cpuData[core].package = idx;
  2606. core++;
  2607. mask >>= 1;
  2608. }
  2609. ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size);
  2610. idx++;
  2611. }
  2612. idx = 0;
  2613. ptr = coreInfo;
  2614. while( (char*)ptr < ((char*)coreInfo) + csz )
  2615. {
  2616. assert( ptr->Relationship == RelationProcessorCore );
  2617. // FIXME account for GroupCount
  2618. auto mask = ptr->Processor.GroupMask[0].Mask;
  2619. int core = 0;
  2620. while( mask != 0 )
  2621. {
  2622. if( mask & 1 ) cpuData[core].core = idx;
  2623. core++;
  2624. mask >>= 1;
  2625. }
  2626. ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size);
  2627. idx++;
  2628. }
  2629. for( uint32_t i=0; i<numcpus; i++ )
  2630. {
  2631. auto& data = cpuData[i];
  2632. TracyLfqPrepare( QueueType::CpuTopology );
  2633. MemWrite( &item->cpuTopology.package, data.package );
  2634. MemWrite( &item->cpuTopology.core, data.core );
  2635. MemWrite( &item->cpuTopology.thread, data.thread );
  2636. #ifdef TRACY_ON_DEMAND
  2637. DeferItem( *item );
  2638. #endif
  2639. TracyLfqCommit;
  2640. }
  2641. tracy_free( cpuData );
  2642. tracy_free( coreInfo );
  2643. tracy_free( packageInfo );
  2644. #elif defined __linux__
  2645. const int numcpus = std::thread::hardware_concurrency();
  2646. auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus );
  2647. memset( cpuData, 0, sizeof( CpuData ) * numcpus );
  2648. const char* basePath = "/sys/devices/system/cpu/cpu";
  2649. for( int i=0; i<numcpus; i++ )
  2650. {
  2651. char path[1024];
  2652. sprintf( path, "%s%i/topology/physical_package_id", basePath, i );
  2653. char buf[1024];
  2654. FILE* f = fopen( path, "rb" );
  2655. if( !f )
  2656. {
  2657. tracy_free( cpuData );
  2658. return;
  2659. }
  2660. auto read = fread( buf, 1, 1024, f );
  2661. buf[read] = '\0';
  2662. fclose( f );
  2663. cpuData[i].package = uint32_t( atoi( buf ) );
  2664. cpuData[i].thread = i;
  2665. sprintf( path, "%s%i/topology/core_id", basePath, i );
  2666. f = fopen( path, "rb" );
  2667. read = fread( buf, 1, 1024, f );
  2668. buf[read] = '\0';
  2669. fclose( f );
  2670. cpuData[i].core = uint32_t( atoi( buf ) );
  2671. }
  2672. for( int i=0; i<numcpus; i++ )
  2673. {
  2674. auto& data = cpuData[i];
  2675. TracyLfqPrepare( QueueType::CpuTopology );
  2676. MemWrite( &item->cpuTopology.package, data.package );
  2677. MemWrite( &item->cpuTopology.core, data.core );
  2678. MemWrite( &item->cpuTopology.thread, data.thread );
  2679. #ifdef TRACY_ON_DEMAND
  2680. DeferItem( *item );
  2681. #endif
  2682. TracyLfqCommit;
  2683. }
  2684. tracy_free( cpuData );
  2685. #endif
  2686. #endif
  2687. }
  2688. void Profiler::SendCallstack( int depth, const char* skipBefore )
  2689. {
  2690. #ifdef TRACY_HAS_CALLSTACK
  2691. TracyLfqPrepare( QueueType::Callstack );
  2692. auto ptr = Callstack( depth );
  2693. CutCallstack( ptr, skipBefore );
  2694. MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
  2695. TracyLfqCommit;
  2696. #endif
  2697. }
  2698. void Profiler::CutCallstack( void* callstack, const char* skipBefore )
  2699. {
  2700. #ifdef TRACY_HAS_CALLSTACK
  2701. auto data = (uintptr_t*)callstack;
  2702. const auto sz = *data++;
  2703. uintptr_t i;
  2704. for( i=0; i<sz; i++ )
  2705. {
  2706. auto name = DecodeCallstackPtrFast( uint64_t( data[i] ) );
  2707. const bool found = strcmp( name, skipBefore ) == 0;
  2708. if( found )
  2709. {
  2710. i++;
  2711. break;
  2712. }
  2713. }
  2714. if( i != sz )
  2715. {
  2716. memmove( data, data + i, ( sz - i ) * sizeof( uintptr_t* ) );
  2717. *--data = sz - i;
  2718. }
  2719. #endif
  2720. }
  2721. #ifdef TRACY_HAS_SYSTIME
  2722. void Profiler::ProcessSysTime()
  2723. {
  2724. if( m_shutdown.load( std::memory_order_relaxed ) ) return;
  2725. auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count();
  2726. if( t - m_sysTimeLast > 100000000 ) // 100 ms
  2727. {
  2728. auto sysTime = m_sysTime.Get();
  2729. if( sysTime >= 0 )
  2730. {
  2731. m_sysTimeLast = t;
  2732. TracyLfqPrepare( QueueType::SysTimeReport );
  2733. MemWrite( &item->sysTime.time, GetTime() );
  2734. MemWrite( &item->sysTime.sysTime, sysTime );
  2735. TracyLfqCommit;
  2736. }
  2737. }
  2738. }
  2739. #endif
  2740. void Profiler::HandleParameter( uint64_t payload )
  2741. {
  2742. assert( m_paramCallback );
  2743. const auto idx = uint32_t( payload >> 32 );
  2744. const auto val = int32_t( payload & 0xFFFFFFFF );
  2745. m_paramCallback( idx, val );
  2746. AckServerQuery();
  2747. }
  2748. #ifdef __ANDROID__
  2749. // Implementation helpers of EnsureReadable(address).
  2750. // This is so far only needed on Android, where it is common for libraries to be mapped
  2751. // with only executable, not readable, permissions. Typical example (line from /proc/self/maps):
  2752. /*
  2753. 746b63b000-746b6dc000 --xp 00042000 07:48 35 /apex/com.android.runtime/lib64/bionic/libc.so
  2754. */
  2755. // See https://github.com/wolfpld/tracy/issues/125 .
  2756. // To work around this, we parse /proc/self/maps and we use mprotect to set read permissions
  2757. // on any mappings that contain symbols addresses hit by HandleSymbolCodeQuery.
  2758. namespace {
  2759. // Holds some information about a single memory mapping.
  2760. struct MappingInfo {
  2761. // Start of address range. Inclusive.
  2762. uintptr_t start_address;
  2763. // End of address range. Exclusive, so the mapping is the half-open interval
  2764. // [start, end) and its length in bytes is `end - start`. As in /proc/self/maps.
  2765. uintptr_t end_address;
  2766. // Read/Write/Executable permissions.
  2767. bool perm_r, perm_w, perm_x;
  2768. };
  2769. } // anonymous namespace
  2770. // Internal implementation helper for LookUpMapping(address).
  2771. //
  2772. // Parses /proc/self/maps returning a vector<MappingInfo>.
  2773. // /proc/self/maps is assumed to be sorted by ascending address, so the resulting
  2774. // vector is sorted by ascending address too.
  2775. static std::vector<MappingInfo> ParseMappings()
  2776. {
  2777. std::vector<MappingInfo> result;
  2778. FILE* file = fopen( "/proc/self/maps", "r" );
  2779. if( !file ) return result;
  2780. char line[1024];
  2781. while( fgets( line, sizeof( line ), file ) )
  2782. {
  2783. uintptr_t start_addr;
  2784. uintptr_t end_addr;
  2785. if( sscanf( line, "%lx-%lx", &start_addr, &end_addr ) != 2 ) continue;
  2786. char* first_space = strchr( line, ' ' );
  2787. if( !first_space ) continue;
  2788. char* perm = first_space + 1;
  2789. char* second_space = strchr( perm, ' ' );
  2790. if( !second_space || second_space - perm != 4 ) continue;
  2791. result.emplace_back();
  2792. auto& mapping = result.back();
  2793. mapping.start_address = start_addr;
  2794. mapping.end_address = end_addr;
  2795. mapping.perm_r = perm[0] == 'r';
  2796. mapping.perm_w = perm[1] == 'w';
  2797. mapping.perm_x = perm[2] == 'x';
  2798. }
  2799. fclose( file );
  2800. return result;
  2801. }
  2802. // Internal implementation helper for LookUpMapping(address).
  2803. //
  2804. // Takes as input an `address` and a known vector `mappings`, assumed to be
  2805. // sorted by increasing addresses, as /proc/self/maps seems to be.
  2806. // Returns a pointer to the MappingInfo describing the mapping that this
  2807. // address belongs to, or nullptr if the address isn't in `mappings`.
  2808. static MappingInfo* LookUpMapping(std::vector<MappingInfo>& mappings, uintptr_t address)
  2809. {
  2810. // Comparison function for std::lower_bound. Returns true if all addresses in `m1`
  2811. // are lower than `addr`.
  2812. auto Compare = []( const MappingInfo& m1, uintptr_t addr ) {
  2813. // '<=' because the address ranges are half-open intervals, [start, end).
  2814. return m1.end_address <= addr;
  2815. };
  2816. auto iter = std::lower_bound( mappings.begin(), mappings.end(), address, Compare );
  2817. if( iter == mappings.end() || iter->start_address > address) {
  2818. return nullptr;
  2819. }
  2820. return &*iter;
  2821. }
  2822. // Internal implementation helper for EnsureReadable(address).
  2823. //
  2824. // Takes as input an `address` and returns a pointer to a MappingInfo
  2825. // describing the mapping that this address belongs to, or nullptr if
  2826. // the address isn't in any known mapping.
  2827. //
  2828. // This function is stateful and not reentrant (assumes to be called from
  2829. // only one thread). It holds a vector of mappings parsed from /proc/self/maps.
  2830. //
  2831. // Attempts to react to mappings changes by re-parsing /proc/self/maps.
  2832. static MappingInfo* LookUpMapping(uintptr_t address)
  2833. {
  2834. // Static state managed by this function. Not constant, we mutate that state as
  2835. // we turn some mappings readable. Initially parsed once here, updated as needed below.
  2836. static std::vector<MappingInfo> s_mappings = ParseMappings();
  2837. MappingInfo* mapping = LookUpMapping( s_mappings, address );
  2838. if( mapping ) return mapping;
  2839. // This address isn't in any known mapping. Try parsing again, maybe
  2840. // mappings changed.
  2841. s_mappings = ParseMappings();
  2842. return LookUpMapping( s_mappings, address );
  2843. }
  2844. // Internal implementation helper for EnsureReadable(address).
  2845. //
  2846. // Attempts to make the specified `mapping` readable if it isn't already.
  2847. // Returns true if and only if the mapping is readable.
  2848. static bool EnsureReadable( MappingInfo& mapping )
  2849. {
  2850. if( mapping.perm_r )
  2851. {
  2852. // The mapping is already readable.
  2853. return true;
  2854. }
  2855. int prot = PROT_READ;
  2856. if( mapping.perm_w ) prot |= PROT_WRITE;
  2857. if( mapping.perm_x ) prot |= PROT_EXEC;
  2858. if( mprotect( reinterpret_cast<void*>( mapping.start_address ),
  2859. mapping.end_address - mapping.start_address, prot ) == -1 )
  2860. {
  2861. // Failed to make the mapping readable. Shouldn't happen, hasn't
  2862. // been observed yet. If it happened in practice, we should consider
  2863. // adding a bool to MappingInfo to track this to avoid retrying mprotect
  2864. // everytime on such mappings.
  2865. return false;
  2866. }
  2867. // The mapping is now readable. Update `mapping` so the next call will be fast.
  2868. mapping.perm_r = true;
  2869. return true;
  2870. }
  2871. // Attempts to set the read permission on the entire mapping containing the
  2872. // specified address. Returns true if and only if the mapping is now readable.
  2873. static bool EnsureReadable( uintptr_t address )
  2874. {
  2875. MappingInfo* mapping = LookUpMapping(address);
  2876. return mapping && EnsureReadable( *mapping );
  2877. }
  2878. #endif // defined __ANDROID__
  2879. void Profiler::HandleSymbolQuery( uint64_t symbol )
  2880. {
  2881. #ifdef TRACY_HAS_CALLSTACK
  2882. #ifdef __ANDROID__
  2883. // On Android it's common for code to be in mappings that are only executable
  2884. // but not readable.
  2885. if( !EnsureReadable( symbol ) )
  2886. {
  2887. return;
  2888. }
  2889. #endif
  2890. const auto sym = DecodeSymbolAddress( symbol );
  2891. SendSingleString( sym.file );
  2892. QueueItem item;
  2893. MemWrite( &item.hdr.type, QueueType::SymbolInformation );
  2894. MemWrite( &item.symbolInformation.line, sym.line );
  2895. MemWrite( &item.symbolInformation.symAddr, symbol );
  2896. AppendData( &item, QueueDataSize[(int)QueueType::SymbolInformation] );
  2897. if( sym.needFree ) tracy_free( (void*)sym.file );
  2898. #endif
  2899. }
  2900. void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size )
  2901. {
  2902. #ifdef __ANDROID__
  2903. // On Android it's common for code to be in mappings that are only executable
  2904. // but not readable.
  2905. if( !EnsureReadable( symbol ) )
  2906. {
  2907. return;
  2908. }
  2909. #endif
  2910. SendLongString( symbol, (const char*)symbol, size, QueueType::SymbolCode );
  2911. }
  2912. void Profiler::HandleSourceCodeQuery()
  2913. {
  2914. assert( m_exectime != 0 );
  2915. assert( m_queryData );
  2916. struct stat st;
  2917. if( stat( m_queryData, &st ) == 0 && (uint64_t)st.st_mtime < m_exectime && st.st_size < ( TargetFrameSize - 16 ) )
  2918. {
  2919. FILE* f = fopen( m_queryData, "rb" );
  2920. tracy_free( m_queryData );
  2921. if( f )
  2922. {
  2923. auto ptr = (char*)tracy_malloc( st.st_size );
  2924. auto rd = fread( ptr, 1, st.st_size, f );
  2925. fclose( f );
  2926. if( rd == st.st_size )
  2927. {
  2928. SendLongString( (uint64_t)ptr, ptr, rd, QueueType::SourceCode );
  2929. }
  2930. else
  2931. {
  2932. AckSourceCodeNotAvailable();
  2933. }
  2934. tracy_free( ptr );
  2935. }
  2936. else
  2937. {
  2938. AckSourceCodeNotAvailable();
  2939. }
  2940. }
  2941. else
  2942. {
  2943. tracy_free( m_queryData );
  2944. AckSourceCodeNotAvailable();
  2945. }
  2946. m_queryData = nullptr;
  2947. }
  2948. void Profiler::SendCodeLocation( uint64_t ptr )
  2949. {
  2950. #ifdef TRACY_HAS_CALLSTACK
  2951. const auto sym = DecodeCodeAddress( ptr );
  2952. SendSingleString( sym.file );
  2953. QueueItem item;
  2954. MemWrite( &item.hdr.type, QueueType::CodeInformation );
  2955. MemWrite( &item.codeInformation.ptr, ptr );
  2956. MemWrite( &item.codeInformation.line, sym.line );
  2957. AppendData( &item, QueueDataSize[(int)QueueType::CodeInformation] );
  2958. if( sym.needFree ) tracy_free( (void*)sym.file );
  2959. #endif
  2960. }
  2961. #if ( defined _WIN32 || defined __CYGWIN__ ) && defined TRACY_TIMER_QPC
  2962. int64_t Profiler::GetTimeQpc()
  2963. {
  2964. LARGE_INTEGER t;
  2965. QueryPerformanceCounter( &t );
  2966. return t.QuadPart;
  2967. }
  2968. #endif
  2969. }
  2970. #ifdef __cplusplus
  2971. extern "C" {
  2972. #endif
  2973. TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active )
  2974. {
  2975. ___tracy_c_zone_context ctx;
  2976. #ifdef TRACY_ON_DEMAND
  2977. ctx.active = active && tracy::GetProfiler().IsConnected();
  2978. #else
  2979. ctx.active = active;
  2980. #endif
  2981. if( !ctx.active ) return ctx;
  2982. const auto id = tracy::GetProfiler().GetNextZoneId();
  2983. ctx.id = id;
  2984. #ifndef TRACY_NO_VERIFY
  2985. {
  2986. TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
  2987. tracy::MemWrite( &item->zoneValidation.id, id );
  2988. TracyLfqCommitC;
  2989. }
  2990. #endif
  2991. {
  2992. TracyLfqPrepareC( tracy::QueueType::ZoneBegin );
  2993. tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
  2994. tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
  2995. TracyLfqCommitC;
  2996. }
  2997. return ctx;
  2998. }
  2999. TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active )
  3000. {
  3001. ___tracy_c_zone_context ctx;
  3002. #ifdef TRACY_ON_DEMAND
  3003. ctx.active = active && tracy::GetProfiler().IsConnected();
  3004. #else
  3005. ctx.active = active;
  3006. #endif
  3007. if( !ctx.active ) return ctx;
  3008. const auto id = tracy::GetProfiler().GetNextZoneId();
  3009. ctx.id = id;
  3010. #ifndef TRACY_NO_VERIFY
  3011. {
  3012. TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
  3013. tracy::MemWrite( &item->zoneValidation.id, id );
  3014. TracyLfqCommitC;
  3015. }
  3016. #endif
  3017. tracy::GetProfiler().SendCallstack( depth );
  3018. {
  3019. TracyLfqPrepareC( tracy::QueueType::ZoneBeginCallstack );
  3020. tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
  3021. tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
  3022. TracyLfqCommitC;
  3023. }
  3024. return ctx;
  3025. }
  3026. TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active )
  3027. {
  3028. ___tracy_c_zone_context ctx;
  3029. #ifdef TRACY_ON_DEMAND
  3030. ctx.active = active && tracy::GetProfiler().IsConnected();
  3031. #else
  3032. ctx.active = active;
  3033. #endif
  3034. if( !ctx.active )
  3035. {
  3036. tracy::tracy_free( (void*)srcloc );
  3037. return ctx;
  3038. }
  3039. const auto id = tracy::GetProfiler().GetNextZoneId();
  3040. ctx.id = id;
  3041. #ifndef TRACY_NO_VERIFY
  3042. {
  3043. TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
  3044. tracy::MemWrite( &item->zoneValidation.id, id );
  3045. TracyLfqCommitC;
  3046. }
  3047. #endif
  3048. {
  3049. TracyLfqPrepareC( tracy::QueueType::ZoneBeginAllocSrcLoc );
  3050. tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
  3051. tracy::MemWrite( &item->zoneBegin.srcloc, srcloc );
  3052. TracyLfqCommitC;
  3053. }
  3054. return ctx;
  3055. }
  3056. TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active )
  3057. {
  3058. ___tracy_c_zone_context ctx;
  3059. #ifdef TRACY_ON_DEMAND
  3060. ctx.active = active && tracy::GetProfiler().IsConnected();
  3061. #else
  3062. ctx.active = active;
  3063. #endif
  3064. if( !ctx.active )
  3065. {
  3066. tracy::tracy_free( (void*)srcloc );
  3067. return ctx;
  3068. }
  3069. const auto id = tracy::GetProfiler().GetNextZoneId();
  3070. ctx.id = id;
  3071. #ifndef TRACY_NO_VERIFY
  3072. {
  3073. TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
  3074. tracy::MemWrite( &item->zoneValidation.id, id );
  3075. TracyLfqCommitC;
  3076. }
  3077. #endif
  3078. tracy::GetProfiler().SendCallstack( depth );
  3079. {
  3080. TracyLfqPrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack );
  3081. tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
  3082. tracy::MemWrite( &item->zoneBegin.srcloc, srcloc );
  3083. TracyLfqCommitC;
  3084. }
  3085. return ctx;
  3086. }
  3087. TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx )
  3088. {
  3089. if( !ctx.active ) return;
  3090. #ifndef TRACY_NO_VERIFY
  3091. {
  3092. TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
  3093. tracy::MemWrite( &item->zoneValidation.id, ctx.id );
  3094. TracyLfqCommitC;
  3095. }
  3096. #endif
  3097. {
  3098. TracyLfqPrepareC( tracy::QueueType::ZoneEnd );
  3099. tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime() );
  3100. TracyLfqCommitC;
  3101. }
  3102. }
  3103. TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size )
  3104. {
  3105. assert( size < std::numeric_limits<uint16_t>::max() );
  3106. if( !ctx.active ) return;
  3107. auto ptr = (char*)tracy::tracy_malloc( size );
  3108. memcpy( ptr, txt, size );
  3109. #ifndef TRACY_NO_VERIFY
  3110. {
  3111. TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
  3112. tracy::MemWrite( &item->zoneValidation.id, ctx.id );
  3113. TracyLfqCommitC;
  3114. }
  3115. #endif
  3116. {
  3117. TracyLfqPrepareC( tracy::QueueType::ZoneText );
  3118. tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
  3119. tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size );
  3120. TracyLfqCommitC;
  3121. }
  3122. }
  3123. TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size )
  3124. {
  3125. assert( size < std::numeric_limits<uint16_t>::max() );
  3126. if( !ctx.active ) return;
  3127. auto ptr = (char*)tracy::tracy_malloc( size );
  3128. memcpy( ptr, txt, size );
  3129. #ifndef TRACY_NO_VERIFY
  3130. {
  3131. TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
  3132. tracy::MemWrite( &item->zoneValidation.id, ctx.id );
  3133. TracyLfqCommitC;
  3134. }
  3135. #endif
  3136. {
  3137. TracyLfqPrepareC( tracy::QueueType::ZoneName );
  3138. tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
  3139. tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size );
  3140. TracyLfqCommitC;
  3141. }
  3142. }
  3143. TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color ) {
  3144. if( !ctx.active ) return;
  3145. #ifndef TRACY_NO_VERIFY
  3146. {
  3147. TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
  3148. tracy::MemWrite( &item->zoneValidation.id, ctx.id );
  3149. TracyLfqCommitC;
  3150. }
  3151. #endif
  3152. {
  3153. TracyLfqPrepareC( tracy::QueueType::ZoneColor );
  3154. tracy::MemWrite( &item->zoneColor.r, uint8_t( ( color ) & 0xFF ) );
  3155. tracy::MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
  3156. tracy::MemWrite( &item->zoneColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
  3157. TracyLfqCommitC;
  3158. }
  3159. }
  3160. TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value )
  3161. {
  3162. if( !ctx.active ) return;
  3163. #ifndef TRACY_NO_VERIFY
  3164. {
  3165. TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
  3166. tracy::MemWrite( &item->zoneValidation.id, ctx.id );
  3167. TracyLfqCommitC;
  3168. }
  3169. #endif
  3170. {
  3171. TracyLfqPrepareC( tracy::QueueType::ZoneValue );
  3172. tracy::MemWrite( &item->zoneValue.value, value );
  3173. TracyLfqCommitC;
  3174. }
  3175. }
  3176. TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); }
  3177. TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); }
  3178. TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); }
  3179. TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ) { tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); }
  3180. TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); }
  3181. TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ) { tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); }
  3182. TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); }
  3183. TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ) { tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); }
  3184. TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); }
  3185. TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); }
  3186. TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); }
  3187. TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip ); }
  3188. TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); }
  3189. TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); }
  3190. TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); }
  3191. TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); }
  3192. TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); }
  3193. TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); }
  3194. TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) {
  3195. return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz );
  3196. }
  3197. TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) {
  3198. return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
  3199. }
  3200. // thread_locals are not initialized on thread creation. At least on GNU/Linux. Instead they are
  3201. // initialized on their first ODR-use. This means that the allocator is not automagically
  3202. // initialized every time a thread is created. As thus, expose to the C API users a simple API to
  3203. // call every time they create a thread. Here we can then put all sorts of per-thread
  3204. // initialization.
  3205. TRACY_API void ___tracy_init_thread(void) {
  3206. #ifdef TRACY_DELAYED_INIT
  3207. (void)tracy::GetProfilerThreadData();
  3208. #else
  3209. (void)tracy::s_rpmalloc_thread_init;
  3210. #endif
  3211. }
  3212. #ifdef __cplusplus
  3213. }
  3214. #endif
  3215. #endif