TracySysTrace.cpp 43 KB


  1. #include "TracySysTrace.hpp"
  2. #ifdef TRACY_HAS_SYSTEM_TRACING
  3. # if defined _WIN32 || defined __CYGWIN__
  4. # ifndef NOMINMAX
  5. # define NOMINMAX
  6. # endif
  7. # define INITGUID
  8. # include <assert.h>
  9. # include <string.h>
  10. # include <windows.h>
  11. # include <dbghelp.h>
  12. # include <evntrace.h>
  13. # include <evntcons.h>
  14. # include <psapi.h>
  15. # include <winternl.h>
  16. # include "../common/TracyAlloc.hpp"
  17. # include "../common/TracySystem.hpp"
  18. # include "TracyProfiler.hpp"
  19. # include "TracyThread.hpp"
  20. namespace tracy
  21. {
  22. static const GUID PerfInfoGuid = { 0xce1dbfb4, 0x137e, 0x4da6, { 0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc } };
  23. static const GUID DxgKrnlGuid = { 0x802ec45a, 0x1e99, 0x4b83, { 0x99, 0x20, 0x87, 0xc9, 0x82, 0x77, 0xba, 0x9d } };
  24. static TRACEHANDLE s_traceHandle;
  25. static TRACEHANDLE s_traceHandle2;
  26. static EVENT_TRACE_PROPERTIES* s_prop;
  27. static DWORD s_pid;
  28. static EVENT_TRACE_PROPERTIES* s_propVsync;
  29. static TRACEHANDLE s_traceHandleVsync;
  30. static TRACEHANDLE s_traceHandleVsync2;
  31. Thread* s_threadVsync = nullptr;
  32. struct CSwitch
  33. {
  34. uint32_t newThreadId;
  35. uint32_t oldThreadId;
  36. int8_t newThreadPriority;
  37. int8_t oldThreadPriority;
  38. uint8_t previousCState;
  39. int8_t spareByte;
  40. int8_t oldThreadWaitReason;
  41. int8_t oldThreadWaitMode;
  42. int8_t oldThreadState;
  43. int8_t oldThreadWaitIdealProcessor;
  44. uint32_t newThreadWaitTime;
  45. uint32_t reserved;
  46. };
  47. struct ReadyThread
  48. {
  49. uint32_t threadId;
  50. int8_t adjustReason;
  51. int8_t adjustIncrement;
  52. int8_t flag;
  53. int8_t reserverd;
  54. };
  55. struct ThreadTrace
  56. {
  57. uint32_t processId;
  58. uint32_t threadId;
  59. uint32_t stackBase;
  60. uint32_t stackLimit;
  61. uint32_t userStackBase;
  62. uint32_t userStackLimit;
  63. uint32_t startAddr;
  64. uint32_t win32StartAddr;
  65. uint32_t tebBase;
  66. uint32_t subProcessTag;
  67. };
  68. struct StackWalkEvent
  69. {
  70. uint64_t eventTimeStamp;
  71. uint32_t stackProcess;
  72. uint32_t stackThread;
  73. uint64_t stack[192];
  74. };
  75. struct VSyncInfo
  76. {
  77. void* dxgAdapter;
  78. uint32_t vidPnTargetId;
  79. uint64_t scannedPhysicalAddress;
  80. uint32_t vidPnSourceId;
  81. uint32_t frameNumber;
  82. int64_t frameQpcTime;
  83. void* hFlipDevice;
  84. uint32_t flipType;
  85. uint64_t flipFenceId;
  86. };
  87. #ifdef __CYGWIN__
  88. extern "C" typedef DWORD (WINAPI *t_GetProcessIdOfThread)( HANDLE );
  89. extern "C" typedef DWORD (WINAPI *t_GetProcessImageFileNameA)( HANDLE, LPSTR, DWORD );
  90. extern "C" ULONG WMIAPI TraceSetInformation(TRACEHANDLE SessionHandle, TRACE_INFO_CLASS InformationClass, PVOID TraceInformation, ULONG InformationLength);
  91. t_GetProcessIdOfThread GetProcessIdOfThread = (t_GetProcessIdOfThread)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetProcessIdOfThread" );
  92. t_GetProcessImageFileNameA GetProcessImageFileNameA = (t_GetProcessImageFileNameA)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetProcessImageFileNameA" );
  93. #endif
  94. extern "C" typedef NTSTATUS (WINAPI *t_NtQueryInformationThread)( HANDLE, THREADINFOCLASS, PVOID, ULONG, PULONG );
  95. extern "C" typedef BOOL (WINAPI *t_EnumProcessModules)( HANDLE, HMODULE*, DWORD, LPDWORD );
  96. extern "C" typedef BOOL (WINAPI *t_GetModuleInformation)( HANDLE, HMODULE, LPMODULEINFO, DWORD );
  97. extern "C" typedef DWORD (WINAPI *t_GetModuleBaseNameA)( HANDLE, HMODULE, LPSTR, DWORD );
  98. extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* );
  99. t_NtQueryInformationThread NtQueryInformationThread = (t_NtQueryInformationThread)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "NtQueryInformationThread" );
  100. t_EnumProcessModules _EnumProcessModules = (t_EnumProcessModules)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32EnumProcessModules" );
  101. t_GetModuleInformation _GetModuleInformation = (t_GetModuleInformation)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleInformation" );
  102. t_GetModuleBaseNameA _GetModuleBaseNameA = (t_GetModuleBaseNameA)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleBaseNameA" );
  103. static t_GetThreadDescription _GetThreadDescription = 0;
  104. void WINAPI EventRecordCallback( PEVENT_RECORD record )
  105. {
  106. #ifdef TRACY_ON_DEMAND
  107. if( !GetProfiler().IsConnected() ) return;
  108. #endif
  109. const auto& hdr = record->EventHeader;
  110. switch( hdr.ProviderId.Data1 )
  111. {
  112. case 0x3d6fa8d1: // Thread Guid
  113. if( hdr.EventDescriptor.Opcode == 36 )
  114. {
  115. const auto cswitch = (const CSwitch*)record->UserData;
  116. TracyLfqPrepare( QueueType::ContextSwitch );
  117. MemWrite( &item->contextSwitch.time, hdr.TimeStamp.QuadPart );
  118. memcpy( &item->contextSwitch.oldThread, &cswitch->oldThreadId, sizeof( cswitch->oldThreadId ) );
  119. memcpy( &item->contextSwitch.newThread, &cswitch->newThreadId, sizeof( cswitch->newThreadId ) );
  120. memset( ((char*)&item->contextSwitch.oldThread)+4, 0, 4 );
  121. memset( ((char*)&item->contextSwitch.newThread)+4, 0, 4 );
  122. MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber );
  123. MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason );
  124. MemWrite( &item->contextSwitch.state, cswitch->oldThreadState );
  125. TracyLfqCommit;
  126. }
  127. else if( hdr.EventDescriptor.Opcode == 50 )
  128. {
  129. const auto rt = (const ReadyThread*)record->UserData;
  130. TracyLfqPrepare( QueueType::ThreadWakeup );
  131. MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart );
  132. memcpy( &item->threadWakeup.thread, &rt->threadId, sizeof( rt->threadId ) );
  133. memset( ((char*)&item->threadWakeup.thread)+4, 0, 4 );
  134. TracyLfqCommit;
  135. }
  136. else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 )
  137. {
  138. const auto tt = (const ThreadTrace*)record->UserData;
  139. uint64_t tid = tt->threadId;
  140. if( tid == 0 ) return;
  141. uint64_t pid = tt->processId;
  142. TracyLfqPrepare( QueueType::TidToPid );
  143. MemWrite( &item->tidToPid.tid, tid );
  144. MemWrite( &item->tidToPid.pid, pid );
  145. TracyLfqCommit;
  146. }
  147. break;
  148. case 0xdef2fe46: // StackWalk Guid
  149. if( hdr.EventDescriptor.Opcode == 32 )
  150. {
  151. const auto sw = (const StackWalkEvent*)record->UserData;
  152. if( sw->stackProcess == s_pid && ( sw->stack[0] & 0x8000000000000000 ) == 0 )
  153. {
  154. const uint64_t sz = ( record->UserDataLength - 16 ) / 8;
  155. if( sz > 0 )
  156. {
  157. auto trace = (uint64_t*)tracy_malloc( ( 1 + sz ) * sizeof( uint64_t ) );
  158. memcpy( trace, &sz, sizeof( uint64_t ) );
  159. memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz );
  160. TracyLfqPrepare( QueueType::CallstackSample );
  161. MemWrite( &item->callstackSampleFat.time, sw->eventTimeStamp );
  162. MemWrite( &item->callstackSampleFat.thread, (uint64_t)sw->stackThread );
  163. MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
  164. TracyLfqCommit;
  165. }
  166. }
  167. }
  168. break;
  169. default:
  170. break;
  171. }
  172. }
  173. static constexpr const char* VsyncName[] = {
  174. "[0] Vsync",
  175. "[1] Vsync",
  176. "[2] Vsync",
  177. "[3] Vsync",
  178. "[4] Vsync",
  179. "[5] Vsync",
  180. "[6] Vsync",
  181. "[7] Vsync",
  182. "Vsync"
  183. };
  184. static uint32_t VsyncTarget[8] = {};
  185. void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record )
  186. {
  187. #ifdef TRACY_ON_DEMAND
  188. if( !GetProfiler().IsConnected() ) return;
  189. #endif
  190. const auto& hdr = record->EventHeader;
  191. assert( hdr.ProviderId.Data1 == 0x802EC45A );
  192. assert( hdr.EventDescriptor.Id == 0x0011 );
  193. const auto vs = (const VSyncInfo*)record->UserData;
  194. int idx = 0;
  195. do
  196. {
  197. if( VsyncTarget[idx] == 0 )
  198. {
  199. VsyncTarget[idx] = vs->vidPnTargetId;
  200. break;
  201. }
  202. else if( VsyncTarget[idx] == vs->vidPnTargetId )
  203. {
  204. break;
  205. }
  206. }
  207. while( ++idx < 8 );
  208. TracyLfqPrepare( QueueType::FrameMarkMsg );
  209. MemWrite( &item->frameMark.time, hdr.TimeStamp.QuadPart );
  210. MemWrite( &item->frameMark.name, uint64_t( VsyncName[idx] ) );
  211. TracyLfqCommit;
  212. }
  213. static void SetupVsync()
  214. {
  215. #if _WIN32_WINNT >= _WIN32_WINNT_WINBLUE
  216. const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + MAX_PATH;
  217. s_propVsync = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz );
  218. memset( s_propVsync, 0, sizeof( EVENT_TRACE_PROPERTIES ) );
  219. s_propVsync->LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
  220. s_propVsync->Wnode.BufferSize = psz;
  221. #ifdef TRACY_TIMER_QPC
  222. s_propVsync->Wnode.ClientContext = 1;
  223. #else
  224. s_propVsync->Wnode.ClientContext = 3;
  225. #endif
  226. s_propVsync->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES );
  227. strcpy( ((char*)s_propVsync) + sizeof( EVENT_TRACE_PROPERTIES ), "TracyVsync" );
  228. auto backup = tracy_malloc( psz );
  229. memcpy( backup, s_propVsync, psz );
  230. const auto controlStatus = ControlTraceA( 0, "TracyVsync", s_propVsync, EVENT_TRACE_CONTROL_STOP );
  231. if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND )
  232. {
  233. tracy_free( backup );
  234. tracy_free( s_propVsync );
  235. return;
  236. }
  237. memcpy( s_propVsync, backup, psz );
  238. tracy_free( backup );
  239. const auto startStatus = StartTraceA( &s_traceHandleVsync, "TracyVsync", s_propVsync );
  240. if( startStatus != ERROR_SUCCESS )
  241. {
  242. tracy_free( s_propVsync );
  243. return;
  244. }
  245. EVENT_FILTER_EVENT_ID fe = {};
  246. fe.FilterIn = TRUE;
  247. fe.Count = 1;
  248. fe.Events[0] = 0x0011; // VSyncDPC_Info
  249. EVENT_FILTER_DESCRIPTOR desc = {};
  250. desc.Ptr = (ULONGLONG)&fe;
  251. desc.Size = sizeof( fe );
  252. desc.Type = EVENT_FILTER_TYPE_EVENT_ID;
  253. ENABLE_TRACE_PARAMETERS params = {};
  254. params.Version = ENABLE_TRACE_PARAMETERS_VERSION_2;
  255. params.EnableProperty = EVENT_ENABLE_PROPERTY_IGNORE_KEYWORD_0;
  256. params.SourceId = s_propVsync->Wnode.Guid;
  257. params.EnableFilterDesc = &desc;
  258. params.FilterDescCount = 1;
  259. uint64_t mask = 0x4000000000000001; // Microsoft_Windows_DxgKrnl_Performance | Base
  260. if( EnableTraceEx2( s_traceHandleVsync, &DxgKrnlGuid, EVENT_CONTROL_CODE_ENABLE_PROVIDER, TRACE_LEVEL_INFORMATION, mask, mask, 0, &params ) != ERROR_SUCCESS )
  261. {
  262. tracy_free( s_propVsync );
  263. return;
  264. }
  265. char loggerName[MAX_PATH];
  266. strcpy( loggerName, "TracyVsync" );
  267. EVENT_TRACE_LOGFILEA log = {};
  268. log.LoggerName = loggerName;
  269. log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP;
  270. log.EventRecordCallback = EventRecordCallbackVsync;
  271. s_traceHandleVsync2 = OpenTraceA( &log );
  272. if( s_traceHandleVsync2 == (TRACEHANDLE)INVALID_HANDLE_VALUE )
  273. {
  274. CloseTrace( s_traceHandleVsync );
  275. tracy_free( s_propVsync );
  276. return;
  277. }
  278. s_threadVsync = (Thread*)tracy_malloc( sizeof( Thread ) );
  279. new(s_threadVsync) Thread( [] (void*) {
  280. ThreadExitHandler threadExitHandler;
  281. SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
  282. SetThreadName( "Tracy Vsync" );
  283. ProcessTrace( &s_traceHandleVsync2, 1, nullptr, nullptr );
  284. }, nullptr );
  285. #endif
  286. }
  287. bool SysTraceStart( int64_t& samplingPeriod )
  288. {
  289. if( !_GetThreadDescription ) _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
  290. s_pid = GetCurrentProcessId();
  291. #if defined _WIN64
  292. constexpr bool isOs64Bit = true;
  293. #else
  294. BOOL _iswow64;
  295. IsWow64Process( GetCurrentProcess(), &_iswow64 );
  296. const bool isOs64Bit = _iswow64;
  297. #endif
  298. TOKEN_PRIVILEGES priv = {};
  299. priv.PrivilegeCount = 1;
  300. priv.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
  301. if( LookupPrivilegeValue( nullptr, SE_SYSTEM_PROFILE_NAME, &priv.Privileges[0].Luid ) == 0 ) return false;
  302. HANDLE pt;
  303. if( OpenProcessToken( GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &pt ) == 0 ) return false;
  304. const auto adjust = AdjustTokenPrivileges( pt, FALSE, &priv, 0, nullptr, nullptr );
  305. CloseHandle( pt );
  306. if( adjust == 0 ) return false;
  307. const auto status = GetLastError();
  308. if( status != ERROR_SUCCESS ) return false;
  309. if( isOs64Bit )
  310. {
  311. TRACE_PROFILE_INTERVAL interval = {};
  312. interval.Interval = 1250; // 8 kHz
  313. const auto intervalStatus = TraceSetInformation( 0, TraceSampledProfileIntervalInfo, &interval, sizeof( interval ) );
  314. if( intervalStatus != ERROR_SUCCESS ) return false;
  315. samplingPeriod = 125*1000;
  316. }
  317. const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + sizeof( KERNEL_LOGGER_NAME );
  318. s_prop = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz );
  319. memset( s_prop, 0, sizeof( EVENT_TRACE_PROPERTIES ) );
  320. ULONG flags = 0;
  321. #ifndef TRACY_NO_CONTEXT_SWITCH
  322. flags = EVENT_TRACE_FLAG_CSWITCH | EVENT_TRACE_FLAG_DISPATCHER | EVENT_TRACE_FLAG_THREAD;
  323. #endif
  324. #ifndef TRACY_NO_SAMPLING
  325. if( isOs64Bit ) flags |= EVENT_TRACE_FLAG_PROFILE;
  326. #endif
  327. s_prop->EnableFlags = flags;
  328. s_prop->LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
  329. s_prop->Wnode.BufferSize = psz;
  330. s_prop->Wnode.Flags = WNODE_FLAG_TRACED_GUID;
  331. #ifdef TRACY_TIMER_QPC
  332. s_prop->Wnode.ClientContext = 1;
  333. #else
  334. s_prop->Wnode.ClientContext = 3;
  335. #endif
  336. s_prop->Wnode.Guid = SystemTraceControlGuid;
  337. s_prop->BufferSize = 1024;
  338. s_prop->MinimumBuffers = std::thread::hardware_concurrency() * 4;
  339. s_prop->MaximumBuffers = std::thread::hardware_concurrency() * 6;
  340. s_prop->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES );
  341. memcpy( ((char*)s_prop) + sizeof( EVENT_TRACE_PROPERTIES ), KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) );
  342. auto backup = tracy_malloc( psz );
  343. memcpy( backup, s_prop, psz );
  344. const auto controlStatus = ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP );
  345. if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND )
  346. {
  347. tracy_free( backup );
  348. tracy_free( s_prop );
  349. return false;
  350. }
  351. memcpy( s_prop, backup, psz );
  352. tracy_free( backup );
  353. const auto startStatus = StartTrace( &s_traceHandle, KERNEL_LOGGER_NAME, s_prop );
  354. if( startStatus != ERROR_SUCCESS )
  355. {
  356. tracy_free( s_prop );
  357. return false;
  358. }
  359. if( isOs64Bit )
  360. {
  361. CLASSIC_EVENT_ID stackId;
  362. stackId.EventGuid = PerfInfoGuid;
  363. stackId.Type = 46;
  364. const auto stackStatus = TraceSetInformation( s_traceHandle, TraceStackTracingInfo, &stackId, sizeof( stackId ) );
  365. if( stackStatus != ERROR_SUCCESS )
  366. {
  367. tracy_free( s_prop );
  368. return false;
  369. }
  370. }
  371. #ifdef UNICODE
  372. WCHAR KernelLoggerName[sizeof( KERNEL_LOGGER_NAME )];
  373. #else
  374. char KernelLoggerName[sizeof( KERNEL_LOGGER_NAME )];
  375. #endif
  376. memcpy( KernelLoggerName, KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) );
  377. EVENT_TRACE_LOGFILE log = {};
  378. log.LoggerName = KernelLoggerName;
  379. log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP;
  380. log.EventRecordCallback = EventRecordCallback;
  381. s_traceHandle2 = OpenTrace( &log );
  382. if( s_traceHandle2 == (TRACEHANDLE)INVALID_HANDLE_VALUE )
  383. {
  384. CloseTrace( s_traceHandle );
  385. tracy_free( s_prop );
  386. return false;
  387. }
  388. #ifndef TRACY_NO_VSYNC_CAPTURE
  389. SetupVsync();
  390. #endif
  391. return true;
  392. }
  393. void SysTraceStop()
  394. {
  395. if( s_threadVsync )
  396. {
  397. CloseTrace( s_traceHandleVsync2 );
  398. CloseTrace( s_traceHandleVsync );
  399. s_threadVsync->~Thread();
  400. tracy_free( s_threadVsync );
  401. }
  402. CloseTrace( s_traceHandle2 );
  403. CloseTrace( s_traceHandle );
  404. }
  405. void SysTraceWorker( void* ptr )
  406. {
  407. ThreadExitHandler threadExitHandler;
  408. SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
  409. SetThreadName( "Tracy SysTrace" );
  410. ProcessTrace( &s_traceHandle2, 1, 0, 0 );
  411. ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP );
  412. tracy_free( s_prop );
  413. }
  414. void SysTraceSendExternalName( uint64_t thread )
  415. {
  416. bool threadSent = false;
  417. auto hnd = OpenThread( THREAD_QUERY_INFORMATION, FALSE, DWORD( thread ) );
  418. if( hnd == 0 )
  419. {
  420. hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, DWORD( thread ) );
  421. }
  422. if( hnd != 0 )
  423. {
  424. PWSTR tmp;
  425. _GetThreadDescription( hnd, &tmp );
  426. char buf[256];
  427. if( tmp )
  428. {
  429. auto ret = wcstombs( buf, tmp, 256 );
  430. if( ret != 0 )
  431. {
  432. GetProfiler().SendString( thread, buf, ret, QueueType::ExternalThreadName );
  433. threadSent = true;
  434. }
  435. }
  436. const auto pid = GetProcessIdOfThread( hnd );
  437. if( !threadSent && NtQueryInformationThread && _EnumProcessModules && _GetModuleInformation && _GetModuleBaseNameA )
  438. {
  439. void* ptr;
  440. ULONG retlen;
  441. auto status = NtQueryInformationThread( hnd, (THREADINFOCLASS)9 /*ThreadQuerySetWin32StartAddress*/, &ptr, sizeof( &ptr ), &retlen );
  442. if( status == 0 )
  443. {
  444. const auto phnd = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid );
  445. if( phnd != INVALID_HANDLE_VALUE )
  446. {
  447. HMODULE modules[1024];
  448. DWORD needed;
  449. if( _EnumProcessModules( phnd, modules, 1024 * sizeof( HMODULE ), &needed ) != 0 )
  450. {
  451. const auto sz = std::min( DWORD( needed / sizeof( HMODULE ) ), DWORD( 1024 ) );
  452. for( DWORD i=0; i<sz; i++ )
  453. {
  454. MODULEINFO info;
  455. if( _GetModuleInformation( phnd, modules[i], &info, sizeof( info ) ) != 0 )
  456. {
  457. if( (uint64_t)ptr >= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage )
  458. {
  459. char buf2[1024];
  460. const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 );
  461. if( modlen != 0 )
  462. {
  463. GetProfiler().SendString( thread, buf2, modlen, QueueType::ExternalThreadName );
  464. threadSent = true;
  465. }
  466. }
  467. }
  468. }
  469. }
  470. CloseHandle( phnd );
  471. }
  472. }
  473. }
  474. CloseHandle( hnd );
  475. if( !threadSent )
  476. {
  477. GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName );
  478. threadSent = true;
  479. }
  480. if( pid != 0 )
  481. {
  482. {
  483. uint64_t _pid = pid;
  484. TracyLfqPrepare( QueueType::TidToPid );
  485. MemWrite( &item->tidToPid.tid, thread );
  486. MemWrite( &item->tidToPid.pid, _pid );
  487. TracyLfqCommit;
  488. }
  489. if( pid == 4 )
  490. {
  491. GetProfiler().SendString( thread, "System", 6, QueueType::ExternalName );
  492. return;
  493. }
  494. else
  495. {
  496. const auto phnd = OpenProcess( PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid );
  497. if( phnd != INVALID_HANDLE_VALUE )
  498. {
  499. char buf2[1024];
  500. const auto sz = GetProcessImageFileNameA( phnd, buf2, 1024 );
  501. CloseHandle( phnd );
  502. if( sz != 0 )
  503. {
  504. auto ptr = buf2 + sz - 1;
  505. while( ptr > buf2 && *ptr != '\\' ) ptr--;
  506. if( *ptr == '\\' ) ptr++;
  507. GetProfiler().SendString( thread, ptr, QueueType::ExternalName );
  508. return;
  509. }
  510. }
  511. }
  512. }
  513. }
  514. if( !threadSent )
  515. {
  516. GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName );
  517. }
  518. GetProfiler().SendString( thread, "???", 3, QueueType::ExternalName );
  519. }
  520. }
  521. # elif defined __linux__
  522. # include <sys/types.h>
  523. # include <sys/stat.h>
  524. # include <sys/wait.h>
  525. # include <fcntl.h>
  526. # include <inttypes.h>
  527. # include <limits>
  528. # include <poll.h>
  529. # include <stdio.h>
  530. # include <stdlib.h>
  531. # include <string.h>
  532. # include <unistd.h>
  533. # include <atomic>
  534. # include <thread>
  535. # include <linux/perf_event.h>
  536. # include <linux/version.h>
  537. # include <sys/mman.h>
  538. # include <sys/ioctl.h>
  539. # include <sys/syscall.h>
  540. # include "TracyProfiler.hpp"
  541. # include "TracyRingBuffer.hpp"
  542. # include "TracyThread.hpp"
  543. # ifdef __ANDROID__
  544. # include "TracySysTracePayload.hpp"
  545. # endif
  546. namespace tracy
  547. {
  548. static const char BasePath[] = "/sys/kernel/debug/tracing/";
  549. static const char TracingOn[] = "tracing_on";
  550. static const char CurrentTracer[] = "current_tracer";
  551. static const char TraceOptions[] = "trace_options";
  552. static const char TraceClock[] = "trace_clock";
  553. static const char SchedSwitch[] = "events/sched/sched_switch/enable";
  554. static const char SchedWakeup[] = "events/sched/sched_wakeup/enable";
  555. static const char BufferSizeKb[] = "buffer_size_kb";
  556. static const char TracePipe[] = "trace_pipe";
  557. static std::atomic<bool> traceActive { false };
  558. static Thread* s_threadSampling = nullptr;
  559. static int s_numCpus = 0;
  560. static constexpr size_t RingBufSize = 64*1024;
  561. static RingBuffer<RingBufSize>* s_ring = nullptr;
  562. static int perf_event_open( struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags )
  563. {
  564. return syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags );
  565. }
  566. static void SetupSampling( int64_t& samplingPeriod )
  567. {
  568. #ifndef CLOCK_MONOTONIC_RAW
  569. return;
  570. #endif
  571. samplingPeriod = 100*1000;
  572. s_numCpus = (int)std::thread::hardware_concurrency();
  573. s_ring = (RingBuffer<RingBufSize>*)tracy_malloc( sizeof( RingBuffer<RingBufSize> ) * s_numCpus );
  574. perf_event_attr pe = {};
  575. pe.type = PERF_TYPE_SOFTWARE;
  576. pe.size = sizeof( perf_event_attr );
  577. pe.config = PERF_COUNT_SW_CPU_CLOCK;
  578. pe.sample_freq = 10000;
  579. pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CALLCHAIN;
  580. #if LINUX_VERSION_CODE >= KERNEL_VERSION( 4, 8, 0 )
  581. pe.sample_max_stack = 127;
  582. #endif
  583. pe.exclude_callchain_kernel = 1;
  584. pe.disabled = 1;
  585. pe.freq = 1;
  586. #if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
  587. pe.use_clockid = 1;
  588. pe.clockid = CLOCK_MONOTONIC_RAW;
  589. #endif
  590. for( int i=0; i<s_numCpus; i++ )
  591. {
  592. const int fd = perf_event_open( &pe, -1, i, -1, 0 );
  593. if( fd == -1 )
  594. {
  595. for( int j=0; j<i; j++ ) s_ring[j].~RingBuffer<RingBufSize>();
  596. tracy_free( s_ring );
  597. return;
  598. }
  599. new( s_ring+i ) RingBuffer<RingBufSize>( fd );
  600. }
  601. s_threadSampling = (Thread*)tracy_malloc( sizeof( Thread ) );
  602. new(s_threadSampling) Thread( [] (void*) {
  603. ThreadExitHandler threadExitHandler;
  604. SetThreadName( "Tracy Sampling" );
  605. sched_param sp = { 5 };
  606. pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
  607. uint32_t currentPid = (uint32_t)getpid();
  608. #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
  609. for( int i=0; i<s_numCpus; i++ )
  610. {
  611. if( !s_ring[i].CheckTscCaps() )
  612. {
  613. for( int j=0; j<s_numCpus; j++ ) s_ring[j].~RingBuffer<RingBufSize>();
  614. tracy_free( s_ring );
  615. const char* err = "Tracy Profiler: sampling is disabled due to non-native scheduler clock. Are you running under a VM?";
  616. Profiler::MessageAppInfo( err, strlen( err ) );
  617. return;
  618. }
  619. }
  620. #endif
  621. for( int i=0; i<s_numCpus; i++ ) s_ring[i].Enable();
  622. for(;;)
  623. {
  624. bool hadData = false;
  625. for( int i=0; i<s_numCpus; i++ )
  626. {
  627. if( !traceActive.load( std::memory_order_relaxed ) ) break;
  628. if( !s_ring[i].HasData() ) continue;
  629. hadData = true;
  630. perf_event_header hdr;
  631. s_ring[i].Read( &hdr, 0, sizeof( perf_event_header ) );
  632. if( hdr.type == PERF_RECORD_SAMPLE )
  633. {
  634. uint32_t pid, tid;
  635. uint64_t t0;
  636. uint64_t cnt;
  637. auto offset = sizeof( perf_event_header );
  638. s_ring[i].Read( &pid, offset, sizeof( uint32_t ) );
  639. if( pid == currentPid )
  640. {
  641. offset += sizeof( uint32_t );
  642. s_ring[i].Read( &tid, offset, sizeof( uint32_t ) );
  643. offset += sizeof( uint32_t );
  644. s_ring[i].Read( &t0, offset, sizeof( uint64_t ) );
  645. offset += sizeof( uint64_t );
  646. s_ring[i].Read( &cnt, offset, sizeof( uint64_t ) );
  647. offset += sizeof( uint64_t );
  648. auto trace = (uint64_t*)tracy_malloc( ( 1 + cnt ) * sizeof( uint64_t ) );
  649. s_ring[i].Read( trace+1, offset, sizeof( uint64_t ) * cnt );
  650. // remove non-canonical pointers
  651. do
  652. {
  653. const auto test = (int64_t)trace[cnt];
  654. const auto m1 = test >> 63;
  655. const auto m2 = test >> 47;
  656. if( m1 == m2 ) break;
  657. }
  658. while( --cnt > 0 );
  659. for( uint64_t j=1; j<cnt; j++ )
  660. {
  661. const auto test = (int64_t)trace[j];
  662. const auto m1 = test >> 63;
  663. const auto m2 = test >> 47;
  664. if( m1 != m2 ) trace[j] = 0;
  665. }
  666. // skip kernel frames
  667. uint64_t j;
  668. for( j=0; j<cnt; j++ )
  669. {
  670. if( (int64_t)trace[j+1] >= 0 ) break;
  671. }
  672. if( j == cnt )
  673. {
  674. tracy_free( trace );
  675. }
  676. else
  677. {
  678. if( j > 0 )
  679. {
  680. cnt -= j;
  681. memmove( trace+1, trace+1+j, sizeof( uint64_t ) * cnt );
  682. }
  683. memcpy( trace, &cnt, sizeof( uint64_t ) );
  684. #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
  685. t0 = s_ring[i].ConvertTimeToTsc( t0 );
  686. #endif
  687. TracyLfqPrepare( QueueType::CallstackSample );
  688. MemWrite( &item->callstackSampleFat.time, t0 );
  689. MemWrite( &item->callstackSampleFat.thread, (uint64_t)tid );
  690. MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
  691. TracyLfqCommit;
  692. }
  693. }
  694. }
  695. s_ring[i].Advance( hdr.size );
  696. }
  697. if( !traceActive.load( std::memory_order_relaxed) ) break;
  698. if( !hadData )
  699. {
  700. std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
  701. }
  702. }
  703. for( int i=0; i<s_numCpus; i++ ) s_ring[i].~RingBuffer<RingBufSize>();
  704. tracy_free( s_ring );
  705. }, nullptr );
  706. }
  707. #ifdef __ANDROID__
  708. static bool TraceWrite( const char* path, size_t psz, const char* val, size_t vsz )
  709. {
  710. // Explanation for "su root sh -c": there are 2 flavors of "su" in circulation
  711. // on Android. The default Android su has the following syntax to run a command
  712. // as root:
  713. // su root 'command'
  714. // and 'command' is exec'd not passed to a shell, so if shell interpretation is
  715. // wanted, one needs to do:
  716. // su root sh -c 'command'
  717. // Besides that default Android 'su' command, some Android devices use a different
  718. // su with a command-line interface closer to the familiar util-linux su found
  719. // on Linux distributions. Fortunately, both the util-linux su and the one
  720. // in https://github.com/topjohnwu/Magisk seem to be happy with the above
  721. // `su root sh -c 'command'` command line syntax.
  722. char tmp[256];
  723. sprintf( tmp, "su root sh -c 'echo \"%s\" > %s%s'", val, BasePath, path );
  724. return system( tmp ) == 0;
  725. }
  726. #else
  727. static bool TraceWrite( const char* path, size_t psz, const char* val, size_t vsz )
  728. {
  729. char tmp[256];
  730. memcpy( tmp, BasePath, sizeof( BasePath ) - 1 );
  731. memcpy( tmp + sizeof( BasePath ) - 1, path, psz );
  732. int fd = open( tmp, O_WRONLY );
  733. if( fd < 0 ) return false;
  734. for(;;)
  735. {
  736. ssize_t cnt = write( fd, val, vsz );
  737. if( cnt == (ssize_t)vsz )
  738. {
  739. close( fd );
  740. return true;
  741. }
  742. if( cnt < 0 )
  743. {
  744. close( fd );
  745. return false;
  746. }
  747. vsz -= cnt;
  748. val += cnt;
  749. }
  750. }
  751. #endif
  752. #ifdef __ANDROID__
  753. void SysTraceInjectPayload()
  754. {
  755. int pipefd[2];
  756. if( pipe( pipefd ) == 0 )
  757. {
  758. const auto pid = fork();
  759. if( pid == 0 )
  760. {
  761. // child
  762. close( pipefd[1] );
  763. if( dup2( pipefd[0], STDIN_FILENO ) >= 0 )
  764. {
  765. close( pipefd[0] );
  766. execlp( "su", "su", "root", "sh", "-c", "cat > /data/tracy_systrace", (char*)nullptr );
  767. exit( 1 );
  768. }
  769. }
  770. else if( pid > 0 )
  771. {
  772. // parent
  773. close( pipefd[0] );
  774. #ifdef __aarch64__
  775. write( pipefd[1], tracy_systrace_aarch64_data, tracy_systrace_aarch64_size );
  776. #else
  777. write( pipefd[1], tracy_systrace_armv7_data, tracy_systrace_armv7_size );
  778. #endif
  779. close( pipefd[1] );
  780. waitpid( pid, nullptr, 0 );
  781. system( "su root sh -c 'chmod 700 /data/tracy_systrace'" );
  782. }
  783. }
  784. }
  785. #endif
  786. bool SysTraceStart( int64_t& samplingPeriod )
  787. {
  788. #ifndef CLOCK_MONOTONIC_RAW
  789. return false;
  790. #endif
  791. if( !TraceWrite( TracingOn, sizeof( TracingOn ), "0", 2 ) ) return false;
  792. if( !TraceWrite( CurrentTracer, sizeof( CurrentTracer ), "nop", 4 ) ) return false;
  793. TraceWrite( TraceOptions, sizeof( TraceOptions ), "norecord-cmd", 13 );
  794. TraceWrite( TraceOptions, sizeof( TraceOptions ), "norecord-tgid", 14 );
  795. TraceWrite( TraceOptions, sizeof( TraceOptions ), "noirq-info", 11 );
  796. TraceWrite( TraceOptions, sizeof( TraceOptions ), "noannotate", 11 );
  797. #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
  798. if( !TraceWrite( TraceClock, sizeof( TraceClock ), "x86-tsc", 8 ) ) return false;
  799. #else
  800. if( !TraceWrite( TraceClock, sizeof( TraceClock ), "mono_raw", 9 ) ) return false;
  801. #endif
  802. if( !TraceWrite( SchedSwitch, sizeof( SchedSwitch ), "1", 2 ) ) return false;
  803. if( !TraceWrite( SchedWakeup, sizeof( SchedWakeup ), "1", 2 ) ) return false;
  804. if( !TraceWrite( BufferSizeKb, sizeof( BufferSizeKb ), "4096", 5 ) ) return false;
  805. #if defined __ANDROID__ && ( defined __aarch64__ || defined __ARM_ARCH )
  806. SysTraceInjectPayload();
  807. #endif
  808. if( !TraceWrite( TracingOn, sizeof( TracingOn ), "1", 2 ) ) return false;
  809. traceActive.store( true, std::memory_order_relaxed );
  810. SetupSampling( samplingPeriod );
  811. return true;
  812. }
  813. void SysTraceStop()
  814. {
  815. TraceWrite( TracingOn, sizeof( TracingOn ), "0", 2 );
  816. traceActive.store( false, std::memory_order_relaxed );
  817. if( s_threadSampling )
  818. {
  819. s_threadSampling->~Thread();
  820. tracy_free( s_threadSampling );
  821. }
  822. }
  823. static uint64_t ReadNumber( const char*& data )
  824. {
  825. auto ptr = data;
  826. assert( *ptr >= '0' && *ptr <= '9' );
  827. uint64_t val = *ptr++ - '0';
  828. for(;;)
  829. {
  830. const uint8_t v = uint8_t( *ptr - '0' );
  831. if( v > 9 ) break;
  832. val = val * 10 + v;
  833. ptr++;
  834. }
  835. data = ptr;
  836. return val;
  837. }
  838. static uint8_t ReadState( char state )
  839. {
  840. switch( state )
  841. {
  842. case 'D': return 101;
  843. case 'I': return 102;
  844. case 'R': return 103;
  845. case 'S': return 104;
  846. case 'T': return 105;
  847. case 't': return 106;
  848. case 'W': return 107;
  849. case 'X': return 108;
  850. case 'Z': return 109;
  851. default: return 100;
  852. }
  853. }
  854. #if defined __ANDROID__ && defined __ANDROID_API__ && __ANDROID_API__ < 18
  855. /*-
  856. * Copyright (c) 2011 The NetBSD Foundation, Inc.
  857. * All rights reserved.
  858. *
  859. * This code is derived from software contributed to The NetBSD Foundation
  860. * by Christos Zoulas.
  861. *
  862. * Redistribution and use in source and binary forms, with or without
  863. * modification, are permitted provided that the following conditions
  864. * are met:
  865. * 1. Redistributions of source code must retain the above copyright
  866. * notice, this list of conditions and the following disclaimer.
  867. * 2. Redistributions in binary form must reproduce the above copyright
  868. * notice, this list of conditions and the following disclaimer in the
  869. * documentation and/or other materials provided with the distribution.
  870. *
  871. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  872. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  873. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  874. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  875. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  876. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  877. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  878. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  879. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  880. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  881. * POSSIBILITY OF SUCH DAMAGE.
  882. */
  883. ssize_t getdelim(char **buf, size_t *bufsiz, int delimiter, FILE *fp)
  884. {
  885. char *ptr, *eptr;
  886. if (*buf == NULL || *bufsiz == 0) {
  887. *bufsiz = BUFSIZ;
  888. if ((*buf = (char*)malloc(*bufsiz)) == NULL)
  889. return -1;
  890. }
  891. for (ptr = *buf, eptr = *buf + *bufsiz;;) {
  892. int c = fgetc(fp);
  893. if (c == -1) {
  894. if (feof(fp))
  895. return ptr == *buf ? -1 : ptr - *buf;
  896. else
  897. return -1;
  898. }
  899. *ptr++ = c;
  900. if (c == delimiter) {
  901. *ptr = '\0';
  902. return ptr - *buf;
  903. }
  904. if (ptr + 2 >= eptr) {
  905. char *nbuf;
  906. size_t nbufsiz = *bufsiz * 2;
  907. ssize_t d = ptr - *buf;
  908. if ((nbuf = (char*)realloc(*buf, nbufsiz)) == NULL)
  909. return -1;
  910. *buf = nbuf;
  911. *bufsiz = nbufsiz;
  912. eptr = nbuf + nbufsiz;
  913. ptr = nbuf + d;
  914. }
  915. }
  916. }
  917. ssize_t getline(char **buf, size_t *bufsiz, FILE *fp)
  918. {
  919. return getdelim(buf, bufsiz, '\n', fp);
  920. }
  921. #endif
  922. static void HandleTraceLine( const char* line )
  923. {
  924. line += 23;
  925. while( *line != '[' ) line++;
  926. line++;
  927. const auto cpu = (uint8_t)ReadNumber( line );
  928. line++; // ']'
  929. while( *line == ' ' ) line++;
  930. #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
  931. const auto time = ReadNumber( line );
  932. #else
  933. const auto ts = ReadNumber( line );
  934. line++; // '.'
  935. const auto tus = ReadNumber( line );
  936. const auto time = ts * 1000000000ll + tus * 1000ll;
  937. #endif
  938. line += 2; // ': '
  939. if( memcmp( line, "sched_switch", 12 ) == 0 )
  940. {
  941. line += 14;
  942. while( memcmp( line, "prev_pid", 8 ) != 0 ) line++;
  943. line += 9;
  944. const auto oldPid = ReadNumber( line );
  945. line++;
  946. while( memcmp( line, "prev_state", 10 ) != 0 ) line++;
  947. line += 11;
  948. const auto oldState = (uint8_t)ReadState( *line );
  949. line += 5;
  950. while( memcmp( line, "next_pid", 8 ) != 0 ) line++;
  951. line += 9;
  952. const auto newPid = ReadNumber( line );
  953. uint8_t reason = 100;
  954. TracyLfqPrepare( QueueType::ContextSwitch );
  955. MemWrite( &item->contextSwitch.time, time );
  956. MemWrite( &item->contextSwitch.oldThread, oldPid );
  957. MemWrite( &item->contextSwitch.newThread, newPid );
  958. MemWrite( &item->contextSwitch.cpu, cpu );
  959. MemWrite( &item->contextSwitch.reason, reason );
  960. MemWrite( &item->contextSwitch.state, oldState );
  961. TracyLfqCommit;
  962. }
  963. else if( memcmp( line, "sched_wakeup", 12 ) == 0 )
  964. {
  965. line += 14;
  966. while( memcmp( line, "pid=", 4 ) != 0 ) line++;
  967. line += 4;
  968. const auto pid = ReadNumber( line );
  969. TracyLfqPrepare( QueueType::ThreadWakeup );
  970. MemWrite( &item->threadWakeup.time, time );
  971. MemWrite( &item->threadWakeup.thread, pid );
  972. TracyLfqCommit;
  973. }
  974. }
  975. #ifdef __ANDROID__
  976. static void ProcessTraceLines( int fd )
  977. {
  978. // Linux pipe buffer is 64KB, additional 1KB is for unfinished lines
  979. char* buf = (char*)tracy_malloc( (64+1)*1024 );
  980. char* line = buf;
  981. for(;;)
  982. {
  983. if( !traceActive.load( std::memory_order_relaxed ) ) break;
  984. const auto rd = read( fd, line, 64*1024 );
  985. if( rd <= 0 ) break;
  986. #ifdef TRACY_ON_DEMAND
  987. if( !GetProfiler().IsConnected() )
  988. {
  989. if( rd < 64*1024 )
  990. {
  991. assert( line[rd-1] == '\n' );
  992. line = buf;
  993. std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
  994. }
  995. else
  996. {
  997. const auto end = line + rd;
  998. line = end - 1;
  999. while( line > buf && *line != '\n' ) line--;
  1000. if( line > buf )
  1001. {
  1002. line++;
  1003. const auto lsz = end - line;
  1004. memmove( buf, line, lsz );
  1005. line = buf + lsz;
  1006. }
  1007. }
  1008. continue;
  1009. }
  1010. #endif
  1011. const auto end = line + rd;
  1012. line = buf;
  1013. for(;;)
  1014. {
  1015. auto next = (char*)memchr( line, '\n', end - line );
  1016. if( !next )
  1017. {
  1018. const auto lsz = end - line;
  1019. memmove( buf, line, lsz );
  1020. line = buf + lsz;
  1021. break;
  1022. }
  1023. HandleTraceLine( line );
  1024. line = ++next;
  1025. }
  1026. if( rd < 64*1024 )
  1027. {
  1028. std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
  1029. }
  1030. }
  1031. tracy_free( buf );
  1032. }
  1033. void SysTraceWorker( void* ptr )
  1034. {
  1035. ThreadExitHandler threadExitHandler;
  1036. SetThreadName( "Tracy SysTrace" );
  1037. int pipefd[2];
  1038. if( pipe( pipefd ) == 0 )
  1039. {
  1040. const auto pid = fork();
  1041. if( pid == 0 )
  1042. {
  1043. // child
  1044. close( pipefd[0] );
  1045. dup2( open( "/dev/null", O_WRONLY ), STDERR_FILENO );
  1046. if( dup2( pipefd[1], STDOUT_FILENO ) >= 0 )
  1047. {
  1048. close( pipefd[1] );
  1049. sched_param sp = { 4 };
  1050. pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
  1051. #if defined __ANDROID__ && ( defined __aarch64__ || defined __ARM_ARCH )
  1052. execlp( "su", "su", "root", "sh", "-c", "/data/tracy_systrace", (char*)nullptr );
  1053. #endif
  1054. execlp( "su", "su", "root", "sh", "-c", "cat /sys/kernel/debug/tracing/trace_pipe", (char*)nullptr );
  1055. exit( 1 );
  1056. }
  1057. }
  1058. else if( pid > 0 )
  1059. {
  1060. // parent
  1061. close( pipefd[1] );
  1062. sched_param sp = { 5 };
  1063. pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
  1064. ProcessTraceLines( pipefd[0] );
  1065. close( pipefd[0] );
  1066. waitpid( pid, nullptr, 0 );
  1067. }
  1068. }
  1069. }
  1070. #else
  1071. static void ProcessTraceLines( int fd )
  1072. {
  1073. char* buf = (char*)tracy_malloc( 64*1024 );
  1074. struct pollfd pfd;
  1075. pfd.fd = fd;
  1076. pfd.events = POLLIN | POLLERR;
  1077. for(;;)
  1078. {
  1079. while( poll( &pfd, 1, 0 ) <= 0 )
  1080. {
  1081. if( !traceActive.load( std::memory_order_relaxed ) ) break;
  1082. std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
  1083. }
  1084. const auto rd = read( fd, buf, 64*1024 );
  1085. if( rd <= 0 ) break;
  1086. #ifdef TRACY_ON_DEMAND
  1087. if( !GetProfiler().IsConnected() ) continue;
  1088. #endif
  1089. auto line = buf;
  1090. const auto end = buf + rd;
  1091. for(;;)
  1092. {
  1093. auto next = (char*)memchr( line, '\n', end - line );
  1094. if( !next ) break;
  1095. HandleTraceLine( line );
  1096. line = ++next;
  1097. }
  1098. }
  1099. tracy_free( buf );
  1100. }
  1101. void SysTraceWorker( void* ptr )
  1102. {
  1103. ThreadExitHandler threadExitHandler;
  1104. SetThreadName( "Tracy SysTrace" );
  1105. char tmp[256];
  1106. memcpy( tmp, BasePath, sizeof( BasePath ) - 1 );
  1107. memcpy( tmp + sizeof( BasePath ) - 1, TracePipe, sizeof( TracePipe ) );
  1108. int fd = open( tmp, O_RDONLY );
  1109. if( fd < 0 ) return;
  1110. sched_param sp = { 5 };
  1111. pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
  1112. ProcessTraceLines( fd );
  1113. close( fd );
  1114. }
  1115. #endif
  1116. void SysTraceSendExternalName( uint64_t thread )
  1117. {
  1118. FILE* f;
  1119. char fn[256];
  1120. sprintf( fn, "/proc/%" PRIu64 "/comm", thread );
  1121. f = fopen( fn, "rb" );
  1122. if( f )
  1123. {
  1124. char buf[256];
  1125. const auto sz = fread( buf, 1, 256, f );
  1126. if( sz > 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0';
  1127. GetProfiler().SendString( thread, buf, QueueType::ExternalThreadName );
  1128. fclose( f );
  1129. }
  1130. else
  1131. {
  1132. GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName );
  1133. }
  1134. sprintf( fn, "/proc/%" PRIu64 "/status", thread );
  1135. f = fopen( fn, "rb" );
  1136. if( f )
  1137. {
  1138. int pid = -1;
  1139. size_t lsz = 1024;
  1140. auto line = (char*)tracy_malloc( lsz );
  1141. for(;;)
  1142. {
  1143. auto rd = getline( &line, &lsz, f );
  1144. if( rd <= 0 ) break;
  1145. if( memcmp( "Tgid:\t", line, 6 ) == 0 )
  1146. {
  1147. pid = atoi( line + 6 );
  1148. break;
  1149. }
  1150. }
  1151. tracy_free( line );
  1152. fclose( f );
  1153. if( pid >= 0 )
  1154. {
  1155. {
  1156. uint64_t _pid = pid;
  1157. TracyLfqPrepare( QueueType::TidToPid );
  1158. MemWrite( &item->tidToPid.tid, thread );
  1159. MemWrite( &item->tidToPid.pid, _pid );
  1160. TracyLfqCommit;
  1161. }
  1162. sprintf( fn, "/proc/%i/comm", pid );
  1163. f = fopen( fn, "rb" );
  1164. if( f )
  1165. {
  1166. char buf[256];
  1167. const auto sz = fread( buf, 1, 256, f );
  1168. if( sz > 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0';
  1169. GetProfiler().SendString( thread, buf, QueueType::ExternalName );
  1170. fclose( f );
  1171. return;
  1172. }
  1173. }
  1174. }
  1175. GetProfiler().SendString( thread, "???", 3, QueueType::ExternalName );
  1176. }
  1177. }
  1178. # endif
  1179. #endif