eathread_callstack_kettle.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // Copyright (c) Electronic Arts Inc. All rights reserved.
  3. ///////////////////////////////////////////////////////////////////////////////
  4. #include <EABase/eabase.h>
  5. #include <eathread/eathread.h>
  6. #include <eathread/eathread_atomic.h>
  7. #include <eathread/eathread_callstack.h>
  8. #include <eathread/eathread_callstack_context.h>
  9. #include <eathread/eathread_storage.h>
  10. #include <string.h>
  11. #include <sys/signal.h>
  12. #include <machine/signal.h>
  13. #include <sdk_version.h>
  14. #include <unistd.h>
  15. // EATHREAD_PTHREAD_SIGACTION_SUPPORTED
  16. //
  17. // Defined as 0 or 1.
  18. //
  19. #if !defined(EATHREAD_PTHREAD_SIGACTION_SUPPORTED)
  20. //#if EATHREAD_SCEDBG_ENABLED || defined(EA_DEBUG)
  21. // #define EATHREAD_PTHREAD_SIGACTION_SUPPORTED 1
  22. //#else
  23. // #define EATHREAD_PTHREAD_SIGACTION_SUPPORTED 0
  24. //#endif
  25. // Disabling due to syscall crashing on SDK 1.6.
  26. #define EATHREAD_PTHREAD_SIGACTION_SUPPORTED 0
  27. #endif
  28. #if EATHREAD_PTHREAD_SIGACTION_SUPPORTED
  29. // Until Sony provides a declaration for this or an alternative scheme, we declare this ourselves.
  30. __BEGIN_DECLS
  31. // User-level applications use as integer registers for passing the sequence %rdi, %rsi, %rdx, %rcx, %r8 and %r9.
  32. // The kernel interface uses %rdi, %rsi, %rdx, %r10, %r8 and %r9, which is what matters to us below.
  33. // http://www.ibm.com/developerworks/library/l-ia/index.html
  34. // A system-call is done via the syscall instruction. The kernel destroys registers %rcx and %r11.
  35. // The number of the syscall has to be passed in register %rax.
  36. // System-calls are limited to six arguments, no argument is passed directly on the stack.
  37. // Returning from the syscall, register %rax contains the result of the system-call. A value in the range between -4095 and -1 indicates an error, it is -errno.
  38. // Only values of class INTEGER or class MEMORY are passed to the kernel.
  39. // Relevant BSD source code: https://bitbucket.org/freebsd/freebsd-head/src/36b017c6a0f817439d40abfd790238dfa13e2be3/lib/libthr/thread?at=default
  40. // The BSD pthread struct: https://bitbucket.org/freebsd/freebsd-head/src/36b017c6a0f817439d40abfd790238dfa13e2be3/lib/libthr/thread/thr_private.h?at=default
  41. // Some NetBSD pthread source: http://cvsweb.netbsd.org/bsdweb.cgi/src/lib/libpthread/pthread.c?rev=1.134&content-type=text/x-cvsweb-markup&only_with_tag=MAIN
  42. static int sigaction(int sig, const struct sigaction * __restrict act, struct sigaction * __restrict oact)
  43. {
  44. int result;
  45. __asm__ __volatile__(
  46. "mov %%rcx, %%r10\n\t"
  47. "syscall\n\t"
  48. : "=a"(result) : "a"(416), "D"(sig), "S"(act), "d"(oact));
  49. return result;
  50. }
  51. // #define SYS_thr_kill 433
  52. // typedef long thread_t
  53. // pthread_t is an opaque typedef for struct pthread. struct pthread looks like so:
  54. // struct pthread {
  55. // long tid; // Kernel thread id.
  56. // . . . // Many other members.
  57. // }
  58. // Thus you can directly reinterpret_cast pthread to a pointer to a kernel thread id.
  59. #if !defined(GetTidFromPthread)
  60. #define GetTidFromPthread(pthreadId) *reinterpret_cast<long*>(pthreadId)
  61. #endif
  62. static int thr_kill(long thread, int sig)
  63. {
  64. int result;
  65. __asm__ __volatile__(
  66. "mov %%rcx, %%r10\n\t"
  67. "syscall\n\t"
  68. : "=a"(result) : "a"(433), "D"(thread), "S"(sig));
  69. return result;
  70. }
  71. static int pthread_kill(pthread_t pthreadId, int sig)
  72. {
  73. long tid = GetTidFromPthread(pthreadId);
  74. thr_kill(tid, sig);
  75. return 0;
  76. }
  77. const size_t kBacktraceSignalHandlerIgnoreCount = 2; // It's unclear what this value should be. On one machine it was 4, but on another it was 2. Going with a lower number is more conservative. Possibly a debug/opt thing?
  78. __END_DECLS
  79. #endif
  80. // Sony may remove this header in the future, so we use the clang __has_include feature to detect if and when that occurs.
  81. // NOTE: Use of unwind.h is disabled on PS4 due to syscall hangs in the kernel
  82. // experienced by Frostbite when overloadiing user_malloc to generate a
  83. // callstack. In addition, Sony recommends the use of __builtin_frame_address
  84. // / __builtin_return_address over _Unwind_Backtrace as it is more performant
  85. // due to the frame pointers being included by default in all builds.
  86. // Thread that stats performance of __builtin_frame_pointer is better.
  87. // https://ps4.scedev.net/forums/thread/2267/
  88. // Open support ticket for syscall hang:
  89. // https://ps4.scedev.net/forums/thread/52687/
  90. #if __has_include(<unwind.h>) && !defined(EA_PLATFORM_SONY)
  91. #include <unwind.h>
  92. #if !defined(EA_HAVE_UNWIND_H)
  93. #define EA_HAVE_UNWIND_H 1
  94. #endif
  95. #else
  96. #if !defined(EA_NO_HAVE_UNWIND_H)
  97. #define EA_NO_HAVE_UNWIND_H 1
  98. #endif
  99. #endif
  100. namespace EA
  101. {
  102. namespace Thread
  103. {
  104. ///////////////////////////////////////////////////////////////////////////////
  105. // InitCallstack
  106. //
  107. EATHREADLIB_API void InitCallstack()
  108. {
  109. // Nothing needed.
  110. }
  111. ///////////////////////////////////////////////////////////////////////////////
  112. // ShutdownCallstack
  113. //
  114. EATHREADLIB_API void ShutdownCallstack()
  115. {
  116. // Nothing needed.
  117. }
  118. EATHREADLIB_API void GetInstructionPointer(void*& p)
  119. {
  120. p = __builtin_return_address(0);
  121. }
  122. #if defined(EA_HAVE_UNWIND_H)
  123. // This is a callback function which libunwind calls, once per callstack entry.
  124. struct UnwindCallbackContext
  125. {
  126. void** mpReturnAddressArray;
  127. size_t mReturnAddressArrayCapacity;
  128. size_t mReturnAddressArrayIndex;
  129. };
  130. static _Unwind_Reason_Code UnwindCallback(_Unwind_Context* pUnwindContext, void* pUnwindCallbackContextVoid)
  131. {
  132. UnwindCallbackContext* pUnwindCallbackContext = (UnwindCallbackContext*)pUnwindCallbackContextVoid;
  133. if(pUnwindCallbackContext->mReturnAddressArrayIndex < pUnwindCallbackContext->mReturnAddressArrayCapacity)
  134. {
  135. uintptr_t ip = _Unwind_GetIP(pUnwindContext);
  136. pUnwindCallbackContext->mpReturnAddressArray[pUnwindCallbackContext->mReturnAddressArrayIndex++] = (void*)ip;
  137. return _URC_NO_REASON;
  138. }
  139. return _URC_NORMAL_STOP;
  140. }
  141. #endif
  142. #if EATHREAD_PTHREAD_SIGACTION_SUPPORTED
  143. namespace Local
  144. {
  145. enum EAThreadBacktraceState
  146. {
  147. // Positive thread lwp ids are here implicitly.
  148. EATHREAD_BACKTRACE_STATE_NONE = -1,
  149. EATHREAD_BACKTRACE_STATE_DUMPING = -2,
  150. EATHREAD_BACKTRACE_STATE_DONE = -3,
  151. EATHREAD_BACKTRACE_STATE_CANCEL = -4
  152. };
  153. struct ThreadBacktraceState
  154. {
  155. EA::Thread::AtomicInt32 mState; // One of enum EAThreadBacktraceState or (initially) the thread id of the thread we are targeting.
  156. void** mCallstack; // Output param
  157. size_t mCallstackCapacity; // Input param, refers to array capacity of mCallstack.
  158. size_t mCallstackCount; // Output param
  159. ScePthread mPthread; // Output param
  160. ThreadBacktraceState() : mState(EATHREAD_BACKTRACE_STATE_NONE), mCallstackCapacity(0), mCallstackCount(0), mPthread(NULL){}
  161. };
  162. static ScePthreadMutex gThreadBacktraceMutex = SCE_PTHREAD_MUTEX_INITIALIZER;
  163. static ThreadBacktraceState gThreadBacktraceState; // Protected by gThreadBacktraceMutex.
  164. static void gThreadBacktraceSignalHandler(int /*sigNum*/, siginfo_t* /*pSigInfo*/, void* pSigContextVoid)
  165. {
  166. int32_t lwpSelf = *(int32_t*)scePthreadSelf();
  167. if(gThreadBacktraceState.mState.SetValueConditional(EATHREAD_BACKTRACE_STATE_DUMPING, lwpSelf))
  168. {
  169. gThreadBacktraceState.mPthread = scePthreadSelf();
  170. if(gThreadBacktraceState.mCallstackCapacity)
  171. {
  172. gThreadBacktraceState.mCallstackCount = GetCallstack(gThreadBacktraceState.mCallstack, gThreadBacktraceState.mCallstackCapacity, (const CallstackContext*)NULL);
  173. // At this point we need to remove the top N entries and insert an entry for where the thread's instruction pointer is.
  174. // We originally had code like the following, but it's returning a signal
  175. // handling address now that we are using our own pthread_kill function:
  176. //if(gThreadBacktraceState.mCallstackCount >= kBacktraceSignalHandlerIgnoreCount) // This should always be true.
  177. //{
  178. // gThreadBacktraceState.mCallstackCount -= (kBacktraceSignalHandlerIgnoreCount - 1);
  179. // memmove(&gThreadBacktraceState.mCallstack[1], &gThreadBacktraceState.mCallstack[kBacktraceSignalHandlerIgnoreCount], (gThreadBacktraceState.mCallstackCount - 1) * sizeof(void*));
  180. //}
  181. //else
  182. // gThreadBacktraceState.mCallstackCount = 1;
  183. //gThreadBacktraceState.mCallstack[0] = pSigContextVoid ? reinterpret_cast<void*>(reinterpret_cast<sigcontext*>((uintptr_t)pSigContextVoid + 48)->sc_rip) : NULL;
  184. // New code that's working for our own pthread_kill function usage:
  185. if(gThreadBacktraceState.mCallstackCount >= kBacktraceSignalHandlerIgnoreCount) // This should always be true.
  186. {
  187. gThreadBacktraceState.mCallstackCount -= kBacktraceSignalHandlerIgnoreCount;
  188. memmove(&gThreadBacktraceState.mCallstack[0], &gThreadBacktraceState.mCallstack[kBacktraceSignalHandlerIgnoreCount], gThreadBacktraceState.mCallstackCount * sizeof(void*));
  189. }
  190. }
  191. else
  192. gThreadBacktraceState.mCallstackCount = 0;
  193. gThreadBacktraceState.mState.SetValue(EATHREAD_BACKTRACE_STATE_DONE);
  194. }
  195. // else this thread received an unexpected SIGURG. This can happen if it was so delayed that
  196. // we timed out waiting for it to happen and moved on.
  197. }
  198. }
  199. #endif
  200. /// GetCallstack
  201. ///
  202. /// This is a version of GetCallstack which gets the callstack of a thread based on its thread id as opposed to
  203. /// its register state. It works by injecting a signal handler into the given thread and reading the self callstack
  204. /// then exiting from the signal handler. The GetCallstack function sets this up, generates the signal for the
  205. /// other thread, then waits for it to complete. It uses the SIGURG signal for this.
  206. ///
  207. /// Primary causes of failure:
  208. /// The target thread has SIGURG explicitly ignored.
  209. /// The target thread somehow is getting too little CPU time to respond to the signal.
  210. ///
  211. /// To do: Change this function to take a ThreadInfo as a last parameter instead of pthread_t. And have the
  212. /// ThreadInfo return additional basic thread information. Or maybe even change this function to be a
  213. /// GetThreadInfo function instead of GetCallstack.
  214. ///
  215. EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, EA::Thread::ThreadId& pthread)
  216. {
  217. size_t callstackCount = 0;
  218. #if EATHREAD_PTHREAD_SIGACTION_SUPPORTED
  219. using namespace Local;
  220. if(pthread)
  221. {
  222. ScePthread pthreadSelf = scePthreadSelf();
  223. int32_t lwp = *(int32_t*)pthread;
  224. int32_t lwpSelf = *(int32_t*)pthreadSelf;
  225. if(lwp == lwpSelf) // This function can be called only for a thread other than self.
  226. callstackCount = GetCallstack(pReturnAddressArray, nReturnAddressArrayCapacity, (const CallstackContext*)NULL);
  227. else
  228. {
  229. struct sigaction act; memset(&act, 0, sizeof(act));
  230. struct sigaction oact; memset(&oact, 0, sizeof(oact));
  231. act.sa_sigaction = gThreadBacktraceSignalHandler;
  232. act.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
  233. scePthreadMutexLock(&gThreadBacktraceMutex);
  234. if(sigaction(SIGURG, &act, &oact) == 0)
  235. {
  236. gThreadBacktraceState.mCallstack = pReturnAddressArray;
  237. gThreadBacktraceState.mCallstackCapacity = nReturnAddressArrayCapacity;
  238. gThreadBacktraceState.mState.SetValue(lwp);
  239. // Signal the specific thread that we want to dump.
  240. int32_t stateTemp = lwp;
  241. if(pthread_kill(pthread, SIGURG) == 0)
  242. {
  243. // Wait for the other thread to start dumping the stack, or time out.
  244. for(int waitMS = 200; waitMS; waitMS--)
  245. {
  246. stateTemp = gThreadBacktraceState.mState.GetValue();
  247. if(stateTemp != lwp)
  248. break;
  249. usleep(1000); // This sleep gives the OS the opportunity to execute the target thread, even if it's of a lower priority than this thread.
  250. }
  251. }
  252. // else apparently failed to send SIGURG to the thread, or the thread was paused in a way that it couldn't receive it.
  253. if(stateTemp == lwp) // If the operation timed out or seemingly never started...
  254. {
  255. if(gThreadBacktraceState.mState.SetValueConditional(EATHREAD_BACKTRACE_STATE_CANCEL, lwp)) // If the backtrace still didn't start, and we were able to stop it by setting the state to cancel...
  256. stateTemp = EATHREAD_BACKTRACE_STATE_CANCEL;
  257. else
  258. stateTemp = gThreadBacktraceState.mState.GetValue(); // It looks like the backtrace thread did in fact get a late start and is now executing
  259. }
  260. // Wait indefinitely for the dump to finish or be canceled.
  261. // We cannot apply a timeout here because the other thread is accessing state that
  262. // is owned by this thread.
  263. for(int waitMS = 100; (stateTemp == EATHREAD_BACKTRACE_STATE_DUMPING) && waitMS; waitMS--) // If the thread is (still) busy writing it out its callstack...
  264. {
  265. usleep(1000);
  266. stateTemp = gThreadBacktraceState.mState.GetValue();
  267. }
  268. if(stateTemp == EATHREAD_BACKTRACE_STATE_DONE)
  269. callstackCount = gThreadBacktraceState.mCallstackCount;
  270. // Else give up on it. It's OK to just fall through.
  271. // Restore the original SIGURG handler.
  272. sigaction(SIGURG, &oact, NULL);
  273. }
  274. scePthreadMutexUnlock(&gThreadBacktraceMutex);
  275. }
  276. }
  277. #endif
  278. return callstackCount;
  279. }
  280. ///////////////////////////////////////////////////////////////////////////////
  281. // GetCallstack
  282. //
  283. EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
  284. {
  285. #if defined(EA_HAVE_UNWIND_H)
  286. // libunwind can only read the stack from the current thread.
  287. // However, we can accomplish this for another thread by injecting a signal handler into that thread.
  288. // See the EAThreadBacktrace() function source code above.
  289. if(pContext == NULL) // If reading the current thread's context...
  290. {
  291. UnwindCallbackContext context = { pReturnAddressArray, nReturnAddressArrayCapacity, 0 };
  292. _Unwind_Backtrace(&UnwindCallback, &context);
  293. return context.mReturnAddressArrayIndex;
  294. }
  295. // We don't yet have a means to read another thread's context.
  296. return 0;
  297. #else
  298. // This platform doesn't use glibc and so the backtrace() function isn't available.
  299. // For debug builds we can follow the stack frame manually, as stack frames are usually available in debug builds.
  300. EA_UNUSED(pReturnAddressArray);
  301. EA_UNUSED(nReturnAddressArrayCapacity);
  302. size_t index = 0;
  303. void** sp = nullptr;
  304. void** new_sp = nullptr;
  305. const uintptr_t kPtrSanityCheckLimit = 1*1024*1024;
  306. if (pContext == NULL)
  307. {
  308. // Arguments are passed in registers on x86-64, so we can't just offset from &pReturnAddressArray.
  309. sp = (void**)__builtin_frame_address(0);
  310. }
  311. else
  312. {
  313. // On kettle it's not recommended to omit the frame pointer so we check that RBP is sane before use since
  314. // it could have been omitted. From Sony Docs:
  315. // "[omit frame pointer] will inhibit unwinding and ... the option may also increase code size since the
  316. // encoding for stack-based addressing is often 1 byte longer then RBP-based (frame pointer) addressing.
  317. // With PlayStation®4 Clang, frame pointer omission may not lead to improved performance.
  318. // Performance analysis and code profiling are recommended before using this option"
  319. sp = (void**)((pContext->mRBP - pContext->mRSP) > kPtrSanityCheckLimit ? pContext->mRSP : pContext->mRBP);
  320. pReturnAddressArray[index++] = (void*)pContext->mRIP;
  321. }
  322. for(int count = 0; sp && (index < nReturnAddressArrayCapacity); sp = new_sp, ++count)
  323. {
  324. if(count > 0 || index != 0) // We skip the current frame if we haven't set it already above
  325. pReturnAddressArray[index++] = *(sp + 1);
  326. new_sp = (void**)*sp;
  327. if((new_sp < sp) || (new_sp > (sp + kPtrSanityCheckLimit)))
  328. break;
  329. }
  330. return index;
  331. #endif
  332. }
  333. ///////////////////////////////////////////////////////////////////////////////
  334. // GetCallstackContext
  335. //
  336. EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, intptr_t threadId)
  337. {
  338. ScePthread self = scePthreadSelf();
  339. ScePthread pthread_Id = (ScePthread)threadId; // Requires that ScePthread is a pointer or integral type.
  340. if(scePthreadEqual(pthread_Id, self))
  341. {
  342. void* pInstruction;
  343. // This is some crazy GCC code that happens to work:
  344. pInstruction = ({ __label__ label; label: &&label; });
  345. context.mRIP = (uint64_t)pInstruction;
  346. context.mRSP = (uint64_t)__builtin_frame_address(1);
  347. context.mRBP = 0;
  348. }
  349. else
  350. {
  351. // There is currently no way to do this.
  352. memset(&context, 0, sizeof(context));
  353. return false;
  354. }
  355. return true;
  356. }
  357. ///////////////////////////////////////////////////////////////////////////////
  358. // GetCallstackContextSysThreadId
  359. //
  360. EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& context, intptr_t sysThreadId)
  361. {
  362. // Assuming we are using pthreads, sysThreadId == threadId.
  363. return GetCallstackContext(context, sysThreadId);
  364. }
  365. ///////////////////////////////////////////////////////////////////////////////
  366. // GetCallstackContext
  367. //
  368. EATHREADLIB_API void GetCallstackContext(CallstackContext& context, const Context* pContext)
  369. {
  370. context.mRIP = pContext->Rip;
  371. context.mRSP = pContext->Rsp;
  372. context.mRBP = pContext->Rbp;
  373. }
  374. ///////////////////////////////////////////////////////////////////////////////
  375. // GetModuleFromAddress
  376. //
  377. EATHREADLIB_API size_t GetModuleFromAddress(const void* /*address*/, char* pModuleName, size_t /*moduleNameCapacity*/)
  378. {
  379. // Not currently implemented for the given platform.
  380. pModuleName[0] = 0;
  381. return 0;
  382. }
  383. ///////////////////////////////////////////////////////////////////////////////
  384. // GetModuleHandleFromAddress
  385. //
  386. EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* /*pAddress*/)
  387. {
  388. // Not currently implemented for the given platform.
  389. return 0;
  390. }
  391. EA::Thread::ThreadLocalStorage sStackBase;
  392. ///////////////////////////////////////////////////////////////////////////////
  393. // SetStackBase
  394. //
  395. EATHREADLIB_API void SetStackBase(void* pStackBase)
  396. {
  397. if(pStackBase)
  398. sStackBase.SetValue(pStackBase);
  399. else
  400. {
  401. pStackBase = __builtin_frame_address(0);
  402. if(pStackBase)
  403. SetStackBase(pStackBase);
  404. // Else failure; do nothing.
  405. }
  406. }
  407. ///////////////////////////////////////////////////////////////////////////////
  408. // GetStackBase
  409. //
  410. EATHREADLIB_API void* GetStackBase()
  411. {
  412. void* pBase;
  413. if(GetPthreadStackInfo(&pBase, NULL))
  414. return pBase;
  415. // Else we require the user to have set this previously, usually via a call
  416. // to SetStackBase() in the start function of this currently executing
  417. // thread (or main for the main thread).
  418. pBase = sStackBase.GetValue();
  419. if(pBase == NULL)
  420. pBase = (void*)(((uintptr_t)&pBase + 4095) & ~4095); // Make a guess, round up to next 4096.
  421. return pBase;
  422. }
  423. ///////////////////////////////////////////////////////////////////////////////
  424. // GetStackLimit
  425. //
  426. EATHREADLIB_API void* GetStackLimit()
  427. {
  428. void* pLimit;
  429. if(GetPthreadStackInfo(NULL, &pLimit))
  430. return pLimit;
  431. pLimit = __builtin_frame_address(0);
  432. return (void*)((uintptr_t)pLimit & ~4095); // Round down to nearest page.
  433. }
  434. } // namespace Thread
  435. } // namespace EA