PIXEventsCommon.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. /*==========================================================================;
  2. *
  3. * Copyright (C) Microsoft Corporation. All Rights Reserved.
  4. *
  5. * File: PIXEventsCommon.h
  6. * Content: PIX include file
  7. * Don't include this file directly - use pix3.h
  8. *
  9. ****************************************************************************/
  10. #pragma once
  11. #ifndef _PIXEventsCommon_H_
  12. #define _PIXEventsCommon_H_
  13. #if defined(_AMD64_) || defined(_X86_)
  14. #include <emmintrin.h>
  15. #endif // _AMD64_ || _X86_
  16. enum PIXEventType
  17. {
  18. PIXEvent_EndEvent = 0x000,
  19. PIXEvent_BeginEvent_VarArgs = 0x001,
  20. PIXEvent_BeginEvent_NoArgs = 0x002,
  21. PIXEvent_SetMarker_VarArgs = 0x007,
  22. PIXEvent_SetMarker_NoArgs = 0x008,
  23. PIXEvent_EndEvent_OnContext = 0x010,
  24. PIXEvent_BeginEvent_OnContext_VarArgs = 0x011,
  25. PIXEvent_BeginEvent_OnContext_NoArgs = 0x012,
  26. PIXEvent_SetMarker_OnContext_VarArgs = 0x017,
  27. PIXEvent_SetMarker_OnContext_NoArgs = 0x018,
  28. };
  29. static const UINT64 PIXEventsReservedRecordSpaceQwords = 64;
  30. //this is used to make sure SSE string copy always will end 16-byte write in the current block
  31. //this way only a check if destination < limit can be performed, instead of destination < limit - 1
  32. //since both these are UINT64* and SSE writes in 16 byte chunks, 8 bytes are kept in reserve
  33. //so even if SSE overwrites 8 extra bytes, those will still belong to the correct block
  34. //on next iteration check destination will be greater than limit
  35. //this is used as well for fixed size UMD events and PIXEndEvent since these require less space
  36. //than other variable length user events and do not need big reserved space
  37. static const UINT64 PIXEventsReservedTailSpaceQwords = 2;
  38. static const UINT64 PIXEventsSafeFastCopySpaceQwords = PIXEventsReservedRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
  39. static const UINT64 PIXEventsGraphicsRecordSpaceQwords = 64;
  40. //Bits 7-19 (13 bits)
  41. static const UINT64 PIXEventsBlockEndMarker = 0x00000000000FFF80;
  42. //Bits 10-19 (10 bits)
  43. static const UINT64 PIXEventsTypeReadMask = 0x00000000000FFC00;
  44. static const UINT64 PIXEventsTypeWriteMask = 0x00000000000003FF;
  45. static const UINT64 PIXEventsTypeBitShift = 10;
  46. //Bits 20-63 (44 bits)
  47. static const UINT64 PIXEventsTimestampReadMask = 0xFFFFFFFFFFF00000;
  48. static const UINT64 PIXEventsTimestampWriteMask = 0x00000FFFFFFFFFFF;
  49. static const UINT64 PIXEventsTimestampBitShift = 20;
  50. inline UINT64 PIXEncodeEventInfo(UINT64 timestamp, PIXEventType eventType)
  51. {
  52. return ((timestamp & PIXEventsTimestampWriteMask) << PIXEventsTimestampBitShift) |
  53. (((UINT64)eventType & PIXEventsTypeWriteMask) << PIXEventsTypeBitShift);
  54. }
  55. //Bits 60-63 (4)
  56. static const UINT64 PIXEventsStringAlignmentWriteMask = 0x000000000000000F;
  57. static const UINT64 PIXEventsStringAlignmentReadMask = 0xF000000000000000;
  58. static const UINT64 PIXEventsStringAlignmentBitShift = 60;
  59. //Bits 55-59 (5)
  60. static const UINT64 PIXEventsStringCopyChunkSizeWriteMask = 0x000000000000001F;
  61. static const UINT64 PIXEventsStringCopyChunkSizeReadMask = 0x0F80000000000000;
  62. static const UINT64 PIXEventsStringCopyChunkSizeBitShift = 55;
  63. //Bit 54
  64. static const UINT64 PIXEventsStringIsANSIWriteMask = 0x0000000000000001;
  65. static const UINT64 PIXEventsStringIsANSIReadMask = 0x0040000000000000;
  66. static const UINT64 PIXEventsStringIsANSIBitShift = 54;
  67. //Bit 53
  68. static const UINT64 PIXEventsStringIsShortcutWriteMask = 0x0000000000000001;
  69. static const UINT64 PIXEventsStringIsShortcutReadMask = 0x0020000000000000;
  70. static const UINT64 PIXEventsStringIsShortcutBitShift = 53;
  71. inline UINT64 PIXEncodeStringInfo(UINT64 alignment, UINT64 copyChunkSize, BOOL isANSI, BOOL isShortcut)
  72. {
  73. return ((alignment & PIXEventsStringAlignmentWriteMask) << PIXEventsStringAlignmentBitShift) |
  74. ((copyChunkSize & PIXEventsStringCopyChunkSizeWriteMask) << PIXEventsStringCopyChunkSizeBitShift) |
  75. (((UINT64)isANSI & PIXEventsStringIsANSIWriteMask) << PIXEventsStringIsANSIBitShift) |
  76. (((UINT64)isShortcut & PIXEventsStringIsShortcutWriteMask) << PIXEventsStringIsShortcutBitShift);
  77. }
  78. template<UINT alignment, class T>
  79. inline bool PIXIsPointerAligned(T* pointer)
  80. {
  81. return !(((UINT64)pointer) & (alignment - 1));
  82. }
  83. template<class T>
  84. inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, T argument)
  85. {
  86. if (destination < limit)
  87. {
  88. *((T*)destination) = argument;
  89. ++destination;
  90. }
  91. }
  92. //floats must be cast to double during writing the data to be properly printed later when reading the data
  93. //this is needed because when float is passed to varargs function it's cast to double
  94. template<>
  95. inline void PIXCopyEventArgument<float>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, float argument)
  96. {
  97. if (destination < limit)
  98. {
  99. *((double*)destination) = (double)(argument);
  100. ++destination;
  101. }
  102. }
  103. //char has to be cast to a longer signed integer type
  104. //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
  105. template<>
  106. inline void PIXCopyEventArgument<char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, char argument)
  107. {
  108. if (destination < limit)
  109. {
  110. *((INT64*)destination) = (INT64)(argument);
  111. ++destination;
  112. }
  113. }
  114. //unsigned char has to be cast to a longer unsigned integer type
  115. //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
  116. template<>
  117. inline void PIXCopyEventArgument<unsigned char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, unsigned char argument)
  118. {
  119. if (destination < limit)
  120. {
  121. *destination = (UINT64)(argument);
  122. ++destination;
  123. }
  124. }
  125. //bool has to be cast to an integer since it's not explicitly supported by string format routines
  126. //there's no format specifier for bool type, but it should work with integer format specifiers
  127. template<>
  128. inline void PIXCopyEventArgument<bool>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, bool argument)
  129. {
  130. if (destination < limit)
  131. {
  132. *destination = (UINT64)(argument);
  133. ++destination;
  134. }
  135. }
  136. inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
  137. {
  138. *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
  139. while (destination < limit)
  140. {
  141. UINT64 c = argument[0];
  142. if (!c)
  143. {
  144. *destination++ = 0;
  145. return;
  146. }
  147. UINT64 x = c;
  148. c = argument[1];
  149. if (!c)
  150. {
  151. *destination++ = x;
  152. return;
  153. }
  154. x |= c << 8;
  155. c = argument[2];
  156. if (!c)
  157. {
  158. *destination++ = x;
  159. return;
  160. }
  161. x |= c << 16;
  162. c = argument[3];
  163. if (!c)
  164. {
  165. *destination++ = x;
  166. return;
  167. }
  168. x |= c << 24;
  169. c = argument[4];
  170. if (!c)
  171. {
  172. *destination++ = x;
  173. return;
  174. }
  175. x |= c << 32;
  176. c = argument[5];
  177. if (!c)
  178. {
  179. *destination++ = x;
  180. return;
  181. }
  182. x |= c << 40;
  183. c = argument[6];
  184. if (!c)
  185. {
  186. *destination++ = x;
  187. return;
  188. }
  189. x |= c << 48;
  190. c = argument[7];
  191. if (!c)
  192. {
  193. *destination++ = x;
  194. return;
  195. }
  196. x |= c << 56;
  197. *destination++ = x;
  198. argument += 8;
  199. }
  200. }
  201. inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
  202. {
  203. if (PIXIsPointerAligned<8>(argument))
  204. {
  205. *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
  206. UINT64* source = (UINT64*)argument;
  207. while (destination < limit)
  208. {
  209. UINT64 qword = *source++;
  210. *destination++ = qword;
  211. //check if any of the characters is a terminating zero
  212. if (!((qword & 0xFF00000000000000) &&
  213. (qword & 0xFF000000000000) &&
  214. (qword & 0xFF0000000000) &&
  215. (qword & 0xFF00000000) &&
  216. (qword & 0xFF000000) &&
  217. (qword & 0xFF0000) &&
  218. (qword & 0xFF00) &&
  219. (qword & 0xFF)))
  220. {
  221. break;
  222. }
  223. }
  224. }
  225. else
  226. {
  227. PIXCopyEventArgumentSlowest(destination, limit, argument);
  228. }
  229. }
  230. template<>
  231. inline void PIXCopyEventArgument<PCSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
  232. {
  233. if (destination < limit)
  234. {
  235. if (argument != nullptr)
  236. {
  237. #if defined(_AMD64_) || defined(_X86_)
  238. if (PIXIsPointerAligned<16>(argument))
  239. {
  240. *destination++ = PIXEncodeStringInfo(0, 16, TRUE, FALSE);
  241. __m128i zero = _mm_setzero_si128();
  242. if (PIXIsPointerAligned<16>(destination))
  243. {
  244. while (destination < limit)
  245. {
  246. __m128i mem = _mm_load_si128((__m128i*)argument);
  247. _mm_store_si128((__m128i*)destination, mem);
  248. //check if any of the characters is a terminating zero
  249. __m128i res = _mm_cmpeq_epi8(mem, zero);
  250. destination += 2;
  251. if (_mm_movemask_epi8(res))
  252. break;
  253. argument += 16;
  254. }
  255. }
  256. else
  257. {
  258. while (destination < limit)
  259. {
  260. __m128i mem = _mm_load_si128((__m128i*)argument);
  261. _mm_storeu_si128((__m128i*)destination, mem);
  262. //check if any of the characters is a terminating zero
  263. __m128i res = _mm_cmpeq_epi8(mem, zero);
  264. destination += 2;
  265. if (_mm_movemask_epi8(res))
  266. break;
  267. argument += 16;
  268. }
  269. }
  270. }
  271. else
  272. #endif // _AMD64_ || _X86_
  273. {
  274. PIXCopyEventArgumentSlow(destination, limit, argument);
  275. }
  276. }
  277. else
  278. {
  279. *destination++ = 0ull;
  280. }
  281. }
  282. }
  283. template<>
  284. inline void PIXCopyEventArgument<PSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PSTR argument)
  285. {
  286. PIXCopyEventArgument(destination, limit, (PCSTR)argument);
  287. }
  288. inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
  289. {
  290. *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
  291. while (destination < limit)
  292. {
  293. UINT64 c = argument[0];
  294. if (!c)
  295. {
  296. *destination++ = 0;
  297. return;
  298. }
  299. UINT64 x = c;
  300. c = argument[1];
  301. if (!c)
  302. {
  303. *destination++ = x;
  304. return;
  305. }
  306. x |= c << 16;
  307. c = argument[2];
  308. if (!c)
  309. {
  310. *destination++ = x;
  311. return;
  312. }
  313. x |= c << 32;
  314. c = argument[3];
  315. if (!c)
  316. {
  317. *destination++ = x;
  318. return;
  319. }
  320. x |= c << 48;
  321. *destination++ = x;
  322. argument += 4;
  323. }
  324. }
  325. inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
  326. {
  327. if (PIXIsPointerAligned<8>(argument))
  328. {
  329. *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
  330. UINT64* source = (UINT64*)argument;
  331. while (destination < limit)
  332. {
  333. UINT64 qword = *source++;
  334. *destination++ = qword;
  335. //check if any of the characters is a terminating zero
  336. //TODO: check if reversed condition is faster
  337. if (!((qword & 0xFFFF000000000000) &&
  338. (qword & 0xFFFF00000000) &&
  339. (qword & 0xFFFF0000) &&
  340. (qword & 0xFFFF)))
  341. {
  342. break;
  343. }
  344. }
  345. }
  346. else
  347. {
  348. PIXCopyEventArgumentSlowest(destination, limit, argument);
  349. }
  350. }
  351. template<>
  352. inline void PIXCopyEventArgument<PCWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
  353. {
  354. if (destination < limit)
  355. {
  356. if (argument != nullptr)
  357. {
  358. #if defined(_AMD64_) || defined(_X86_)
  359. if (PIXIsPointerAligned<16>(argument))
  360. {
  361. *destination++ = PIXEncodeStringInfo(0, 16, FALSE, FALSE);
  362. __m128i zero = _mm_setzero_si128();
  363. if (PIXIsPointerAligned<16>(destination))
  364. {
  365. while (destination < limit)
  366. {
  367. __m128i mem = _mm_load_si128((__m128i*)argument);
  368. _mm_store_si128((__m128i*)destination, mem);
  369. //check if any of the characters is a terminating zero
  370. __m128i res = _mm_cmpeq_epi16(mem, zero);
  371. destination += 2;
  372. if (_mm_movemask_epi8(res))
  373. break;
  374. argument += 8;
  375. }
  376. }
  377. else
  378. {
  379. while (destination < limit)
  380. {
  381. __m128i mem = _mm_load_si128((__m128i*)argument);
  382. _mm_storeu_si128((__m128i*)destination, mem);
  383. //check if any of the characters is a terminating zero
  384. __m128i res = _mm_cmpeq_epi16(mem, zero);
  385. destination += 2;
  386. if (_mm_movemask_epi8(res))
  387. break;
  388. argument += 8;
  389. }
  390. }
  391. }
  392. else
  393. #endif // _AMD64_ || _X86_
  394. {
  395. PIXCopyEventArgumentSlow(destination, limit, argument);
  396. }
  397. }
  398. else
  399. {
  400. *destination++ = 0ull;
  401. }
  402. }
  403. }
  404. template<>
  405. inline void PIXCopyEventArgument<PWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PWSTR argument)
  406. {
  407. PIXCopyEventArgument(destination, limit, (PCWSTR)argument);
  408. };
  409. #if defined(__d3d12_x_h__) || defined(__d3d12_h__)
  410. inline void PIXSetMarkerOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
  411. {
  412. commandList->SetMarker(D3D12_EVENT_METADATA, data, size);
  413. }
  414. inline void PIXSetMarkerOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
  415. {
  416. commandQueue->SetMarker(D3D12_EVENT_METADATA, data, size);
  417. }
  418. inline void PIXBeginEventOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
  419. {
  420. commandList->BeginEvent(D3D12_EVENT_METADATA, data, size);
  421. }
  422. inline void PIXBeginEventOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
  423. {
  424. commandQueue->BeginEvent(D3D12_EVENT_METADATA, data, size);
  425. }
  426. inline void PIXEndEventOnContext(_In_ ID3D12GraphicsCommandList* commandList)
  427. {
  428. commandList->EndEvent();
  429. }
  430. inline void PIXEndEventOnContext(_In_ ID3D12CommandQueue* commandQueue)
  431. {
  432. commandQueue->EndEvent();
  433. }
  434. #endif //__d3d12_x_h__
  435. template<class T> struct PIXInferScopedEventType { typedef T Type; };
  436. template<class T> struct PIXInferScopedEventType<const T> { typedef T Type; };
  437. template<class T> struct PIXInferScopedEventType<T*> { typedef T Type; };
  438. template<class T> struct PIXInferScopedEventType<T* const> { typedef T Type; };
  439. template<> struct PIXInferScopedEventType<UINT64> { typedef void Type; };
  440. template<> struct PIXInferScopedEventType<const UINT64> { typedef void Type; };
  441. template<> struct PIXInferScopedEventType<INT64> { typedef void Type; };
  442. template<> struct PIXInferScopedEventType<const INT64> { typedef void Type; };
  443. template<> struct PIXInferScopedEventType<UINT> { typedef void Type; };
  444. template<> struct PIXInferScopedEventType<const UINT> { typedef void Type; };
  445. template<> struct PIXInferScopedEventType<INT> { typedef void Type; };
  446. template<> struct PIXInferScopedEventType<const INT> { typedef void Type; };
  447. #endif //_PIXEventsCommon_H_