Просмотр исходного кода

3rdparty: update {bx,bimg,bgfx}

Daniele Bartolini 8 лет назад
Родитель
Сommit
405bb69e2d
100 измененных файлов с 12818 добавлено и 1119 удалено
  1. 4 4
      3rdparty/bgfx/.travis.yml
  2. 483 0
      3rdparty/bgfx/3rdparty/dxsdk/include/PIXEventsCommon.h
  3. 10748 0
      3rdparty/bgfx/3rdparty/dxsdk/include/PIXEventsGenerated.h
  4. 116 0
      3rdparty/bgfx/3rdparty/dxsdk/include/pix3.h
  5. 53 0
      3rdparty/bgfx/3rdparty/dxsdk/include/pix3_win.h
  6. 9 2
      3rdparty/bgfx/3rdparty/fcpp/cpp1.c
  7. 1 1
      3rdparty/bgfx/3rdparty/fcpp/cpp3.c
  8. 24 1
      3rdparty/bgfx/3rdparty/fcpp/cpp6.c
  9. 2 1
      3rdparty/bgfx/3rdparty/fcpp/cppadd.h
  10. 64 53
      3rdparty/bgfx/3rdparty/forsyth-too/forsythtriangleorderoptimizer.cpp
  11. 21 17
      3rdparty/bgfx/3rdparty/forsyth-too/forsythtriangleorderoptimizer.h
  12. 175 146
      3rdparty/bgfx/3rdparty/glsl-optimizer/src/glsl/glcpp/glcpp-lex.c
  13. 4 0
      3rdparty/bgfx/3rdparty/glsl-optimizer/src/glsl/glsl_optimizer.cpp
  14. 0 7
      3rdparty/bgfx/3rdparty/glslang/SPIRV/GLSL.ext.AMD.h
  15. 39 0
      3rdparty/bgfx/3rdparty/glslang/SPIRV/GLSL.ext.EXT.h
  16. 115 64
      3rdparty/bgfx/3rdparty/glslang/SPIRV/GlslangToSpv.cpp
  17. 88 67
      3rdparty/bgfx/3rdparty/glslang/SPIRV/SpvBuilder.cpp
  18. 11 6
      3rdparty/bgfx/3rdparty/glslang/SPIRV/SpvBuilder.h
  19. 6 0
      3rdparty/bgfx/3rdparty/glslang/SPIRV/doc.cpp
  20. 52 5
      3rdparty/bgfx/3rdparty/glslang/SPIRV/spirv.hpp
  21. 1 4
      3rdparty/bgfx/3rdparty/glslang/SPIRV/spvIR.h
  22. 10 2
      3rdparty/bgfx/3rdparty/glslang/StandAlone/StandAlone.cpp
  23. 6 0
      3rdparty/bgfx/3rdparty/glslang/Test/310AofA.vert
  24. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/310implicitSizeArrayError.vert
  25. 6 0
      3rdparty/bgfx/3rdparty/glslang/Test/440.vert
  26. 8 0
      3rdparty/bgfx/3rdparty/glslang/Test/450.vert
  27. 15 0
      3rdparty/bgfx/3rdparty/glslang/Test/460.frag
  28. 1 9
      3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.aliasOpaque.frag.out
  29. 1 8
      3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.flattenOpaque.frag.out
  30. 1 5
      3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.flattenOpaqueInit.vert.out
  31. 1 6
      3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.flattenOpaqueInitMix.vert.out
  32. 1 21
      3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.flattenSubset.frag.out
  33. 1 18
      3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.flattenSubset2.frag.out
  34. 26 53
      3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.partialFlattenLocal.vert.out
  35. 1 15
      3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.partialFlattenMixed.vert.out
  36. 4 2
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/120.frag.out
  37. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/120.vert.out
  38. 19 0
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/310AofA.vert.out
  39. 4 4
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/310implicitSizeArrayError.vert.out
  40. 3 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/330.frag.out
  41. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/400.geom.out
  42. 44 39
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/440.vert.out
  43. 7 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/450.vert.out
  44. 48 2
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/460.frag.out
  45. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/glsl.entryPointRename.vert.bad.out
  46. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/glsl.entryPointRename.vert.out
  47. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/glspv.version.frag.out
  48. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.PointSize.geom.out
  49. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.PointSize.vert.out
  50. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.aliasOpaque.frag.out
  51. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.amend.frag.out
  52. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.array.flatten.frag.out
  53. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.array.frag.out
  54. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.array.implicit-size.frag.out
  55. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.array.multidim.frag.out
  56. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.assoc.frag.out
  57. 3 3
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.attribute.expression.comp.out
  58. 3 3
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.attribute.frag.out
  59. 71 54
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.attributeC11.frag.out
  60. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.attributeGlobalBuffer.frag.out
  61. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.basic.comp.out
  62. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.basic.geom.out
  63. 177 207
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.boolConv.vert.out
  64. 255 201
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.buffer.frag.out
  65. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.calculatelod.dx10.frag.out
  66. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.cast.frag.out
  67. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.cbuffer-identifier.vert.out
  68. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.charLit.vert.out
  69. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clip.frag.out
  70. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-1.frag.out
  71. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-1.geom.out
  72. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-1.vert.out
  73. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-2.frag.out
  74. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-2.geom.out
  75. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-2.vert.out
  76. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-3.frag.out
  77. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-3.geom.out
  78. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-3.vert.out
  79. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-4.frag.out
  80. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-4.geom.out
  81. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-4.vert.out
  82. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-5.frag.out
  83. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-5.vert.out
  84. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-6.frag.out
  85. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-6.vert.out
  86. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-7.frag.out
  87. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-7.vert.out
  88. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-8.frag.out
  89. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-8.vert.out
  90. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-9.frag.out
  91. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-9.vert.out
  92. 33 34
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.color.hull.tesc.out
  93. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.comparison.vec.frag.out
  94. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.conditional.frag.out
  95. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.constantbuffer.frag.out
  96. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.constructArray.vert.out
  97. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.constructexpr.frag.out
  98. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.constructimat.frag.out
  99. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.dashI.vert.out
  100. 1 1
      3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.deadFunctionMissingBody.vert.out

+ 4 - 4
3rdparty/bgfx/.travis.yml

@@ -11,8 +11,8 @@ addons:
     sources:
     - ubuntu-toolchain-r-test
     packages:
-    - gcc-4.8
-    - g++-4.8
+    - gcc-5
+    - g++-5
     - clang
 
 before_script:
@@ -20,7 +20,7 @@ before_script:
   - git clone --depth 1 https://github.com/bkaradzic/bimg ../bimg
 
 script:
-  - if [ "$TRAVIS_OS_NAME" == "linux" ]; then make build CXX="g++-4.8" CC="gcc-4.8"; fi
+  - if [ "$TRAVIS_OS_NAME" == "linux" ]; then make build CXX="g++-5" CC="gcc-5"; fi
   - if [ "$TRAVIS_OS_NAME" == "osx" ]; then make build; fi
 
 branches:
@@ -30,4 +30,4 @@ branches:
 notifications:
   email: false
 
-osx_image: xcode7.3
+osx_image: xcode9.3

+ 483 - 0
3rdparty/bgfx/3rdparty/dxsdk/include/PIXEventsCommon.h

@@ -0,0 +1,483 @@
+/*==========================================================================;
+*
+*  Copyright (C) Microsoft Corporation.  All Rights Reserved.
+*
+*  File:       PIXEventsCommon.h
+*  Content:    PIX include file
+*              Don't include this file directly - use pix3.h
+*
+****************************************************************************/
+#pragma once
+
+#ifndef _PIXEventsCommon_H_
+#define _PIXEventsCommon_H_
+
+#if defined(_AMD64_) || defined(_X86_)
+#include <emmintrin.h>
+#endif // _AMD64_ || _X86_
+
+enum PIXEventType
+{
+    PIXEvent_EndEvent                       = 0x000,
+    PIXEvent_BeginEvent_VarArgs             = 0x001,
+    PIXEvent_BeginEvent_NoArgs              = 0x002,
+    PIXEvent_SetMarker_VarArgs              = 0x007,
+    PIXEvent_SetMarker_NoArgs               = 0x008,
+
+    PIXEvent_EndEvent_OnContext             = 0x010,
+    PIXEvent_BeginEvent_OnContext_VarArgs   = 0x011,
+    PIXEvent_BeginEvent_OnContext_NoArgs    = 0x012,
+    PIXEvent_SetMarker_OnContext_VarArgs    = 0x017,
+    PIXEvent_SetMarker_OnContext_NoArgs     = 0x018,
+};
+
+static const UINT64 PIXEventsReservedRecordSpaceQwords = 64;
+//this is used to make sure SSE string copy always will end 16-byte write in the current block
+//this way only a check if destination < limit can be performed, instead of destination < limit - 1
+//since both these are UINT64* and SSE writes in 16 byte chunks, 8 bytes are kept in reserve
+//so even if SSE overwrites 8 extra bytes, those will still belong to the correct block
+//on next iteration check destination will be greater than limit
+//this is used as well for fixed size UMD events and PIXEndEvent since these require less space
+//than other variable length user events and do not need big reserved space
+static const UINT64 PIXEventsReservedTailSpaceQwords = 2;
+static const UINT64 PIXEventsSafeFastCopySpaceQwords = PIXEventsReservedRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+static const UINT64 PIXEventsGraphicsRecordSpaceQwords = 64;
+
+//Bits 7-19 (13 bits)
+static const UINT64 PIXEventsBlockEndMarker     = 0x00000000000FFF80;
+
+//Bits 10-19 (10 bits)
+static const UINT64 PIXEventsTypeReadMask       = 0x00000000000FFC00;
+static const UINT64 PIXEventsTypeWriteMask      = 0x00000000000003FF;
+static const UINT64 PIXEventsTypeBitShift       = 10;
+
+//Bits 20-63 (44 bits)
+static const UINT64 PIXEventsTimestampReadMask  = 0xFFFFFFFFFFF00000;
+static const UINT64 PIXEventsTimestampWriteMask = 0x00000FFFFFFFFFFF;
+static const UINT64 PIXEventsTimestampBitShift  = 20;
+
+inline UINT64 PIXEncodeEventInfo(UINT64 timestamp, PIXEventType eventType)
+{
+    return ((timestamp & PIXEventsTimestampWriteMask) << PIXEventsTimestampBitShift) |
+        (((UINT64)eventType & PIXEventsTypeWriteMask) << PIXEventsTypeBitShift);
+}
+
+//Bits 60-63 (4)
+static const UINT64 PIXEventsStringAlignmentWriteMask     = 0x000000000000000F;
+static const UINT64 PIXEventsStringAlignmentReadMask      = 0xF000000000000000;
+static const UINT64 PIXEventsStringAlignmentBitShift      = 60;
+
+//Bits 55-59 (5)
+static const UINT64 PIXEventsStringCopyChunkSizeWriteMask = 0x000000000000001F;
+static const UINT64 PIXEventsStringCopyChunkSizeReadMask  = 0x0F80000000000000;
+static const UINT64 PIXEventsStringCopyChunkSizeBitShift  = 55;
+
+//Bit 54
+static const UINT64 PIXEventsStringIsANSIWriteMask        = 0x0000000000000001;
+static const UINT64 PIXEventsStringIsANSIReadMask         = 0x0040000000000000;
+static const UINT64 PIXEventsStringIsANSIBitShift         = 54;
+
+//Bit 53
+static const UINT64 PIXEventsStringIsShortcutWriteMask    = 0x0000000000000001;
+static const UINT64 PIXEventsStringIsShortcutReadMask     = 0x0020000000000000;
+static const UINT64 PIXEventsStringIsShortcutBitShift     = 53;
+
+inline UINT64 PIXEncodeStringInfo(UINT64 alignment, UINT64 copyChunkSize, BOOL isANSI, BOOL isShortcut)
+{
+    return ((alignment & PIXEventsStringAlignmentWriteMask) << PIXEventsStringAlignmentBitShift) |
+        ((copyChunkSize & PIXEventsStringCopyChunkSizeWriteMask) << PIXEventsStringCopyChunkSizeBitShift) |
+        (((UINT64)isANSI & PIXEventsStringIsANSIWriteMask) << PIXEventsStringIsANSIBitShift) |
+        (((UINT64)isShortcut & PIXEventsStringIsShortcutWriteMask) << PIXEventsStringIsShortcutBitShift);
+}
+
+template<UINT alignment, class T>
+inline bool PIXIsPointerAligned(T* pointer)
+{
+    return !(((UINT64)pointer) & (alignment - 1));
+}
+
+template<class T>
+inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, T argument)
+{
+    if (destination < limit)
+    {
+        *((T*)destination) = argument;
+        ++destination;
+    }
+}
+
+//floats must be cast to double during writing the data to be properly printed later when reading the data
+//this is needed because when float is passed to varargs function it's cast to double
+template<>
+inline void PIXCopyEventArgument<float>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, float argument)
+{
+    if (destination < limit)
+    {
+        *((double*)destination) = (double)(argument);
+        ++destination;
+    }
+}
+
+//char has to be cast to a longer signed integer type
+//this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
+template<>
+inline void PIXCopyEventArgument<char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, char argument)
+{
+    if (destination < limit)
+    {
+        *((INT64*)destination) = (INT64)(argument);
+        ++destination;
+    }
+}
+
+//unsigned char has to be cast to a longer unsigned integer type
+//this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
+template<>
+inline void PIXCopyEventArgument<unsigned char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, unsigned char argument)
+{
+    if (destination < limit)
+    {
+        *destination = (UINT64)(argument);
+        ++destination;
+    }
+}
+
+//bool has to be cast to an integer since it's not explicitly supported by string format routines
+//there's no format specifier for bool type, but it should work with integer format specifiers
+template<>
+inline void PIXCopyEventArgument<bool>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, bool argument)
+{
+    if (destination < limit)
+    {
+        *destination = (UINT64)(argument);
+        ++destination;
+    }
+}
+
+inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
+{
+    *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
+    while (destination < limit)
+    {
+        UINT64 c = argument[0];
+        if (!c)
+        {
+            *destination++ = 0;
+            return;
+        }
+        UINT64 x = c;
+        c = argument[1];
+        if (!c)
+        {
+            *destination++ = x;
+            return;
+        }
+        x |= c << 8;
+        c = argument[2];
+        if (!c)
+        {
+            *destination++ = x;
+            return;
+        }
+        x |= c << 16;
+        c = argument[3];
+        if (!c)
+        {
+            *destination++ = x;
+            return;
+        }
+        x |= c << 24;
+        c = argument[4];
+        if (!c)
+        {
+            *destination++ = x;
+            return;
+        }
+        x |= c << 32;
+        c = argument[5];
+        if (!c)
+        {
+            *destination++ = x;
+            return;
+        }
+        x |= c << 40;
+        c = argument[6];
+        if (!c)
+        {
+            *destination++ = x;
+            return;
+        }
+        x |= c << 48;
+        c = argument[7];
+        if (!c)
+        {
+            *destination++ = x;
+            return;
+        }
+        x |= c << 56;
+        *destination++ = x;
+        argument += 8;
+    }
+}
+
+inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
+{
+    if (PIXIsPointerAligned<8>(argument))
+    {
+        *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
+        UINT64* source = (UINT64*)argument;
+        while (destination < limit)
+        {
+            UINT64 qword = *source++;
+            *destination++ = qword;
+            //check if any of the characters is a terminating zero
+            if (!((qword & 0xFF00000000000000) &&
+                (qword & 0xFF000000000000) &&
+                (qword & 0xFF0000000000) &&
+                (qword & 0xFF00000000) &&
+                (qword & 0xFF000000) &&
+                (qword & 0xFF0000) &&
+                (qword & 0xFF00) &&
+                (qword & 0xFF)))
+            {
+                break;
+            }
+        }
+    }
+    else
+    {
+        PIXCopyEventArgumentSlowest(destination, limit, argument);
+    }
+}
+
+template<>
+inline void PIXCopyEventArgument<PCSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
+{
+    if (destination < limit)
+    {
+        if (argument != nullptr)
+        {
+#if defined(_AMD64_) || defined(_X86_)
+            if (PIXIsPointerAligned<16>(argument))
+            {
+                *destination++ = PIXEncodeStringInfo(0, 16, TRUE, FALSE);
+                __m128i zero = _mm_setzero_si128();
+                if (PIXIsPointerAligned<16>(destination))
+                {
+                    while (destination < limit)
+                    {
+                        __m128i mem = _mm_load_si128((__m128i*)argument);
+                        _mm_store_si128((__m128i*)destination, mem);
+                        //check if any of the characters is a terminating zero
+                        __m128i res = _mm_cmpeq_epi8(mem, zero);
+                        destination += 2;
+                        if (_mm_movemask_epi8(res))
+                            break;
+                        argument += 16;
+                    }
+                }
+                else
+                {
+                    while (destination < limit)
+                    {
+                        __m128i mem = _mm_load_si128((__m128i*)argument);
+                        _mm_storeu_si128((__m128i*)destination, mem);
+                        //check if any of the characters is a terminating zero
+                        __m128i res = _mm_cmpeq_epi8(mem, zero);
+                        destination += 2;
+                        if (_mm_movemask_epi8(res))
+                            break;
+                        argument += 16;
+                    }
+                }
+            }
+            else
+#endif // _AMD64_ || _X86_
+            {
+                PIXCopyEventArgumentSlow(destination, limit, argument);
+            }
+        }
+        else
+        {
+            *destination++ = 0ull;
+        }
+    }
+}
+
+template<>
+inline void PIXCopyEventArgument<PSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PSTR argument)
+{
+    PIXCopyEventArgument(destination, limit, (PCSTR)argument);
+}
+
+inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
+{
+    *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
+    while (destination < limit)
+    {
+        UINT64 c = argument[0];
+        if (!c)
+        {
+            *destination++ = 0;
+            return;
+        }
+        UINT64 x = c;
+        c = argument[1];
+        if (!c)
+        {
+            *destination++ = x;
+            return;
+        }
+        x |= c << 16;
+        c = argument[2];
+        if (!c)
+        {
+            *destination++ = x;
+            return;
+        }
+        x |= c << 32;
+        c = argument[3];
+        if (!c)
+        {
+            *destination++ = x;
+            return;
+        }
+        x |= c << 48;
+        *destination++ = x;
+        argument += 4;
+    }
+}
+
+inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
+{
+    if (PIXIsPointerAligned<8>(argument))
+    {
+        *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
+        UINT64* source = (UINT64*)argument;
+        while (destination < limit)
+        {
+            UINT64 qword = *source++;
+            *destination++ = qword;
+            //check if any of the characters is a terminating zero
+            //TODO: check if reversed condition is faster
+            if (!((qword & 0xFFFF000000000000) &&
+                (qword & 0xFFFF00000000) &&
+                (qword & 0xFFFF0000) &&
+                (qword & 0xFFFF)))
+            {
+                break;
+            }
+        }
+    }
+    else
+    {
+        PIXCopyEventArgumentSlowest(destination, limit, argument);
+    }
+}
+
+template<>
+inline void PIXCopyEventArgument<PCWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
+{
+    if (destination < limit)
+    {
+        if (argument != nullptr)
+        {
+#if defined(_AMD64_) || defined(_X86_)
+            if (PIXIsPointerAligned<16>(argument))
+            {
+                *destination++ = PIXEncodeStringInfo(0, 16, FALSE, FALSE);
+                __m128i zero = _mm_setzero_si128();
+                if (PIXIsPointerAligned<16>(destination))
+                {
+                    while (destination < limit)
+                    {
+                        __m128i mem = _mm_load_si128((__m128i*)argument);
+                        _mm_store_si128((__m128i*)destination, mem);
+                        //check if any of the characters is a terminating zero
+                        __m128i res = _mm_cmpeq_epi16(mem, zero);
+                        destination += 2;
+                        if (_mm_movemask_epi8(res))
+                            break;
+                        argument += 8;
+                    }
+                }
+                else
+                {
+                    while (destination < limit)
+                    {
+                        __m128i mem = _mm_load_si128((__m128i*)argument);
+                        _mm_storeu_si128((__m128i*)destination, mem);
+                        //check if any of the characters is a terminating zero
+                        __m128i res = _mm_cmpeq_epi16(mem, zero);
+                        destination += 2;
+                        if (_mm_movemask_epi8(res))
+                            break;
+                        argument += 8;
+                    }
+                }
+            }
+            else
+#endif // _AMD64_ || _X86_
+            {
+                PIXCopyEventArgumentSlow(destination, limit, argument);
+            }
+        }
+        else
+        {
+            *destination++ = 0ull;
+        }
+    }
+}
+
+template<>
+inline void PIXCopyEventArgument<PWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PWSTR argument)
+{
+    PIXCopyEventArgument(destination, limit, (PCWSTR)argument);
+};
+
+#if defined(__d3d12_x_h__) || defined(__d3d12_h__)
+
+inline void PIXSetMarkerOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
+{
+    commandList->SetMarker(D3D12_EVENT_METADATA, data, size);
+}
+
+inline void PIXSetMarkerOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
+{
+    commandQueue->SetMarker(D3D12_EVENT_METADATA, data, size);
+}
+
+inline void PIXBeginEventOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
+{
+    commandList->BeginEvent(D3D12_EVENT_METADATA, data, size);
+}
+
+inline void PIXBeginEventOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
+{
+    commandQueue->BeginEvent(D3D12_EVENT_METADATA, data, size);
+}
+inline void PIXEndEventOnContext(_In_ ID3D12GraphicsCommandList* commandList)
+{
+    commandList->EndEvent();
+}
+
+inline void PIXEndEventOnContext(_In_ ID3D12CommandQueue* commandQueue)
+{
+    commandQueue->EndEvent();
+}
+
+#endif //__d3d12_x_h__
+
+template<class T> struct PIXInferScopedEventType { typedef T Type; };
+template<class T> struct PIXInferScopedEventType<const T> { typedef T Type; };
+template<class T> struct PIXInferScopedEventType<T*> { typedef T Type; };
+template<class T> struct PIXInferScopedEventType<T* const> { typedef T Type; };
+template<> struct PIXInferScopedEventType<UINT64> { typedef void Type; };
+template<> struct PIXInferScopedEventType<const UINT64> { typedef void Type; };
+template<> struct PIXInferScopedEventType<INT64> { typedef void Type; };
+template<> struct PIXInferScopedEventType<const INT64> { typedef void Type; };
+template<> struct PIXInferScopedEventType<UINT> { typedef void Type; };
+template<> struct PIXInferScopedEventType<const UINT> { typedef void Type; };
+template<> struct PIXInferScopedEventType<INT> { typedef void Type; };
+template<> struct PIXInferScopedEventType<const INT> { typedef void Type; };
+#endif //_PIXEventsCommon_H_

+ 10748 - 0
3rdparty/bgfx/3rdparty/dxsdk/include/PIXEventsGenerated.h

@@ -0,0 +1,10748 @@
+//This is a generated file.
+#pragma once
+
+#ifndef _PIXEventsGenerated_H_
+#define _PIXEventsGenerated_H_
+
+#ifndef _PIX3_H_
+#error "Don't include this file directly - use pix3.h"
+#endif
+
+#include "PIXEventsCommon.h"
+
+//__declspec(noinline) is specified to stop compiler from making bad inlining decisions
+//inline has to be specified for functions fully defined in header due to one definition rule
+//supported context types for TContext are ID3D11DeviceContextX, ID3D11ComputeContextX and ID3D11DmaEngineContextX
+
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_NoArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            PIXCopyEventArgument(destination, limit, a16);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_NoArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+__declspec(noinline) inline void PIXBeginEventAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            PIXCopyEventArgument(destination, limit, a16);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_NoArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString);
+    }
+}
+
+template<class T1>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1);
+    }
+}
+
+template<class T1, class T2>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2);
+    }
+}
+
+template<class T1, class T2, class T3>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3);
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void PIXBeginEvent(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+        PIXCopyEventArgument(destination, limit, a16);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+}
+
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_NoArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString);
+    }
+}
+
+template<class T1>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1);
+    }
+}
+
+template<class T1, class T2>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2);
+    }
+}
+
+template<class T1, class T2, class T3>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3);
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void PIXBeginEvent(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+        PIXCopyEventArgument(destination, limit, a16);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXBeginEventAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+}
+
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_NoArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            PIXCopyEventArgument(destination, limit, a16);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_NoArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+__declspec(noinline) inline void PIXSetMarkerAllocate(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            PIXCopyEventArgument(destination, limit, a16);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_NoArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString);
+    }
+}
+
+template<class T1>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1);
+    }
+}
+
+template<class T1, class T2>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2);
+    }
+}
+
+template<class T1, class T2, class T3>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3);
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void PIXSetMarker(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+        PIXCopyEventArgument(destination, limit, a16);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+}
+
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_NoArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString);
+    }
+}
+
+template<class T1>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1);
+    }
+}
+
+template<class T1, class T2>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2);
+    }
+}
+
+template<class T1, class T2, class T3>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3);
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void PIXSetMarker(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_VarArgs);
+        *destination++ = color;
+
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+        PIXCopyEventArgument(destination, limit, a16);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXSetMarkerAllocate(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+}
+
+template<class TContext>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_NoArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    PIXCopyEventArgument(destination, limit, a15);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    PIXCopyEventArgument(destination, limit, a15);
+    PIXCopyEventArgument(destination, limit, a16);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_NoArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    PIXCopyEventArgument(destination, limit, a15);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void PIXBeginEvent(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXBeginCPUEventOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_BeginEvent_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    PIXCopyEventArgument(destination, limit, a15);
+    PIXCopyEventArgument(destination, limit, a16);
+    *destination = 0ull;
+    PIXBeginEventOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_NoArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    PIXCopyEventArgument(destination, limit, a15);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    PIXCopyEventArgument(destination, limit, a15);
+    PIXCopyEventArgument(destination, limit, a16);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_NoArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    PIXCopyEventArgument(destination, limit, a15);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+template<class TContext, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void PIXSetMarker(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXSetCPUMarkerOnContext(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+
+    UINT64 buffer[PIXEventsGraphicsRecordSpaceQwords];
+    UINT64* destination = buffer;
+    UINT64* limit = buffer + PIXEventsGraphicsRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
+
+    *destination++ = PIXEncodeEventInfo(0, PIXEvent_SetMarker_VarArgs);
+    *destination++ = color;
+
+    PIXCopyEventArgument(destination, limit, formatString);
+    PIXCopyEventArgument(destination, limit, a1);
+    PIXCopyEventArgument(destination, limit, a2);
+    PIXCopyEventArgument(destination, limit, a3);
+    PIXCopyEventArgument(destination, limit, a4);
+    PIXCopyEventArgument(destination, limit, a5);
+    PIXCopyEventArgument(destination, limit, a6);
+    PIXCopyEventArgument(destination, limit, a7);
+    PIXCopyEventArgument(destination, limit, a8);
+    PIXCopyEventArgument(destination, limit, a9);
+    PIXCopyEventArgument(destination, limit, a10);
+    PIXCopyEventArgument(destination, limit, a11);
+    PIXCopyEventArgument(destination, limit, a12);
+    PIXCopyEventArgument(destination, limit, a13);
+    PIXCopyEventArgument(destination, limit, a14);
+    PIXCopyEventArgument(destination, limit, a15);
+    PIXCopyEventArgument(destination, limit, a16);
+    *destination = 0ull;
+    PIXSetMarkerOnContext(context, static_cast<void*>(buffer), static_cast<UINT>(reinterpret_cast<BYTE*>(destination) - reinterpret_cast<BYTE*>(buffer)));
+}
+
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_NoArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            PIXCopyEventArgument(destination, limit, a16);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_NoArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+__declspec(noinline)  inline void MakeCPUSetMarkerForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            PIXCopyEventArgument(destination, limit, a16);
+
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_NoArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString);
+    }
+}
+
+template<class T1>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1);
+    }
+}
+
+template<class T1, class T2>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2);
+    }
+}
+
+template<class T1, class T2, class T3>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3);
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+        PIXCopyEventArgument(destination, limit, a16);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+}
+
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_NoArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString);
+    }
+}
+
+template<class T1>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1);
+    }
+}
+
+template<class T1, class T2>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2);
+    }
+}
+
+template<class T1, class T2, class T3>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3);
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void MakeCPUSetMarkerForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_SetMarker_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+        PIXCopyEventArgument(destination, limit, a16);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUSetMarkerForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+}
+
+
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_NoArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            PIXCopyEventArgument(destination, limit, a16);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_NoArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+__declspec(noinline) inline void MakeCPUBeginEventForContextAllocate(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    UINT64 time = PIXEventsReplaceBlock(false);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+            *destination++ = color;
+
+            PIXCopyEventArgument(destination, limit, context);
+            PIXCopyEventArgument(destination, limit, formatString);
+            PIXCopyEventArgument(destination, limit, a1);
+            PIXCopyEventArgument(destination, limit, a2);
+            PIXCopyEventArgument(destination, limit, a3);
+            PIXCopyEventArgument(destination, limit, a4);
+            PIXCopyEventArgument(destination, limit, a5);
+            PIXCopyEventArgument(destination, limit, a6);
+            PIXCopyEventArgument(destination, limit, a7);
+            PIXCopyEventArgument(destination, limit, a8);
+            PIXCopyEventArgument(destination, limit, a9);
+            PIXCopyEventArgument(destination, limit, a10);
+            PIXCopyEventArgument(destination, limit, a11);
+            PIXCopyEventArgument(destination, limit, a12);
+            PIXCopyEventArgument(destination, limit, a13);
+            PIXCopyEventArgument(destination, limit, a14);
+            PIXCopyEventArgument(destination, limit, a15);
+            PIXCopyEventArgument(destination, limit, a16);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_NoArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString);
+    }
+}
+
+template<class T1>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1);
+    }
+}
+
+template<class T1, class T2>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2);
+    }
+}
+
+template<class T1, class T2, class T3>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3);
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+        PIXCopyEventArgument(destination, limit, a16);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+}
+
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_NoArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString);
+    }
+}
+
+template<class T1>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1);
+    }
+}
+
+template<class T1, class T2>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2);
+    }
+}
+
+template<class T1, class T2, class T3>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3);
+    }
+}
+
+template<class T1, class T2, class T3, class T4>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+}
+
+template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+inline void MakeCPUBeginEventForContext(UINT64 color, PVOID context, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_BeginEvent_OnContext_VarArgs);
+        *destination++ = color;
+        
+        PIXCopyEventArgument(destination, limit, context);
+        PIXCopyEventArgument(destination, limit, formatString);
+        PIXCopyEventArgument(destination, limit, a1);
+        PIXCopyEventArgument(destination, limit, a2);
+        PIXCopyEventArgument(destination, limit, a3);
+        PIXCopyEventArgument(destination, limit, a4);
+        PIXCopyEventArgument(destination, limit, a5);
+        PIXCopyEventArgument(destination, limit, a6);
+        PIXCopyEventArgument(destination, limit, a7);
+        PIXCopyEventArgument(destination, limit, a8);
+        PIXCopyEventArgument(destination, limit, a9);
+        PIXCopyEventArgument(destination, limit, a10);
+        PIXCopyEventArgument(destination, limit, a11);
+        PIXCopyEventArgument(destination, limit, a12);
+        PIXCopyEventArgument(destination, limit, a13);
+        PIXCopyEventArgument(destination, limit, a14);
+        PIXCopyEventArgument(destination, limit, a15);
+        PIXCopyEventArgument(destination, limit, a16);
+
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUBeginEventForContextAllocate(color, context, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+}
+
+
+__declspec(noinline) inline void PIXEndEventAllocate()
+{
+    UINT64 time = PIXEventsReplaceBlock(true);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_EndEvent);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+inline void PIXEndEvent()
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->limit;
+    if (destination < limit)
+    {
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_EndEvent);
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        PIXEndEventAllocate();
+    }
+}
+
+__declspec(noinline) inline void MakeCPUEndEventForContextAllocate(PVOID context)
+{
+    UINT64 time = PIXEventsReplaceBlock(true);
+    if (time)
+    {
+        PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+        UINT64* destination = threadInfo->destination;
+        UINT64* limit = threadInfo->limit;
+        if (destination < limit)
+        {
+            *destination++ = PIXEncodeEventInfo(time, PIXEvent_EndEvent_OnContext);
+            PIXCopyEventArgument(destination, limit, context);
+            *destination = PIXEventsBlockEndMarker;
+            threadInfo->destination = destination;
+        }
+    }
+}
+
+inline void MakeCPUEndEventForContext(PVOID context)
+{
+    PIXEventsThreadInfo* threadInfo = PIXGetThreadInfo();
+    UINT64* destination = threadInfo->destination;
+    UINT64* limit = threadInfo->biasedLimit;
+    if (destination < limit)
+    {
+        limit += PIXEventsSafeFastCopySpaceQwords;
+        UINT64 time = PIXGetTimestampCounter();
+        *destination++ = PIXEncodeEventInfo(time, PIXEvent_EndEvent_OnContext);
+        PIXCopyEventArgument(destination, limit, context);
+        *destination = PIXEventsBlockEndMarker;
+        threadInfo->destination = destination;
+    }
+    else if (limit != nullptr)
+    {
+        MakeCPUEndEventForContextAllocate(context);
+    }
+}
+
+template<class TContext>
+inline void PIXEndEvent(TContext* context)
+{
+    PIXEndCPUEventOnContext(context);
+    PIXEndEventOnContext(context);
+}
+
+template<class TContext>
+class PIXScopedEventObject
+{
+private:
+    TContext* m_context;
+
+public:
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString);
+    }
+
+    template<class T1>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1);
+    }
+
+    template<class T1, class T2>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2);
+    }
+
+    template<class T1, class T2, class T3>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3);
+    }
+
+    template<class T1, class T2, class T3, class T4>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString);
+    }
+
+    template<class T1>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1);
+    }
+
+    template<class T1, class T2>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2);
+    }
+
+    template<class T1, class T2, class T3>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3);
+    }
+
+    template<class T1, class T2, class T3, class T4>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+    PIXScopedEventObject(TContext* context, UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+        : m_context(context)
+    {
+        PIXBeginEvent(context, color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+
+    ~PIXScopedEventObject()
+    {
+        PIXEndEvent(m_context);
+    }
+};
+
+template<>
+class PIXScopedEventObject<void>
+{
+public:
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString)
+    {
+        PIXBeginEvent(color, formatString);
+    }
+
+    template<class T1>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1)
+    {
+        PIXBeginEvent(color, formatString, a1);
+    }
+
+    template<class T1, class T2>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2)
+    {
+        PIXBeginEvent(color, formatString, a1, a2);
+    }
+
+    template<class T1, class T2, class T3>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3);
+    }
+
+    template<class T1, class T2, class T3, class T4>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+    PIXScopedEventObject(UINT64 color, _In_ PCSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString)
+    {
+        PIXBeginEvent(color, formatString);
+    }
+
+    template<class T1>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1)
+    {
+        PIXBeginEvent(color, formatString, a1);
+    }
+
+    template<class T1, class T2>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2)
+    {
+        PIXBeginEvent(color, formatString, a1, a2);
+    }
+
+    template<class T1, class T2, class T3>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3);
+    }
+
+    template<class T1, class T2, class T3, class T4>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15);
+    }
+
+    template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, class T12, class T13, class T14, class T15, class T16>
+    PIXScopedEventObject(UINT64 color, _In_ PCWSTR formatString, T1 a1, T2 a2, T3 a3, T4 a4, T5 a5, T6 a6, T7 a7, T8 a8, T9 a9, T10 a10, T11 a11, T12 a12, T13 a13, T14 a14, T15 a15, T16 a16)
+    {
+        PIXBeginEvent(color, formatString, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16);
+    }
+
+    ~PIXScopedEventObject()
+    {
+        PIXEndEvent();
+    }
+};
+
+#define PIXConcatenate(a, b) a ## b
+#define PIXGetScopedEventVariableName(a, b) PIXConcatenate(a, b)
+#define PIXScopedEvent(context, ...) PIXScopedEventObject<PIXInferScopedEventType<decltype(context)>::Type> PIXGetScopedEventVariableName(pixEvent, __LINE__)(context, __VA_ARGS__)
+
+#endif

+ 116 - 0
3rdparty/bgfx/3rdparty/dxsdk/include/pix3.h

@@ -0,0 +1,116 @@
+/*==========================================================================;
+ *
+ *  Copyright (C) Microsoft Corporation.  All Rights Reserved.
+ *
+ *  File:       pix3.h
+ *  Content:    PIX include file
+ *
+ ****************************************************************************/
+#pragma once
+
+#ifndef _PIX3_H_
+#define _PIX3_H_
+
+#include <sal.h>
+
+#ifndef __cplusplus
+#error "Only C++ files can include pix.h. C is not supported."
+#endif
+
+#if defined(XBOX) || defined(_XBOX_ONE) || defined(_DURANGO)
+#include "pix3_xbox.h"
+#else
+#include "pix3_win.h"
+#endif
+
+//
+// The PIX event/marker APIs compile to nothing on retail builds and on x86 builds
+//
+#if (!defined(USE_PIX)) && ((defined(_DEBUG) || DBG || (defined(PROFILE) && !defined(FASTCAP)) || defined(PROFILE_BUILD)) && !defined(i386) && defined(_AMD64_) && !defined(_PREFAST_))
+#define USE_PIX
+#endif
+
+#if defined(USE_PIX) && !defined(_AMD64_) && !defined(USE_PIX_ON_ALL_ARCHITECTURES)
+#pragma message("Warning: Pix markers are only supported on AMD64")
+#endif
+
+// These flags are used by both PIXBeginCapture and PIXGetCaptureState
+#define PIX_CAPTURE_TIMING                  (1 << 0)
+#define PIX_CAPTURE_GPU                     (1 << 1)
+#define PIX_CAPTURE_FUNCTION_SUMMARY        (1 << 2)
+#define PIX_CAPTURE_FUNCTION_DETAILS        (1 << 3)
+#define PIX_CAPTURE_CALLGRAPH               (1 << 4)
+#define PIX_CAPTURE_INSTRUCTION_TRACE       (1 << 5)
+#define PIX_CAPTURE_SYSTEM_MONITOR_COUNTERS (1 << 6)
+#define PIX_CAPTURE_VIDEO                   (1 << 7)
+#define PIX_CAPTURE_AUDIO                   (1 << 8)
+
+typedef union PIXCaptureParameters
+{
+    struct GpuCaptureParameters
+    {
+        PVOID reserved;
+    } GpuCaptureParameters;
+
+    struct TimingCaptureParameters
+    {
+        BOOL CaptureCallstacks;
+        PWSTR FileName;
+    } TimingCaptureParameters;
+
+} PIXCaptureParameters, *PPIXCaptureParameters;
+
+
+
+#if defined (USE_PIX) && (defined(_AMD64_) || defined(USE_PIX_ON_ALL_ARCHITECTURES))
+
+#include "PIXEventsCommon.h"
+#include "PIXEventsGenerated.h"
+
+// Starts a programmatically controlled capture.
+// captureFlags uses the PIX_CAPTURE_* family of flags to specify the type of capture to take
+extern "C" HRESULT WINAPI PIXBeginCapture(DWORD captureFlags, _In_opt_ const PPIXCaptureParameters captureParameters);
+
+// Stops a programmatically controlled capture
+//  If discard == TRUE, the captured data is discarded
+//  If discard == FALSE, the captured data is saved
+extern "C" HRESULT WINAPI PIXEndCapture(BOOL discard);
+
+extern "C" DWORD WINAPI PIXGetCaptureState();
+
+extern "C" void WINAPI PIXReportCounter(_In_ PCWSTR name, float value);
+
+#else
+
+// Eliminate these APIs when not using PIX
+inline HRESULT PIXBeginCapture(DWORD, _In_opt_ const PIXCaptureParameters*) { return S_OK; }
+inline HRESULT PIXEndCapture(BOOL) { return S_OK; }
+inline DWORD PIXGetCaptureState() { return 0; }
+inline void PIXReportCounter(_In_ PCWSTR, float) {}
+
+inline void PIXBeginEvent(UINT64, _In_ PCSTR, ...) {}
+inline void PIXBeginEvent(UINT64, _In_ PCWSTR, ...) {}
+inline void PIXBeginEvent(void*, UINT64, _In_ PCSTR, ...) {}
+inline void PIXBeginEvent(void*, UINT64, _In_ PCWSTR, ...) {}
+inline void PIXEndEvent() {}
+inline void PIXEndEvent(void*) {}
+inline void PIXSetMarker(UINT64, _In_ PCSTR, ...) {}
+inline void PIXSetMarker(UINT64, _In_ PCWSTR, ...) {}
+inline void PIXSetMarker(void*, UINT64, _In_ PCSTR, ...) {}
+inline void PIXSetMarker(void*, UINT64, _In_ PCWSTR, ...) {}
+inline void PIXScopedEvent(UINT64, _In_ PCSTR, ...) {}
+inline void PIXScopedEvent(UINT64, _In_ PCWSTR, ...) {}
+inline void PIXScopedEvent(void*, UINT64, _In_ PCSTR, ...) {}
+inline void PIXScopedEvent(void*, UINT64, _In_ PCWSTR, ...) {}
+
+#endif // USE_PIX
+
+// Use these functions to specify colors to pass as metadata to a PIX event/marker API.
+// Use PIX_COLOR() to specify a particular color for an event.
+// Or, use PIX_COLOR_INDEX() to specify a set of unique event categories, and let PIX choose
+// the colors to represent each category.
+inline UINT PIX_COLOR(BYTE r, BYTE g, BYTE b) { return 0xff000000 | (r << 16) | (g << 8) | b; }
+inline UINT PIX_COLOR_INDEX(BYTE i) { return i; }
+const UINT PIX_COLOR_DEFAULT = PIX_COLOR_INDEX(0);
+
+#endif // _PIX3_H_

+ 53 - 0
3rdparty/bgfx/3rdparty/dxsdk/include/pix3_win.h

@@ -0,0 +1,53 @@
+/*==========================================================================;
+ *
+ *  Copyright (C) Microsoft Corporation.  All Rights Reserved.
+ *
+ *  File:       PIX3_win.h
+ *  Content:    PIX include file
+ *              Don't include this file directly - use pix3.h
+ *
+ ****************************************************************************/
+
+#pragma once
+
+#ifndef _PIX3_H_
+#error "Don't include this file directly - use pix3.h"
+#endif
+
+#ifndef _PIX3_WIN_H_
+#define _PIX3_WIN_H_
+
+struct PIXEventsBlockInfo
+{
+};
+
+struct PIXEventsThreadInfo
+{
+    PIXEventsBlockInfo* block;
+    UINT64* biasedLimit;
+    UINT64* destination;
+    UINT64* limit;
+    UINT64 id;
+};
+
+// The following defines denote the different metadata values that have been used
+// by tools to denote how to parse pix marker event data. The first two values
+// are legacy values.
+#define WINPIX_EVENT_UNICODE_VERSION 0
+#define WINPIX_EVENT_ANSI_VERSION 1
+#define WINPIX_EVENT_PIX3BLOB_VERSION 2
+
+#define D3D12_EVENT_METADATA WINPIX_EVENT_PIX3BLOB_VERSION
+
+__forceinline UINT64 PIXGetTimestampCounter()
+{
+    LARGE_INTEGER time = {};
+    QueryPerformanceCounter(&time);
+    return time.QuadPart;
+}
+
+#define PIXSetCPUMarkerOnContext(context, metadata, ...) MakeCPUSetMarkerForContext(metadata, context, __VA_ARGS__)
+#define PIXBeginCPUEventOnContext(context, metadata, ...) MakeCPUBeginEventForContext(metadata, context, __VA_ARGS__)
+#define PIXEndCPUEventOnContext(context) MakeCPUEndEventForContext(context)
+
+#endif //_PIX3_WIN_H_

+ 9 - 2
3rdparty/bgfx/3rdparty/fcpp/cpp1.c

@@ -42,6 +42,7 @@ int fppPreProcess(struct fppTag *tags)
 {
   size_t i=0;
   ReturnCode ret;       /* cpp return code */
+  int retVal;           /* fppPreProcess return code */
   struct Global *global;
 
   global=(struct Global *)malloc(sizeof(struct Global));
@@ -144,10 +145,16 @@ int fppPreProcess(struct fppTag *tags)
   }
   fflush(stdout);
 // BK -  fclose(stdout);
+  delalldefines(global);
 
+  retVal = IO_NORMAL;
   if (global->errors > 0 && !global->eflag)
-    return(IO_ERROR);
-  return(IO_NORMAL);       /* No errors or -E option set   */
+    retVal = IO_ERROR;
+  free(global->tokenbuf);
+  free(global->functionname);
+  free(global->spacebuf);
+  free(global);
+  return retVal;       /* No errors or -E option set   */
 }
 
 INLINE FILE_LOCAL

+ 1 - 1
3rdparty/bgfx/3rdparty/fcpp/cpp3.c

@@ -363,7 +363,7 @@ ReturnCode initdefines(struct Global *global)
   return(FPP_OK);
 }
 
-void deldefines(struct Global *global)
+void delbuiltindefines(struct Global *global)
 {
   /*
    * Delete the built-in #define's.

+ 24 - 1
3rdparty/bgfx/3rdparty/fcpp/cpp6.c

@@ -619,12 +619,35 @@ DEFBUF *defendel(struct Global *global,
 }
 
 
+void delalldefines(struct Global *global)
+{
+  /*
+   * Delete all the defines in the tables and free memory
+   */
+
+  DEFBUF *dp;
+  DEFBUF *prevp;
+  int i;
+
+  for (i = 0; i < SBSIZE; ++i)
+  {
+    prevp = global->symtab[i];
+    while ((dp = prevp) != (DEFBUF *)NULL) {
+      prevp = dp->link;
+      free(dp->repl);                /* Free the replacement */
+      free((char *)dp);              /* Free the symbol      */
+    }
+    global->symtab[i] = NULL;
+  }
+}
+
+
 void outdefines(struct Global *global)
 {
   DEFBUF *dp;
   DEFBUF **syp;
 
-  deldefines(global);                   /* Delete built-in #defines     */
+  delbuiltindefines(global);                   /* Delete built-in #defines     */
   for (syp = global->symtab; syp < &global->symtab[SBSIZE]; syp++) {
     if ((dp = *syp) != (DEFBUF *) NULL) {
       do {

+ 2 - 1
3rdparty/bgfx/3rdparty/fcpp/cppadd.h

@@ -407,7 +407,8 @@ void dumpadef(char *, register DEFBUF *);
 #endif
 ReturnCode openfile(struct Global *,char *);
 int cget(struct Global *);
-void deldefines(struct Global *);
+void delbuiltindefines(struct Global *);
+void delalldefines(struct Global *);
 char *Getmem(struct Global *, int);
 ReturnCode openinclude(struct Global *, char *, int);
 ReturnCode expstuff(struct Global *, char *, char *);

+ 64 - 53
3rdparty/bgfx/3rdparty/forsyth-too/forsythtriangleorderoptimizer.cpp

@@ -14,18 +14,14 @@
 //  IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 //-----------------------------------------------------------------------------
 
+#include <stdint.h>
 #include <assert.h>
 #include <math.h>
 #include <vector>
-#include <limits>
 #include <algorithm>
 
 namespace Forsyth
 {
-    typedef unsigned int uint;
-    typedef unsigned short uint16;
-    typedef unsigned char byte;
-
     //-----------------------------------------------------------------------------
     //  OptimizeFaces
     //-----------------------------------------------------------------------------
@@ -36,13 +32,27 @@ namespace Forsyth
     //          the number of indices in the list
     //      vertexCount
     //          the largest index value in indexList
+    //      vertexBaseIndex
+    //          starting vertex index subtracted from each index in indexList to
+    //          allow safe operation on multiple objects in a single index buffer
     //      newIndexList
     //          a pointer to a preallocated buffer the same size as indexList to
     //          hold the optimized index list
     //      lruCacheSize
     //          the size of the simulated post-transform cache (max:64)
     //-----------------------------------------------------------------------------
-    void OptimizeFaces(const uint16* indexList, uint indexCount, uint vertexCount, uint16* newIndexList, uint16 lruCacheSize);
+    template<class IndexT>
+    void OptimizeFacesImpl(const IndexT* indexList, uint32_t indexCount, uint32_t vertexCount, IndexT vertexBaseIndex, IndexT* newIndexList, uint16_t lruCacheSize);
+
+    void OptimizeFaces(const uint16_t* indexList, uint32_t indexCount, uint32_t vertexCount, uint16_t vertexBaseIndex, uint16_t* newIndexList, uint16_t lruCacheSize) 
+    {
+        OptimizeFacesImpl<uint16_t>( indexList, indexCount, vertexCount, vertexBaseIndex, newIndexList, lruCacheSize ); 
+    }
+
+    void OptimizeFaces(const uint32_t* indexList, uint32_t indexCount, uint32_t vertexCount, uint32_t vertexBaseIndex, uint32_t* newIndexList, uint16_t lruCacheSize) 
+    { 
+        OptimizeFacesImpl<uint32_t>( indexList, indexCount, vertexCount, vertexBaseIndex, newIndexList, lruCacheSize ); 
+    }
 
     namespace
     {
@@ -82,7 +92,7 @@ namespace Forsyth
             return score;
         }
 
-        float ComputeVertexValenceScore(uint numActiveFaces)
+        float ComputeVertexValenceScore(uint32_t numActiveFaces)
         {
             const float FindVertexScore_ValenceBoostScale = 2.0f;
             const float FindVertexScore_ValenceBoostPower = 0.5f;
@@ -100,7 +110,7 @@ namespace Forsyth
 
 
         const int kMaxVertexCacheSize = 64;
-        const uint kMaxPrecomputedVertexValenceScores = 64;
+        const uint32_t kMaxPrecomputedVertexValenceScores = 64;
         float s_vertexCacheScores[kMaxVertexCacheSize+1][kMaxVertexCacheSize];
         float s_vertexValenceScores[kMaxPrecomputedVertexValenceScores];
 
@@ -114,7 +124,7 @@ namespace Forsyth
                 }
             }
 
-            for (uint valence=0; valence<kMaxPrecomputedVertexValenceScores; ++valence)
+            for (uint32_t valence=0; valence<kMaxPrecomputedVertexValenceScores; ++valence)
             {
                 s_vertexValenceScores[valence] = ComputeVertexValenceScore(valence);
             }
@@ -133,7 +143,7 @@ namespace Forsyth
 //            return s_vertexValenceScores[numActiveTris];
 //        }
 
-        float FindVertexScore(uint numActiveFaces, uint cachePosition, uint vertexCacheSize)
+        float FindVertexScore(uint32_t numActiveFaces, uint32_t cachePosition, uint32_t vertexCacheSize)
         {
             assert(s_vertexScoresComputed); (void)s_vertexScoresComputed;
 
@@ -163,37 +173,38 @@ namespace Forsyth
 
         struct OptimizeVertexData
         {
-            float   score;
-            uint    activeFaceListStart;
-            uint    activeFaceListSize;
-            uint16  cachePos0;
-            uint16  cachePos1;
+            float score;
+            uint32_t activeFaceListStart;
+            uint32_t activeFaceListSize;
+            uint16_t cachePos0;
+            uint16_t cachePos1;
             OptimizeVertexData() : score(0.f), activeFaceListStart(0), activeFaceListSize(0), cachePos0(0), cachePos1(0) { }
         };
     }
 
-    void OptimizeFaces(const uint16* indexList, uint indexCount, uint vertexCount, uint16* newIndexList, uint16 lruCacheSize)
+    template<class IndexT>
+    void OptimizeFacesImpl(const IndexT* indexList, uint32_t indexCount, uint32_t vertexCount, IndexT vertexBaseIndex, IndexT* newIndexList, uint16_t lruCacheSize)
     {
-        std::vector<OptimizeVertexData> vertexDataList;
+        std::vector< OptimizeVertexData > vertexDataList;
         vertexDataList.resize(vertexCount);
 
         // compute face count per vertex
-        for (uint i=0; i<indexCount; ++i)
+        for (uint32_t i=0; i<indexCount; ++i)
         {
-            uint16 index = indexList[i];
-            assert(index < vertexCount);
+            IndexT index = indexList[i] - vertexBaseIndex;
+            assert((index >= 0) && (index < vertexCount));
             OptimizeVertexData& vertexData = vertexDataList[index];
             vertexData.activeFaceListSize++;
         }
 
-        std::vector<uint> activeFaceList;
+        std::vector<uint32_t> activeFaceList;
 
-        const uint16 kEvictedCacheIndex = std::numeric_limits<uint16>::max();
+        const IndexT kEvictedCacheIndex = std::numeric_limits<uint16_t>::max();
 
         {
             // allocate face list per vertex
-            uint curActiveFaceListPos = 0;
-            for (uint i=0; i<vertexCount; ++i)
+            uint32_t curActiveFaceListPos = 0;
+            for (uint32_t i=0; i<vertexCount; ++i)
             {
                 OptimizeVertexData& vertexData = vertexDataList[i];
                 vertexData.cachePos0 = kEvictedCacheIndex;
@@ -207,45 +218,45 @@ namespace Forsyth
         }
 
         // fill out face list per vertex
-        for (uint i=0; i<indexCount; i+=3)
+        for (uint32_t i=0; i<indexCount; i+=3)
         {
-            for (uint j=0; j<3; ++j)
+            for (uint32_t j=0; j<3; ++j)
             {
-                uint16 index = indexList[i+j];
+                IndexT index = indexList[i+j] - vertexBaseIndex;
                 OptimizeVertexData& vertexData = vertexDataList[index];
                 activeFaceList[vertexData.activeFaceListStart + vertexData.activeFaceListSize] = i;
                 vertexData.activeFaceListSize++;
             }
         }
 
-        std::vector<byte> processedFaceList;
+        std::vector<uint8_t> processedFaceList;
         processedFaceList.resize(indexCount);
 
-        uint16 vertexCacheBuffer[(kMaxVertexCacheSize+3)*2];
-        uint16* cache0 = vertexCacheBuffer;
-        uint16* cache1 = vertexCacheBuffer+(kMaxVertexCacheSize+3);
-        uint16 entriesInCache0 = 0;
+        IndexT vertexCacheBuffer[(kMaxVertexCacheSize+3)*2];
+        IndexT* cache0 = vertexCacheBuffer;
+        IndexT* cache1 = vertexCacheBuffer+(kMaxVertexCacheSize+3);
+        IndexT entriesInCache0 = 0;
 
-        uint bestFace = 0;
+        uint32_t bestFace = 0;
         float bestScore = -1.f;
 
         const float maxValenceScore = FindVertexScore(1, kEvictedCacheIndex, lruCacheSize) * 3.f;
 
-        for (uint i = 0; i < indexCount; i += 3)
+        for (uint32_t i = 0; i < indexCount; i += 3)
         {
             if (bestScore < 0.f)
             {
                 // no verts in the cache are used by any unprocessed faces so
                 // search all unprocessed faces for a new starting point
-                for (uint j = 0; j < indexCount; j += 3)
+                for (uint32_t j = 0; j < indexCount; j += 3)
                 {
                     if (processedFaceList[j] == 0)
                     {
-                        uint face = j;
+                        uint32_t face = j;
                         float faceScore = 0.f;
-                        for (uint k=0; k<3; ++k)
+                        for (uint32_t k=0; k<3; ++k)
                         {
-                            uint16 index = indexList[face+k];
+                            IndexT index = indexList[face+k] - vertexBaseIndex;
                             OptimizeVertexData& vertexData = vertexDataList[index];
                             assert(vertexData.activeFaceListSize > 0);
                             assert(vertexData.cachePos0 >= lruCacheSize);
@@ -269,13 +280,13 @@ namespace Forsyth
             }
 
             processedFaceList[bestFace] = 1;
-            uint16 entriesInCache1 = 0;
+            uint16_t entriesInCache1 = 0;
 
             // add bestFace to LRU cache and to newIndexList
-            for (uint v = 0; v < 3; ++v)
+            for (uint32_t v = 0; v < 3; ++v)
             {
-                uint16 index = indexList[bestFace+v];
-                newIndexList[i+v] = index;
+                IndexT index = indexList[bestFace+v] - vertexBaseIndex;
+                newIndexList[i+v] = index + vertexBaseIndex;
 
                 OptimizeVertexData& vertexData = vertexDataList[index];
 
@@ -292,9 +303,9 @@ namespace Forsyth
                 }
 
                 assert(vertexData.activeFaceListSize > 0);
-                uint* begin = &activeFaceList[vertexData.activeFaceListStart];
-                uint* end = &(activeFaceList[vertexData.activeFaceListStart + vertexData.activeFaceListSize - 1]) + 1;
-                uint* it = std::find(begin, end, bestFace);
+                uint32_t* begin = &activeFaceList[vertexData.activeFaceListStart];
+                uint32_t* end = &(activeFaceList[vertexData.activeFaceListStart + vertexData.activeFaceListSize - 1]) + 1;
+                uint32_t* it = std::find(begin, end, bestFace);
                 assert(it != end);
                 std::swap(*it, *(end-1));
                 --vertexData.activeFaceListSize;
@@ -303,9 +314,9 @@ namespace Forsyth
             }
 
             // move the rest of the old verts in the cache down and compute their new scores
-            for (uint c0 = 0; c0 < entriesInCache0; ++c0)
+            for (uint32_t c0 = 0; c0 < entriesInCache0; ++c0)
             {
-                uint16 index = cache0[c0];
+                IndexT index = cache0[c0];
                 OptimizeVertexData& vertexData = vertexDataList[index];
 
                 if (vertexData.cachePos1 >= entriesInCache1)
@@ -318,19 +329,19 @@ namespace Forsyth
 
             // find the best scoring triangle in the current cache (including up to 3 that were just evicted)
             bestScore = -1.f;
-            for (uint c1 = 0; c1 < entriesInCache1; ++c1)
+            for (uint32_t c1 = 0; c1 < entriesInCache1; ++c1)
             {
-                uint16 index = cache1[c1];
+                IndexT index = cache1[c1];
                 OptimizeVertexData& vertexData = vertexDataList[index];
                 vertexData.cachePos0 = vertexData.cachePos1;
                 vertexData.cachePos1 = kEvictedCacheIndex;
-                for (uint j=0; j<vertexData.activeFaceListSize; ++j)
+                for (uint32_t j=0; j<vertexData.activeFaceListSize; ++j)
                 {
-                    uint face = activeFaceList[vertexData.activeFaceListStart+j];
+                    uint32_t face = activeFaceList[vertexData.activeFaceListStart+j];
                     float faceScore = 0.f;
-                    for (uint v=0; v<3; v++)
+                    for (uint32_t v=0; v<3; v++)
                     {
-                        uint16 faceIndex = indexList[face+v];
+                        IndexT faceIndex = indexList[face+v] - vertexBaseIndex;
                         OptimizeVertexData& faceVertexData = vertexDataList[faceIndex];
                         faceScore += faceVertexData.score;
                     }

+ 21 - 17
3rdparty/bgfx/3rdparty/forsyth-too/forsythtriangleorderoptimizer.h

@@ -21,23 +21,27 @@
 
 namespace Forsyth
 {
-	//-----------------------------------------------------------------------------
-	//  OptimizeFaces
-	//-----------------------------------------------------------------------------
-	//  Parameters:
-	//      indexList
-	//          input index list
-	//      indexCount
-	//          the number of indices in the list
-	//      vertexCount
-	//          the largest index value in indexList
-	//      newIndexList
-	//          a pointer to a preallocated buffer the same size as indexList to
-	//          hold the optimized index list
-	//      lruCacheSize
-	//          the size of the simulated post-transform cache (max:64)
-	//-----------------------------------------------------------------------------
-	void OptimizeFaces(const uint16_t* indexList, uint32_t indexCount, uint32_t vertexCount, uint16_t* newIndexList, uint16_t lruCacheSize);
+    //-----------------------------------------------------------------------------
+    //  OptimizeFaces
+    //-----------------------------------------------------------------------------
+    //  Parameters:
+    //      indexList
+    //          input index list
+    //      indexCount
+    //          the number of indices in the list
+    //      vertexCount
+    //          the largest index value in indexList
+    //      vertexBaseIndex
+    //          starting vertex index subtracted from each index in indexList to
+    //          allow safe operation on multiple objects in a single index buffer
+    //      newIndexList
+    //          a pointer to a preallocated buffer the same size as indexList to
+    //          hold the optimized index list
+    //      lruCacheSize
+    //          the size of the simulated post-transform cache (max:64)
+    //-----------------------------------------------------------------------------
+    void OptimizeFaces(const uint16_t* indexList, uint32_t indexCount, uint32_t vertexCount, uint16_t vertexBaseIndex, uint16_t* newIndexList, uint16_t lruCacheSize);
+    void OptimizeFaces(const uint32_t* indexList, uint32_t indexCount, uint32_t vertexCount, uint32_t vertexBaseIndex, uint32_t* newIndexList, uint16_t lruCacheSize);
 
 } // namespace Forsyth
 

Разница между файлами не показана из-за своего большого размера
+ 175 - 146
3rdparty/bgfx/3rdparty/glsl-optimizer/src/glsl/glcpp/glcpp-lex.c


+ 4 - 0
3rdparty/bgfx/3rdparty/glsl-optimizer/src/glsl/glsl_optimizer.cpp

@@ -170,6 +170,10 @@ struct glslopt_shader
 	{
 		for (unsigned i = 0; i < MESA_SHADER_STAGES; i++)
 			ralloc_free(whole_program->_LinkedShaders[i]);
+		for(GLuint i =0;i< whole_program->NumShaders;i++)
+			ralloc_free(whole_program->Shaders[i]);
+		ralloc_free(whole_program->Shaders);
+		ralloc_free(whole_program->InfoLog);
 		ralloc_free(whole_program);
 		ralloc_free(rawOutput);
 		ralloc_free(optimizedOutput);

+ 0 - 7
3rdparty/bgfx/3rdparty/glslang/SPIRV/GLSL.ext.AMD.h

@@ -104,14 +104,7 @@ static const char* const E_SPV_AMD_gpu_shader_int16 = "SPV_AMD_gpu_shader_int16"
 // SPV_AMD_shader_image_load_store_lod
 static const char* const E_SPV_AMD_shader_image_load_store_lod = "SPV_AMD_shader_image_load_store_lod";
 
-static const Capability CapabilityImageReadWriteLodAMD = static_cast<Capability>(5015);
-
 // SPV_AMD_shader_fragment_mask
 static const char* const E_SPV_AMD_shader_fragment_mask = "SPV_AMD_shader_fragment_mask";
 
-static const Capability CapabilityFragmentMaskAMD = static_cast<Capability>(5010);
-
-static const Op OpFragmentMaskFetchAMD = static_cast<Op>(5011);
-static const Op OpFragmentFetchAMD     = static_cast<Op>(5012);
-
 #endif  // #ifndef GLSLextAMD_H

+ 39 - 0
3rdparty/bgfx/3rdparty/glslang/SPIRV/GLSL.ext.EXT.h

@@ -0,0 +1,39 @@
+/*
+** Copyright (c) 2014-2016 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a copy
+** of this software and/or associated documentation files (the "Materials"),
+** to deal in the Materials without restriction, including without limitation
+** the rights to use, copy, modify, merge, publish, distribute, sublicense,
+** and/or sell copies of the Materials, and to permit persons to whom the
+** Materials are furnished to do so, subject to the following conditions:
+**
+** The above copyright notice and this permission notice shall be included in
+** all copies or substantial portions of the Materials.
+**
+** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
+** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
+** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
+** IN THE MATERIALS.
+*/
+
+#ifndef GLSLextEXT_H
+#define GLSLextEXT_H
+
+enum BuiltIn;
+enum Op;
+enum Capability;
+
+static const int GLSLextEXTVersion = 100;
+static const int GLSLextEXTRevision = 1;
+
+static const char* const E_SPV_EXT_fragment_fully_covered = "SPV_EXT_fragment_fully_covered";
+
+#endif  // #ifndef GLSLextEXT_H

+ 115 - 64
3rdparty/bgfx/3rdparty/glslang/SPIRV/GlslangToSpv.cpp

@@ -44,6 +44,7 @@
 namespace spv {
     #include "GLSL.std.450.h"
     #include "GLSL.ext.KHR.h"
+    #include "GLSL.ext.EXT.h"
 #ifdef AMD_EXTENSIONS
     #include "GLSL.ext.AMD.h"
 #endif
@@ -106,7 +107,8 @@ private:
 //
 class TGlslangToSpvTraverser : public glslang::TIntermTraverser {
 public:
-    TGlslangToSpvTraverser(const glslang::TIntermediate*, spv::SpvBuildLogger* logger, glslang::SpvOptions& options);
+    TGlslangToSpvTraverser(unsigned int spvVersion, const glslang::TIntermediate*, spv::SpvBuildLogger* logger,
+        glslang::SpvOptions& options);
     virtual ~TGlslangToSpvTraverser() { }
 
     bool visitAggregate(glslang::TVisit, glslang::TIntermAggregate*);
@@ -127,8 +129,9 @@ protected:
     spv::Decoration TranslateAuxiliaryStorageDecoration(const glslang::TQualifier& qualifier);
     spv::BuiltIn TranslateBuiltInDecoration(glslang::TBuiltInVariable, bool memberDeclaration);
     spv::ImageFormat TranslateImageFormat(const glslang::TType& type);
-    spv::SelectionControlMask TranslateSelectionControl(glslang::TSelectionControl) const;
-    spv::LoopControlMask TranslateLoopControl(glslang::TLoopControl) const;
+    spv::SelectionControlMask TranslateSelectionControl(const glslang::TIntermSelection&) const;
+    spv::SelectionControlMask TranslateSwitchControl(const glslang::TIntermSwitch&) const;
+    spv::LoopControlMask TranslateLoopControl(const glslang::TIntermLoop&, unsigned int& dependencyLength) const;
     spv::StorageClass TranslateStorageClass(const glslang::TType&);
     spv::Id createSpvVariable(const glslang::TIntermSymbol*);
     spv::Id getSampledType(const glslang::TSampler&);
@@ -185,7 +188,9 @@ protected:
     bool isTrivialLeaf(const glslang::TIntermTyped* node);
     bool isTrivial(const glslang::TIntermTyped* node);
     spv::Id createShortCircuit(glslang::TOperator, glslang::TIntermTyped& left, glslang::TIntermTyped& right);
+#ifdef AMD_EXTENSIONS
     spv::Id getExtBuiltins(const char* name);
+#endif
 
     glslang::SpvOptions& options;
     spv::Function* shaderEntry;
@@ -646,6 +651,10 @@ spv::BuiltIn TGlslangToSpvTraverser::TranslateBuiltInDecoration(glslang::TBuiltI
             builder.addCapability(spv::CapabilityPerViewAttributesNV);
         }
         return spv::BuiltInViewportMaskPerViewNV;
+    case glslang::EbvFragFullyCoveredNV:
+        builder.addExtension(spv::E_SPV_EXT_fragment_fully_covered);
+        builder.addCapability(spv::CapabilityFragmentFullyCoveredEXT);
+        return spv::BuiltInFullyCoveredEXT;
 #endif 
     default:
         return spv::BuiltInMax;
@@ -740,26 +749,42 @@ spv::ImageFormat TGlslangToSpvTraverser::TranslateImageFormat(const glslang::TTy
     }
 }
 
-spv::SelectionControlMask TGlslangToSpvTraverser::TranslateSelectionControl(glslang::TSelectionControl selectionControl) const
+spv::SelectionControlMask TGlslangToSpvTraverser::TranslateSelectionControl(const glslang::TIntermSelection& selectionNode) const
 {
-    switch (selectionControl) {
-    case glslang::ESelectionControlNone:        return spv::SelectionControlMaskNone;
-    case glslang::ESelectionControlFlatten:     return spv::SelectionControlFlattenMask;
-    case glslang::ESelectionControlDontFlatten: return spv::SelectionControlDontFlattenMask;
-    default:                                    return spv::SelectionControlMaskNone;
-    }
+    if (selectionNode.getFlatten())
+        return spv::SelectionControlFlattenMask;
+    if (selectionNode.getDontFlatten())
+        return spv::SelectionControlDontFlattenMask;
+    return spv::SelectionControlMaskNone;
+}
+
+spv::SelectionControlMask TGlslangToSpvTraverser::TranslateSwitchControl(const glslang::TIntermSwitch& switchNode) const
+{
+    if (switchNode.getFlatten())
+        return spv::SelectionControlFlattenMask;
+    if (switchNode.getDontFlatten())
+        return spv::SelectionControlDontFlattenMask;
+    return spv::SelectionControlMaskNone;
 }
 
-spv::LoopControlMask TGlslangToSpvTraverser::TranslateLoopControl(glslang::TLoopControl loopControl) const
+// return a non-0 dependency if the dependency argument must be set
+spv::LoopControlMask TGlslangToSpvTraverser::TranslateLoopControl(const glslang::TIntermLoop& loopNode,
+    unsigned int& dependencyLength) const
 {
-    switch (loopControl) {
-    case glslang::ELoopControlNone:       return spv::LoopControlMaskNone;
-    case glslang::ELoopControlUnroll:     return spv::LoopControlUnrollMask;
-    case glslang::ELoopControlDontUnroll: return spv::LoopControlDontUnrollMask;
-    // TODO: DependencyInfinite
-    // TODO: DependencyLength
-    default:                              return spv::LoopControlMaskNone;
+    spv::LoopControlMask control = spv::LoopControlMaskNone;
+
+    if (loopNode.getDontUnroll())
+        control = control | spv::LoopControlDontUnrollMask;
+    if (loopNode.getUnroll())
+        control = control | spv::LoopControlUnrollMask;
+    if (loopNode.getLoopDependency() == glslang::TIntermLoop::dependencyInfinite)
+        control = control | spv::LoopControlDependencyInfiniteMask;
+    else if (loopNode.getLoopDependency() > 0) {
+        control = control | spv::LoopControlDependencyLengthMask;
+        dependencyLength = loopNode.getLoopDependency();
     }
+
+    return control;
 }
 
 // Translate glslang type to SPIR-V storage class.
@@ -873,13 +898,13 @@ bool HasNonLayoutQualifiers(const glslang::TType& type, const glslang::TQualifie
 // Implement the TGlslangToSpvTraverser class.
 //
 
-TGlslangToSpvTraverser::TGlslangToSpvTraverser(const glslang::TIntermediate* glslangIntermediate,
+TGlslangToSpvTraverser::TGlslangToSpvTraverser(unsigned int spvVersion, const glslang::TIntermediate* glslangIntermediate,
                                                spv::SpvBuildLogger* buildLogger, glslang::SpvOptions& options)
     : TIntermTraverser(true, false, true),
       options(options),
       shaderEntry(nullptr), currentFunction(nullptr),
       sequenceDepth(0), logger(buildLogger),
-      builder((glslang::GetKhronosToolId() << 16) | glslang::GetSpirvGeneratorVersion(), logger),
+      builder(spvVersion, (glslang::GetKhronosToolId() << 16) | glslang::GetSpirvGeneratorVersion(), logger),
       inEntryPoint(false), entryPointTerminated(false), linkageOnly(false),
       glslangIntermediate(glslangIntermediate)
 {
@@ -1757,8 +1782,9 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
     case glslang::EOpMemoryBarrierImage:
     case glslang::EOpMemoryBarrierShared:
     case glslang::EOpGroupMemoryBarrier:
+    case glslang::EOpDeviceMemoryBarrier:
     case glslang::EOpAllMemoryBarrierWithGroupSync:
-    case glslang::EOpGroupMemoryBarrierWithGroupSync:
+    case glslang::EOpDeviceMemoryBarrierWithGroupSync:
     case glslang::EOpWorkgroupMemoryBarrier:
     case glslang::EOpWorkgroupMemoryBarrierWithGroupSync:
         noReturnValue = true;
@@ -2017,7 +2043,7 @@ bool TGlslangToSpvTraverser::visitSelection(glslang::TVisit /* visit */, glslang
     node->getCondition()->traverse(this);
 
     // Selection control:
-    const spv::SelectionControlMask control = TranslateSelectionControl(node->getSelectionControl());
+    const spv::SelectionControlMask control = TranslateSelectionControl(*node);
 
     // make an "if" based on the value created by the condition
     spv::Builder::If ifBuilder(accessChainLoad(node->getCondition()->getType()), control, builder);
@@ -2059,7 +2085,7 @@ bool TGlslangToSpvTraverser::visitSwitch(glslang::TVisit /* visit */, glslang::T
     spv::Id selector = accessChainLoad(node->getCondition()->getAsTyped()->getType());
 
     // Selection control:
-    const spv::SelectionControlMask control = TranslateSelectionControl(node->getSelectionControl());
+    const spv::SelectionControlMask control = TranslateSwitchControl(*node);
 
     // browse the children to sort out code segments
     int defaultSegment = -1;
@@ -2119,9 +2145,8 @@ bool TGlslangToSpvTraverser::visitLoop(glslang::TVisit /* visit */, glslang::TIn
     builder.createBranch(&blocks.head);
 
     // Loop control:
-    const spv::LoopControlMask control = TranslateLoopControl(node->getLoopControl());
-
-    // TODO: dependency length
+    unsigned int dependencyLength = glslang::TIntermLoop::dependencyInfinite;
+    const spv::LoopControlMask control = TranslateLoopControl(*node, dependencyLength);
 
     // Spec requires back edges to target header blocks, and every header block
     // must dominate its merge block.  Make a header block first to ensure these
@@ -2131,7 +2156,7 @@ bool TGlslangToSpvTraverser::visitLoop(glslang::TVisit /* visit */, glslang::TIn
     // including merges of its own.
     builder.setLine(node->getLoc().line);
     builder.setBuildPoint(&blocks.head);
-    builder.createLoopMerge(&blocks.merge, &blocks.continue_target, control);
+    builder.createLoopMerge(&blocks.merge, &blocks.continue_target, control, dependencyLength);
     if (node->testFirst() && node->getTest()) {
         spv::Block& test = builder.makeNewBlock();
         builder.createBranch(&test);
@@ -2656,13 +2681,6 @@ void TGlslangToSpvTraverser::decorateStructType(const glslang::TType& type,
         builder.addCapability(spv::CapabilityGeometryStreams);
         builder.addDecoration(spvType, spv::DecorationStream, type.getQualifier().layoutStream);
     }
-    if (glslangIntermediate->getXfbMode()) {
-        builder.addCapability(spv::CapabilityTransformFeedback);
-        if (type.getQualifier().hasXfbStride())
-            builder.addDecoration(spvType, spv::DecorationXfbStride, type.getQualifier().layoutXfbStride);
-        if (type.getQualifier().hasXfbBuffer())
-            builder.addDecoration(spvType, spv::DecorationXfbBuffer, type.getQualifier().layoutXfbBuffer);
-    }
 }
 
 // Turn the expression forming the array size into an id.
@@ -5431,40 +5449,65 @@ spv::Id TGlslangToSpvTraverser::createNoArgOperation(glslang::TOperator op, spv:
         builder.createNoResultOp(spv::OpEndPrimitive);
         return 0;
     case glslang::EOpBarrier:
-        builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeDevice, spv::MemorySemanticsMaskNone);
+        if (glslangIntermediate->getStage() == EShLangTessControl) {
+            builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeInvocation, spv::MemorySemanticsMaskNone);
+            // TODO: prefer the following, when available:
+            // builder.createControlBarrier(spv::ScopePatch, spv::ScopePatch,
+            //                                 spv::MemorySemanticsPatchMask |
+            //                                 spv::MemorySemanticsAcquireReleaseMask);
+        } else {
+            builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeWorkgroup,
+                                            spv::MemorySemanticsWorkgroupMemoryMask |
+                                            spv::MemorySemanticsAcquireReleaseMask);
+        }
         return 0;
     case glslang::EOpMemoryBarrier:
-        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAllMemory);
+        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAllMemory |
+                                                      spv::MemorySemanticsAcquireReleaseMask);
         return 0;
     case glslang::EOpMemoryBarrierAtomicCounter:
-        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAtomicCounterMemoryMask);
+        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAtomicCounterMemoryMask |
+                                                      spv::MemorySemanticsAcquireReleaseMask);
         return 0;
     case glslang::EOpMemoryBarrierBuffer:
-        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsUniformMemoryMask);
+        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsUniformMemoryMask |
+                                                      spv::MemorySemanticsAcquireReleaseMask);
         return 0;
     case glslang::EOpMemoryBarrierImage:
-        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsImageMemoryMask);
+        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsImageMemoryMask |
+                                                      spv::MemorySemanticsAcquireReleaseMask);
         return 0;
     case glslang::EOpMemoryBarrierShared:
-        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsWorkgroupMemoryMask);
+        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsWorkgroupMemoryMask |
+                                                      spv::MemorySemanticsAcquireReleaseMask);
         return 0;
     case glslang::EOpGroupMemoryBarrier:
-        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsCrossWorkgroupMemoryMask);
+        builder.createMemoryBarrier(spv::ScopeWorkgroup, spv::MemorySemanticsAllMemory |
+                                                         spv::MemorySemanticsAcquireReleaseMask);
         return 0;
     case glslang::EOpAllMemoryBarrierWithGroupSync:
-        // Control barrier with non-"None" semantic is also a memory barrier.
-        builder.createControlBarrier(spv::ScopeDevice, spv::ScopeDevice, spv::MemorySemanticsAllMemory);
+        builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeDevice,
+                                        spv::MemorySemanticsAllMemory |
+                                        spv::MemorySemanticsAcquireReleaseMask);
+        return 0;
+    case glslang::EOpDeviceMemoryBarrier:
+        builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsUniformMemoryMask |
+                                                      spv::MemorySemanticsImageMemoryMask |
+                                                      spv::MemorySemanticsAcquireReleaseMask);
         return 0;
-    case glslang::EOpGroupMemoryBarrierWithGroupSync:
-        // Control barrier with non-"None" semantic is also a memory barrier.
-        builder.createControlBarrier(spv::ScopeDevice, spv::ScopeDevice, spv::MemorySemanticsCrossWorkgroupMemoryMask);
+    case glslang::EOpDeviceMemoryBarrierWithGroupSync:
+        builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeDevice, spv::MemorySemanticsUniformMemoryMask |
+                                                                            spv::MemorySemanticsImageMemoryMask |
+                                                                            spv::MemorySemanticsAcquireReleaseMask);
         return 0;
     case glslang::EOpWorkgroupMemoryBarrier:
-        builder.createMemoryBarrier(spv::ScopeWorkgroup, spv::MemorySemanticsWorkgroupMemoryMask);
+        builder.createMemoryBarrier(spv::ScopeWorkgroup, spv::MemorySemanticsWorkgroupMemoryMask |
+                                                         spv::MemorySemanticsAcquireReleaseMask);
         return 0;
     case glslang::EOpWorkgroupMemoryBarrierWithGroupSync:
-        // Control barrier with non-"None" semantic is also a memory barrier.
-        builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeWorkgroup, spv::MemorySemanticsWorkgroupMemoryMask);
+        builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeWorkgroup,
+                                        spv::MemorySemanticsWorkgroupMemoryMask |
+                                        spv::MemorySemanticsAcquireReleaseMask);
         return 0;
 #ifdef AMD_EXTENSIONS
     case glslang::EOpTime:
@@ -5503,15 +5546,6 @@ spv::Id TGlslangToSpvTraverser::getSymbolId(const glslang::TIntermSymbol* symbol
             builder.addDecoration(id, spv::DecorationIndex, symbol->getQualifier().layoutIndex);
         if (symbol->getQualifier().hasComponent())
             builder.addDecoration(id, spv::DecorationComponent, symbol->getQualifier().layoutComponent);
-        if (glslangIntermediate->getXfbMode()) {
-            builder.addCapability(spv::CapabilityTransformFeedback);
-            if (symbol->getQualifier().hasXfbStride())
-                builder.addDecoration(id, spv::DecorationXfbStride, symbol->getQualifier().layoutXfbStride);
-            if (symbol->getQualifier().hasXfbBuffer())
-                builder.addDecoration(id, spv::DecorationXfbBuffer, symbol->getQualifier().layoutXfbBuffer);
-            if (symbol->getQualifier().hasXfbOffset())
-                builder.addDecoration(id, spv::DecorationOffset, symbol->getQualifier().layoutXfbOffset);
-        }
         // atomic counters use this:
         if (symbol->getQualifier().hasOffset())
             builder.addDecoration(id, spv::DecorationOffset, symbol->getQualifier().layoutOffset);
@@ -5538,8 +5572,14 @@ spv::Id TGlslangToSpvTraverser::getSymbolId(const glslang::TIntermSymbol* symbol
         builder.addCapability(spv::CapabilityTransformFeedback);
         if (symbol->getQualifier().hasXfbStride())
             builder.addDecoration(id, spv::DecorationXfbStride, symbol->getQualifier().layoutXfbStride);
-        if (symbol->getQualifier().hasXfbBuffer())
+        if (symbol->getQualifier().hasXfbBuffer()) {
             builder.addDecoration(id, spv::DecorationXfbBuffer, symbol->getQualifier().layoutXfbBuffer);
+            unsigned stride = glslangIntermediate->getXfbStride(symbol->getQualifier().layoutXfbBuffer);
+            if (stride != glslang::TQualifier::layoutXfbStrideEnd)
+                builder.addDecoration(id, spv::DecorationXfbStride, stride);
+        }
+        if (symbol->getQualifier().hasXfbOffset())
+            builder.addDecoration(id, spv::DecorationOffset, symbol->getQualifier().layoutXfbOffset);
     }
 
     if (symbol->getType().isImage()) {
@@ -5933,6 +5973,7 @@ spv::Id TGlslangToSpvTraverser::createShortCircuit(glslang::TOperator op, glslan
     return builder.createOp(spv::OpPhi, boolTypeId, phiOperands);
 }
 
+#ifdef AMD_EXTENSIONS
 // Return type Id of the imported set of extended instructions corresponds to the name.
 // Import this set if it has not been imported yet.
 spv::Id TGlslangToSpvTraverser::getExtBuiltins(const char* name)
@@ -5946,6 +5987,7 @@ spv::Id TGlslangToSpvTraverser::getExtBuiltins(const char* name)
         return extBuiltins;
     }
 }
+#endif
 
 };  // end anonymous namespace
 
@@ -5964,7 +6006,10 @@ void GetSpirvVersion(std::string& version)
 // or a different instruction sequence to do something gets used).
 int GetSpirvGeneratorVersion()
 {
-    return 2;
+    // return 1; // start
+    // return 2; // EOpAtomicCounterDecrement gets a post decrement, to map between GLSL -> SPIR-V
+    // return 3; // change/correct barrier-instruction operands, to match memory model group decisions
+       return 4; // some deeper access chains: for dynamic vector component, and local Boolean component
 }
 
 // Write SPIR-V out to a binary file
@@ -6040,7 +6085,7 @@ void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector<unsign
 
     glslang::GetThreadPoolAllocator().push();
 
-    TGlslangToSpvTraverser it(&intermediate, logger, *options);
+    TGlslangToSpvTraverser it(intermediate.getSpv().spv, &intermediate, logger, *options);
     root->traverse(&it);
     it.finishSpv();
     it.dumpSpv(spirv);
@@ -6063,20 +6108,26 @@ void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector<unsign
         });
 
         optimizer.RegisterPass(CreateInlineExhaustivePass());
+        optimizer.RegisterPass(CreateEliminateDeadFunctionsPass());
+        optimizer.RegisterPass(CreateScalarReplacementPass());
         optimizer.RegisterPass(CreateLocalAccessChainConvertPass());
         optimizer.RegisterPass(CreateLocalSingleBlockLoadStoreElimPass());
         optimizer.RegisterPass(CreateLocalSingleStoreElimPass());
         optimizer.RegisterPass(CreateInsertExtractElimPass());
+        optimizer.RegisterPass(CreateDeadInsertElimPass());
         optimizer.RegisterPass(CreateAggressiveDCEPass());
         optimizer.RegisterPass(CreateDeadBranchElimPass());
         optimizer.RegisterPass(CreateCFGCleanupPass());
         optimizer.RegisterPass(CreateBlockMergePass());
         optimizer.RegisterPass(CreateLocalMultiStoreElimPass());
         optimizer.RegisterPass(CreateInsertExtractElimPass());
+        optimizer.RegisterPass(CreateDeadInsertElimPass());
+        if (options->optimizeSize) {
+            optimizer.RegisterPass(CreateRedundancyEliminationPass());
+            // TODO(greg-lunarg): Add this when AMD driver issues are resolved
+            // optimizer.RegisterPass(CreateCommonUniformElimPass());
+        }
         optimizer.RegisterPass(CreateAggressiveDCEPass());
-        // TODO(greg-lunarg): Add this when AMD driver issues are resolved
-        // if (options->optimizeSize)
-        //     optimizer.RegisterPass(CreateCommonUniformElimPass());
 
         if (!optimizer.Run(spirv.data(), spirv.size(), &spirv))
             return;

+ 88 - 67
3rdparty/bgfx/3rdparty/glslang/SPIRV/SpvBuilder.cpp

@@ -56,7 +56,8 @@
 
 namespace spv {
 
-Builder::Builder(unsigned int magicNumber, SpvBuildLogger* buildLogger) :
+Builder::Builder(unsigned int spvVersion, unsigned int magicNumber, SpvBuildLogger* buildLogger) :
+    spvVersion(spvVersion),
     source(SourceLanguageUnknown),
     sourceVersion(0),
     sourceFileStringId(NoResult),
@@ -1387,16 +1388,13 @@ Id Builder::createLvalueSwizzle(Id typeId, Id target, Id source, const std::vect
         return createCompositeInsert(source, target, typeId, channels.front());
 
     Instruction* swizzle = new Instruction(getUniqueId(), typeId, OpVectorShuffle);
+
     assert(isVector(target));
     swizzle->addIdOperand(target);
-    if (accessChain.component != NoResult)
-        // For dynamic component selection, source does not involve in l-value swizzle
-        swizzle->addIdOperand(target);
-    else {
-        assert(getNumComponents(source) == (int)channels.size());
-        assert(isVector(source));
-        swizzle->addIdOperand(source);
-    }
+
+    assert(getNumComponents(source) == (int)channels.size());
+    assert(isVector(source));
+    swizzle->addIdOperand(source);
 
     // Set up an identity shuffle from the base value to the result value
     unsigned int components[4];
@@ -1405,12 +1403,8 @@ Id Builder::createLvalueSwizzle(Id typeId, Id target, Id source, const std::vect
         components[i] = i;
 
     // Punch in the l-value swizzle
-    for (int i = 0; i < (int)channels.size(); ++i) {
-        if (accessChain.component != NoResult)
-            components[i] = channels[i]; // Only shuffle the base value
-        else
-            components[channels[i]] = numTargetComponents + i;
-    }
+    for (int i = 0; i < (int)channels.size(); ++i)
+        components[channels[i]] = numTargetComponents + i;
 
     // finish the instruction with these components selectors
     for (int i = 0; i < numTargetComponents; ++i)
@@ -2202,7 +2196,7 @@ void Builder::accessChainPushSwizzle(std::vector<unsigned>& swizzle, Id preSwizz
         accessChain.preSwizzleBaseType = preSwizzleBaseType;
 
     // if needed, propagate the swizzle for the current access chain
-    if (accessChain.swizzle.size()) {
+    if (accessChain.swizzle.size() > 0) {
         std::vector<unsigned> oldSwizzle = accessChain.swizzle;
         accessChain.swizzle.resize(0);
         for (unsigned int i = 0; i < swizzle.size(); ++i) {
@@ -2223,24 +2217,18 @@ void Builder::accessChainStore(Id rvalue)
 
     transferAccessChainSwizzle(true);
     Id base = collapseAccessChain();
+    Id source = rvalue;
+
+    // dynamic component should be gone
+    assert(accessChain.component == NoResult);
 
     // If swizzle still exists, it is out-of-order or not full, we must load the target vector,
     // extract and insert elements to perform writeMask and/or swizzle.
-    Id source = NoResult;
-    if (accessChain.swizzle.size()) {
+    if (accessChain.swizzle.size() > 0) {
         Id tempBaseId = createLoad(base);
-        source = createLvalueSwizzle(getTypeId(tempBaseId), tempBaseId, rvalue, accessChain.swizzle);
+        source = createLvalueSwizzle(getTypeId(tempBaseId), tempBaseId, source, accessChain.swizzle);
     }
 
-    // dynamic component selection
-    if (accessChain.component != NoResult) {
-        Id tempBaseId = (source == NoResult) ? createLoad(base) : source;
-        source = createVectorInsertDynamic(tempBaseId, getTypeId(tempBaseId), rvalue, accessChain.component);
-    }
-
-    if (source == NoResult)
-        source = rvalue;
-
     createStore(source, base);
 }
 
@@ -2250,7 +2238,7 @@ Id Builder::accessChainLoad(Decoration precision, Id resultType)
     Id id;
 
     if (accessChain.isRValue) {
-        // transfer access chain, but keep it static, so we can stay in registers
+        // transfer access chain, but try to stay in registers
         transferAccessChainSwizzle(false);
         if (accessChain.indexChain.size() > 0) {
             Id swizzleBase = accessChain.preSwizzleBaseType != NoType ? accessChain.preSwizzleBaseType : resultType;
@@ -2298,16 +2286,16 @@ Id Builder::accessChainLoad(Decoration precision, Id resultType)
         return id;
 
     // Do remaining swizzling
-    // First, static swizzling
-    if (accessChain.swizzle.size()) {
-        // static swizzle
+
+    // Do the basic swizzle
+    if (accessChain.swizzle.size() > 0) {
         Id swizzledType = getScalarTypeId(getTypeId(id));
         if (accessChain.swizzle.size() > 1)
             swizzledType = makeVectorType(swizzledType, (int)accessChain.swizzle.size());
         id = createRvalueSwizzle(precision, swizzledType, id, accessChain.swizzle);
     }
 
-    // dynamic single-component selection
+    // Do the dynamic component
     if (accessChain.component != NoResult)
         id = setPrecision(createVectorExtractDynamic(id, resultType, accessChain.component), precision);
 
@@ -2403,7 +2391,7 @@ void Builder::dump(std::vector<unsigned int>& out) const
 {
     // Header, before first instructions:
     out.push_back(MagicNumber);
-    out.push_back(Version);
+    out.push_back(spvVersion);
     out.push_back(builderNumber);
     out.push_back(uniqueId + 1);
     out.push_back(0);
@@ -2457,26 +2445,66 @@ void Builder::dump(std::vector<unsigned int>& out) const
 // Protected methods.
 //
 
-// Turn the described access chain in 'accessChain' into an instruction
+// Turn the described access chain in 'accessChain' into an instruction(s)
 // computing its address.  This *cannot* include complex swizzles, which must
-// be handled after this is called, but it does include swizzles that select
-// an individual element, as a single address of a scalar type can be
-// computed by an OpAccessChain instruction.
+// be handled after this is called.
+//
+// Can generate code.
 Id Builder::collapseAccessChain()
 {
     assert(accessChain.isRValue == false);
 
-    if (accessChain.indexChain.size() > 0) {
-        if (accessChain.instr == 0) {
-            StorageClass storageClass = (StorageClass)module.getStorageClass(getTypeId(accessChain.base));
-            accessChain.instr = createAccessChain(storageClass, accessChain.base, accessChain.indexChain);
-        }
-
+    // did we already emit an access chain for this?
+    if (accessChain.instr != NoResult)
         return accessChain.instr;
-    } else
+
+    // If we have a dynamic component, we can still transfer
+    // that into a final operand to the access chain.  We need to remap the
+    // dynamic component through the swizzle to get a new dynamic component to
+    // update.
+    //
+    // This was not done in transferAccessChainSwizzle() because it might
+    // generate code.
+    remapDynamicSwizzle();
+    if (accessChain.component != NoResult) {
+        // transfer the dynamic component to the access chain
+        accessChain.indexChain.push_back(accessChain.component);
+        accessChain.component = NoResult;
+    }
+
+    // note that non-trivial swizzling is left pending
+
+    // do we have an access chain?
+    if (accessChain.indexChain.size() == 0)
         return accessChain.base;
 
-    // note that non-trivial swizzling is left pending...
+    // emit the access chain
+    StorageClass storageClass = (StorageClass)module.getStorageClass(getTypeId(accessChain.base));
+    accessChain.instr = createAccessChain(storageClass, accessChain.base, accessChain.indexChain);
+
+    return accessChain.instr;
+}
+
+// For a dynamic component selection of a swizzle.
+//
+// Turn the swizzle and dynamic component into just a dynamic component.
+//
+// Generates code.
+void Builder::remapDynamicSwizzle()
+{
+    // do we have a swizzle to remap a dynamic component through?
+    if (accessChain.component != NoResult && accessChain.swizzle.size() > 1) {
+        // build a vector of the swizzle for the component to map into
+        std::vector<Id> components;
+        for (int c = 0; c < accessChain.swizzle.size(); ++c)
+            components.push_back(makeUintConstant(accessChain.swizzle[c]));
+        Id mapType = makeVectorType(makeUintType(32), (int)accessChain.swizzle.size());
+        Id map = makeCompositeConstant(mapType, components);
+
+        // use it
+        accessChain.component = createVectorExtractDynamic(map, makeUintType(32), accessChain.component);
+        accessChain.swizzle.clear();
+    }
 }
 
 // clear out swizzle if it is redundant, that is reselecting the same components
@@ -2502,38 +2530,30 @@ void Builder::simplifyAccessChainSwizzle()
 
 // To the extent any swizzling can become part of the chain
 // of accesses instead of a post operation, make it so.
-// If 'dynamic' is true, include transferring a non-static component index,
-// otherwise, only transfer static indexes.
+// If 'dynamic' is true, include transferring the dynamic component,
+// otherwise, leave it pending.
 //
-// Also, Boolean vectors are likely to be special.  While
-// for external storage, they should only be integer types,
-// function-local bool vectors could use sub-word indexing,
-// so keep that as a separate Insert/Extract on a loaded vector.
+// Does not generate code. just updates the access chain.
 void Builder::transferAccessChainSwizzle(bool dynamic)
 {
-    // too complex?
-    if (accessChain.swizzle.size() > 1)
-        return;
-
     // non existent?
     if (accessChain.swizzle.size() == 0 && accessChain.component == NoResult)
         return;
 
-    // single component...
-
-    // skip doing it for Boolean vectors
-    if (isBoolType(getContainedTypeId(accessChain.preSwizzleBaseType)))
+    // too complex?
+    // (this requires either a swizzle, or generating code for a dynamic component)
+    if (accessChain.swizzle.size() > 1)
         return;
 
+    // single component, either in the swizzle and/or dynamic component
     if (accessChain.swizzle.size() == 1) {
-        // handle static component
+        assert(accessChain.component == NoResult);
+        // handle static component selection
         accessChain.indexChain.push_back(makeUintConstant(accessChain.swizzle.front()));
         accessChain.swizzle.clear();
-        // note, the only valid remaining dynamic access would be to this one
-        // component, so don't bother even looking at accessChain.component
         accessChain.preSwizzleBaseType = NoType;
-        accessChain.component = NoResult;
     } else if (dynamic && accessChain.component != NoResult) {
+        assert(accessChain.swizzle.size() == 0);
         // handle dynamic component
         accessChain.indexChain.push_back(accessChain.component);
         accessChain.preSwizzleBaseType = NoType;
@@ -2572,12 +2592,15 @@ void Builder::createSelectionMerge(Block* mergeBlock, unsigned int control)
     buildPoint->addInstruction(std::unique_ptr<Instruction>(merge));
 }
 
-void Builder::createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control)
+void Builder::createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control,
+                              unsigned int dependencyLength)
 {
     Instruction* merge = new Instruction(OpLoopMerge);
     merge->addIdOperand(mergeBlock->getId());
     merge->addIdOperand(continueBlock->getId());
     merge->addImmediateOperand(control);
+    if ((control & LoopControlDependencyLengthMask) != 0)
+        merge->addImmediateOperand(dependencyLength);
     buildPoint->addInstruction(std::unique_ptr<Instruction>(merge));
 }
 
@@ -2644,8 +2667,6 @@ void Builder::dumpInstructions(std::vector<unsigned int>& out, const std::vector
 void Builder::dumpModuleProcesses(std::vector<unsigned int>& out) const
 {
     for (int i = 0; i < (int)moduleProcesses.size(); ++i) {
-        // TODO: switch this out for the 1.1 headers
-        const spv::Op OpModuleProcessed = (spv::Op)330;
         Instruction moduleProcessed(OpModuleProcessed);
         moduleProcessed.addStringOperand(moduleProcesses[i]);
         moduleProcessed.dump(out);

+ 11 - 6
3rdparty/bgfx/3rdparty/glslang/SPIRV/SpvBuilder.h

@@ -60,7 +60,7 @@ namespace spv {
 
 class Builder {
 public:
-    Builder(unsigned int userNumber, SpvBuildLogger* logger);
+    Builder(unsigned int spvVersion, unsigned int userNumber, SpvBuildLogger* logger);
     virtual ~Builder();
 
     static const int maxMatrixSize = 4;
@@ -533,12 +533,15 @@ public:
     // push new swizzle onto the end of any existing swizzle, merging into a single swizzle
     void accessChainPushSwizzle(std::vector<unsigned>& swizzle, Id preSwizzleBaseType);
 
-    // push a variable component selection onto the access chain; supporting only one, so unsided
+    // push a dynamic component selection onto the access chain, only applicable with a
+    // non-trivial swizzle or no swizzle
     void accessChainPushComponent(Id component, Id preSwizzleBaseType)
     {
-        accessChain.component = component;
-        if (accessChain.preSwizzleBaseType == NoType)
-            accessChain.preSwizzleBaseType = preSwizzleBaseType;
+        if (accessChain.swizzle.size() != 1) {
+            accessChain.component = component;
+            if (accessChain.preSwizzleBaseType == NoType)
+                accessChain.preSwizzleBaseType = preSwizzleBaseType;
+        }
     }
 
     // use accessChain and swizzle to store value
@@ -561,7 +564,7 @@ public:
 
     void createBranch(Block* block);
     void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock);
-    void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control);
+    void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control, unsigned int dependencyLength);
 
     // Sets to generate opcode for specialization constants.
     void setToSpecConstCodeGenMode() { generatingOpCodeForSpecConst = true; }
@@ -577,6 +580,7 @@ public:
     Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2) const;
     Id findCompositeConstant(Op typeClass, const std::vector<Id>& comps) const;
     Id collapseAccessChain();
+    void remapDynamicSwizzle();
     void transferAccessChainSwizzle(bool dynamic);
     void simplifyAccessChainSwizzle();
     void createAndSetNoPredecessorBlock(const char*);
@@ -585,6 +589,7 @@ public:
     void dumpInstructions(std::vector<unsigned int>&, const std::vector<std::unique_ptr<Instruction> >&) const;
     void dumpModuleProcesses(std::vector<unsigned int>&) const;
 
+    unsigned int spvVersion;     // the version of SPIR-V to emit in the header
     SourceLanguage source;
     int sourceVersion;
     spv::Id sourceFileStringId;

+ 6 - 0
3rdparty/bgfx/3rdparty/glslang/SPIRV/doc.cpp

@@ -49,6 +49,7 @@ namespace spv {
     extern "C" {
         // Include C-based headers that don't have a namespace
         #include "GLSL.ext.KHR.h"
+        #include "GLSL.ext.EXT.h"
 #ifdef AMD_EXTENSIONS
         #include "GLSL.ext.AMD.h"
 #endif
@@ -351,6 +352,8 @@ const char* BuiltInString(int builtIn)
     case 5262: return "ViewportMaskPerViewNV";
 #endif
 
+    case 5264: return "FullyCoveredEXT";
+
     case BuiltInCeiling:
     default: return "Bad";
     }
@@ -862,6 +865,8 @@ const char* CapabilityString(int info)
     case 5260: return "PerViewAttributesNV";
 #endif
 
+    case 5265: return "FragmentFullyCoveredEXT";
+
     case CapabilityCeiling:
     default: return "Bad";
     }
@@ -2553,6 +2558,7 @@ void Parameterize()
     InstructionDesc[OpLoopMerge].operands.push(OperandId, "'Merge Block'");
     InstructionDesc[OpLoopMerge].operands.push(OperandId, "'Continue Target'");
     InstructionDesc[OpLoopMerge].operands.push(OperandLoop, "");
+    InstructionDesc[OpLoopMerge].operands.push(OperandOptionalLiteral, "");
 
     InstructionDesc[OpSelectionMerge].operands.push(OperandId, "'Merge Block'");
     InstructionDesc[OpSelectionMerge].operands.push(OperandSelect, "");

+ 52 - 5
3rdparty/bgfx/3rdparty/glslang/SPIRV/spirv.hpp

@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2017 The Khronos Group Inc.
+// Copyright (c) 2014-2018 The Khronos Group Inc.
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and/or associated documentation files (the "Materials"),
@@ -46,12 +46,12 @@ namespace spv {
 
 typedef unsigned int Id;
 
-#define SPV_VERSION 0x10000
-#define SPV_REVISION 12
+#define SPV_VERSION 0x10200
+#define SPV_REVISION 3
 
 static const unsigned int MagicNumber = 0x07230203;
-static const unsigned int Version = 0x00010000;
-static const unsigned int Revision = 12;
+static const unsigned int Version = 0x00010200;
+static const unsigned int Revision = 3;
 static const unsigned int OpCodeMask = 0xffff;
 static const unsigned int WordCountShift = 16;
 
@@ -122,7 +122,15 @@ enum ExecutionMode {
     ExecutionModeOutputTriangleStrip = 29,
     ExecutionModeVecTypeHint = 30,
     ExecutionModeContractionOff = 31,
+    ExecutionModeInitializer = 33,
+    ExecutionModeFinalizer = 34,
+    ExecutionModeSubgroupSize = 35,
+    ExecutionModeSubgroupsPerWorkgroup = 36,
+    ExecutionModeSubgroupsPerWorkgroupId = 37,
+    ExecutionModeLocalSizeId = 38,
+    ExecutionModeLocalSizeHintId = 39,
     ExecutionModePostDepthCoverage = 4446,
+    ExecutionModeStencilRefReplacingEXT = 5027,
     ExecutionModeMax = 0x7fffffff,
 };
 
@@ -377,6 +385,9 @@ enum Decoration {
     DecorationNoContraction = 42,
     DecorationInputAttachmentIndex = 43,
     DecorationAlignment = 44,
+    DecorationMaxByteOffset = 45,
+    DecorationAlignmentId = 46,
+    DecorationMaxByteOffsetId = 47,
     DecorationExplicitInterpAMD = 4999,
     DecorationOverrideCoverageNV = 5248,
     DecorationPassthroughNV = 5250,
@@ -450,6 +461,7 @@ enum BuiltIn {
     BuiltInSecondaryViewportMaskNV = 5258,
     BuiltInPositionPerViewNV = 5261,
     BuiltInViewportMaskPerViewNV = 5262,
+    BuiltInFullyCoveredEXT = 5264,
     BuiltInMax = 0x7fffffff,
 };
 
@@ -468,6 +480,8 @@ enum SelectionControlMask {
 enum LoopControlShift {
     LoopControlUnrollShift = 0,
     LoopControlDontUnrollShift = 1,
+    LoopControlDependencyInfiniteShift = 2,
+    LoopControlDependencyLengthShift = 3,
     LoopControlMax = 0x7fffffff,
 };
 
@@ -475,6 +489,8 @@ enum LoopControlMask {
     LoopControlMaskNone = 0,
     LoopControlUnrollMask = 0x00000001,
     LoopControlDontUnrollMask = 0x00000002,
+    LoopControlDependencyInfiniteMask = 0x00000004,
+    LoopControlDependencyLengthMask = 0x00000008,
 };
 
 enum FunctionControlShift {
@@ -625,6 +641,9 @@ enum Capability {
     CapabilityStorageImageReadWithoutFormat = 55,
     CapabilityStorageImageWriteWithoutFormat = 56,
     CapabilityMultiViewport = 57,
+    CapabilitySubgroupDispatch = 58,
+    CapabilityNamedBarrier = 59,
+    CapabilityPipeStorage = 60,
     CapabilitySubgroupBallotKHR = 4423,
     CapabilityDrawParameters = 4427,
     CapabilitySubgroupVoteKHR = 4431,
@@ -641,7 +660,9 @@ enum Capability {
     CapabilityAtomicStorageOps = 4445,
     CapabilitySampleMaskPostDepthCoverage = 4447,
     CapabilityImageGatherBiasLodAMD = 5009,
+    CapabilityFragmentMaskAMD = 5010,
     CapabilityStencilExportEXT = 5013,
+    CapabilityImageReadWriteLodAMD = 5015,
     CapabilitySampleMaskOverrideCoverageNV = 5249,
     CapabilityGeometryShaderPassthroughNV = 5251,
     CapabilityShaderViewportIndexLayerEXT = 5254,
@@ -649,6 +670,10 @@ enum Capability {
     CapabilityShaderViewportMaskNV = 5255,
     CapabilityShaderStereoViewNV = 5259,
     CapabilityPerViewAttributesNV = 5260,
+    CapabilityFragmentFullyCoveredEXT = 5265,
+    CapabilitySubgroupShuffleINTEL = 5568,
+    CapabilitySubgroupBufferBlockIOINTEL = 5569,
+    CapabilitySubgroupImageBlockIOINTEL = 5570,
     CapabilityMax = 0x7fffffff,
 };
 
@@ -947,6 +972,18 @@ enum Op {
     OpAtomicFlagTestAndSet = 318,
     OpAtomicFlagClear = 319,
     OpImageSparseRead = 320,
+    OpSizeOf = 321,
+    OpTypePipeStorage = 322,
+    OpConstantPipeStorage = 323,
+    OpCreatePipeFromPipeStorage = 324,
+    OpGetKernelLocalSizeForSubgroupCount = 325,
+    OpGetKernelMaxNumSubgroups = 326,
+    OpTypeNamedBarrier = 327,
+    OpNamedBarrierInitialize = 328,
+    OpMemoryNamedBarrier = 329,
+    OpModuleProcessed = 330,
+    OpExecutionModeId = 331,
+    OpDecorateId = 332,
     OpSubgroupBallotKHR = 4421,
     OpSubgroupFirstInvocationKHR = 4422,
     OpSubgroupAllKHR = 4428,
@@ -961,6 +998,16 @@ enum Op {
     OpGroupFMaxNonUniformAMD = 5005,
     OpGroupUMaxNonUniformAMD = 5006,
     OpGroupSMaxNonUniformAMD = 5007,
+    OpFragmentMaskFetchAMD = 5011,
+    OpFragmentFetchAMD = 5012,
+    OpSubgroupShuffleINTEL = 5571,
+    OpSubgroupShuffleDownINTEL = 5572,
+    OpSubgroupShuffleUpINTEL = 5573,
+    OpSubgroupShuffleXorINTEL = 5574,
+    OpSubgroupBlockReadINTEL = 5575,
+    OpSubgroupBlockWriteINTEL = 5576,
+    OpSubgroupImageBlockReadINTEL = 5577,
+    OpSubgroupImageBlockWriteINTEL = 5578,
     OpMax = 0x7fffffff,
 };
 

+ 1 - 4
3rdparty/bgfx/3rdparty/glslang/SPIRV/spvIR.h

@@ -74,11 +74,8 @@ const Decoration NoPrecision = DecorationMax;
 
 POTENTIALLY_UNUSED
 const MemorySemanticsMask MemorySemanticsAllMemory =
-                (MemorySemanticsMask)(MemorySemanticsSequentiallyConsistentMask |
-                                      MemorySemanticsUniformMemoryMask |
-                                      MemorySemanticsSubgroupMemoryMask |
+                (MemorySemanticsMask)(MemorySemanticsUniformMemoryMask |
                                       MemorySemanticsWorkgroupMemoryMask |
-                                      MemorySemanticsCrossWorkgroupMemoryMask |
                                       MemorySemanticsAtomicCounterMemoryMask |
                                       MemorySemanticsImageMemoryMask);
 

+ 10 - 2
3rdparty/bgfx/3rdparty/glslang/StandAlone/StandAlone.cpp

@@ -98,6 +98,7 @@ enum TOptions {
     EOptionStdin                = (1 << 27),
     EOptionOptimizeDisable      = (1 << 28),
     EOptionOptimizeSize         = (1 << 29),
+    EOptionInvertY              = (1 << 30),
 };
 
 //
@@ -158,7 +159,7 @@ std::vector<std::string> IncludeDirectoryList;
 int ClientInputSemanticsVersion = 100;   // maps to, say, #define VULKAN 100
 int VulkanClientVersion = 100;           // would map to, say, Vulkan 1.0
 int OpenGLClientVersion = 450;           // doesn't influence anything yet, but maps to OpenGL 4.50
-unsigned int TargetVersion = 0x00001000; // maps to, say, SPIR-V 1.0
+unsigned int TargetVersion = 0x00010000; // maps to, say, SPIR-V 1.0
 std::vector<std::string> Processes;      // what should be recorded by OpModuleProcessed, or equivalent
 
 // Per descriptor-set binding base data
@@ -519,6 +520,9 @@ void ProcessArguments(std::vector<std::unique_ptr<glslang::TWorkItem>>& workItem
                         variableName = argv[1];
                         bumpArg();
                         break;
+                    } else if (lowerword == "invert-y" ||  // synonyms
+                               lowerword == "iy") {
+                        Options |= EOptionInvertY;
                     } else {
                         usage();
                     }
@@ -840,6 +844,9 @@ void CompileAndLinkShaderUnits(std::vector<ShaderCompUnit> compUnits)
         if (Options & EOptionAutoMapLocations)
             shader->setAutoMapLocations(true);
 
+        if (Options & EOptionInvertY)
+            shader->setInvertY(true);
+
         // Set up the environment, some subsettings take precedence over earlier
         // ways of setting things.
         if (Options & EOptionSpv) {
@@ -848,7 +855,7 @@ void CompileAndLinkShaderUnits(std::vector<ShaderCompUnit> compUnits)
                                                                 : glslang::EShSourceGlsl,
                                         compUnit.stage, glslang::EShClientVulkan, ClientInputSemanticsVersion);
                 shader->setEnvClient(glslang::EShClientVulkan, VulkanClientVersion);
-                shader->setEnvTarget(glslang::EshTargetSpv, TargetVersion);
+                shader->setEnvTarget(glslang::EShTargetSpv, TargetVersion);
             } else {
                 shader->setEnvInput((Options & EOptionReadHlsl) ? glslang::EShSourceHlsl
                                                                 : glslang::EShSourceGlsl,
@@ -1359,6 +1366,7 @@ void usage()
            "                                       uint32_t array named <name>\n"
            "                                       initialized with the shader binary code.\n"
            "  --vn <name>                          synonym for --variable-name <name>\n"
+           "  --invert-y | --iy                    invert position.Y output in vertex shader\n"
            );
 
     exit(EFailUsage);

+ 6 - 0
3rdparty/bgfx/3rdparty/glslang/Test/310AofA.vert

@@ -113,3 +113,9 @@ out float outArray[2][3];  // ERROR
 uniform ubaa {
     int a;
 } ubaaname[2][3];  // ERROR
+
+vec3 func(in mat3[2] x[3])
+{
+	mat3 a0 = x[2][1];
+    return a0[2];
+}

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/310implicitSizeArrayError.vert

@@ -1,5 +1,5 @@
 #version 310 es
-layout (location=0) uniform Block {
+layout (binding=0) uniform Block {
   highp int a[];
 } uni;
 layout (location=0) out highp int o;

+ 6 - 0
3rdparty/bgfx/3rdparty/glslang/Test/440.vert

@@ -174,6 +174,12 @@ out layout(xfb_buffer=7, xfb_offset=0) bblck10 {  // link ERROR, implicit stride
     float f;
 } bbinst10;
 
+layout(xfb_buffer = 3) out;
+layout(xfb_offset = 32) out gl_PerVertex {
+    layout(xfb_buffer = 2) float gl_PointSize; // ERROR, change in xfb_buffer
+    vec4 gl_Position;
+};
+
 int drawParamsBad()
 {
     return gl_BaseVertexARB + gl_BaseInstanceARB + gl_DrawIDARB; // ERROR, extension not requested

+ 8 - 0
3rdparty/bgfx/3rdparty/glslang/Test/450.vert

@@ -46,3 +46,11 @@ void foo()
     allInvocationsEqual(b1);  // ERROR, need 4.6
 }
 ; // ERROR: no extraneous semicolons
+
+layout(location = 0) uniform locBlock {        // ERROR, no location uniform block
+    int a;
+};
+
+layout(location = 0) buffer locBuffBlock {     // ERROR, no location on buffer block
+    int b;
+};

+ 15 - 0
3rdparty/bgfx/3rdparty/glslang/Test/460.frag

@@ -15,3 +15,18 @@ void main()
     b1 = allInvocations(b1);
     b1 = allInvocationsEqual(b1);
 }
+
+void attExtBad()
+{
+    // ERRORs, not enabled
+    [[dependency_length(1+3)]] for (int i = 0; i < 8; ++i) { }
+    [[flatten]]                if (true) { } else { }
+}
+
+#extension GL_EXT_control_flow_attributes : enable
+
+void attExt()
+{
+    [[dependency_length(-3)]] do {  } while(true); // ERROR, not positive
+    [[dependency_length(0)]] do {  } while(true);  // ERROR, not positive
+}

+ 1 - 9
3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.aliasOpaque.frag.out

@@ -1,7 +1,7 @@
 hlsl.aliasOpaque.frag
 WARNING: AST will form illegal SPIR-V; need to transform to legalize
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 87
 
                               Capability Shader
@@ -11,15 +11,9 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                               ExecutionMode 4 OriginUpperLeft
                               Source HLSL 500
                               Name 4  "main"
-                              Name 9  "OS"
-                              MemberName 9(OS) 0  "ss"
-                              MemberName 9(OS) 1  "a"
-                              MemberName 9(OS) 2  "tex"
-                              Name 44  "gss2"
                               Name 47  "gss"
                               Name 51  "gtex"
                               Name 62  "@entryPointOutput"
-                              Decorate 44(gss2) DescriptorSet 0
                               Decorate 47(gss) DescriptorSet 0
                               Decorate 51(gtex) DescriptorSet 0
                               Decorate 62(@entryPointOutput) Location 0
@@ -28,7 +22,6 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                6:             TypeSampler
                7:             TypeFloat 32
                8:             TypeImage 7(float) 2D sampled format:Unknown
-           9(OS):             TypeStruct 6 7(float) 8
               11:             TypeVector 7(float) 4
               32:             TypeSampledImage 8
               34:             TypeVector 7(float) 2
@@ -36,7 +29,6 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
               36:    7(float) Constant 1050253722
               37:   34(fvec2) ConstantComposite 35 36
               43:             TypePointer UniformConstant 6
-        44(gss2):     43(ptr) Variable UniformConstant
          47(gss):     43(ptr) Variable UniformConstant
               50:             TypePointer UniformConstant 8
         51(gtex):     50(ptr) Variable UniformConstant

+ 1 - 8
3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.flattenOpaque.frag.out

@@ -1,7 +1,7 @@
 hlsl.flattenOpaque.frag
 WARNING: AST will form illegal SPIR-V; need to transform to legalize
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 185
 
                               Capability Shader
@@ -11,11 +11,6 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                               ExecutionMode 4 OriginUpperLeft
                               Source HLSL 500
                               Name 4  "main"
-                              Name 7  "os"
-                              MemberName 7(os) 0  "s2D"
-                              Name 23  "os2"
-                              MemberName 23(os2) 0  "s2D"
-                              MemberName 23(os2) 1  "tex"
                               Name 38  "tex"
                               Name 82  "s.s2D"
                               Name 97  "s2.s2D"
@@ -29,12 +24,10 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeSampler
-           7(os):             TypeStruct 6
                9:             TypeFloat 32
               10:             TypeVector 9(float) 4
               15:             TypeVector 9(float) 2
               22:             TypeImage 9(float) 2D sampled format:Unknown
-         23(os2):             TypeStruct 6 22
               37:             TypePointer UniformConstant 22
          38(tex):     37(ptr) Variable UniformConstant
               45:             TypeSampledImage 22

+ 1 - 5
3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.flattenOpaqueInit.vert.out

@@ -1,7 +1,7 @@
 hlsl.flattenOpaqueInit.vert
 WARNING: AST will form illegal SPIR-V; need to transform to legalize
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 134
 
                               Capability Shader
@@ -10,9 +10,6 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                               EntryPoint Vertex 4  "main" 80
                               Source HLSL 500
                               Name 4  "main"
-                              Name 9  "FxaaTex"
-                              MemberName 9(FxaaTex) 0  "smpl"
-                              MemberName 9(FxaaTex) 1  "tex"
                               Name 43  "g_tInputTexture_sampler"
                               Name 47  "g_tInputTexture"
                               Name 80  "@entryPointOutput"
@@ -24,7 +21,6 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                6:             TypeSampler
                7:             TypeFloat 32
                8:             TypeImage 7(float) 2D sampled format:Unknown
-      9(FxaaTex):             TypeStruct 6 8
               11:             TypeVector 7(float) 4
               31:             TypeSampledImage 8
               33:             TypeVector 7(float) 2

+ 1 - 6
3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.flattenOpaqueInitMix.vert.out

@@ -1,7 +1,7 @@
 hlsl.flattenOpaqueInitMix.vert
 WARNING: AST will form illegal SPIR-V; need to transform to legalize
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 80
 
                               Capability Shader
@@ -10,10 +10,6 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                               EntryPoint Vertex 4  "main" 57
                               Source HLSL 500
                               Name 4  "main"
-                              Name 9  "FxaaTex"
-                              MemberName 9(FxaaTex) 0  "smpl"
-                              MemberName 9(FxaaTex) 1  "tex"
-                              MemberName 9(FxaaTex) 2  "f"
                               Name 44  "g_tInputTexture_sampler"
                               Name 47  "g_tInputTexture"
                               Name 57  "@entryPointOutput"
@@ -25,7 +21,6 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                6:             TypeSampler
                7:             TypeFloat 32
                8:             TypeImage 7(float) 2D sampled format:Unknown
-      9(FxaaTex):             TypeStruct 6 8 7(float)
               11:             TypeVector 7(float) 4
               28:             TypeSampledImage 8
               36:             TypeVector 7(float) 2

+ 1 - 21
3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.flattenSubset.frag.out

@@ -1,7 +1,7 @@
 hlsl.flattenSubset.frag
 WARNING: AST will form illegal SPIR-V; need to transform to legalize
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 66
 
                               Capability Shader
@@ -11,23 +11,7 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                               ExecutionMode 4 OriginUpperLeft
                               Source HLSL 500
                               Name 4  "main"
-                              Name 15  "S0"
-                              MemberName 15(S0) 0  "x"
-                              MemberName 15(S0) 1  "y"
-                              MemberName 15(S0) 2  "ss"
-                              Name 16  "S1"
-                              MemberName 16(S1) 0  "b"
-                              MemberName 16(S1) 1  "samplerState"
-                              MemberName 16(S1) 2  "s0"
-                              MemberName 16(S1) 3  "a"
                               Name 21  "samp"
-                              Name 25  "S2"
-                              MemberName 25(S2) 0  "a1"
-                              MemberName 25(S2) 1  "a2"
-                              MemberName 25(S2) 2  "a3"
-                              MemberName 25(S2) 3  "a4"
-                              MemberName 25(S2) 4  "a5"
-                              MemberName 25(S2) 5  "resources"
                               Name 33  "tex"
                               Name 47  "vpos"
                               Name 50  "@entryPointOutput"
@@ -40,12 +24,8 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                6:             TypeFloat 32
                7:             TypeVector 6(float) 4
               13:             TypeSampler
-              14:             TypeInt 32 1
-          15(S0):             TypeStruct 14(int) 14(int) 13
-          16(S1):             TypeStruct 6(float) 13 15(S0) 14(int)
               20:             TypePointer UniformConstant 13
         21(samp):     20(ptr) Variable UniformConstant
-          25(S2):             TypeStruct 14(int) 14(int) 14(int) 14(int) 14(int) 16(S1)
               31:             TypeImage 6(float) 2D sampled format:Unknown
               32:             TypePointer UniformConstant 31
          33(tex):     32(ptr) Variable UniformConstant

+ 1 - 18
3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.flattenSubset2.frag.out

@@ -1,7 +1,7 @@
 hlsl.flattenSubset2.frag
 WARNING: AST will form illegal SPIR-V; need to transform to legalize
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 53
 
                               Capability Shader
@@ -11,31 +11,14 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                               ExecutionMode 4 OriginUpperLeft
                               Source HLSL 500
                               Name 4  "main"
-                              Name 14  "Nested"
-                              MemberName 14(Nested) 0  "y"
-                              MemberName 14(Nested) 1  "texNested"
-                              Name 15  "A"
-                              MemberName 15(A) 0  "n"
-                              MemberName 15(A) 1  "x"
-                              Name 25  "B"
-                              MemberName 25(B) 0  "n"
-                              MemberName 25(B) 1  "tex"
-                              Name 36  "someTex"
                               Name 49  "vpos"
                               Name 52  "@entryPointOutput"
-                              Decorate 36(someTex) DescriptorSet 0
                               Decorate 49(vpos) Location 0
                               Decorate 52(@entryPointOutput) Location 0
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
                7:             TypeVector 6(float) 4
-              13:             TypeImage 6(float) 2D sampled format:Unknown
-      14(Nested):             TypeStruct 6(float) 13
-           15(A):             TypeStruct 14(Nested) 6(float)
-           25(B):             TypeStruct 14(Nested) 13
-              35:             TypePointer UniformConstant 13
-     36(someTex):     35(ptr) Variable UniformConstant
               43:    6(float) Constant 0
               44:    7(fvec4) ConstantComposite 43 43 43 43
               48:             TypePointer Input 7(fvec4)

+ 26 - 53
3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.partialFlattenLocal.vert.out

@@ -1,8 +1,8 @@
 hlsl.partialFlattenLocal.vert
 WARNING: AST will form illegal SPIR-V; need to transform to legalize
 // Module Version 10000
-// Generated by (magic number): 80002
-// Id's are bound by 132
+// Generated by (magic number): 80004
+// Id's are bound by 169
 
                               Capability Shader
                1:             ExtInstImport  "GLSL.std.450"
@@ -10,23 +10,14 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                               EntryPoint Vertex 4  "main" 83 86
                               Source HLSL 500
                               Name 4  "main"
-                              Name 22  "Packed"
-                              MemberName 22(Packed) 0  "tex"
-                              MemberName 22(Packed) 1  "pos"
-                              MemberName 22(Packed) 2  "uv"
-                              MemberName 22(Packed) 3  "x"
-                              MemberName 22(Packed) 4  "n"
-                              Name 27  "tex"
                               Name 83  "pos"
                               Name 86  "@entryPointOutput"
-                              Decorate 27(tex) DescriptorSet 0
                               Decorate 83(pos) Location 0
                               Decorate 86(@entryPointOutput) BuiltIn Position
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
                7:             TypeVector 6(float) 4
-              13:             TypeImage 6(float) 2D sampled format:Unknown
               14:             TypeVector 6(float) 3
               15:             TypeInt 32 0
               16:     15(int) Constant 3
@@ -35,73 +26,55 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
               19:     15(int) Constant 2
               20:             TypeArray 18(fvec2) 19
               21:             TypeInt 32 1
-      22(Packed):             TypeStruct 13 17 20 6(float) 21(int)
-              23:             TypePointer Function 22(Packed)
               25:     21(int) Constant 0
-              26:             TypePointer UniformConstant 13
-         27(tex):     26(ptr) Variable UniformConstant
-              29:             TypePointer Function 13
               31:     21(int) Constant 1
               32:    6(float) Constant 0
               33:   14(fvec3) ConstantComposite 32 32 32
               34:             TypePointer Function 14(fvec3)
-              36:     21(int) Constant 2
               37:    6(float) Constant 1065353216
               38:   18(fvec2) ConstantComposite 32 37
               39:             TypePointer Function 18(fvec2)
-              41:     21(int) Constant 3
-              42:             TypePointer Function 6(float)
-              44:     21(int) Constant 4
-              45:             TypePointer Function 21(int)
               54:             TypeBool
               82:             TypePointer Input 7(fvec4)
          83(pos):     82(ptr) Variable Input
               85:             TypePointer Output 7(fvec4)
 86(@entryPointOutput):     85(ptr) Variable Output
+             130:             TypePointer Function 17
+             132:             TypePointer Function 20
          4(main):           2 Function None 3
                5:             Label
-              90:     23(ptr) Variable Function
+             133:    132(ptr) Variable Function
+             131:    130(ptr) Variable Function
               84:    7(fvec4) Load 83(pos)
-              94:          13 Load 27(tex)
-              95:     29(ptr) AccessChain 90 25
-                              Store 95 94
-              96:     34(ptr) AccessChain 90 31 25
-                              Store 96 33
-              97:     39(ptr) AccessChain 90 36 25
-                              Store 97 38
-              98:     42(ptr) AccessChain 90 41
-                              Store 98 37
-              99:     45(ptr) AccessChain 90 44
-                              Store 99 41
+             136:     34(ptr) AccessChain 131 25
+                              Store 136 33
+             137:     39(ptr) AccessChain 133 25
+                              Store 137 38
                               Branch 100
              100:             Label
-             131:     21(int) Phi 25 5 119 102
-                              LoopMerge 101 102 None
-                              Branch 103
-             103:             Label
-             105:    54(bool) SLessThan 131 31
+             168:     21(int) Phi 25 5 119 106
+             105:    54(bool) SLessThan 168 31
+                              LoopMerge 101 106 None
                               BranchConditional 105 106 101
              106:               Label
-             109:     39(ptr)   AccessChain 90 36 131
-             110:   18(fvec2)   Load 109
-             111:     34(ptr)   AccessChain 90 31 131
-             112:   14(fvec3)   Load 111
+             138:     39(ptr)   AccessChain 133 168
+             110:   18(fvec2)   Load 138
+             139:     34(ptr)   AccessChain 131 168
+             112:   14(fvec3)   Load 139
              113:   18(fvec2)   VectorShuffle 112 112 0 1
              114:   18(fvec2)   FAdd 113 110
-             115:     34(ptr)   AccessChain 90 31 131
-             116:   14(fvec3)   Load 115
+             140:     34(ptr)   AccessChain 131 168
+             116:   14(fvec3)   Load 140
              117:   14(fvec3)   VectorShuffle 116 114 3 4 2
-                                Store 115 117
-                                Branch 102
-             102:               Label
-             119:     21(int)   IAdd 131 31
+                                Store 140 117
+             119:     21(int)   IAdd 168 31
                                 Branch 100
              101:             Label
-             120:  22(Packed) Load 90
-             130:   14(fvec3) CompositeExtract 120 1 0
-             124:    6(float) CompositeExtract 130 0
-             125:    6(float) CompositeExtract 130 1
-             126:    6(float) CompositeExtract 130 2
+             142:          17 Load 131
+             161:   14(fvec3) CompositeExtract 142 0
+             124:    6(float) CompositeExtract 161 0
+             125:    6(float) CompositeExtract 161 1
+             126:    6(float) CompositeExtract 161 2
              127:    7(fvec4) CompositeConstruct 124 125 126 32
              128:    7(fvec4) FAdd 84 127
                               Store 86(@entryPointOutput) 128

+ 1 - 15
3rdparty/bgfx/3rdparty/glslang/Test/baseLegalResults/hlsl.partialFlattenMixed.vert.out

@@ -1,7 +1,7 @@
 hlsl.partialFlattenMixed.vert
 WARNING: AST will form illegal SPIR-V; need to transform to legalize
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 36
 
                               Capability Shader
@@ -10,28 +10,14 @@ WARNING: AST will form illegal SPIR-V; need to transform to legalize
                               EntryPoint Vertex 4  "main" 32 35
                               Source HLSL 500
                               Name 4  "main"
-                              Name 18  "Packed"
-                              MemberName 18(Packed) 0  "a"
-                              MemberName 18(Packed) 1  "membTex"
-                              MemberName 18(Packed) 2  "b"
-                              Name 23  "tex"
                               Name 32  "pos"
                               Name 35  "@entryPointOutput"
-                              Decorate 23(tex) DescriptorSet 0
                               Decorate 32(pos) Location 0
                               Decorate 35(@entryPointOutput) BuiltIn Position
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
                7:             TypeVector 6(float) 4
-              13:             TypeInt 32 1
-              14:             TypeImage 6(float) 2D sampled format:Unknown
-              15:             TypeInt 32 0
-              16:     15(int) Constant 2
-              17:             TypeArray 14 16
-      18(Packed):             TypeStruct 13(int) 17 13(int)
-              22:             TypePointer UniformConstant 17
-         23(tex):     22(ptr) Variable UniformConstant
               31:             TypePointer Input 7(fvec4)
          32(pos):     31(ptr) Variable Input
               34:             TypePointer Output 7(fvec4)

+ 4 - 2
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/120.frag.out

@@ -19,7 +19,7 @@ ERROR: 0:82: 'xr' : vector swizzle selectors not from the same set
 ERROR: 0:83: 'xyxyx' : vector swizzle too long 
 ERROR: 0:84: 'z' : vector swizzle selection out of range 
 ERROR: 0:85: 'assign' :  l-value required 
-ERROR: 0:91: 'int' : overloaded functions must have the same return type 
+ERROR: 0:91: 'main' : overloaded functions must have the same return type 
 ERROR: 0:91: 'main' : function already has a body 
 ERROR: 0:91: 'int' :  entry point cannot return a value
 ERROR: 0:92: 'main' : function cannot take any parameter(s) 
@@ -52,8 +52,10 @@ ERROR: 0:209: 'assign' :  cannot convert from ' const float' to ' temp 4-compone
 ERROR: 0:212: 'sampler2DRect' : Reserved word. 
 ERROR: 0:244: ':' :  wrong operand types: no operation ':' exists that takes a left-hand operand of type ' global void' and a right operand of type ' const int' (or there is no acceptable conversion)
 ERROR: 0:245: ':' :  wrong operand types: no operation ':' exists that takes a left-hand operand of type ' const int' and a right operand of type ' global void' (or there is no acceptable conversion)
+ERROR: 0:248: 'shader half float' : required extension not requested: GL_AMD_gpu_shader_half_float
+ERROR: 0:248: 'half floating-point suffix' : not supported with this profile: none
 ERROR: 0:248: '' :  syntax error, unexpected IDENTIFIER, expecting COMMA or SEMICOLON
-ERROR: 54 compilation errors.  No code generated.
+ERROR: 56 compilation errors.  No code generated.
 
 
 Shader version: 120

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/120.vert.out

@@ -34,7 +34,7 @@ ERROR: 0:51: 'arrays of arrays' : not supported with this profile: none
 ERROR: 0:52: 'arrays of arrays' : not supported with this profile: none
 ERROR: 0:53: 'arrays of arrays' : not supported with this profile: none
 ERROR: 0:56: 'out' : overloaded functions must have the same parameter storage qualifiers for argument 1
-ERROR: 0:57: 'float' : overloaded functions must have the same return type 
+ERROR: 0:57: 'overloadA' : overloaded functions must have the same return type 
 ERROR: 0:87: 'overloadC' : no matching overloaded function found 
 ERROR: 0:90: 'overloadC' : no matching overloaded function found 
 ERROR: 0:95: 'overloadD' : ambiguous function signature match: multiple signatures match under implicit type conversion 

+ 19 - 0
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/310AofA.vert.out

@@ -317,6 +317,25 @@ ERROR: node is still EOpNull!
 0:99              0 (const int)
 0:99          Constant:
 0:99            1 (const int)
+0:117  Function Definition: func(mf33[3][2]; ( global highp 3-component vector of float)
+0:117    Function Parameters: 
+0:117      'x' ( in 3-element array of 2-element array of highp 3X3 matrix of float)
+0:119    Sequence
+0:119      Sequence
+0:119        move second child to first child ( temp highp 3X3 matrix of float)
+0:119          'a0' ( temp highp 3X3 matrix of float)
+0:119          direct index ( temp highp 3X3 matrix of float)
+0:119            direct index ( temp 2-element array of highp 3X3 matrix of float)
+0:119              'x' ( in 3-element array of 2-element array of highp 3X3 matrix of float)
+0:119              Constant:
+0:119                2 (const int)
+0:119            Constant:
+0:119              1 (const int)
+0:120      Branch: Return with expression
+0:120        direct index ( temp highp 3-component vector of float)
+0:120          'a0' ( temp highp 3X3 matrix of float)
+0:120          Constant:
+0:120            2 (const int)
 0:?   Linker Objects
 0:?     'name' (layout( column_major shared) buffer 3-element array of block{layout( column_major shared) buffer implicitly-sized array of highp float u, layout( column_major shared) buffer implicitly-sized array of highp 4-component vector of float v})
 0:?     'uname' (layout( column_major shared) uniform 3-element array of block{layout( column_major shared) uniform highp float u, layout( column_major shared) uniform implicitly-sized array of highp 4-component vector of float v})

+ 4 - 4
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/310implicitSizeArrayError.vert.out

@@ -12,13 +12,13 @@ ERROR: node is still EOpNull!
 0:7        'o' (layout( location=0) smooth out highp int)
 0:7        direct index (layout( column_major shared) temp highp int)
 0:7          a: direct index for structure (layout( column_major shared) uniform implicitly-sized array of highp int)
-0:7            'uni' (layout( location=0 column_major shared) uniform block{layout( column_major shared) uniform implicitly-sized array of highp int a})
+0:7            'uni' (layout( binding=0 column_major shared) uniform block{layout( column_major shared) uniform implicitly-sized array of highp int a})
 0:7            Constant:
 0:7              0 (const int)
 0:7          Constant:
 0:7            2 (const int)
 0:?   Linker Objects
-0:?     'uni' (layout( location=0 column_major shared) uniform block{layout( column_major shared) uniform implicitly-sized array of highp int a})
+0:?     'uni' (layout( binding=0 column_major shared) uniform block{layout( column_major shared) uniform implicitly-sized array of highp int a})
 0:?     'o' (layout( location=0) smooth out highp int)
 0:?     'gl_VertexID' ( gl_VertexId highp int VertexId)
 0:?     'gl_InstanceID' ( gl_InstanceId highp int InstanceId)
@@ -36,13 +36,13 @@ ERROR: node is still EOpNull!
 0:7        'o' (layout( location=0) smooth out highp int)
 0:7        direct index (layout( column_major shared) temp highp int)
 0:7          a: direct index for structure (layout( column_major shared) uniform 1-element array of highp int)
-0:7            'uni' (layout( location=0 column_major shared) uniform block{layout( column_major shared) uniform 1-element array of highp int a})
+0:7            'uni' (layout( binding=0 column_major shared) uniform block{layout( column_major shared) uniform 1-element array of highp int a})
 0:7            Constant:
 0:7              0 (const int)
 0:7          Constant:
 0:7            2 (const int)
 0:?   Linker Objects
-0:?     'uni' (layout( location=0 column_major shared) uniform block{layout( column_major shared) uniform 1-element array of highp int a})
+0:?     'uni' (layout( binding=0 column_major shared) uniform block{layout( column_major shared) uniform 1-element array of highp int a})
 0:?     'o' (layout( location=0) smooth out highp int)
 0:?     'gl_VertexID' ( gl_VertexId highp int VertexId)
 0:?     'gl_InstanceID' ( gl_InstanceId highp int InstanceId)

+ 3 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/330.frag.out

@@ -16,6 +16,7 @@ ERROR: 0:57: 'location on block member' : not supported for this version or the
 ERROR: 0:62: 'location on block member' : can only use in an in/out block 
 ERROR: 0:62: 'location qualifier on uniform or buffer' : not supported for this version or the enabled extensions 
 ERROR: 0:60: 'location qualifier on uniform or buffer' : not supported for this version or the enabled extensions 
+ERROR: 0:60: 'location' : cannot apply to uniform or buffer block 
 ERROR: 0:68: 'layout-id value' : cannot be negative 
 ERROR: 0:69: 'layout-id value' : cannot be negative 
 ERROR: 0:76: 'f2' : cannot use layout qualifiers on structure members 
@@ -23,6 +24,7 @@ ERROR: 0:91: 'location on block member' : can only use in an in/out block
 ERROR: 0:91: 'location qualifier on uniform or buffer' : not supported for this version or the enabled extensions 
 ERROR: 0:91: 'location' : overlapping use of location 3
 ERROR: 0:89: 'location qualifier on uniform or buffer' : not supported for this version or the enabled extensions 
+ERROR: 0:89: 'location' : cannot apply to uniform or buffer block 
 ERROR: 0:94: 'location' : either the block needs a location, or all members need a location, or no members have a location 
 ERROR: 0:108: 'A' : cannot use layout qualifiers on structure members 
 ERROR: 0:119: 'location' : overlapping use of location 44
@@ -38,7 +40,7 @@ ERROR: 0:140: 'assign' :  cannot convert from ' const float' to ' temp 2-compone
 ERROR: 0:141: 'textureQueryLod' : no matching overloaded function found 
 ERROR: 0:141: 'assign' :  cannot convert from ' const float' to ' temp 2-component vector of float'
 ERROR: 0:152: 'index' : value must be 0 or 1 
-ERROR: 39 compilation errors.  No code generated.
+ERROR: 41 compilation errors.  No code generated.
 
 
 Shader version: 330

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/400.geom.out

@@ -1,7 +1,7 @@
 400.geom
 ERROR: 0:12: 'invocations' : can only apply to a standalone qualifier 
 ERROR: 0:20: 'patch' : not supported in this stage: geometry
-ERROR: 0:20: 'gl_PointSize' : cannot add layout to redeclared block member 
+ERROR: 0:20: 'gl_PointSize' : cannot add non-XFB layout to redeclared block member 
 ERROR: 0:20: 'gl_PointSize' : cannot add patch to redeclared block member 
 ERROR: 0:25: 'length' :  array must first be sized by a redeclaration or layout qualifier
 ERROR: 0:36: 'length' :  array must first be sized by a redeclaration or layout qualifier

+ 44 - 39
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/440.vert.out

@@ -46,51 +46,54 @@ ERROR: 0:166: 'xfb_buffer' : buffer is too large: gl_MaxTransformFeedbackBuffers
 ERROR: 0:169: 'xfb_buffer' : buffer is too large: gl_MaxTransformFeedbackBuffers is 4
 ERROR: 0:169: 'xfb_stride' : 1/4 stride is too large: gl_MaxTransformFeedbackInterleavedComponents is 64
 ERROR: 0:171: 'xfb_buffer' : buffer is too large: gl_MaxTransformFeedbackBuffers is 4
-ERROR: 0:179: 'gl_BaseVertexARB' : required extension not requested: GL_ARB_shader_draw_parameters
-ERROR: 0:179: 'gl_BaseInstanceARB' : required extension not requested: GL_ARB_shader_draw_parameters
-ERROR: 0:179: 'gl_DrawIDARB' : required extension not requested: GL_ARB_shader_draw_parameters
-ERROR: 0:187: 'assign' :  l-value required "gl_BaseVertexARB" (can't modify shader input)
-ERROR: 0:188: 'assign' :  l-value required "gl_BaseInstanceARB" (can't modify shader input)
-ERROR: 0:189: 'assign' :  l-value required "gl_DrawIDARB" (can't modify shader input)
-ERROR: 0:190: 'glBaseInstanceARB' : undeclared identifier 
-ERROR: 54 compilation errors.  No code generated.
+ERROR: 0:178: 'xfb_offset' : overlapping offsets at offset 36 in buffer 3
+ERROR: 0:179: 'xfb_buffer' : member cannot contradict block (or what block inherited from global) 
+ERROR: 0:178: 'xfb_offset' : overlapping offsets at offset 32 in buffer 3
+ERROR: 0:185: 'gl_BaseVertexARB' : required extension not requested: GL_ARB_shader_draw_parameters
+ERROR: 0:185: 'gl_BaseInstanceARB' : required extension not requested: GL_ARB_shader_draw_parameters
+ERROR: 0:185: 'gl_DrawIDARB' : required extension not requested: GL_ARB_shader_draw_parameters
+ERROR: 0:193: 'assign' :  l-value required "gl_BaseVertexARB" (can't modify shader input)
+ERROR: 0:194: 'assign' :  l-value required "gl_BaseInstanceARB" (can't modify shader input)
+ERROR: 0:195: 'assign' :  l-value required "gl_DrawIDARB" (can't modify shader input)
+ERROR: 0:196: 'glBaseInstanceARB' : undeclared identifier 
+ERROR: 57 compilation errors.  No code generated.
 
 
 Shader version: 440
 Requested GL_ARB_shader_draw_parameters
 in xfb mode
 ERROR: node is still EOpNull!
-0:177  Function Definition: drawParamsBad( ( global int)
-0:177    Function Parameters: 
-0:179    Sequence
-0:179      Branch: Return with expression
-0:179        add ( temp int)
-0:179          add ( temp int)
-0:179            'gl_BaseVertexARB' ( in int BaseVertex)
-0:179            'gl_BaseInstanceARB' ( in int BaseInstance)
-0:179          'gl_DrawIDARB' ( in int DrawId)
-0:184  Function Definition: drawParams( ( global int)
-0:184    Function Parameters: 
-0:186    Sequence
-0:186      Branch: Return with expression
-0:186        add ( temp int)
-0:186          add ( temp int)
-0:186            'gl_BaseVertexARB' ( in int BaseVertex)
-0:186            'gl_BaseInstanceARB' ( in int BaseInstance)
-0:186          'gl_DrawIDARB' ( in int DrawId)
-0:187      move second child to first child ( temp int)
-0:187        'gl_BaseVertexARB' ( in int BaseVertex)
-0:187        Constant:
-0:187          3 (const int)
-0:188      move second child to first child ( temp int)
-0:188        'gl_BaseInstanceARB' ( in int BaseInstance)
-0:188        Constant:
-0:188          3 (const int)
-0:189      move second child to first child ( temp int)
-0:189        'gl_DrawIDARB' ( in int DrawId)
-0:189        Constant:
-0:189          3 (const int)
-0:190      'glBaseInstanceARB' ( temp float)
+0:183  Function Definition: drawParamsBad( ( global int)
+0:183    Function Parameters: 
+0:185    Sequence
+0:185      Branch: Return with expression
+0:185        add ( temp int)
+0:185          add ( temp int)
+0:185            'gl_BaseVertexARB' ( in int BaseVertex)
+0:185            'gl_BaseInstanceARB' ( in int BaseInstance)
+0:185          'gl_DrawIDARB' ( in int DrawId)
+0:190  Function Definition: drawParams( ( global int)
+0:190    Function Parameters: 
+0:192    Sequence
+0:192      Branch: Return with expression
+0:192        add ( temp int)
+0:192          add ( temp int)
+0:192            'gl_BaseVertexARB' ( in int BaseVertex)
+0:192            'gl_BaseInstanceARB' ( in int BaseInstance)
+0:192          'gl_DrawIDARB' ( in int DrawId)
+0:193      move second child to first child ( temp int)
+0:193        'gl_BaseVertexARB' ( in int BaseVertex)
+0:193        Constant:
+0:193          3 (const int)
+0:194      move second child to first child ( temp int)
+0:194        'gl_BaseInstanceARB' ( in int BaseInstance)
+0:194        Constant:
+0:194          3 (const int)
+0:195      move second child to first child ( temp int)
+0:195        'gl_DrawIDARB' ( in int DrawId)
+0:195        Constant:
+0:195          3 (const int)
+0:196      'glBaseInstanceARB' ( temp float)
 0:?   Linker Objects
 0:?     'a' (layout( location=2 component=2) in 2-component vector of float)
 0:?     'b' (layout( location=2 component=1) in float)
@@ -153,6 +156,7 @@ ERROR: node is still EOpNull!
 0:?     'bbinst9' ( out block{layout( xfb_buffer=4 xfb_offset=1) out bool b, layout( xfb_buffer=4 xfb_offset=12) out structure{ global bool b,  global structure{ global int i,  global double d,  global float f} s,  global 2-component vector of float v2} t, layout( xfb_buffer=4 xfb_offset=52) out 3X3 matrix of float m3, layout( xfb_buffer=4 xfb_offset=90) out int i, layout( xfb_buffer=4 xfb_offset=98) out double d, layout( xfb_buffer=4 xfb_offset=108) out structure{ global int a} s})
 0:?     'bm' (layout( xfb_buffer=5 xfb_offset=0) smooth out float)
 0:?     'bbinst10' ( out block{layout( xfb_buffer=7 xfb_offset=0) out 4X4 matrix of double m1, layout( xfb_buffer=7 xfb_offset=128) out 4X4 matrix of double m2, layout( xfb_buffer=7 xfb_offset=256) out float f})
+0:?     'anon@0' ( out block{layout( xfb_buffer=3 xfb_offset=36) gl_Position 4-component vector of float Position gl_Position, layout( xfb_buffer=3 xfb_offset=32) gl_PointSize float PointSize gl_PointSize, })
 0:?     'gl_VertexID' ( gl_VertexId int VertexId)
 0:?     'gl_InstanceID' ( gl_InstanceId int InstanceId)
 
@@ -235,6 +239,7 @@ ERROR: node is still EOpNull!
 0:?     'bbinst9' ( out block{layout( xfb_buffer=4 xfb_offset=1) out bool b, layout( xfb_buffer=4 xfb_offset=12) out structure{ global bool b,  global structure{ global int i,  global double d,  global float f} s,  global 2-component vector of float v2} t, layout( xfb_buffer=4 xfb_offset=52) out 3X3 matrix of float m3, layout( xfb_buffer=4 xfb_offset=90) out int i, layout( xfb_buffer=4 xfb_offset=98) out double d, layout( xfb_buffer=4 xfb_offset=108) out structure{ global int a} s})
 0:?     'bm' (layout( xfb_buffer=5 xfb_offset=0) smooth out float)
 0:?     'bbinst10' ( out block{layout( xfb_buffer=7 xfb_offset=0) out 4X4 matrix of double m1, layout( xfb_buffer=7 xfb_offset=128) out 4X4 matrix of double m2, layout( xfb_buffer=7 xfb_offset=256) out float f})
+0:?     'anon@0' ( out block{layout( xfb_buffer=3 xfb_offset=36) gl_Position 4-component vector of float Position gl_Position, layout( xfb_buffer=3 xfb_offset=32) gl_PointSize float PointSize gl_PointSize, })
 0:?     'gl_VertexID' ( gl_VertexId int VertexId)
 0:?     'gl_InstanceID' ( gl_InstanceId int InstanceId)
 

+ 7 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/450.vert.out

@@ -19,7 +19,9 @@ ERROR: 0:44: 'anyInvocation' : no matching overloaded function found
 ERROR: 0:45: 'allInvocations' : no matching overloaded function found 
 ERROR: 0:46: 'allInvocationsEqual' : no matching overloaded function found 
 ERROR: 0:48: 'extraneous semicolon' : not supported for this version or the enabled extensions 
-ERROR: 20 compilation errors.  No code generated.
+ERROR: 0:50: 'location' : cannot apply to uniform or buffer block 
+ERROR: 0:54: 'location' : cannot apply to uniform or buffer block 
+ERROR: 22 compilation errors.  No code generated.
 
 
 Shader version: 450
@@ -77,6 +79,8 @@ ERROR: node is still EOpNull!
 0:?     'outSS' ( smooth out structure{ global float f,  global structure{ global float f} s})
 0:?     'aui' (layout( binding=0 offset=0) uniform atomic_uint)
 0:?     'ui' ( global uint)
+0:?     'anon@1' (layout( location=0 column_major shared) uniform block{layout( column_major shared) uniform int a})
+0:?     'anon@2' (layout( location=0 column_major shared) buffer block{layout( column_major shared) buffer int b})
 0:?     'gl_VertexID' ( gl_VertexId int VertexId)
 0:?     'gl_InstanceID' ( gl_InstanceId int InstanceId)
 
@@ -112,6 +116,8 @@ ERROR: node is still EOpNull!
 0:?     'outSS' ( smooth out structure{ global float f,  global structure{ global float f} s})
 0:?     'aui' (layout( binding=0 offset=0) uniform atomic_uint)
 0:?     'ui' ( global uint)
+0:?     'anon@1' (layout( location=0 column_major shared) uniform block{layout( column_major shared) uniform int a})
+0:?     'anon@2' (layout( location=0 column_major shared) buffer block{layout( column_major shared) buffer int b})
 0:?     'gl_VertexID' ( gl_VertexId int VertexId)
 0:?     'gl_InstanceID' ( gl_InstanceId int InstanceId)
 

+ 48 - 2
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/460.frag.out

@@ -1,6 +1,14 @@
 460.frag
+ERROR: 0:22: 'attribute' : required extension not requested: GL_EXT_control_flow_attributes
+ERROR: 0:23: 'attribute' : required extension not requested: GL_EXT_control_flow_attributes
+ERROR: 0:30: 'dependency_length' : must be positive 
+ERROR: 0:31: 'dependency_length' : must be positive 
+ERROR: 4 compilation errors.  No code generated.
+
+
 Shader version: 460
-0:? Sequence
+Requested GL_EXT_control_flow_attributes
+ERROR: node is still EOpNull!
 0:10  Function Definition: main( ( global void)
 0:10    Function Parameters: 
 0:12    Sequence
@@ -21,6 +29,43 @@ Shader version: 460
 0:16        'b1' ( temp bool)
 0:16        allInvocationsEqual ( global bool)
 0:16          'b1' ( temp bool)
+0:19  Function Definition: attExtBad( ( global void)
+0:19    Function Parameters: 
+0:22    Sequence
+0:22      Sequence
+0:22        Sequence
+0:22          move second child to first child ( temp int)
+0:22            'i' ( temp int)
+0:22            Constant:
+0:22              0 (const int)
+0:22        Loop with condition tested first: Dependency 4
+0:22          Loop Condition
+0:22          Compare Less Than ( temp bool)
+0:22            'i' ( temp int)
+0:22            Constant:
+0:22              8 (const int)
+0:22          No loop body
+0:22          Loop Terminal Expression
+0:22          Pre-Increment ( temp int)
+0:22            'i' ( temp int)
+0:23      Test condition and select ( temp void): Flatten
+0:23        Condition
+0:23        Constant:
+0:23          true (const bool)
+0:23        true case is null
+0:28  Function Definition: attExt( ( global void)
+0:28    Function Parameters: 
+0:30    Sequence
+0:30      Loop with condition not tested first: Dependency -3
+0:30        Loop Condition
+0:30        Constant:
+0:30          true (const bool)
+0:30        No loop body
+0:31      Loop with condition not tested first
+0:31        Loop Condition
+0:31        Constant:
+0:31          true (const bool)
+0:31        No loop body
 0:?   Linker Objects
 0:?     's' ( smooth in structure{ global float f,  global 4-component vector of float v})
 
@@ -29,7 +74,8 @@ Linked fragment stage:
 
 
 Shader version: 460
-0:? Sequence
+Requested GL_EXT_control_flow_attributes
+ERROR: node is still EOpNull!
 0:10  Function Definition: main( ( global void)
 0:10    Function Parameters: 
 0:12    Sequence

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/glsl.entryPointRename.vert.bad.out

@@ -2,7 +2,7 @@ glsl.entryPointRename.vert
 ERROR: Source entry point must be "main"
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 20
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/glsl.entryPointRename.vert.out

@@ -1,6 +1,6 @@
 glsl.entryPointRename.vert
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 20
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/glspv.version.frag.out

@@ -2,7 +2,7 @@ glspv.version.frag
 ERROR: #version: compilation for SPIR-V does not support the compatibility profile
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 6
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.PointSize.geom.out

@@ -70,7 +70,7 @@ output primitive = line_strip
 0:?     'OutputStream.ps' ( out float PointSize)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 36
 
                               Capability Geometry

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.PointSize.vert.out

@@ -38,7 +38,7 @@ Shader version: 500
 0:?     '@entryPointOutput' ( out float PointSize)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 16
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.aliasOpaque.frag.out

@@ -143,7 +143,7 @@ gl_FragCoord origin is upper left
 0:?     '@entryPointOutput' (layout( location=0) out 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 64
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.amend.frag.out

@@ -160,7 +160,7 @@ gl_FragCoord origin is upper left
 0:?     'm' ( global 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 57
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.array.flatten.frag.out

@@ -345,7 +345,7 @@ gl_FragCoord origin is upper left
 0:?     'ps_output.color' (layout( location=0) out 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 143
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.array.frag.out

@@ -290,7 +290,7 @@ gl_FragCoord origin is upper left
 0:?     'input' (layout( location=1) in 3-element array of 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 126
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.array.implicit-size.frag.out

@@ -163,7 +163,7 @@ gl_FragCoord origin is upper left
 0:?     'g_mystruct' ( global 2-element array of structure{ temp int i,  temp float f})
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 72
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.array.multidim.frag.out

@@ -134,7 +134,7 @@ gl_FragCoord origin is upper left
 0:?     '@entryPointOutput.Color' (layout( location=0) out 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 57
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.assoc.frag.out

@@ -132,7 +132,7 @@ gl_FragCoord origin is upper left
 0:?     'a5' (layout( location=4) in 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 58
 
                               Capability Shader

+ 3 - 3
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.attribute.expression.comp.out

@@ -10,7 +10,7 @@ local_size = (4, 6, 8)
 0:11          'x' ( temp int)
 0:11          Constant:
 0:11            0 (const int)
-0:11        Loop with condition tested first
+0:11        Loop with condition tested first: Unroll
 0:11          Loop Condition
 0:11          Compare Less Than ( temp bool)
 0:11            'x' ( temp int)
@@ -53,7 +53,7 @@ local_size = (4, 6, 8)
 0:11          'x' ( temp int)
 0:11          Constant:
 0:11            0 (const int)
-0:11        Loop with condition tested first
+0:11        Loop with condition tested first: Unroll
 0:11          Loop Condition
 0:11          Compare Less Than ( temp bool)
 0:11            'x' ( temp int)
@@ -82,7 +82,7 @@ local_size = (4, 6, 8)
 0:?     '@entryPointOutput' (layout( location=0) out 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 39
 
                               Capability Shader

+ 3 - 3
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.attribute.frag.out

@@ -6,7 +6,7 @@ gl_FragCoord origin is upper left
 0:2    Function Parameters: 
 0:2      'input' ( in 4-component vector of float)
 0:?     Sequence
-0:11      Test condition and select ( temp void)
+0:11      Test condition and select ( temp void): DontFlatten
 0:11        Condition
 0:11        Constant:
 0:11          false (const bool)
@@ -33,7 +33,7 @@ gl_FragCoord origin is upper left
 0:2    Function Parameters: 
 0:2      'input' ( in 4-component vector of float)
 0:?     Sequence
-0:11      Test condition and select ( temp void)
+0:11      Test condition and select ( temp void): DontFlatten
 0:11        Condition
 0:11        Constant:
 0:11          false (const bool)
@@ -50,7 +50,7 @@ gl_FragCoord origin is upper left
 0:?     'input' (layout( location=0) in 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 24
 
                               Capability Shader

+ 71 - 54
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.attributeC11.frag.out

@@ -2,42 +2,45 @@ hlsl.attributeC11.frag
 Shader version: 500
 gl_FragCoord origin is upper left
 0:? Sequence
-0:16  Function Definition: @main(vf4; ( temp 4-component vector of float)
-0:16    Function Parameters: 
-0:16      'input' ( in 4-component vector of float)
+0:20  Function Definition: @main(vf4; ( temp 4-component vector of float)
+0:20    Function Parameters: 
+0:20      'input' ( in 4-component vector of float)
 0:?     Sequence
-0:17      Branch: Return with expression
-0:17        add ( temp 4-component vector of float)
-0:17          'input' ( in 4-component vector of float)
-0:17          textureFetch ( temp 4-component vector of float)
-0:17            'attach' ( uniform texture2D)
-0:17            vector swizzle ( temp int)
-0:17              Constant:
-0:17                0 (const int)
-0:17                0 (const int)
-0:17              Sequence
-0:17                Constant:
-0:17                  0 (const int)
-0:17            direct index ( temp int)
-0:17              Constant:
-0:17                0 (const int)
-0:17                0 (const int)
-0:17              Constant:
-0:17                1 (const int)
-0:16  Function Definition: main( ( temp void)
-0:16    Function Parameters: 
+0:21      Branch: Return with expression
+0:21        add ( temp 4-component vector of float)
+0:21          'input' ( in 4-component vector of float)
+0:21          textureFetch ( temp 4-component vector of float)
+0:21            'attach' ( uniform texture2D)
+0:21            vector swizzle ( temp int)
+0:21              Constant:
+0:21                0 (const int)
+0:21                0 (const int)
+0:21              Sequence
+0:21                Constant:
+0:21                  0 (const int)
+0:21            direct index ( temp int)
+0:21              Constant:
+0:21                0 (const int)
+0:21                0 (const int)
+0:21              Constant:
+0:21                1 (const int)
+0:20  Function Definition: main( ( temp void)
+0:20    Function Parameters: 
 0:?     Sequence
-0:16      move second child to first child ( temp 4-component vector of float)
+0:20      move second child to first child ( temp 4-component vector of float)
 0:?         'input' ( temp 4-component vector of float)
 0:?         'input' (layout( location=8) in 4-component vector of float)
-0:16      move second child to first child ( temp 4-component vector of float)
+0:20      move second child to first child ( temp 4-component vector of float)
 0:?         '@entryPointOutput' (layout( location=7) out 4-component vector of float)
-0:16        Function Call: @main(vf4; ( temp 4-component vector of float)
+0:20        Function Call: @main(vf4; ( temp 4-component vector of float)
 0:?           'input' ( temp 4-component vector of float)
 0:?   Linker Objects
 0:?     'buffer1' (layout( set=0 binding=1 row_major std430) readonly buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 2-component vector of float f} @data})
 0:?     'buffer3' (layout( set=2 binding=3 row_major std430) readonly buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 2-component vector of float f} @data})
 0:?     'attach' ( uniform texture2D)
+0:?     'ci' ( specialization-constant const int)
+0:?       11 (const int)
+0:?     'anon@0' (layout( row_major std430 push_constant) uniform block{layout( row_major std430 offset=0) uniform int a})
 0:?     '@entryPointOutput' (layout( location=7) out 4-component vector of float)
 0:?     'input' (layout( location=8) in 4-component vector of float)
 
@@ -48,48 +51,51 @@ Linked fragment stage:
 Shader version: 500
 gl_FragCoord origin is upper left
 0:? Sequence
-0:16  Function Definition: @main(vf4; ( temp 4-component vector of float)
-0:16    Function Parameters: 
-0:16      'input' ( in 4-component vector of float)
+0:20  Function Definition: @main(vf4; ( temp 4-component vector of float)
+0:20    Function Parameters: 
+0:20      'input' ( in 4-component vector of float)
 0:?     Sequence
-0:17      Branch: Return with expression
-0:17        add ( temp 4-component vector of float)
-0:17          'input' ( in 4-component vector of float)
-0:17          textureFetch ( temp 4-component vector of float)
-0:17            'attach' ( uniform texture2D)
-0:17            vector swizzle ( temp int)
-0:17              Constant:
-0:17                0 (const int)
-0:17                0 (const int)
-0:17              Sequence
-0:17                Constant:
-0:17                  0 (const int)
-0:17            direct index ( temp int)
-0:17              Constant:
-0:17                0 (const int)
-0:17                0 (const int)
-0:17              Constant:
-0:17                1 (const int)
-0:16  Function Definition: main( ( temp void)
-0:16    Function Parameters: 
+0:21      Branch: Return with expression
+0:21        add ( temp 4-component vector of float)
+0:21          'input' ( in 4-component vector of float)
+0:21          textureFetch ( temp 4-component vector of float)
+0:21            'attach' ( uniform texture2D)
+0:21            vector swizzle ( temp int)
+0:21              Constant:
+0:21                0 (const int)
+0:21                0 (const int)
+0:21              Sequence
+0:21                Constant:
+0:21                  0 (const int)
+0:21            direct index ( temp int)
+0:21              Constant:
+0:21                0 (const int)
+0:21                0 (const int)
+0:21              Constant:
+0:21                1 (const int)
+0:20  Function Definition: main( ( temp void)
+0:20    Function Parameters: 
 0:?     Sequence
-0:16      move second child to first child ( temp 4-component vector of float)
+0:20      move second child to first child ( temp 4-component vector of float)
 0:?         'input' ( temp 4-component vector of float)
 0:?         'input' (layout( location=8) in 4-component vector of float)
-0:16      move second child to first child ( temp 4-component vector of float)
+0:20      move second child to first child ( temp 4-component vector of float)
 0:?         '@entryPointOutput' (layout( location=7) out 4-component vector of float)
-0:16        Function Call: @main(vf4; ( temp 4-component vector of float)
+0:20        Function Call: @main(vf4; ( temp 4-component vector of float)
 0:?           'input' ( temp 4-component vector of float)
 0:?   Linker Objects
 0:?     'buffer1' (layout( set=0 binding=1 row_major std430) readonly buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 2-component vector of float f} @data})
 0:?     'buffer3' (layout( set=2 binding=3 row_major std430) readonly buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 2-component vector of float f} @data})
 0:?     'attach' ( uniform texture2D)
+0:?     'ci' ( specialization-constant const int)
+0:?       11 (const int)
+0:?     'anon@0' (layout( row_major std430 push_constant) uniform block{layout( row_major std430 offset=0) uniform int a})
 0:?     '@entryPointOutput' (layout( location=7) out 4-component vector of float)
 0:?     'input' (layout( location=8) in 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
-// Id's are bound by 47
+// Generated by (magic number): 80004
+// Id's are bound by 51
 
                               Capability Shader
                1:             ExtInstImport  "GLSL.std.450"
@@ -111,6 +117,10 @@ gl_FragCoord origin is upper left
                               MemberName 43(buffer1) 0  "@data"
                               Name 45  "buffer1"
                               Name 46  "buffer3"
+                              Name 47  "ci"
+                              Name 48  "pcBuf"
+                              MemberName 48(pcBuf) 0  "a"
+                              Name 50  ""
                               Decorate 16(attach) DescriptorSet 0
                               Decorate 16(attach) InputAttachmentIndex 4
                               Decorate 33(input) Location 8
@@ -124,6 +134,9 @@ gl_FragCoord origin is upper left
                               Decorate 45(buffer1) Binding 1
                               Decorate 46(buffer3) DescriptorSet 2
                               Decorate 46(buffer3) Binding 3
+                              Decorate 47(ci) SpecId 13
+                              MemberDecorate 48(pcBuf) 0 Offset 0
+                              Decorate 48(pcBuf) Block
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -151,6 +164,10 @@ gl_FragCoord origin is upper left
               44:             TypePointer Uniform 43(buffer1)
      45(buffer1):     44(ptr) Variable Uniform
      46(buffer3):     44(ptr) Variable Uniform
+          47(ci):     18(int) SpecConstant 11
+       48(pcBuf):             TypeStruct 18(int)
+              49:             TypePointer PushConstant 48(pcBuf)
+              50:     49(ptr) Variable PushConstant
          4(main):           2 Function None 3
                5:             Label
        31(input):      8(ptr) Variable Function

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.attributeGlobalBuffer.frag.out

@@ -56,7 +56,7 @@ gl_FragCoord origin is upper left
 0:?     '@entryPointOutput' (layout( location=0) out 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 28
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.basic.comp.out

@@ -60,7 +60,7 @@ local_size = (1, 1, 1)
 0:?     'gti' ( in int LocalInvocationID)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 35
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.basic.geom.out

@@ -188,7 +188,7 @@ output primitive = line_strip
 0:?     'OutputStream.something' (layout( location=1) out int)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 68
 
                               Capability Geometry

+ 177 - 207
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.boolConv.vert.out

@@ -16,84 +16,74 @@ Shader version: 500
 0:4            0 (const int)
 0:6      add second child into first child ( temp int)
 0:6        'r' ( temp int)
-0:6        Convert bool to int ( temp int)
-0:6          add ( temp bool)
-0:6            Convert bool to int ( temp int)
-0:6              'a' ( global bool)
-0:6            Convert bool to int ( temp int)
-0:6              'b' ( global bool)
+0:6        add ( temp int)
+0:6          Convert bool to int ( temp int)
+0:6            'a' ( global bool)
+0:6          Convert bool to int ( temp int)
+0:6            'b' ( global bool)
 0:7      add second child into first child ( temp int)
 0:7        'r' ( temp int)
-0:7        Convert bool to int ( temp int)
-0:7          subtract ( temp bool)
-0:7            Convert bool to int ( temp int)
-0:7              'a' ( global bool)
-0:7            Convert bool to int ( temp int)
-0:7              'b' ( global bool)
+0:7        subtract ( temp int)
+0:7          Convert bool to int ( temp int)
+0:7            'a' ( global bool)
+0:7          Convert bool to int ( temp int)
+0:7            'b' ( global bool)
 0:8      add second child into first child ( temp int)
 0:8        'r' ( temp int)
-0:8        Convert bool to int ( temp int)
-0:8          component-wise multiply ( temp bool)
-0:8            Convert bool to int ( temp int)
-0:8              'a' ( global bool)
-0:8            Convert bool to int ( temp int)
-0:8              'b' ( global bool)
+0:8        component-wise multiply ( temp int)
+0:8          Convert bool to int ( temp int)
+0:8            'a' ( global bool)
+0:8          Convert bool to int ( temp int)
+0:8            'b' ( global bool)
 0:9      add second child into first child ( temp int)
 0:9        'r' ( temp int)
-0:9        Convert bool to int ( temp int)
-0:9          divide ( temp bool)
-0:9            Convert bool to int ( temp int)
-0:9              'a' ( global bool)
-0:9            Convert bool to int ( temp int)
-0:9              'b' ( global bool)
+0:9        divide ( temp int)
+0:9          Convert bool to int ( temp int)
+0:9            'a' ( global bool)
+0:9          Convert bool to int ( temp int)
+0:9            'b' ( global bool)
 0:10      add second child into first child ( temp int)
 0:10        'r' ( temp int)
-0:10        Convert bool to int ( temp int)
-0:10          mod ( temp bool)
-0:10            Convert bool to int ( temp int)
-0:10              'a' ( global bool)
-0:10            Convert bool to int ( temp int)
-0:10              'b' ( global bool)
+0:10        mod ( temp int)
+0:10          Convert bool to int ( temp int)
+0:10            'a' ( global bool)
+0:10          Convert bool to int ( temp int)
+0:10            'b' ( global bool)
 0:12      add second child into first child ( temp int)
 0:12        'r' ( temp int)
-0:12        Convert bool to int ( temp int)
-0:12          bitwise and ( temp bool)
-0:12            Convert bool to int ( temp int)
-0:12              'a' ( global bool)
-0:12            Convert bool to int ( temp int)
-0:12              'b' ( global bool)
+0:12        bitwise and ( temp int)
+0:12          Convert bool to int ( temp int)
+0:12            'a' ( global bool)
+0:12          Convert bool to int ( temp int)
+0:12            'b' ( global bool)
 0:13      add second child into first child ( temp int)
 0:13        'r' ( temp int)
-0:13        Convert bool to int ( temp int)
-0:13          inclusive-or ( temp bool)
-0:13            Convert bool to int ( temp int)
-0:13              'a' ( global bool)
-0:13            Convert bool to int ( temp int)
-0:13              'b' ( global bool)
+0:13        inclusive-or ( temp int)
+0:13          Convert bool to int ( temp int)
+0:13            'a' ( global bool)
+0:13          Convert bool to int ( temp int)
+0:13            'b' ( global bool)
 0:14      add second child into first child ( temp int)
 0:14        'r' ( temp int)
-0:14        Convert bool to int ( temp int)
-0:14          exclusive-or ( temp bool)
-0:14            Convert bool to int ( temp int)
-0:14              'a' ( global bool)
-0:14            Convert bool to int ( temp int)
-0:14              'b' ( global bool)
+0:14        exclusive-or ( temp int)
+0:14          Convert bool to int ( temp int)
+0:14            'a' ( global bool)
+0:14          Convert bool to int ( temp int)
+0:14            'b' ( global bool)
 0:16      add second child into first child ( temp int)
 0:16        'r' ( temp int)
-0:16        Convert bool to int ( temp int)
-0:16          left-shift ( temp bool)
-0:16            Convert bool to int ( temp int)
-0:16              'a' ( global bool)
-0:16            Convert bool to int ( temp int)
-0:16              'b' ( global bool)
+0:16        left-shift ( temp int)
+0:16          Convert bool to int ( temp int)
+0:16            'a' ( global bool)
+0:16          Convert bool to int ( temp int)
+0:16            'b' ( global bool)
 0:17      add second child into first child ( temp int)
 0:17        'r' ( temp int)
-0:17        Convert bool to int ( temp int)
-0:17          right-shift ( temp bool)
-0:17            Convert bool to int ( temp int)
-0:17              'a' ( global bool)
-0:17            Convert bool to int ( temp int)
-0:17              'b' ( global bool)
+0:17        right-shift ( temp int)
+0:17          Convert bool to int ( temp int)
+0:17            'a' ( global bool)
+0:17          Convert bool to int ( temp int)
+0:17            'b' ( global bool)
 0:19      Branch: Return with expression
 0:19        Construct vec4 ( temp 4-component vector of float)
 0:19          Convert int to float ( temp float)
@@ -130,84 +120,74 @@ Shader version: 500
 0:4            0 (const int)
 0:6      add second child into first child ( temp int)
 0:6        'r' ( temp int)
-0:6        Convert bool to int ( temp int)
-0:6          add ( temp bool)
-0:6            Convert bool to int ( temp int)
-0:6              'a' ( global bool)
-0:6            Convert bool to int ( temp int)
-0:6              'b' ( global bool)
+0:6        add ( temp int)
+0:6          Convert bool to int ( temp int)
+0:6            'a' ( global bool)
+0:6          Convert bool to int ( temp int)
+0:6            'b' ( global bool)
 0:7      add second child into first child ( temp int)
 0:7        'r' ( temp int)
-0:7        Convert bool to int ( temp int)
-0:7          subtract ( temp bool)
-0:7            Convert bool to int ( temp int)
-0:7              'a' ( global bool)
-0:7            Convert bool to int ( temp int)
-0:7              'b' ( global bool)
+0:7        subtract ( temp int)
+0:7          Convert bool to int ( temp int)
+0:7            'a' ( global bool)
+0:7          Convert bool to int ( temp int)
+0:7            'b' ( global bool)
 0:8      add second child into first child ( temp int)
 0:8        'r' ( temp int)
-0:8        Convert bool to int ( temp int)
-0:8          component-wise multiply ( temp bool)
-0:8            Convert bool to int ( temp int)
-0:8              'a' ( global bool)
-0:8            Convert bool to int ( temp int)
-0:8              'b' ( global bool)
+0:8        component-wise multiply ( temp int)
+0:8          Convert bool to int ( temp int)
+0:8            'a' ( global bool)
+0:8          Convert bool to int ( temp int)
+0:8            'b' ( global bool)
 0:9      add second child into first child ( temp int)
 0:9        'r' ( temp int)
-0:9        Convert bool to int ( temp int)
-0:9          divide ( temp bool)
-0:9            Convert bool to int ( temp int)
-0:9              'a' ( global bool)
-0:9            Convert bool to int ( temp int)
-0:9              'b' ( global bool)
+0:9        divide ( temp int)
+0:9          Convert bool to int ( temp int)
+0:9            'a' ( global bool)
+0:9          Convert bool to int ( temp int)
+0:9            'b' ( global bool)
 0:10      add second child into first child ( temp int)
 0:10        'r' ( temp int)
-0:10        Convert bool to int ( temp int)
-0:10          mod ( temp bool)
-0:10            Convert bool to int ( temp int)
-0:10              'a' ( global bool)
-0:10            Convert bool to int ( temp int)
-0:10              'b' ( global bool)
+0:10        mod ( temp int)
+0:10          Convert bool to int ( temp int)
+0:10            'a' ( global bool)
+0:10          Convert bool to int ( temp int)
+0:10            'b' ( global bool)
 0:12      add second child into first child ( temp int)
 0:12        'r' ( temp int)
-0:12        Convert bool to int ( temp int)
-0:12          bitwise and ( temp bool)
-0:12            Convert bool to int ( temp int)
-0:12              'a' ( global bool)
-0:12            Convert bool to int ( temp int)
-0:12              'b' ( global bool)
+0:12        bitwise and ( temp int)
+0:12          Convert bool to int ( temp int)
+0:12            'a' ( global bool)
+0:12          Convert bool to int ( temp int)
+0:12            'b' ( global bool)
 0:13      add second child into first child ( temp int)
 0:13        'r' ( temp int)
-0:13        Convert bool to int ( temp int)
-0:13          inclusive-or ( temp bool)
-0:13            Convert bool to int ( temp int)
-0:13              'a' ( global bool)
-0:13            Convert bool to int ( temp int)
-0:13              'b' ( global bool)
+0:13        inclusive-or ( temp int)
+0:13          Convert bool to int ( temp int)
+0:13            'a' ( global bool)
+0:13          Convert bool to int ( temp int)
+0:13            'b' ( global bool)
 0:14      add second child into first child ( temp int)
 0:14        'r' ( temp int)
-0:14        Convert bool to int ( temp int)
-0:14          exclusive-or ( temp bool)
-0:14            Convert bool to int ( temp int)
-0:14              'a' ( global bool)
-0:14            Convert bool to int ( temp int)
-0:14              'b' ( global bool)
+0:14        exclusive-or ( temp int)
+0:14          Convert bool to int ( temp int)
+0:14            'a' ( global bool)
+0:14          Convert bool to int ( temp int)
+0:14            'b' ( global bool)
 0:16      add second child into first child ( temp int)
 0:16        'r' ( temp int)
-0:16        Convert bool to int ( temp int)
-0:16          left-shift ( temp bool)
-0:16            Convert bool to int ( temp int)
-0:16              'a' ( global bool)
-0:16            Convert bool to int ( temp int)
-0:16              'b' ( global bool)
+0:16        left-shift ( temp int)
+0:16          Convert bool to int ( temp int)
+0:16            'a' ( global bool)
+0:16          Convert bool to int ( temp int)
+0:16            'b' ( global bool)
 0:17      add second child into first child ( temp int)
 0:17        'r' ( temp int)
-0:17        Convert bool to int ( temp int)
-0:17          right-shift ( temp bool)
-0:17            Convert bool to int ( temp int)
-0:17              'a' ( global bool)
-0:17            Convert bool to int ( temp int)
-0:17              'b' ( global bool)
+0:17        right-shift ( temp int)
+0:17          Convert bool to int ( temp int)
+0:17            'a' ( global bool)
+0:17          Convert bool to int ( temp int)
+0:17            'b' ( global bool)
 0:19      Branch: Return with expression
 0:19        Construct vec4 ( temp 4-component vector of float)
 0:19          Convert int to float ( temp float)
@@ -224,21 +204,21 @@ Shader version: 500
 0:?     '@entryPointOutput' ( out 4-component vector of float Position)
 
 // Module Version 10000
-// Generated by (magic number): 80002
-// Id's are bound by 109
+// Generated by (magic number): 80004
+// Id's are bound by 99
 
                               Capability Shader
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Vertex 4  "main" 107
+                              EntryPoint Vertex 4  "main" 97
                               Source HLSL 500
                               Name 4  "main"
                               Name 9  "@main("
                               Name 13  "b"
                               Name 17  "r"
                               Name 19  "a"
-                              Name 107  "@entryPointOutput"
-                              Decorate 107(@entryPointOutput) BuiltIn Position
+                              Name 97  "@entryPointOutput"
+                              Decorate 97(@entryPointOutput) BuiltIn Position
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -253,13 +233,13 @@ Shader version: 500
               18:     15(int) Constant 0
            19(a):     12(ptr) Variable Private
               21:     15(int) Constant 1
-             106:             TypePointer Output 7(fvec4)
-107(@entryPointOutput):    106(ptr) Variable Output
+              96:             TypePointer Output 7(fvec4)
+97(@entryPointOutput):     96(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
                               Store 13(b) 14
-             108:    7(fvec4) FunctionCall 9(@main()
-                              Store 107(@entryPointOutput) 108
+              98:    7(fvec4) FunctionCall 9(@main()
+                              Store 97(@entryPointOutput) 98
                               Return
                               FunctionEnd
        9(@main():    7(fvec4) Function None 8
@@ -270,62 +250,63 @@ Shader version: 500
               22:     15(int) Select 20 21 18
               23:    11(bool) Load 13(b)
               24:     15(int) Select 23 21 18
-              25:    11(bool) IAdd 22 24
-              26:     15(int) Select 25 21 18
-              27:     15(int) Load 17(r)
-              28:     15(int) IAdd 27 26
-                              Store 17(r) 28
-              29:    11(bool) Load 19(a)
-              30:     15(int) Select 29 21 18
-              31:    11(bool) Load 13(b)
-              32:     15(int) Select 31 21 18
-              33:    11(bool) ISub 30 32
-              34:     15(int) Select 33 21 18
-              35:     15(int) Load 17(r)
-              36:     15(int) IAdd 35 34
-                              Store 17(r) 36
-              37:    11(bool) Load 19(a)
+              25:     15(int) IAdd 22 24
+              26:     15(int) Load 17(r)
+              27:     15(int) IAdd 26 25
+                              Store 17(r) 27
+              28:    11(bool) Load 19(a)
+              29:     15(int) Select 28 21 18
+              30:    11(bool) Load 13(b)
+              31:     15(int) Select 30 21 18
+              32:     15(int) ISub 29 31
+              33:     15(int) Load 17(r)
+              34:     15(int) IAdd 33 32
+                              Store 17(r) 34
+              35:    11(bool) Load 19(a)
+              36:     15(int) Select 35 21 18
+              37:    11(bool) Load 13(b)
               38:     15(int) Select 37 21 18
-              39:    11(bool) Load 13(b)
-              40:     15(int) Select 39 21 18
-              41:    11(bool) IMul 38 40
-              42:     15(int) Select 41 21 18
-              43:     15(int) Load 17(r)
-              44:     15(int) IAdd 43 42
-                              Store 17(r) 44
-              45:    11(bool) Load 19(a)
-              46:     15(int) Select 45 21 18
-              47:    11(bool) Load 13(b)
-              48:     15(int) Select 47 21 18
-              49:    11(bool) SDiv 46 48
+              39:     15(int) IMul 36 38
+              40:     15(int) Load 17(r)
+              41:     15(int) IAdd 40 39
+                              Store 17(r) 41
+              42:    11(bool) Load 19(a)
+              43:     15(int) Select 42 21 18
+              44:    11(bool) Load 13(b)
+              45:     15(int) Select 44 21 18
+              46:     15(int) SDiv 43 45
+              47:     15(int) Load 17(r)
+              48:     15(int) IAdd 47 46
+                              Store 17(r) 48
+              49:    11(bool) Load 19(a)
               50:     15(int) Select 49 21 18
-              51:     15(int) Load 17(r)
-              52:     15(int) IAdd 51 50
-                              Store 17(r) 52
-              53:    11(bool) Load 19(a)
-              54:     15(int) Select 53 21 18
-              55:    11(bool) Load 13(b)
-              56:     15(int) Select 55 21 18
-              57:    11(bool) SMod 54 56
-              58:     15(int) Select 57 21 18
-              59:     15(int) Load 17(r)
-              60:     15(int) IAdd 59 58
-                              Store 17(r) 60
-              61:    11(bool) Load 19(a)
-              62:     15(int) Select 61 21 18
-              63:    11(bool) Load 13(b)
+              51:    11(bool) Load 13(b)
+              52:     15(int) Select 51 21 18
+              53:     15(int) SMod 50 52
+              54:     15(int) Load 17(r)
+              55:     15(int) IAdd 54 53
+                              Store 17(r) 55
+              56:    11(bool) Load 19(a)
+              57:     15(int) Select 56 21 18
+              58:    11(bool) Load 13(b)
+              59:     15(int) Select 58 21 18
+              60:     15(int) BitwiseAnd 57 59
+              61:     15(int) Load 17(r)
+              62:     15(int) IAdd 61 60
+                              Store 17(r) 62
+              63:    11(bool) Load 19(a)
               64:     15(int) Select 63 21 18
-              65:    11(bool) BitwiseAnd 62 64
+              65:    11(bool) Load 13(b)
               66:     15(int) Select 65 21 18
-              67:     15(int) Load 17(r)
-              68:     15(int) IAdd 67 66
-                              Store 17(r) 68
-              69:    11(bool) Load 19(a)
-              70:     15(int) Select 69 21 18
-              71:    11(bool) Load 13(b)
-              72:     15(int) Select 71 21 18
-              73:    11(bool) BitwiseOr 70 72
-              74:     15(int) Select 73 21 18
+              67:     15(int) BitwiseOr 64 66
+              68:     15(int) Load 17(r)
+              69:     15(int) IAdd 68 67
+                              Store 17(r) 69
+              70:    11(bool) Load 19(a)
+              71:     15(int) Select 70 21 18
+              72:    11(bool) Load 13(b)
+              73:     15(int) Select 72 21 18
+              74:     15(int) BitwiseXor 71 73
               75:     15(int) Load 17(r)
               76:     15(int) IAdd 75 74
                               Store 17(r) 76
@@ -333,31 +314,20 @@ Shader version: 500
               78:     15(int) Select 77 21 18
               79:    11(bool) Load 13(b)
               80:     15(int) Select 79 21 18
-              81:    11(bool) BitwiseXor 78 80
-              82:     15(int) Select 81 21 18
-              83:     15(int) Load 17(r)
-              84:     15(int) IAdd 83 82
-                              Store 17(r) 84
-              85:    11(bool) Load 19(a)
-              86:     15(int) Select 85 21 18
-              87:    11(bool) Load 13(b)
-              88:     15(int) Select 87 21 18
-              89:    11(bool) ShiftLeftLogical 86 88
-              90:     15(int) Select 89 21 18
+              81:     15(int) ShiftLeftLogical 78 80
+              82:     15(int) Load 17(r)
+              83:     15(int) IAdd 82 81
+                              Store 17(r) 83
+              84:    11(bool) Load 19(a)
+              85:     15(int) Select 84 21 18
+              86:    11(bool) Load 13(b)
+              87:     15(int) Select 86 21 18
+              88:     15(int) ShiftRightArithmetic 85 87
+              89:     15(int) Load 17(r)
+              90:     15(int) IAdd 89 88
+                              Store 17(r) 90
               91:     15(int) Load 17(r)
-              92:     15(int) IAdd 91 90
-                              Store 17(r) 92
-              93:    11(bool) Load 19(a)
-              94:     15(int) Select 93 21 18
-              95:    11(bool) Load 13(b)
-              96:     15(int) Select 95 21 18
-              97:    11(bool) ShiftRightArithmetic 94 96
-              98:     15(int) Select 97 21 18
-              99:     15(int) Load 17(r)
-             100:     15(int) IAdd 99 98
-                              Store 17(r) 100
-             101:     15(int) Load 17(r)
-             102:    6(float) ConvertSToF 101
-             103:    7(fvec4) CompositeConstruct 102 102 102 102
-                              ReturnValue 103
+              92:    6(float) ConvertSToF 91
+              93:    7(fvec4) CompositeConstruct 92 92 92 92
+                              ReturnValue 93
                               FunctionEnd

+ 255 - 201
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.buffer.frag.out

@@ -8,50 +8,66 @@ gl_FragCoord origin is upper left
 0:31      Branch: Return with expression
 0:31        Constant:
 0:31          1.000000
-0:35  Function Definition: @PixelShaderFunction(vf4; ( temp 4-component vector of float)
-0:35    Function Parameters: 
-0:35      'input' ( in 4-component vector of float)
+0:43  Function Definition: @PixelShaderFunction(vf4; ( temp structure{ temp 4-component vector of float a})
+0:43    Function Parameters: 
+0:43      'input' ( in 4-component vector of float)
 0:?     Sequence
-0:36      Branch: Return with expression
-0:36        vector-scale ( temp 4-component vector of float)
-0:36          add ( temp 4-component vector of float)
-0:36            add ( temp 4-component vector of float)
-0:36              add ( temp 4-component vector of float)
-0:36                add ( temp 4-component vector of float)
-0:36                  'input' ( in 4-component vector of float)
-0:36                  v1: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
-0:36                    'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4-component vector of float v1})
-0:36                    Constant:
-0:36                      0 (const uint)
-0:36                v2: direct index for structure (layout( row_major std430) buffer 4-component vector of float)
-0:36                  'anon@1' (layout( row_major std430) readonly buffer block{layout( row_major std430) buffer 4-component vector of float v2})
-0:36                  Constant:
-0:36                    0 (const uint)
-0:36              v3: direct index for structure (layout( row_major std140 offset=0) uniform 4-component vector of float)
-0:36                'anon@2' (layout( row_major std140) uniform block{layout( row_major std140 offset=0) uniform 4-component vector of float v3, layout( row_major std140 offset=20) uniform int i3})
-0:36                Constant:
-0:36                  0 (const uint)
-0:36            v4: direct index for structure (layout( row_major std430 offset=16) buffer 4-component vector of float)
-0:36              'anon@3' (layout( binding=8 row_major std430) readonly buffer block{layout( row_major std430 offset=16) buffer 4-component vector of float v4, layout( row_major std430 offset=48) buffer int i4, layout( row_major std430 offset=60) buffer float f1, layout( row_major std430 offset=64) buffer float f3, layout( row_major std430 offset=68) buffer float f4, layout( row_major std430 offset=72) buffer float f5, layout( row_major std430) buffer float f6, layout( row_major std430 offset=128) buffer float f7, layout( row_major std430 offset=112) buffer 3X4 matrix of float m1, layout( column_major std430 offset=176) buffer 3X4 matrix of float m2, layout( row_major std430 offset=240) buffer 3X4 matrix of float m3, layout( row_major std430 offset=304) buffer 3X4 matrix of float m4})
-0:36              Constant:
-0:36                0 (const uint)
-0:36          Function Call: foo( ( temp float)
-0:35  Function Definition: PixelShaderFunction( ( temp void)
-0:35    Function Parameters: 
+0:45      move second child to first child ( temp 4-component vector of float)
+0:45        a: direct index for structure ( temp 4-component vector of float)
+0:45          'ret' ( temp structure{ temp 4-component vector of float a})
+0:45          Constant:
+0:45            0 (const int)
+0:45        add ( temp 4-component vector of float)
+0:45          v24: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
+0:45            'anon@4' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4-component vector of float v24})
+0:45            Constant:
+0:45              0 (const uint)
+0:45          vector-scale ( temp 4-component vector of float)
+0:45            add ( temp 4-component vector of float)
+0:45              add ( temp 4-component vector of float)
+0:45                add ( temp 4-component vector of float)
+0:45                  add ( temp 4-component vector of float)
+0:45                    'input' ( in 4-component vector of float)
+0:45                    v1: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
+0:45                      'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4-component vector of float v1})
+0:45                      Constant:
+0:45                        0 (const uint)
+0:45                  v2: direct index for structure (layout( row_major std430) buffer 4-component vector of float)
+0:45                    'anon@1' (layout( row_major std430) readonly buffer block{layout( row_major std430) buffer 4-component vector of float v2})
+0:45                    Constant:
+0:45                      0 (const uint)
+0:45                v3: direct index for structure (layout( row_major std140 offset=0) uniform 4-component vector of float)
+0:45                  'anon@2' (layout( row_major std140) uniform block{layout( row_major std140 offset=0) uniform 4-component vector of float v3, layout( row_major std140 offset=20) uniform int i3})
+0:45                  Constant:
+0:45                    0 (const uint)
+0:45              v4: direct index for structure (layout( row_major std430 offset=16) buffer 4-component vector of float)
+0:45                'anon@3' (layout( binding=8 row_major std430) readonly buffer block{layout( row_major std430 offset=16) buffer 4-component vector of float v4, layout( row_major std430 offset=48) buffer int i4, layout( row_major std430 offset=60) buffer float f1, layout( row_major std430 offset=64) buffer float f3, layout( row_major std430 offset=68) buffer float f4, layout( row_major std430 offset=72) buffer float f5, layout( row_major std430) buffer float f6, layout( row_major std430 offset=128) buffer float f7, layout( row_major std430 offset=112) buffer 3X4 matrix of float m1, layout( column_major std430 offset=176) buffer 3X4 matrix of float m2, layout( row_major std430 offset=240) buffer 3X4 matrix of float m3, layout( row_major std430 offset=304) buffer 3X4 matrix of float m4})
+0:45                Constant:
+0:45                  0 (const uint)
+0:45            Function Call: foo( ( temp float)
+0:46      Branch: Return with expression
+0:46        'ret' ( temp structure{ temp 4-component vector of float a})
+0:43  Function Definition: PixelShaderFunction( ( temp void)
+0:43    Function Parameters: 
 0:?     Sequence
-0:35      move second child to first child ( temp 4-component vector of float)
+0:43      move second child to first child ( temp 4-component vector of float)
 0:?         'input' ( temp 4-component vector of float)
 0:?         'input' ( in 4-component vector of float FragCoord)
-0:35      move second child to first child ( temp 4-component vector of float)
-0:?         '@entryPointOutput' (layout( location=0) out 4-component vector of float)
-0:35        Function Call: @PixelShaderFunction(vf4; ( temp 4-component vector of float)
-0:?           'input' ( temp 4-component vector of float)
+0:43      Sequence
+0:43        move second child to first child ( temp 4-component vector of float)
+0:?           '@entryPointOutput.a' (layout( location=0) out 4-component vector of float)
+0:43          a: direct index for structure ( temp 4-component vector of float)
+0:43            Function Call: @PixelShaderFunction(vf4; ( temp structure{ temp 4-component vector of float a})
+0:?               'input' ( temp 4-component vector of float)
+0:43            Constant:
+0:43              0 (const int)
 0:?   Linker Objects
 0:?     'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4-component vector of float v1})
 0:?     'anon@1' (layout( row_major std430) readonly buffer block{layout( row_major std430) buffer 4-component vector of float v2})
 0:?     'anon@2' (layout( row_major std140) uniform block{layout( row_major std140 offset=0) uniform 4-component vector of float v3, layout( row_major std140 offset=20) uniform int i3})
 0:?     'anon@3' (layout( binding=8 row_major std430) readonly buffer block{layout( row_major std430 offset=16) buffer 4-component vector of float v4, layout( row_major std430 offset=48) buffer int i4, layout( row_major std430 offset=60) buffer float f1, layout( row_major std430 offset=64) buffer float f3, layout( row_major std430 offset=68) buffer float f4, layout( row_major std430 offset=72) buffer float f5, layout( row_major std430) buffer float f6, layout( row_major std430 offset=128) buffer float f7, layout( row_major std430 offset=112) buffer 3X4 matrix of float m1, layout( column_major std430 offset=176) buffer 3X4 matrix of float m2, layout( row_major std430 offset=240) buffer 3X4 matrix of float m3, layout( row_major std430 offset=304) buffer 3X4 matrix of float m4})
-0:?     '@entryPointOutput' (layout( location=0) out 4-component vector of float)
+0:?     'anon@4' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4-component vector of float v24})
+0:?     '@entryPointOutput.a' (layout( location=0) out 4-component vector of float)
 0:?     'input' ( in 4-component vector of float FragCoord)
 
 
@@ -67,203 +83,241 @@ gl_FragCoord origin is upper left
 0:31      Branch: Return with expression
 0:31        Constant:
 0:31          1.000000
-0:35  Function Definition: @PixelShaderFunction(vf4; ( temp 4-component vector of float)
-0:35    Function Parameters: 
-0:35      'input' ( in 4-component vector of float)
+0:43  Function Definition: @PixelShaderFunction(vf4; ( temp structure{ temp 4-component vector of float a})
+0:43    Function Parameters: 
+0:43      'input' ( in 4-component vector of float)
 0:?     Sequence
-0:36      Branch: Return with expression
-0:36        vector-scale ( temp 4-component vector of float)
-0:36          add ( temp 4-component vector of float)
-0:36            add ( temp 4-component vector of float)
-0:36              add ( temp 4-component vector of float)
-0:36                add ( temp 4-component vector of float)
-0:36                  'input' ( in 4-component vector of float)
-0:36                  v1: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
-0:36                    'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4-component vector of float v1})
-0:36                    Constant:
-0:36                      0 (const uint)
-0:36                v2: direct index for structure (layout( row_major std430) buffer 4-component vector of float)
-0:36                  'anon@1' (layout( row_major std430) readonly buffer block{layout( row_major std430) buffer 4-component vector of float v2})
-0:36                  Constant:
-0:36                    0 (const uint)
-0:36              v3: direct index for structure (layout( row_major std140 offset=0) uniform 4-component vector of float)
-0:36                'anon@2' (layout( row_major std140) uniform block{layout( row_major std140 offset=0) uniform 4-component vector of float v3, layout( row_major std140 offset=20) uniform int i3})
-0:36                Constant:
-0:36                  0 (const uint)
-0:36            v4: direct index for structure (layout( row_major std430 offset=16) buffer 4-component vector of float)
-0:36              'anon@3' (layout( binding=8 row_major std430) readonly buffer block{layout( row_major std430 offset=16) buffer 4-component vector of float v4, layout( row_major std430 offset=48) buffer int i4, layout( row_major std430 offset=60) buffer float f1, layout( row_major std430 offset=64) buffer float f3, layout( row_major std430 offset=68) buffer float f4, layout( row_major std430 offset=72) buffer float f5, layout( row_major std430) buffer float f6, layout( row_major std430 offset=128) buffer float f7, layout( row_major std430 offset=112) buffer 3X4 matrix of float m1, layout( column_major std430 offset=176) buffer 3X4 matrix of float m2, layout( row_major std430 offset=240) buffer 3X4 matrix of float m3, layout( row_major std430 offset=304) buffer 3X4 matrix of float m4})
-0:36              Constant:
-0:36                0 (const uint)
-0:36          Function Call: foo( ( temp float)
-0:35  Function Definition: PixelShaderFunction( ( temp void)
-0:35    Function Parameters: 
+0:45      move second child to first child ( temp 4-component vector of float)
+0:45        a: direct index for structure ( temp 4-component vector of float)
+0:45          'ret' ( temp structure{ temp 4-component vector of float a})
+0:45          Constant:
+0:45            0 (const int)
+0:45        add ( temp 4-component vector of float)
+0:45          v24: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
+0:45            'anon@4' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4-component vector of float v24})
+0:45            Constant:
+0:45              0 (const uint)
+0:45          vector-scale ( temp 4-component vector of float)
+0:45            add ( temp 4-component vector of float)
+0:45              add ( temp 4-component vector of float)
+0:45                add ( temp 4-component vector of float)
+0:45                  add ( temp 4-component vector of float)
+0:45                    'input' ( in 4-component vector of float)
+0:45                    v1: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
+0:45                      'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4-component vector of float v1})
+0:45                      Constant:
+0:45                        0 (const uint)
+0:45                  v2: direct index for structure (layout( row_major std430) buffer 4-component vector of float)
+0:45                    'anon@1' (layout( row_major std430) readonly buffer block{layout( row_major std430) buffer 4-component vector of float v2})
+0:45                    Constant:
+0:45                      0 (const uint)
+0:45                v3: direct index for structure (layout( row_major std140 offset=0) uniform 4-component vector of float)
+0:45                  'anon@2' (layout( row_major std140) uniform block{layout( row_major std140 offset=0) uniform 4-component vector of float v3, layout( row_major std140 offset=20) uniform int i3})
+0:45                  Constant:
+0:45                    0 (const uint)
+0:45              v4: direct index for structure (layout( row_major std430 offset=16) buffer 4-component vector of float)
+0:45                'anon@3' (layout( binding=8 row_major std430) readonly buffer block{layout( row_major std430 offset=16) buffer 4-component vector of float v4, layout( row_major std430 offset=48) buffer int i4, layout( row_major std430 offset=60) buffer float f1, layout( row_major std430 offset=64) buffer float f3, layout( row_major std430 offset=68) buffer float f4, layout( row_major std430 offset=72) buffer float f5, layout( row_major std430) buffer float f6, layout( row_major std430 offset=128) buffer float f7, layout( row_major std430 offset=112) buffer 3X4 matrix of float m1, layout( column_major std430 offset=176) buffer 3X4 matrix of float m2, layout( row_major std430 offset=240) buffer 3X4 matrix of float m3, layout( row_major std430 offset=304) buffer 3X4 matrix of float m4})
+0:45                Constant:
+0:45                  0 (const uint)
+0:45            Function Call: foo( ( temp float)
+0:46      Branch: Return with expression
+0:46        'ret' ( temp structure{ temp 4-component vector of float a})
+0:43  Function Definition: PixelShaderFunction( ( temp void)
+0:43    Function Parameters: 
 0:?     Sequence
-0:35      move second child to first child ( temp 4-component vector of float)
+0:43      move second child to first child ( temp 4-component vector of float)
 0:?         'input' ( temp 4-component vector of float)
 0:?         'input' ( in 4-component vector of float FragCoord)
-0:35      move second child to first child ( temp 4-component vector of float)
-0:?         '@entryPointOutput' (layout( location=0) out 4-component vector of float)
-0:35        Function Call: @PixelShaderFunction(vf4; ( temp 4-component vector of float)
-0:?           'input' ( temp 4-component vector of float)
+0:43      Sequence
+0:43        move second child to first child ( temp 4-component vector of float)
+0:?           '@entryPointOutput.a' (layout( location=0) out 4-component vector of float)
+0:43          a: direct index for structure ( temp 4-component vector of float)
+0:43            Function Call: @PixelShaderFunction(vf4; ( temp structure{ temp 4-component vector of float a})
+0:?               'input' ( temp 4-component vector of float)
+0:43            Constant:
+0:43              0 (const int)
 0:?   Linker Objects
 0:?     'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4-component vector of float v1})
 0:?     'anon@1' (layout( row_major std430) readonly buffer block{layout( row_major std430) buffer 4-component vector of float v2})
 0:?     'anon@2' (layout( row_major std140) uniform block{layout( row_major std140 offset=0) uniform 4-component vector of float v3, layout( row_major std140 offset=20) uniform int i3})
 0:?     'anon@3' (layout( binding=8 row_major std430) readonly buffer block{layout( row_major std430 offset=16) buffer 4-component vector of float v4, layout( row_major std430 offset=48) buffer int i4, layout( row_major std430 offset=60) buffer float f1, layout( row_major std430 offset=64) buffer float f3, layout( row_major std430 offset=68) buffer float f4, layout( row_major std430 offset=72) buffer float f5, layout( row_major std430) buffer float f6, layout( row_major std430 offset=128) buffer float f7, layout( row_major std430 offset=112) buffer 3X4 matrix of float m1, layout( column_major std430 offset=176) buffer 3X4 matrix of float m2, layout( row_major std430 offset=240) buffer 3X4 matrix of float m3, layout( row_major std430 offset=304) buffer 3X4 matrix of float m4})
-0:?     '@entryPointOutput' (layout( location=0) out 4-component vector of float)
+0:?     'anon@4' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4-component vector of float v24})
+0:?     '@entryPointOutput.a' (layout( location=0) out 4-component vector of float)
 0:?     'input' ( in 4-component vector of float FragCoord)
 
 // Module Version 10000
-// Generated by (magic number): 80002
-// Id's are bound by 61
+// Generated by (magic number): 80004
+// Id's are bound by 73
 
                               Capability Shader
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Fragment 4  "PixelShaderFunction" 54 57
+                              EntryPoint Fragment 4  "PixelShaderFunction" 65 68
                               ExecutionMode 4 OriginUpperLeft
                               Source HLSL 500
                               Name 4  "PixelShaderFunction"
                               Name 8  "foo("
-                              Name 14  "@PixelShaderFunction(vf4;"
-                              Name 13  "input"
-                              Name 20  "buf1"
-                              MemberName 20(buf1) 0  "v1"
-                              Name 22  ""
-                              Name 29  "buf2"
-                              MemberName 29(buf2) 0  "v2"
-                              Name 31  ""
-                              Name 35  "cbufName"
-                              MemberName 35(cbufName) 0  "v3"
-                              MemberName 35(cbufName) 1  "i3"
-                              Name 37  ""
-                              Name 42  "tbufName"
-                              MemberName 42(tbufName) 0  "v4"
-                              MemberName 42(tbufName) 1  "i4"
-                              MemberName 42(tbufName) 2  "f1"
-                              MemberName 42(tbufName) 3  "f3"
-                              MemberName 42(tbufName) 4  "f4"
-                              MemberName 42(tbufName) 5  "f5"
-                              MemberName 42(tbufName) 6  "f6"
-                              MemberName 42(tbufName) 7  "f7"
-                              MemberName 42(tbufName) 8  "m1"
-                              MemberName 42(tbufName) 9  "m2"
-                              MemberName 42(tbufName) 10  "m3"
-                              MemberName 42(tbufName) 11  "m4"
-                              Name 44  ""
-                              Name 52  "input"
-                              Name 54  "input"
-                              Name 57  "@entryPointOutput"
-                              Name 58  "param"
-                              MemberDecorate 20(buf1) 0 Offset 0
-                              Decorate 20(buf1) Block
-                              Decorate 22 DescriptorSet 0
-                              MemberDecorate 29(buf2) 0 NonWritable
-                              MemberDecorate 29(buf2) 0 Offset 0
-                              Decorate 29(buf2) BufferBlock
-                              Decorate 31 DescriptorSet 0
-                              MemberDecorate 35(cbufName) 0 Offset 0
-                              MemberDecorate 35(cbufName) 1 Offset 20
-                              Decorate 35(cbufName) Block
-                              Decorate 37 DescriptorSet 0
-                              MemberDecorate 42(tbufName) 0 NonWritable
-                              MemberDecorate 42(tbufName) 0 Offset 16
-                              MemberDecorate 42(tbufName) 1 NonWritable
-                              MemberDecorate 42(tbufName) 1 Offset 48
-                              MemberDecorate 42(tbufName) 2 NonWritable
-                              MemberDecorate 42(tbufName) 2 Offset 60
-                              MemberDecorate 42(tbufName) 3 NonWritable
-                              MemberDecorate 42(tbufName) 3 Offset 64
-                              MemberDecorate 42(tbufName) 4 NonWritable
-                              MemberDecorate 42(tbufName) 4 Offset 68
-                              MemberDecorate 42(tbufName) 5 NonWritable
-                              MemberDecorate 42(tbufName) 5 Offset 72
-                              MemberDecorate 42(tbufName) 6 NonWritable
-                              MemberDecorate 42(tbufName) 6 Offset 76
-                              MemberDecorate 42(tbufName) 7 NonWritable
-                              MemberDecorate 42(tbufName) 7 Offset 128
-                              MemberDecorate 42(tbufName) 8 RowMajor
-                              MemberDecorate 42(tbufName) 8 NonWritable
-                              MemberDecorate 42(tbufName) 8 Offset 112
-                              MemberDecorate 42(tbufName) 8 MatrixStride 16
-                              MemberDecorate 42(tbufName) 9 ColMajor
-                              MemberDecorate 42(tbufName) 9 NonWritable
-                              MemberDecorate 42(tbufName) 9 Offset 176
-                              MemberDecorate 42(tbufName) 9 MatrixStride 16
-                              MemberDecorate 42(tbufName) 10 RowMajor
-                              MemberDecorate 42(tbufName) 10 NonWritable
-                              MemberDecorate 42(tbufName) 10 Offset 240
-                              MemberDecorate 42(tbufName) 10 MatrixStride 16
-                              MemberDecorate 42(tbufName) 11 RowMajor
-                              MemberDecorate 42(tbufName) 11 NonWritable
-                              MemberDecorate 42(tbufName) 11 Offset 304
-                              MemberDecorate 42(tbufName) 11 MatrixStride 16
-                              Decorate 42(tbufName) BufferBlock
-                              Decorate 44 DescriptorSet 0
-                              Decorate 44 Binding 8
-                              Decorate 54(input) BuiltIn FragCoord
-                              Decorate 57(@entryPointOutput) Location 0
+                              Name 12  "id"
+                              MemberName 12(id) 0  "a"
+                              Name 15  "@PixelShaderFunction(vf4;"
+                              Name 14  "input"
+                              Name 21  "ret"
+                              Name 24  "cbufName2"
+                              MemberName 24(cbufName2) 0  "v24"
+                              Name 26  ""
+                              Name 31  "buf1"
+                              MemberName 31(buf1) 0  "v1"
+                              Name 33  ""
+                              Name 37  "buf2"
+                              MemberName 37(buf2) 0  "v2"
+                              Name 39  ""
+                              Name 43  "cbufName"
+                              MemberName 43(cbufName) 0  "v3"
+                              MemberName 43(cbufName) 1  "i3"
+                              Name 45  ""
+                              Name 50  "tbufName"
+                              MemberName 50(tbufName) 0  "v4"
+                              MemberName 50(tbufName) 1  "i4"
+                              MemberName 50(tbufName) 2  "f1"
+                              MemberName 50(tbufName) 3  "f3"
+                              MemberName 50(tbufName) 4  "f4"
+                              MemberName 50(tbufName) 5  "f5"
+                              MemberName 50(tbufName) 6  "f6"
+                              MemberName 50(tbufName) 7  "f7"
+                              MemberName 50(tbufName) 8  "m1"
+                              MemberName 50(tbufName) 9  "m2"
+                              MemberName 50(tbufName) 10  "m3"
+                              MemberName 50(tbufName) 11  "m4"
+                              Name 52  ""
+                              Name 63  "input"
+                              Name 65  "input"
+                              Name 68  "@entryPointOutput.a"
+                              Name 69  "param"
+                              MemberDecorate 24(cbufName2) 0 Offset 0
+                              Decorate 24(cbufName2) Block
+                              Decorate 26 DescriptorSet 0
+                              MemberDecorate 31(buf1) 0 Offset 0
+                              Decorate 31(buf1) Block
+                              Decorate 33 DescriptorSet 0
+                              MemberDecorate 37(buf2) 0 NonWritable
+                              MemberDecorate 37(buf2) 0 Offset 0
+                              Decorate 37(buf2) BufferBlock
+                              Decorate 39 DescriptorSet 0
+                              MemberDecorate 43(cbufName) 0 Offset 0
+                              MemberDecorate 43(cbufName) 1 Offset 20
+                              Decorate 43(cbufName) Block
+                              Decorate 45 DescriptorSet 0
+                              MemberDecorate 50(tbufName) 0 NonWritable
+                              MemberDecorate 50(tbufName) 0 Offset 16
+                              MemberDecorate 50(tbufName) 1 NonWritable
+                              MemberDecorate 50(tbufName) 1 Offset 48
+                              MemberDecorate 50(tbufName) 2 NonWritable
+                              MemberDecorate 50(tbufName) 2 Offset 60
+                              MemberDecorate 50(tbufName) 3 NonWritable
+                              MemberDecorate 50(tbufName) 3 Offset 64
+                              MemberDecorate 50(tbufName) 4 NonWritable
+                              MemberDecorate 50(tbufName) 4 Offset 68
+                              MemberDecorate 50(tbufName) 5 NonWritable
+                              MemberDecorate 50(tbufName) 5 Offset 72
+                              MemberDecorate 50(tbufName) 6 NonWritable
+                              MemberDecorate 50(tbufName) 6 Offset 76
+                              MemberDecorate 50(tbufName) 7 NonWritable
+                              MemberDecorate 50(tbufName) 7 Offset 128
+                              MemberDecorate 50(tbufName) 8 RowMajor
+                              MemberDecorate 50(tbufName) 8 NonWritable
+                              MemberDecorate 50(tbufName) 8 Offset 112
+                              MemberDecorate 50(tbufName) 8 MatrixStride 16
+                              MemberDecorate 50(tbufName) 9 ColMajor
+                              MemberDecorate 50(tbufName) 9 NonWritable
+                              MemberDecorate 50(tbufName) 9 Offset 176
+                              MemberDecorate 50(tbufName) 9 MatrixStride 16
+                              MemberDecorate 50(tbufName) 10 RowMajor
+                              MemberDecorate 50(tbufName) 10 NonWritable
+                              MemberDecorate 50(tbufName) 10 Offset 240
+                              MemberDecorate 50(tbufName) 10 MatrixStride 16
+                              MemberDecorate 50(tbufName) 11 RowMajor
+                              MemberDecorate 50(tbufName) 11 NonWritable
+                              MemberDecorate 50(tbufName) 11 Offset 304
+                              MemberDecorate 50(tbufName) 11 MatrixStride 16
+                              Decorate 50(tbufName) BufferBlock
+                              Decorate 52 DescriptorSet 0
+                              Decorate 52 Binding 8
+                              Decorate 65(input) BuiltIn FragCoord
+                              Decorate 68(@entryPointOutput.a) Location 0
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
                7:             TypeFunction 6(float)
               10:             TypeVector 6(float) 4
               11:             TypePointer Function 10(fvec4)
-              12:             TypeFunction 10(fvec4) 11(ptr)
-              16:    6(float) Constant 1065353216
-        20(buf1):             TypeStruct 10(fvec4)
-              21:             TypePointer Uniform 20(buf1)
-              22:     21(ptr) Variable Uniform
-              23:             TypeInt 32 1
-              24:     23(int) Constant 0
-              25:             TypePointer Uniform 10(fvec4)
-        29(buf2):             TypeStruct 10(fvec4)
-              30:             TypePointer Uniform 29(buf2)
-              31:     30(ptr) Variable Uniform
-    35(cbufName):             TypeStruct 10(fvec4) 23(int)
-              36:             TypePointer Uniform 35(cbufName)
-              37:     36(ptr) Variable Uniform
-              41:             TypeMatrix 10(fvec4) 3
-    42(tbufName):             TypeStruct 10(fvec4) 23(int) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 41 41 41 41
-              43:             TypePointer Uniform 42(tbufName)
-              44:     43(ptr) Variable Uniform
-              53:             TypePointer Input 10(fvec4)
-       54(input):     53(ptr) Variable Input
-              56:             TypePointer Output 10(fvec4)
-57(@entryPointOutput):     56(ptr) Variable Output
+          12(id):             TypeStruct 10(fvec4)
+              13:             TypeFunction 12(id) 11(ptr)
+              17:    6(float) Constant 1065353216
+              20:             TypePointer Function 12(id)
+              22:             TypeInt 32 1
+              23:     22(int) Constant 0
+   24(cbufName2):             TypeStruct 10(fvec4)
+              25:             TypePointer Uniform 24(cbufName2)
+              26:     25(ptr) Variable Uniform
+              27:             TypePointer Uniform 10(fvec4)
+        31(buf1):             TypeStruct 10(fvec4)
+              32:             TypePointer Uniform 31(buf1)
+              33:     32(ptr) Variable Uniform
+        37(buf2):             TypeStruct 10(fvec4)
+              38:             TypePointer Uniform 37(buf2)
+              39:     38(ptr) Variable Uniform
+    43(cbufName):             TypeStruct 10(fvec4) 22(int)
+              44:             TypePointer Uniform 43(cbufName)
+              45:     44(ptr) Variable Uniform
+              49:             TypeMatrix 10(fvec4) 3
+    50(tbufName):             TypeStruct 10(fvec4) 22(int) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 49 49 49 49
+              51:             TypePointer Uniform 50(tbufName)
+              52:     51(ptr) Variable Uniform
+              64:             TypePointer Input 10(fvec4)
+       65(input):     64(ptr) Variable Input
+              67:             TypePointer Output 10(fvec4)
+68(@entryPointOutput.a):     67(ptr) Variable Output
 4(PixelShaderFunction):           2 Function None 3
                5:             Label
-       52(input):     11(ptr) Variable Function
-       58(param):     11(ptr) Variable Function
-              55:   10(fvec4) Load 54(input)
-                              Store 52(input) 55
-              59:   10(fvec4) Load 52(input)
-                              Store 58(param) 59
-              60:   10(fvec4) FunctionCall 14(@PixelShaderFunction(vf4;) 58(param)
-                              Store 57(@entryPointOutput) 60
+       63(input):     11(ptr) Variable Function
+       69(param):     11(ptr) Variable Function
+              66:   10(fvec4) Load 65(input)
+                              Store 63(input) 66
+              70:   10(fvec4) Load 63(input)
+                              Store 69(param) 70
+              71:      12(id) FunctionCall 15(@PixelShaderFunction(vf4;) 69(param)
+              72:   10(fvec4) CompositeExtract 71 0
+                              Store 68(@entryPointOutput.a) 72
                               Return
                               FunctionEnd
          8(foo():    6(float) Function None 7
                9:             Label
-                              ReturnValue 16
+                              ReturnValue 17
                               FunctionEnd
-14(@PixelShaderFunction(vf4;):   10(fvec4) Function None 12
-       13(input):     11(ptr) FunctionParameter
-              15:             Label
-              19:   10(fvec4) Load 13(input)
-              26:     25(ptr) AccessChain 22 24
-              27:   10(fvec4) Load 26
-              28:   10(fvec4) FAdd 19 27
-              32:     25(ptr) AccessChain 31 24
-              33:   10(fvec4) Load 32
-              34:   10(fvec4) FAdd 28 33
-              38:     25(ptr) AccessChain 37 24
-              39:   10(fvec4) Load 38
-              40:   10(fvec4) FAdd 34 39
-              45:     25(ptr) AccessChain 44 24
-              46:   10(fvec4) Load 45
-              47:   10(fvec4) FAdd 40 46
-              48:    6(float) FunctionCall 8(foo()
-              49:   10(fvec4) VectorTimesScalar 47 48
-                              ReturnValue 49
+15(@PixelShaderFunction(vf4;):      12(id) Function None 13
+       14(input):     11(ptr) FunctionParameter
+              16:             Label
+         21(ret):     20(ptr) Variable Function
+              28:     27(ptr) AccessChain 26 23
+              29:   10(fvec4) Load 28
+              30:   10(fvec4) Load 14(input)
+              34:     27(ptr) AccessChain 33 23
+              35:   10(fvec4) Load 34
+              36:   10(fvec4) FAdd 30 35
+              40:     27(ptr) AccessChain 39 23
+              41:   10(fvec4) Load 40
+              42:   10(fvec4) FAdd 36 41
+              46:     27(ptr) AccessChain 45 23
+              47:   10(fvec4) Load 46
+              48:   10(fvec4) FAdd 42 47
+              53:     27(ptr) AccessChain 52 23
+              54:   10(fvec4) Load 53
+              55:   10(fvec4) FAdd 48 54
+              56:    6(float) FunctionCall 8(foo()
+              57:   10(fvec4) VectorTimesScalar 55 56
+              58:   10(fvec4) FAdd 29 57
+              59:     11(ptr) AccessChain 21(ret) 23
+                              Store 59 58
+              60:      12(id) Load 21(ret)
+                              ReturnValue 60
                               FunctionEnd

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.calculatelod.dx10.frag.out

@@ -356,7 +356,7 @@ gl_FragCoord origin is upper left
 0:?     '@entryPointOutput.Color' (layout( location=0) out 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 148
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.cast.frag.out

@@ -72,7 +72,7 @@ gl_FragCoord origin is upper left
 0:?     'input' (layout( location=0) in 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 39
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.cbuffer-identifier.vert.out

@@ -250,7 +250,7 @@ Shader version: 500
 0:?     'input.Norm' (layout( location=1) in 3-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 106
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.charLit.vert.out

@@ -146,7 +146,7 @@ Shader version: 500
 0:?     '@entryPointOutput' ( out 4-component vector of float Position)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 58
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clip.frag.out

@@ -74,7 +74,7 @@ gl_FragCoord origin is upper left
 0:?     '@entryPointOutput' (layout( location=0) out 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 30
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-1.frag.out

@@ -98,7 +98,7 @@ gl_FragCoord origin is upper left
 0:?     'cull' ( in 1-element array of float CullDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 53
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-1.geom.out

@@ -550,7 +550,7 @@ output primitive = line_strip
 0:?     'OutputStream.clip' ( out 2-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 118
 
                               Capability Geometry

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-1.vert.out

@@ -108,7 +108,7 @@ Shader version: 500
 0:?     'cull' ( out 1-element array of float CullDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 46
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-2.frag.out

@@ -290,7 +290,7 @@ gl_FragCoord origin is upper left
 0:?     'cull' ( in 4-element array of float CullDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 84
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-2.geom.out

@@ -724,7 +724,7 @@ output primitive = line_strip
 0:?     'OutputStream.clip' ( out 4-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 128
 
                               Capability Geometry

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-2.vert.out

@@ -420,7 +420,7 @@ Shader version: 500
 0:?     'cull' ( out 4-element array of float CullDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 89
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-3.frag.out

@@ -98,7 +98,7 @@ gl_FragCoord origin is upper left
 0:?     'cull' ( in 2-element array of float CullDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 53
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-3.geom.out

@@ -630,7 +630,7 @@ output primitive = line_strip
 0:?     'OutputStream.clip1' ( out 4-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 127
 
                               Capability Geometry

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-3.vert.out

@@ -136,7 +136,7 @@ Shader version: 500
 0:?     'cull' ( out 2-element array of float CullDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 51
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-4.frag.out

@@ -174,7 +174,7 @@ gl_FragCoord origin is upper left
 0:?     'v.ClipRect' ( in 4-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 57
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-4.geom.out

@@ -612,7 +612,7 @@ output primitive = line_strip
 0:?     'OutputStream.clip1' ( out 4-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 130
 
                               Capability Geometry

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-4.vert.out

@@ -270,7 +270,7 @@ Shader version: 500
 0:?     '@entryPointOutput.ClipRect' ( out 4-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 72
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-5.frag.out

@@ -232,7 +232,7 @@ gl_FragCoord origin is upper left
 0:?     'v.ClipRect' ( in 4-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 62
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-5.vert.out

@@ -318,7 +318,7 @@ Shader version: 500
 0:?     '@entryPointOutput.ClipRect' ( out 4-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 73
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-6.frag.out

@@ -282,7 +282,7 @@ gl_FragCoord origin is upper left
 0:?     'v.clip1' ( in 8-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 79
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-6.vert.out

@@ -428,7 +428,7 @@ Shader version: 500
 0:?     '@entryPointOutput.clip1' ( out 8-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 86
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-7.frag.out

@@ -270,7 +270,7 @@ gl_FragCoord origin is upper left
 0:?     'v.clip1' ( in 8-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 78
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-7.vert.out

@@ -384,7 +384,7 @@ Shader version: 500
 0:?     '@entryPointOutput.clip1' ( out 8-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 81
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-8.frag.out

@@ -186,7 +186,7 @@ gl_FragCoord origin is upper left
 0:?     'v.clip1' ( in 4-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 65
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-8.vert.out

@@ -240,7 +240,7 @@ Shader version: 500
 0:?     '@entryPointOutput.clip1' ( out 4-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 62
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-9.frag.out

@@ -144,7 +144,7 @@ gl_FragCoord origin is upper left
 0:?     'clip0' ( in 4-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 68
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.clipdistance-9.vert.out

@@ -194,7 +194,7 @@ Shader version: 500
 0:?     'clip0' ( out 4-element array of float ClipDistance)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 67
 
                               Capability Shader

+ 33 - 34
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.color.hull.tesc.out

@@ -356,13 +356,13 @@ triangle order = cw
 0:?     '@patchConstantOutput.inside' ( patch out 2-element array of float TessLevelInner)
 
 // Module Version 10000
-// Generated by (magic number): 80002
-// Id's are bound by 128
+// Generated by (magic number): 80004
+// Id's are bound by 127
 
                               Capability Tessellation
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint TessellationControl 4  "main" 72 76 79 83 111 124
+                              EntryPoint TessellationControl 4  "main" 72 76 79 83 110 123
                               ExecutionMode 4 OutputVertices 3
                               ExecutionMode 4 Triangles
                               ExecutionMode 4 SpacingEqual
@@ -404,8 +404,8 @@ triangle order = cw
                               Name 102  "@patchConstantResult"
                               Name 103  "param"
                               Name 105  "param"
-                              Name 111  "@patchConstantOutput.edges"
-                              Name 124  "@patchConstantOutput.inside"
+                              Name 110  "@patchConstantOutput.edges"
+                              Name 123  "@patchConstantOutput.inside"
                               MemberDecorate 33(TessellationBuffer) 0 Offset 0
                               MemberDecorate 33(TessellationBuffer) 1 Offset 4
                               Decorate 33(TessellationBuffer) Block
@@ -415,10 +415,10 @@ triangle order = cw
                               Decorate 76(pointId) BuiltIn InvocationId
                               Decorate 79(patchId) BuiltIn PrimitiveId
                               Decorate 83(@entryPointOutput) Location 0
-                              Decorate 111(@patchConstantOutput.edges) Patch
-                              Decorate 111(@patchConstantOutput.edges) BuiltIn TessLevelOuter
-                              Decorate 124(@patchConstantOutput.inside) Patch
-                              Decorate 124(@patchConstantOutput.inside) BuiltIn TessLevelInner
+                              Decorate 110(@patchConstantOutput.edges) Patch
+                              Decorate 110(@patchConstantOutput.edges) BuiltIn TessLevelOuter
+                              Decorate 123(@patchConstantOutput.inside) Patch
+                              Decorate 123(@patchConstantOutput.inside) BuiltIn TessLevelInner
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -458,17 +458,16 @@ triangle order = cw
 83(@entryPointOutput):     82(ptr) Variable Output
               92:             TypePointer Output 22(HullOutputType)
               94:     10(int) Constant 2
-              95:     10(int) Constant 1
+              95:     10(int) Constant 4
               96:     10(int) Constant 0
               98:             TypeBool
-             108:     10(int) Constant 4
-             109:             TypeArray 6(float) 108
-             110:             TypePointer Output 109
-111(@patchConstantOutput.edges):    110(ptr) Variable Output
-             114:             TypePointer Output 6(float)
-             122:             TypeArray 6(float) 94
-             123:             TypePointer Output 122
-124(@patchConstantOutput.inside):    123(ptr) Variable Output
+             108:             TypeArray 6(float) 95
+             109:             TypePointer Output 108
+110(@patchConstantOutput.edges):    109(ptr) Variable Output
+             113:             TypePointer Output 6(float)
+             121:             TypeArray 6(float) 94
+             122:             TypePointer Output 121
+123(@patchConstantOutput.inside):    122(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
        70(patch):     13(ptr) Variable Function
@@ -508,22 +507,22 @@ triangle order = cw
                                 Store 105(param) 106
              107:16(ConstantOutputType)   FunctionCall 20(ColorPatchConstantFunction(struct-HullInputType-vf3-vf41[3];u1;) 103(param) 105(param)
                                 Store 102(@patchConstantResult) 107
-             112:     39(ptr)   AccessChain 102(@patchConstantResult) 32 32
-             113:    6(float)   Load 112
-             115:    114(ptr)   AccessChain 111(@patchConstantOutput.edges) 32
-                                Store 115 113
-             116:     39(ptr)   AccessChain 102(@patchConstantResult) 32 41
-             117:    6(float)   Load 116
-             118:    114(ptr)   AccessChain 111(@patchConstantOutput.edges) 41
-                                Store 118 117
-             119:     39(ptr)   AccessChain 102(@patchConstantResult) 32 45
-             120:    6(float)   Load 119
-             121:    114(ptr)   AccessChain 111(@patchConstantOutput.edges) 45
-                                Store 121 120
-             125:     39(ptr)   AccessChain 102(@patchConstantResult) 41
-             126:    6(float)   Load 125
-             127:    114(ptr)   AccessChain 124(@patchConstantOutput.inside) 32
-                                Store 127 126
+             111:     39(ptr)   AccessChain 102(@patchConstantResult) 32 32
+             112:    6(float)   Load 111
+             114:    113(ptr)   AccessChain 110(@patchConstantOutput.edges) 32
+                                Store 114 112
+             115:     39(ptr)   AccessChain 102(@patchConstantResult) 32 41
+             116:    6(float)   Load 115
+             117:    113(ptr)   AccessChain 110(@patchConstantOutput.edges) 41
+                                Store 117 116
+             118:     39(ptr)   AccessChain 102(@patchConstantResult) 32 45
+             119:    6(float)   Load 118
+             120:    113(ptr)   AccessChain 110(@patchConstantOutput.edges) 45
+                                Store 120 119
+             124:     39(ptr)   AccessChain 102(@patchConstantResult) 41
+             125:    6(float)   Load 124
+             126:    113(ptr)   AccessChain 123(@patchConstantOutput.inside) 32
+                                Store 126 125
                                 Branch 101
              101:             Label
                               Return

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.comparison.vec.frag.out

@@ -262,7 +262,7 @@ gl_FragCoord origin is upper left
 0:?     '@entryPointOutput.Color' (layout( location=0) out 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 96
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.conditional.frag.out

@@ -522,7 +522,7 @@ gl_FragCoord origin is upper left
 0:?     'input' (layout( location=0) in 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 220
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.constantbuffer.frag.out

@@ -132,7 +132,7 @@ gl_FragCoord origin is upper left
 0:?     '@entryPointOutput' (layout( location=0) out 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 66
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.constructArray.vert.out

@@ -268,7 +268,7 @@ Shader version: 500
 0:?     '@entryPointOutput' ( out 4-component vector of float Position)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 89
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.constructexpr.frag.out

@@ -104,7 +104,7 @@ gl_FragCoord origin is upper left
 0:?     '@entryPointOutput.color' (layout( location=0) out 4-component vector of float)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 40
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.constructimat.frag.out

@@ -544,7 +544,7 @@ gl_FragCoord origin is upper left
 0:?     '@entryPointOutput' (layout( location=0) out int)
 
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 98
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.dashI.vert.out

@@ -1,6 +1,6 @@
 hlsl.dashI.vert
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 40
 
                               Capability Shader

+ 1 - 1
3rdparty/bgfx/3rdparty/glslang/Test/baseResults/hlsl.deadFunctionMissingBody.vert.out

@@ -1,6 +1,6 @@
 hlsl.deadFunctionMissingBody.vert
 // Module Version 10000
-// Generated by (magic number): 80002
+// Generated by (magic number): 80004
 // Id's are bound by 18
 
                               Capability Shader

Некоторые файлы не были показаны из-за большого количества измененных файлов