ShaderOpArith.xml 127 KB


  1. <?xml version="1.0" encoding="utf-8" standalone="yes"?>
  2. <ShaderOpSet xmlns="http://schemas.microsoft.com/test/ShaderOp">
  3. <ShaderOp Name="DerivFine" PS="PS" VS="VS" TopologyType="TRIANGLE">
  4. <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1))</RootSignature>
  5. <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
  6. { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } },
  7. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  8. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  9. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  10. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  11. { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } }
  12. </Resource>
  13. <Resource Name="T0" Dimension="Texture2D" Width="4" Height="4" InitialResourceState="COPY_DEST" Init="FromBytes" Format="R32_FLOAT">
  14. {.125f, .25f, .5f, 1.0f},
  15. {2.0f, 4.0f, 16.0f, 32.0f},
  16. {32.0f, 64.0f, 128.0f, 256.0f},
  17. {256.0f, 512.0f, 1024.0f, 2048.0f}
  18. </Resource>
  19. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="64" Height="64" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  20. <RootValues>
  21. <RootValue HeapName="ResHeap" />
  22. </RootValues>
  23. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  24. <Descriptor Name='T0' Kind='SRV' ResName='T0' />
  25. </DescriptorHeap>
  26. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  27. <Descriptor Name="RTarget" Kind="RTV"/>
  28. </DescriptorHeap>
  29. <InputElements>
  30. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  31. <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
  32. </InputElements>
  33. <RenderTargets>
  34. <RenderTarget Name="RTarget"/>
  35. </RenderTargets>
  36. <Shader Name="VS" Target="vs_6_0">
  37. <![CDATA[
  38. struct PSInput {
  39. float4 position : SV_POSITION;
  40. float2 uv : TEXCOORD;
  41. };
  42. PSInput main(float3 position : POSITION, float2 uv : TEXCOORD) {
  43. PSInput result;
  44. result.position = float4(position, 1.0);
  45. result.uv = uv;
  46. return result;
  47. }
  48. ]]>
  49. </Shader>
  50. <Shader Name="PS" Target="ps_6_0">
  51. <![CDATA[
  52. struct PSInput {
  53. float4 position : SV_POSITION;
  54. float2 uv : TEXCOORD;
  55. };
  56. Texture2D<float> g_tex : register(t0);
  57. float4 main(PSInput input) : SV_TARGET {
  58. int3 offset = int3((input.uv * 64.0) % 4, 0);
  59. float val = g_tex.Load(offset);
  60. return float4(ddx_fine(val), ddy_fine(val), ddx_coarse(val), ddy_coarse(val));
  61. }
  62. ]]>
  63. </Shader>
  64. </ShaderOp>
  65. <ShaderOp Name="WriteFloat4" CS="CS" DispatchX="8" DispatchY="8">
  66. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  67. <Resource Name="Buffer" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" Init="Zero" ReadBack="true" TransitionTo="UNORDERED_ACCESS" />
  68. <RootValues>
  69. <RootValue Index="0" ResName="Buffer" />
  70. </RootValues>
  71. <Shader Name="CS" Target="cs_6_0">
  72. <![CDATA[
  73. RWStructuredBuffer<float4> g_buf : register(u0);
  74. [numthreads(8,8,1)]
  75. void main(uint GI : SV_GroupIndex) {
  76. g_buf[GI] = GI;
  77. };
  78. ]]>
  79. </Shader>
  80. </ShaderOp>
  81. <ShaderOp Name="Derivatives" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
  82. <RootSignature>
  83. RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
  84. DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)),
  85. StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT)
  86. </RootSignature>
  87. <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
  88. { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } },
  89. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  90. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  91. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  92. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  93. { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } }
  94. </Resource>
  95. <Resource Name="T0" Dimension="Texture2D" Width="4" Height="4" InitialResourceState="COPY_DEST" Init="FromBytes" Format="R32_FLOAT">
  96. {.125f, .25f, .5f, 1.0f},
  97. {2.0f, 4.0f, 16.0f, 32.0f},
  98. {32.0f, 64.0f, 128.0f, 256.0f},
  99. {256.0f, 512.0f, 1024.0f, 2048.0f}
  100. </Resource>
  101. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="64" Height="64" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  102. <Resource Name="U0" Dimension="BUFFER" Width="16384"
  103. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  104. Init="Zero" ReadBack="true" />
  105. <Resource Name="U1" Dimension="BUFFER" Width="16384"
  106. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  107. Init="Zero" ReadBack="true" />
  108. <Resource Name="U2" Dimension="BUFFER" Width="16384"
  109. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  110. Init="Zero" ReadBack="true" />
  111. <RootValues>
  112. <RootValue HeapName="ResHeap" />
  113. </RootValues>
  114. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  115. <Descriptor Name='T0' Kind='SRV' ResName='T0' />
  116. <Descriptor Name='U0' Kind='UAV' ResName='U0'
  117. NumElements="1024" StructureByteStride="16" />
  118. <Descriptor Name='U1' Kind='UAV' ResName='U1'
  119. NumElements="1024" StructureByteStride="16" />
  120. <Descriptor Name='U2' Kind='UAV' ResName='U2'
  121. NumElements="1024" StructureByteStride="16" />
  122. </DescriptorHeap>
  123. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  124. <Descriptor Name="RTarget" Kind="RTV"/>
  125. </DescriptorHeap>
  126. <InputElements>
  127. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  128. <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
  129. </InputElements>
  130. <RenderTargets>
  131. <RenderTarget Name="RTarget"/>
  132. </RenderTargets>
  133. <Shader Name="CS" Target="cs_6_6" EntryPoint="CSMain" Text="@PS"/>
  134. <Shader Name="AS" Target="as_6_6" EntryPoint="ASMain" Text="@PS"/>
  135. <Shader Name="MS" Target="ms_6_6" EntryPoint="MSMain" Text="@PS"/>
  136. <Shader Name="VS" Target="vs_6_0" EntryPoint="VSMain" Text="@PS"/>
  137. <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain">
  138. <![CDATA[
  139. struct PSInput {
  140. float4 position : SV_POSITION;
  141. float2 uv : TEXCOORD;
  142. };
  143. Texture2D<float> g_tex : register(t0);
  144. RWStructuredBuffer<float4> g_bufMain : register(u0);
  145. RWStructuredBuffer<float4> g_bufMesh : register(u1);
  146. RWStructuredBuffer<float4> g_bufAmp : register(u2);
  147. float4 DerivTest(int2 uv) {
  148. int3 offset = int3(uv%4, 0);
  149. float val = g_tex.Load(offset);
  150. return float4(ddx_fine(val), ddy_fine(val), ddx_coarse(val), ddy_coarse(val));
  151. }
  152. // Map group index to 4x4 UV texcoord block
  153. int2 ConvertGroupIdx(uint groupIdx) {
  154. return int2(((groupIdx&0x4)>>1) + (groupIdx&01), ((groupIdx&0x8)>>2) + ((groupIdx&02)>>1));
  155. }
  156. // Convert group index into uv texcoords and return derivatives test result
  157. float4 DerivTest(uint groupIdx) {
  158. return DerivTest(ConvertGroupIdx(groupIdx));
  159. }
  160. PSInput VSMain(float3 position : POSITION, float2 uv : TEXCOORD) {
  161. PSInput result;
  162. result.position = float4(position, 1.0);
  163. result.uv = uv;
  164. return result;
  165. }
  166. struct Payload {
  167. uint nothing;
  168. };
  169. static float4 g_Verts[6] = {
  170. { -1.0f, 1.0f, 0.0f, 1.0f },
  171. { 1.0f, 1.0f, 0.0f, 1.0f },
  172. { -1.0f, -1.0f, 0.0f, 1.0f },
  173. { -1.0f, -1.0f, 0.0f, 1.0f },
  174. { 1.0f, 1.0f, 0.0f, 1.0f },
  175. { 1.0f, -1.0f, 0.0f, 1.0f }};
  176. static float2 g_UV[6] = {
  177. { 0.0f, 0.0f },
  178. { 1.0f, 0.0f },
  179. { 0.0f, 1.0f },
  180. { 0.0f, 1.0f },
  181. { 1.0f, 0.0f },
  182. { 1.0f, 1.0f }};
  183. uint convert2Dto1D(uint x, uint y, uint width) {
  184. // Convert 2D coords to 1D for testing
  185. // All completed rows of quads
  186. uint prevRows = (y/2)*2*width;
  187. // All previous full quads on this quad row
  188. uint prevQuads = (x/2)*4;
  189. // index into current quad
  190. uint quadIx = (y&1)*2 + (x&1);
  191. return prevRows + prevQuads + quadIx;
  192. }
  193. float4 PSMain(PSInput input) : SV_TARGET {
  194. // Convert from texcoords into a groupIndex equivalent
  195. int width = 64;
  196. int height = 64;
  197. int2 uv = int2(input.uv.x*width, input.uv.y*height);
  198. uint ix = convert2Dto1D(uv.x, uv.y, DISPATCHX);
  199. float4 res = 0.0;
  200. if (uv.x < DISPATCHX && uv.y < DISPATCHY) {
  201. res = DerivTest(uv);
  202. g_bufMain[ix] = res;
  203. }
  204. return res;
  205. }
  206. [NumThreads(DISPATCHX, DISPATCHY, DISPATCHZ)]
  207. void CSMain(uint3 id : SV_GroupThreadID, uint ix : SV_GroupIndex) {
  208. if (DISPATCHY == 1 && DISPATCHZ == 1)
  209. g_bufMain[ix] = DerivTest(ix);
  210. else
  211. g_bufMain[convert2Dto1D(id.x, id.y, DISPATCHX)] = DerivTest(id.xy);
  212. }
  213. #if DISPATCHX * DISPATCHY * DISPATCHZ > 128
  214. #undef DISPATCHX
  215. #undef DISPATCHY
  216. #undef DISPATCHZ
  217. #define DISPATCHX 1
  218. #define DISPATCHY 1
  219. #define DISPATCHZ 1
  220. #endif
  221. [NumThreads(DISPATCHX, DISPATCHY, DISPATCHZ)]
  222. void ASMain(uint3 id : SV_GroupThreadID, uint ix : SV_GroupIndex) {
  223. Payload payload;
  224. if (DISPATCHY == 1 && DISPATCHZ == 1)
  225. g_bufAmp[ix] = DerivTest(ix);
  226. else
  227. g_bufAmp[convert2Dto1D(id.x, id.y, DISPATCHX)] = DerivTest(id.xy);
  228. payload.nothing = 0;
  229. DispatchMesh(1, 1, 1, payload);
  230. }
  231. [NumThreads(DISPATCHX, DISPATCHY, DISPATCHZ)]
  232. [OutputTopology("triangle")]
  233. void MSMain(
  234. uint3 id : SV_GroupThreadID,
  235. uint ix : SV_GroupIndex,
  236. in payload Payload payload,
  237. out vertices PSInput verts[6],
  238. out indices uint3 tris[2]) {
  239. SetMeshOutputCounts(6, 2);
  240. // Assign static fullscreen 2 tri quad
  241. verts[ix%6].position = g_Verts[ix%6];
  242. verts[ix%6].uv = g_UV[ix%6];
  243. tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
  244. g_bufMesh[ix] = DerivTest(ix);
  245. if (DISPATCHY == 1 && DISPATCHZ == 1)
  246. g_bufMesh[ix] = DerivTest(ix);
  247. else
  248. g_bufMesh[convert2Dto1D(id.x, id.y, DISPATCHX)] = DerivTest(id.xy);
  249. }
  250. ]]>
  251. </Shader>
  252. </ShaderOp>
  253. <ShaderOp Name="QuadRead" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
  254. <RootSignature>
  255. RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
  256. DescriptorTable(UAV(u0), UAV(u1), UAV(u2))
  257. </RootSignature>
  258. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="64" Height="64" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  259. <Resource Name="U0" Dimension="BUFFER" Width="16384"
  260. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  261. Init="Zero" ReadBack="true" />
  262. <Resource Name="U1" Dimension="BUFFER" Width="16384"
  263. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  264. Init="Zero" ReadBack="true" />
  265. <Resource Name="U2" Dimension="BUFFER" Width="16384"
  266. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  267. Init="Zero" ReadBack="true" />
  268. <RootValues>
  269. <RootValue HeapName="ResHeap" />
  270. </RootValues>
  271. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  272. <Descriptor Name='U0' Kind='UAV' ResName='U0'
  273. NumElements="1024" StructureByteStride="16" />
  274. <Descriptor Name='U1' Kind='UAV' ResName='U1'
  275. NumElements="1024" StructureByteStride="16" />
  276. <Descriptor Name='U2' Kind='UAV' ResName='U2'
  277. NumElements="1024" StructureByteStride="16" />
  278. </DescriptorHeap>
  279. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  280. <Descriptor Name="RTarget" Kind="RTV"/>
  281. </DescriptorHeap>
  282. <InputElements>
  283. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  284. <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
  285. </InputElements>
  286. <RenderTargets>
  287. <RenderTarget Name="RTarget"/>
  288. </RenderTargets>
  289. <Shader Name="CS" Target="cs_6_0" EntryPoint="CSMain" Text="@PS"/>
  290. <Shader Name="AS" Target="as_6_6" EntryPoint="ASMain" Text="@PS"/>
  291. <Shader Name="MS" Target="ms_6_6" EntryPoint="MSMain" Text="@PS"/>
  292. <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain">
  293. <![CDATA[
  294. struct PSInput {
  295. float4 position : SV_POSITION;
  296. };
  297. RWStructuredBuffer<int4> g_bufMain : register(u0);
  298. RWStructuredBuffer<int4> g_bufMesh : register(u1);
  299. RWStructuredBuffer<int4> g_bufAmp : register(u2);
  300. uint4 QuadReadTest(uint ix) {
  301. return int4(QuadReadLaneAt(ix, ix & 0x3), QuadReadAcrossX(ix),
  302. QuadReadAcrossY(ix), QuadReadAcrossDiagonal(ix));
  303. }
  304. struct Payload {
  305. uint nothing;
  306. };
  307. [NumThreads(MESHDISPATCHX, MESHDISPATCHY, MESHDISPATCHZ)]
  308. void ASMain(uint ix : SV_GroupIndex) {
  309. Payload payload;
  310. g_bufAmp[ix] = QuadReadTest(ix);
  311. payload.nothing = 0;
  312. DispatchMesh(1, 1, 1, payload);
  313. }
  314. static float4 g_Verts[6] = {
  315. { -1.0f, 1.0f, 0.0f, 1.0f },
  316. { 1.0f, 1.0f, 0.0f, 1.0f },
  317. { -1.0f, -1.0f, 0.0f, 1.0f },
  318. { -1.0f, -1.0f, 0.0f, 1.0f },
  319. { 1.0f, 1.0f, 0.0f, 1.0f },
  320. { 1.0f, -1.0f, 0.0f, 1.0f }};
  321. [NumThreads(MESHDISPATCHX, MESHDISPATCHY, MESHDISPATCHZ)]
  322. [OutputTopology("triangle")]
  323. void MSMain(
  324. uint ix : SV_GroupIndex,
  325. in payload Payload payload,
  326. out vertices PSInput verts[6],
  327. out indices uint3 tris[2]) {
  328. SetMeshOutputCounts(6, 2);
  329. // Assign static fullscreen 2 tri quad
  330. verts[ix%6].position = g_Verts[ix%6];
  331. tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
  332. g_bufMesh[ix] = QuadReadTest(ix);
  333. }
  334. void PSMain(PSInput input) {
  335. return;
  336. }
  337. [NumThreads(DISPATCHX, DISPATCHY, DISPATCHZ)]
  338. void CSMain(uint ix : SV_GroupIndex) {
  339. g_bufMain[ix] = QuadReadTest(ix);
  340. }
  341. ]]>
  342. </Shader>
  343. </ShaderOp>
  344. <ShaderOp Name="ComputeSample" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
  345. <RootSignature>
  346. RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
  347. DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)),
  348. StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT)
  349. </RootSignature>
  350. <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
  351. { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } },
  352. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  353. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  354. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  355. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  356. { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } }
  357. </Resource>
  358. <Resource Name="T0" Dimension="Texture2D" Width="336" Height="336" MipLevels="7" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_FLOAT" />
  359. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="84" Height="4" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" />
  360. <Resource Name="U0" Dimension="BUFFER" Width="16384"
  361. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  362. Init="Zero" ReadBack="true" />
  363. <Resource Name="U1" Dimension="BUFFER" Width="2048"
  364. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  365. Init="Zero" ReadBack="true" />
  366. <Resource Name="U2" Dimension="BUFFER" Width="2048"
  367. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  368. Init="Zero" ReadBack="true" />
  369. <RootValues>
  370. <RootValue HeapName="ResHeap" />
  371. </RootValues>
  372. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  373. <Descriptor Name='T0' Kind='SRV' ResName='T0' />
  374. <Descriptor Name='U0' Kind='UAV' ResName='U0'
  375. NumElements="336" StructureByteStride="16" />
  376. <Descriptor Name='U1' Kind='UAV' ResName='U1'
  377. NumElements="128" StructureByteStride="16" />
  378. <Descriptor Name='U2' Kind='UAV' ResName='U2'
  379. NumElements="128" StructureByteStride="16" />
  380. </DescriptorHeap>
  381. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  382. <Descriptor Name="RTarget" Kind="RTV"/>
  383. </DescriptorHeap>
  384. <InputElements>
  385. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  386. <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
  387. </InputElements>
  388. <RenderTargets>
  389. <RenderTarget Name="RTarget"/>
  390. </RenderTargets>
  391. <Shader Name="CS" Target="cs_6_6" EntryPoint="CSMain1D" Text="@PS"/>
  392. <Shader Name="AS" Target="as_6_6" EntryPoint="ASMain1D" Text="@PS"/>
  393. <Shader Name="MS" Target="ms_6_6" EntryPoint="MSMain1D" Text="@PS"/>
  394. <Shader Name="CS2" Target="cs_6_6" EntryPoint="CSMain2D" Text="@PS"/>
  395. <Shader Name="AS2" Target="as_6_6" EntryPoint="ASMain2D" Text="@PS"/>
  396. <Shader Name="MS2" Target="ms_6_6" EntryPoint="MSMain2D" Text="@PS"/>
  397. <Shader Name="VS" Target="vs_6_0" EntryPoint="VSMain" Text="@PS"/>
  398. <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain">
  399. <![CDATA[
  400. struct PSInput {
  401. float4 position : SV_POSITION;
  402. float2 uv : TEXCOORD;
  403. };
  404. Texture2D<float> g_tex : register(t0);
  405. RWStructuredBuffer<uint4> g_bufMain : register(u0);
  406. RWStructuredBuffer<uint4> g_bufMesh : register(u1);
  407. RWStructuredBuffer<uint4> g_bufAmp : register(u2);
  408. PSInput VSMain(float3 position : POSITION, float2 uv : TEXCOORD) {
  409. PSInput result;
  410. result.position = float4(position, 1.0);
  411. result.uv = uv;
  412. return result;
  413. }
  414. SamplerState g_samp : register(s0);
  415. uint4 DerivTest(uint ix, uint iy, float left, float right, float top, float bot) {
  416. return uint4(g_tex.CalculateLevelOfDetail(g_samp, float2(left, 0.5)) * (~ix&1) +
  417. g_tex.CalculateLevelOfDetail(g_samp, float2(right, 0.5)) * (ix&1),
  418. g_tex.Sample(g_samp, float2(left, 0.5)) * (~ix&1) +
  419. g_tex.Sample(g_samp, float2(right, 0.5)) * (ix&1),
  420. g_tex.CalculateLevelOfDetail(g_samp, float2(0.5, top)) * (~iy&1) +
  421. g_tex.CalculateLevelOfDetail(g_samp, float2(0.5, bot)) * (iy&1),
  422. g_tex.Sample(g_samp, float2(0.5, top)) * (~iy&1) +
  423. g_tex.Sample(g_samp, float2(0.5, bot)) * (iy&1));
  424. }
  425. // To avoid conditionals, two samples are performed one for left one for right
  426. // They are step functioned on or off depending
  427. uint4 DerivTest(uint ix, float threadCt) {
  428. uint iy = ix>>1;
  429. return DerivTest(ix, iy, ((ix^1)/threadCt)*(ix&1), (ix/threadCt)*(ix&1),
  430. ((ix^2)/threadCt)*(iy&1), (ix/threadCt)*(iy&1));
  431. }
  432. static float4 g_Verts[6] = {
  433. { -1.0f, 1.0f, 0.0f, 1.0f },
  434. { 1.0f, 1.0f, 0.0f, 1.0f },
  435. { -1.0f, -1.0f, 0.0f, 1.0f },
  436. { -1.0f, -1.0f, 0.0f, 1.0f },
  437. { 1.0f, 1.0f, 0.0f, 1.0f },
  438. { 1.0f, -1.0f, 0.0f, 1.0f }};
  439. static float2 g_UV[6] = {
  440. { 0.0f, 0.0f },
  441. { 1.0f, 0.0f },
  442. { 0.0f, 1.0f },
  443. { 0.0f, 1.0f },
  444. { 1.0f, 0.0f },
  445. { 1.0f, 1.0f }};
  446. struct Payload {
  447. uint nothing;
  448. };
  449. uint convert2Dto1D(uint x, uint y, uint width) {
  450. // Convert 2D coords to 1D for testing
  451. // All completed rows of quads
  452. uint prevRows = (y/2)*2*width;
  453. // All previous full quads on this quad row
  454. uint prevQuads = (x/2)*4;
  455. // index into current quad
  456. uint quadIx = (y&1)*2 + (x&1);
  457. return prevRows + prevQuads + quadIx;
  458. }
  459. [NumThreads(116, 1, 1)]
  460. void ASMain1D(uint ix : SV_GroupIndex) {
  461. Payload payload;
  462. g_bufAmp[ix] = DerivTest(ix, 116);
  463. payload.nothing = 0;
  464. DispatchMesh(1, 1, 1, payload);
  465. }
  466. [NumThreads(42, 2, 1)]
  467. void ASMain2D(uint3 id : SV_GroupThreadID) {
  468. Payload payload;
  469. uint ix = convert2Dto1D(id.x, id.y, 42);
  470. g_bufAmp[ix] = DerivTest(ix, 42*2);
  471. payload.nothing = 0;
  472. DispatchMesh(1, 1, 1, payload);
  473. }
  474. [NumThreads(116, 1, 1)]
  475. [OutputTopology("triangle")]
  476. void MSMain1D(
  477. uint ix : SV_GroupIndex,
  478. in payload Payload payload,
  479. out vertices PSInput verts[6],
  480. out indices uint3 tris[2]) {
  481. SetMeshOutputCounts(6, 2);
  482. // Assign static fullscreen 2 tri quad
  483. verts[ix%6].position = g_Verts[ix%6];
  484. verts[ix%6].uv = g_UV[ix%6];
  485. tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
  486. g_bufMesh[ix] = DerivTest(ix, 116);
  487. }
  488. [NumThreads(42, 2, 1)]
  489. [OutputTopology("triangle")]
  490. void MSMain2D(
  491. uint3 id : SV_GroupThreadID,
  492. in payload Payload payload,
  493. out vertices PSInput verts[6],
  494. out indices uint3 tris[2]) {
  495. SetMeshOutputCounts(6, 2);
  496. uint ix = convert2Dto1D(id.x, id.y, 42);
  497. // Assign static fullscreen 2 tri quad
  498. verts[ix%6].position = g_Verts[ix%6];
  499. verts[ix%6].uv = g_UV[ix%6];
  500. tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
  501. g_bufMesh[ix] = DerivTest(ix, 42*2);
  502. }
  503. float4 PSMain(PSInput input) : SV_TARGET {
  504. uint ix = convert2Dto1D(input.uv.x, input.uv.y, 84);
  505. g_bufMain[ix] = DerivTest(ix, 84*4*3);
  506. return 1;
  507. }
  508. [NumThreads(336, 1, 1)]
  509. void CSMain1D(uint ix : SV_GroupIndex) {
  510. g_bufMain[ix] = DerivTest(ix, 336);
  511. }
  512. [NumThreads(84, 4, 3)]
  513. void CSMain2D(uint3 id : SV_GroupThreadID) {
  514. uint ix = convert2Dto1D(id.x, id.y, 84);
  515. g_bufMain[ix] = DerivTest(ix, 84*4);
  516. }
  517. ]]>
  518. </Shader>
  519. </ShaderOp>
  520. <ShaderOp Name="OOB" PS="PS" VS="VS">
  521. <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2))</RootSignature>
  522. <Resource Name="CB0" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" TransitionTo="VERTEX_AND_CONSTANT_BUFFER">
  523. 1.0f, 0.0f, 100.0f
  524. </Resource>
  525. <Resource Name="T0" Dimension="TEXTURE1D" InitialResourceState="COPY_DEST" Init="FromBytes" Format="R32_FLOAT">
  526. 1.0f, 0.5f, 1.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f
  527. </Resource>
  528. <Resource Name='VBuffer' Dimension='BUFFER' Flags='ALLOW_UNORDERED_ACCESS' InitialResourceState='COPY_DEST' Init='FromBytes'>
  529. 1.0f 1.0f 0, 1.0f -1.0f 0.0f, -1.0f -1.0f 0,
  530. -1.0f 1.0f 0, 1.0f 1.0f 0.0f, -1.0f -1.0f 0,
  531. </Resource>
  532. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="320" Height="200" Format="R8G8B8A8_UNORM" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  533. <RootValues>
  534. <RootValue ResName="CB0" />
  535. <RootValue HeapName="ResHeap" />
  536. </RootValues>
  537. <DescriptorHeap Name='ResHeap' Type='CBV_SRV_UAV'>
  538. <Descriptor Name='T0' Kind='SRV' ResName='T0' />
  539. </DescriptorHeap>
  540. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  541. <Descriptor Name="RTarget" Kind="RTV"/>
  542. </DescriptorHeap>
  543. <InputElements>
  544. <InputElement SemanticName='POSITION' Format='R32G32B32_FLOAT' AlignedByteOffset='0' />
  545. </InputElements>
  546. <RenderTargets>
  547. <RenderTarget Name="RTarget" />
  548. </RenderTargets>
  549. <Shader Name='VS' Target='vs_6_0' EntryPoint='VSMain' Text="@PS" />
  550. <Shader Name='PS' Target='ps_6_0' EntryPoint='PSMain'>
  551. <![CDATA[
  552. // Resources that are not local scalars (and thus are bound):
  553. // - array in cbuffer
  554. // - array in local
  555. // - array in groupshared
  556. // - array in signature element
  557. // - resource access operations:
  558. // - sampling
  559. // - loading
  560. // - storing
  561. //
  562. // In all cases, for HLSL, out-of-bound reads yield zero, and
  563. // out-of-bound writes are no-ops.
  564. cbuffer C {
  565. float c_arr;
  566. float zero_idx;
  567. float oob_idx;
  568. };
  569. Texture1D<float> g_t1d: register(t0);
  570. struct PSInput {
  571. float4 position : SV_POSITION;
  572. };
  573. PSInput VSMain(float4 position: POSITION) {
  574. PSInput result;
  575. result.position = position;
  576. return result;
  577. }
  578. float4 PSMain(PSInput input) : SV_TARGET {
  579. float x = input.position.x;
  580. float p0 = g_t1d.Load(zero_idx);
  581. float p1 = g_t1d.Load(oob_idx);
  582. float r = 0;
  583. // every color should be pure red (saturated red channel, zero'ed green channel).
  584. return float4(p0, p1, 0, 1);
  585. }
  586. ]]>
  587. </Shader>
  588. </ShaderOp>
  589. <ShaderOp Name='Saturate' PS='PS' VS='VS'>
  590. <RootSignature>
  591. RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(UAV(u0), CBV(b0))
  592. </RootSignature>
  593. <Resource Name='VBuffer' Dimension='BUFFER' Flags='ALLOW_UNORDERED_ACCESS' InitialResourceState='COPY_DEST' Init='FromBytes'>
  594. 1.0f 1.0f 0, 1.0f -1.0f 0.0f, -1.0f -1.0f 0,
  595. -1.0f 1.0f 0, 1.0f 1.0f 0.0f, -1.0f -1.0f 0,
  596. </Resource>
  597. <Resource Name='CB0' Dimension='BUFFER' Width="256" InitialResourceState='COPY_DEST' Init='FromBytes'>
  598. -inf, -1.5f, -denorm, -0, 0, denorm, 1.5f, inf, nan
  599. </Resource>
  600. <Resource Name="U0" Dimension="BUFFER" Width="1280"
  601. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  602. Init="Zero" ReadBack="true" />
  603. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="64" Height="64" Format="R8G8B8A8_UNORM" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  604. <DescriptorHeap Name='RtvHeap' NumDescriptors='1' Type='RTV'>
  605. <Descriptor Name="RTarget" Kind="RTV"/>
  606. </DescriptorHeap>
  607. <RootValues>
  608. <RootValue HeapName="ResHeap" />
  609. </RootValues>
  610. <DescriptorHeap Name='ResHeap' Type='CBV_SRV_UAV'>
  611. <!-- Create a descriptor for a RWStructuredBuffer. The underlying resource must be of type DXGI_FORMAT_UNKNOWN. -->
  612. <Descriptor Name='U0' Kind='UAV' ResName='U0'
  613. NumElements="320" StructureByteStride="4" />
  614. <Descriptor Name='CB0' Kind='CBV' ResName='CB0' />
  615. </DescriptorHeap>
  616. <InputElements>
  617. <InputElement SemanticName='POSITION' Format='R32G32B32_FLOAT' AlignedByteOffset='0' />
  618. </InputElements>
  619. <RenderTargets>
  620. <RenderTarget Name="RTarget" />
  621. </RenderTargets>
  622. <Shader Name='VS' Target='vs_6_0' EntryPoint='VSMain' Text='@PS'/>
  623. <Shader Name='PS' Target='ps_6_0' EntryPoint='PSMain'>
  624. <![CDATA[
  625. struct c_floats_t {
  626. float c_neg_inf;
  627. float c_neg_f;
  628. float c_neg_denorm;
  629. float c_neg_zero;
  630. float c_zero;
  631. float c_denorm;
  632. float c_f;
  633. float c_inf;
  634. float c_nan;
  635. };
  636. RWStructuredBuffer<float> g_buf : register(u0);
  637. c_floats_t g_cf : register(b0);
  638. struct PSInput {
  639. float4 position : SV_POSITION;
  640. float4 color : COLOR;
  641. };
  642. PSInput VSMain(float4 position: POSITION) {
  643. PSInput result;
  644. result.position = position;
  645. result.color = 1;
  646. return result;
  647. }
  648. float4 PSMain(PSInput input) : SV_TARGET {
  649. uint x = (uint)input.position.x;
  650. float val;
  651. switch (x) {
  652. case 0: val = saturate(g_cf.c_neg_inf); break;
  653. case 1: val = saturate(g_cf.c_neg_f); break;
  654. case 2: val = saturate(g_cf.c_neg_denorm); break;
  655. case 3: val = saturate(g_cf.c_neg_zero); break;
  656. case 4: val = saturate(g_cf.c_zero); break;
  657. case 5: val = saturate(g_cf.c_denorm); break;
  658. case 6: val = saturate(g_cf.c_f); break;
  659. case 7: val = saturate(g_cf.c_inf); break;
  660. case 8: val = saturate(g_cf.c_nan); break;
  661. default: val = x; break;
  662. }
  663. g_buf[x] = val;
  664. float r = 1;
  665. return float4(r, 0, 0, 1);
  666. }]]>
  667. </Shader>
  668. </ShaderOp>
  669. <ShaderOp Name="UnaryFPOp" CS="CS" DispatchX="8" DispatchY="8">
  670. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  671. <Resource Name="SUnaryFPOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  672. <RootValues>
  673. <RootValue Index="0" ResName="SUnaryFPOp" />
  674. </RootValues>
  675. <Shader Name="CS" Target="cs_6_0">
  676. <![CDATA[
  677. void main(uint GI : SV_GroupIndex) {};
  678. ]]>
  679. </Shader>
  680. </ShaderOp>
  681. <ShaderOp Name="BinaryFPOp" CS="CS" DispatchX="8" DispatchY="8">
  682. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  683. <Resource Name="SBinaryFPOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  684. <RootValues>
  685. <RootValue Index="0" ResName="SBinaryFPOp" />
  686. </RootValues>
  687. <Shader Name="CS" Target="cs_6_0">
  688. <![CDATA[
  689. void main(uint GI : SV_GroupIndex) {};
  690. ]]>
  691. </Shader>
  692. </ShaderOp>
  693. <ShaderOp Name="UnaryIntOp" CS="CS" DispatchX="8" DispatchY="8">
  694. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  695. <Resource Name="SUnaryIntOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  696. <RootValues>
  697. <RootValue Index="0" ResName="SUnaryIntOp" />
  698. </RootValues>
  699. <Shader Name="CS" Target="cs_6_0">
  700. <![CDATA[
  701. void main(uint GI : SV_GroupIndex) {};
  702. ]]>
  703. </Shader>
  704. </ShaderOp>
  705. <ShaderOp Name="UnaryUintOp" CS="CS" DispatchX="8" DispatchY="8">
  706. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  707. <Resource Name="SUnaryUintOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  708. <RootValues>
  709. <RootValue Index="0" ResName="SUnaryUintOp" />
  710. </RootValues>
  711. <Shader Name="CS" Target="cs_6_0">
  712. <![CDATA[
  713. void main(uint GI : SV_GroupIndex) {};
  714. ]]>
  715. </Shader>
  716. </ShaderOp>
  717. <ShaderOp Name="BinaryIntOp" CS="CS" DispatchX="8" DispatchY="8">
  718. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  719. <Resource Name="SBinaryIntOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  720. <RootValues>
  721. <RootValue Index="0" ResName="SBinaryIntOp" />
  722. </RootValues>
  723. <Shader Name="CS" Target="cs_6_0">
  724. <![CDATA[
  725. void main(uint GI : SV_GroupIndex) {};
  726. ]]>
  727. </Shader>
  728. </ShaderOp>
  729. <ShaderOp Name="BinaryUintOp" CS="CS" DispatchX="8" DispatchY="8">
  730. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  731. <Resource Name="SBinaryUintOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  732. <RootValues>
  733. <RootValue Index="0" ResName="SBinaryUintOp" />
  734. </RootValues>
  735. <Shader Name="CS" Target="cs_6_0">
  736. <![CDATA[
  737. void main(uint GI : SV_GroupIndex) {};
  738. ]]>
  739. </Shader>
  740. </ShaderOp>
  741. <ShaderOp Name="TertiaryFPOp" CS="CS" DispatchX="8" DispatchY="8">
  742. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  743. <Resource Name="STertiaryFPOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  744. <RootValues>
  745. <RootValue Index="0" ResName="STertiaryFPOp" />
  746. </RootValues>
  747. <Shader Name="CS" Target="cs_6_0">
  748. <![CDATA[
  749. void main(uint GI : SV_GroupIndex) {};
  750. ]]>
  751. </Shader>
  752. </ShaderOp>
  753. <ShaderOp Name="TertiaryIntOp" CS="CS" DispatchX="8" DispatchY="8">
  754. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  755. <Resource Name="STertiaryIntOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  756. <RootValues>
  757. <RootValue Index="0" ResName="STertiaryIntOp" />
  758. </RootValues>
  759. <Shader Name="CS" Target="cs_6_0">
  760. <![CDATA[
  761. void main(uint GI : SV_GroupIndex) {};
  762. ]]>
  763. </Shader>
  764. </ShaderOp>
  765. <ShaderOp Name="TertiaryUintOp" CS="CS" DispatchX="8" DispatchY="8">
  766. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  767. <Resource Name="STertiaryUintOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  768. <RootValues>
  769. <RootValue Index="0" ResName="STertiaryUintOp" />
  770. </RootValues>
  771. <Shader Name="CS" Target="cs_6_0">
  772. <![CDATA[
  773. void main(uint GI : SV_GroupIndex) {};
  774. ]]>
  775. </Shader>
  776. </ShaderOp>
  777. <ShaderOp Name="DotOp" CS="CS" DispatchX="8" DispatchY="8">
  778. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  779. <Resource Name="SDotOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  780. <RootValues>
  781. <RootValue Index="0" ResName="SDotOp" />
  782. </RootValues>
  783. <Shader Name="CS" Target="cs_6_0">
  784. <![CDATA[
  785. void main(uint GI : SV_GroupIndex) {};
  786. ]]>
  787. </Shader>
  788. </ShaderOp>
  789. <ShaderOp Name="Dot2AddHalfOp" CS="CS" DispatchX="8" DispatchY="8">
  790. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  791. <Resource Name="SDot2AddHalfOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  792. <RootValues>
  793. <RootValue Index="0" ResName="SDot2AddHalfOp" />
  794. </RootValues>
  795. <Shader Name="CS" Target="cs_6_4">
  796. <![CDATA[
  797. void main(uint GI : SV_GroupIndex) {};
  798. ]]>
  799. </Shader>
  800. </ShaderOp>
  801. <ShaderOp Name="Dot4AddI8PackedOp" CS="CS" DispatchX="8" DispatchY="8">
  802. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  803. <Resource Name="SDot4AddI8PackedOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  804. <RootValues>
  805. <RootValue Index="0" ResName="SDot4AddI8PackedOp" />
  806. </RootValues>
  807. <Shader Name="CS" Target="cs_6_4">
  808. <![CDATA[
  809. void main(uint GI : SV_GroupIndex) {};
  810. ]]>
  811. </Shader>
  812. </ShaderOp>
  813. <ShaderOp Name="Dot4AddU8PackedOp" CS="CS" DispatchX="8" DispatchY="8">
  814. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  815. <Resource Name="SDot4AddU8PackedOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  816. <RootValues>
  817. <RootValue Index="0" ResName="SDot4AddU8PackedOp" />
  818. </RootValues>
  819. <Shader Name="CS" Target="cs_6_4">
  820. <![CDATA[
  821. void main(uint GI : SV_GroupIndex) {};
  822. ]]>
  823. </Shader>
  824. </ShaderOp>
  825. <ShaderOp Name="Msad4" CS="CS" DispatchX="8" DispatchY="8">
  826. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  827. <Resource Name="SMsad4" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  828. <RootValues>
  829. <RootValue Index="0" ResName="SMsad4"/>
  830. </RootValues>
  831. <Shader Name="CS" Target="cs_6_0">
  832. <![CDATA[
  833. void main(uint GI : SV_GroupIndex) {};
  834. ]]>
  835. </Shader>
  836. </ShaderOp>
  837. <ShaderOp Name="WaveIntrinsicsOp" CS="CS" DispatchX="1" DispatchY="1">
  838. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  839. <Resource Name="SWaveIntrinsicsOp" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  840. <RootValues>
  841. <RootValue Index="0" ResName="SWaveIntrinsicsOp"/>
  842. </RootValues>
  843. <Shader Name="CS" Target="cs_6_0">
  844. <![CDATA[
  845. void main(uint GI : SV_GroupIndex) {};
  846. ]]>
  847. </Shader>
  848. </ShaderOp>
  849. <ShaderOp Name="Triangle" PS="PS" VS="VS">
  850. <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)</RootSignature>
  851. <Resource Name="VBuffer" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" Init="FromBytes">
  852. { { 0.0f, 0.25f , 0.0f }, { 1.0f, 0.0f, 0.0f, 1.0f } },
  853. { { 0.25f, -0.25f , 0.0f }, { 0.0f, 1.0f, 0.0f, 1.0f } },
  854. { { -0.25f, -0.25f , 0.0f }, { 0.0f, 0.0f, 1.0f, 1.0f } }
  855. </Resource>
  856. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="320" Height="200" Format="R8G8B8A8_UNORM" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  857. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  858. <Descriptor Name="RTarget" Kind="RTV"/>
  859. </DescriptorHeap>
  860. <InputElements>
  861. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  862. <InputElement SemanticName="COLOR" Format="R32G32B32A32_FLOAT" AlignedByteOffset="12" />
  863. </InputElements>
  864. <RenderTargets>
  865. <RenderTarget Name="RTarget" />
  866. </RenderTargets>
  867. <Shader Name="VS" Target="vs_6_0">
  868. <![CDATA[
  869. struct PSInput {
  870. float4 position : SV_POSITION;
  871. float4 color : COLOR;
  872. };
  873. PSInput main(float4 position : POSITION, float4 color : COLOR) {
  874. PSInput result;
  875. float ratio = 320.0 / 200.0;
  876. result.position = position;
  877. result.position.y *= ratio;
  878. result.color = color;
  879. return result;
  880. }
  881. ]]>
  882. </Shader>
  883. <Shader Name="PS" Target="ps_6_0">
  884. <![CDATA[
  885. struct PSInput {
  886. float4 position : SV_POSITION;
  887. float4 color : COLOR;
  888. };
  889. float4 main(PSInput input) : SV_TARGET {
  890. return 1;
  891. }
  892. ]]>
  893. </Shader>
  894. </ShaderOp>
  895. <ShaderOp Name="TriangleHalf" PS="PS" VS="VS">
  896. <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)</RootSignature>
  897. <Resource Name="VBuffer" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" Init="FromBytes">
  898. { { 0.0h, 0.25h , 0.0h, 1.0h }, { 1.0h, 1.0h, 1.0h, 1.0h } },
  899. { { 0.25h, -0.25h , 0.0h, 1.0h }, { 1.0h, 1.0h, 1.0h, 1.0h } },
  900. { { -0.25h, -0.25h , 0.0h, 1.0h }, { 1.0h, 1.0h, 1.0h, 1.0h } }
  901. </Resource>
  902. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="320" Height="200" Format="R8G8B8A8_UNORM" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  903. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  904. <Descriptor Name="RTarget" Kind="RTV"/>
  905. </DescriptorHeap>
  906. <InputElements>
  907. <InputElement SemanticName="POSITION" Format="R16G16B16A16_FLOAT" AlignedByteOffset="0" />
  908. <InputElement SemanticName="COLOR" Format="R16G16B16A16_FLOAT" AlignedByteOffset="8" />
  909. </InputElements>
  910. <RenderTargets>
  911. <RenderTarget Name="RTarget" />
  912. </RenderTargets>
  913. <Shader Name="VS" Target="vs_6_2" Arguments="/enable-16bit-types">
  914. <![CDATA[
  915. struct PSInput {
  916. half4 position : SV_POSITION;
  917. half4 color : COLOR;
  918. };
  919. PSInput main(half4 position : POSITION, half4 color : COLOR) {
  920. PSInput result;
  921. float ratio = 320.0 / 200.0;
  922. result.position = position;
  923. result.position.y *= ratio;
  924. result.color = color;
  925. return result;
  926. }
  927. ]]>
  928. </Shader>
  929. <Shader Name="PS" Target="ps_6_2" Arguments="/enable-16bit-types">
  930. <![CDATA[
  931. struct PSInput {
  932. half4 position : SV_POSITION;
  933. half4 color : COLOR;
  934. };
  935. half4 main(PSInput input) : SV_TARGET {
  936. return input.color;
  937. }
  938. ]]>
  939. </Shader>
  940. </ShaderOp>
  941. <ShaderOp Name="CBufferTestHalf" PS="PS" VS="VS" TopologyType="TRIANGLE">
  942. <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2))</RootSignature>
  943. <Resource Name="CB0" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="ByName" TransitionTo="VERTEX_AND_CONSTANT_BUFFER">
  944. 1.25h, 1.75h, 1.25h, 1.875h
  945. </Resource>
  946. <Resource Name='VBuffer' Dimension='BUFFER' Flags='ALLOW_UNORDERED_ACCESS' InitialResourceState='COPY_DEST' Init='FromBytes'>
  947. 1.0f 1.0f 0, 1.0f -1.0f 0.0f, -1.0f -1.0f 0,
  948. -1.0f 1.0f 0, 1.0f 1.0f 0.0f, -1.0f -1.0f 0,
  949. </Resource>
  950. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="320" Height="200" Format="R16G16B16A16_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  951. <RootValues>
  952. <RootValue ResName="CB0" />
  953. </RootValues>
  954. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  955. <Descriptor Name="RTarget" Kind="RTV"/>
  956. </DescriptorHeap>
  957. <InputElements>
  958. <InputElement SemanticName='POSITION' Format='R32G32B32_FLOAT' AlignedByteOffset='0' />
  959. </InputElements>
  960. <RenderTargets>
  961. <RenderTarget Name="RTarget" />
  962. </RenderTargets>
  963. <Shader Name='VS' Target='vs_6_2' EntryPoint='VSMain' Arguments='-enable-16bit-types' Text="@PS" />
  964. <Shader Name='PS' Target='ps_6_2' EntryPoint='PSMain' Arguments='-enable-16bit-types'>
  965. <![CDATA[
  966. cbuffer c_buf {
  967. half first;
  968. half second;
  969. half third;
  970. half fourth;
  971. };
  972. struct PSInput {
  973. float4 position : SV_POSITION;
  974. };
  975. PSInput VSMain(float4 position: POSITION) {
  976. PSInput result;
  977. result.position = position;
  978. return result;
  979. }
  980. half4 PSMain(PSInput input) : SV_TARGET {
  981. return half4(first, second, third, fourth);
  982. }
  983. ]]>
  984. </Shader>
  985. </ShaderOp>
  986. <ShaderOp Name="Barycentrics" PS="PS" VS="VS">
  987. <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)</RootSignature>
  988. <Resource Name="VBuffer" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" Init="FromBytes" ReadBack="true">
  989. { { 0.0f, 1.0f , 0.0f }, { 1.0f, 0.0f, 0.0f, 1.0f } },
  990. { { 1.0f, -1.0f , 0.0f }, { 0.0f, 1.0f, 0.0f, 1.0f } },
  991. { { -1.0f, -1.0f , 0.0f }, { 0.0f, 0.0f, 1.0f, 1.0f } }
  992. </Resource>
  993. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="1280" Height="2400" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  994. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  995. <Descriptor Name="RTarget" Kind="RTV"/>
  996. </DescriptorHeap>
  997. <InputElements>
  998. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  999. <InputElement SemanticName="COLOR" Format="R32G32B32A32_FLOAT" AlignedByteOffset="12" />
  1000. </InputElements>
  1001. <RenderTargets>
  1002. <RenderTarget Name="RTarget" />
  1003. </RenderTargets>
  1004. <Shader Name="VS" Target="vs_6_1">
  1005. <![CDATA[
  1006. struct PSInput {
  1007. float4 position : SV_POSITION;
  1008. nointerpolation float4 color : COLOR;
  1009. };
  1010. PSInput main(float4 position : POSITION, float4 color : COLOR) {
  1011. PSInput result;
  1012. result.position = position;
  1013. result.color = color;
  1014. return result;
  1015. }
  1016. ]]>
  1017. </Shader>
  1018. <Shader Name="PS" Target="ps_6_1">
  1019. <![CDATA[
  1020. struct PSInput {
  1021. float4 position : SV_POSITION;
  1022. nointerpolation float4 color : COLOR;
  1023. };
  1024. float4 main(PSInput input, float3 bary : SV_Barycentrics) : SV_Target {
  1025. float4 vColor0 = GetAttributeAtVertex(input.color, 0);
  1026. float4 vColor1 = GetAttributeAtVertex(input.color, 1);
  1027. float4 vColor2 = GetAttributeAtVertex(input.color, 2);
  1028. return bary.x * vColor0 + bary.y * vColor1 + bary.z * vColor2;
  1029. }
  1030. ]]>
  1031. </Shader>
  1032. </ShaderOp>
  1033. <ShaderOp Name="ComputeRawBufferLdSt32Bit" CS="CS">
  1034. <RootSignature>RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2))</RootSignature>
  1035. <Resource Name="SRVBuffer0" Dimension="BUFFER" Width="40" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1036. <Resource Name="SRVBuffer1" Dimension="BUFFER" Width="40" InitialResourceState="COPY_DEST" Init="ByName" />
  1037. <Resource Name="SRVBuffer2" Dimension="BUFFER" Width="40" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1038. <Resource Name="SRVBuffer3" Dimension="BUFFER" Width="40" InitialResourceState="COPY_DEST" Init="ByName" />
  1039. <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="120" InitialResourceState="COPY_DEST" Init="ByName" Flags="ALLOW_UNORDERED_ACCESS" TransitionTo="UNORDERED_ACCESS" ReadBack="true" Format="R32_TYPELESS" />
  1040. <Resource Name="UAVBuffer1" Dimension="BUFFER" Width="120" InitialResourceState="COPY_DEST" Init="ByName" Flags="ALLOW_UNORDERED_ACCESS" TransitionTo="UNORDERED_ACCESS" ReadBack="true" />
  1041. <Resource Name="UAVBuffer2" Dimension="BUFFER" Width="120" InitialResourceState="COPY_DEST" Init="ByName" Flags="ALLOW_UNORDERED_ACCESS" TransitionTo="UNORDERED_ACCESS" ReadBack="true" Format="R32_TYPELESS" />
  1042. <Resource Name="UAVBuffer3" Dimension="BUFFER" Width="120" InitialResourceState="COPY_DEST" Init="ByName" Flags="ALLOW_UNORDERED_ACCESS" TransitionTo="UNORDERED_ACCESS" ReadBack="true" />
  1043. <RootValues>
  1044. <RootValue Index="0" ResName="SRVBuffer0" />
  1045. <RootValue Index="1" ResName="SRVBuffer1" />
  1046. <RootValue Index="2" ResName="UAVBuffer0" />
  1047. <RootValue Index="3" ResName="UAVBuffer1" />
  1048. <RootValue Index="4" HeapName="ResHeap" />
  1049. </RootValues>
  1050. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  1051. <Descriptor Name='SRVBuffer2' Kind='SRV' ResName='SRVBuffer2' Flags='RAW' NumElements="10" Format="R32_TYPELESS" />
  1052. <Descriptor Name='SRVBuffer3' Kind='SRV' ResName='SRVBuffer3' NumElements="1" StructureByteStride="40" />
  1053. <Descriptor Name='UAVBuffer2' Kind='UAV' ResName='UAVBuffer2' Flags='RAW' NumElements="30" Format="R32_TYPELESS" />
  1054. <Descriptor Name='UAVBuffer3' Kind='UAV' ResName='UAVBuffer3' NumElements="1" StructureByteStride="120" />
  1055. </DescriptorHeap>
  1056. <Shader Name="CS" Target="cs_6_2">
  1057. <![CDATA[// Shader source code will be set at runtime]]>
  1058. </Shader>
  1059. </ShaderOp>>
  1060. <ShaderOp Name="ComputeRawBufferLdSt64Bit" CS="CS">
  1061. <RootSignature>RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2))</RootSignature>
  1062. <Resource Name="SRVBuffer0" Dimension="BUFFER" Width="80" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1063. <Resource Name="SRVBuffer1" Dimension="BUFFER" Width="80" InitialResourceState="COPY_DEST" Init="ByName" />
  1064. <Resource Name="SRVBuffer2" Dimension="BUFFER" Width="80" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1065. <Resource Name="SRVBuffer3" Dimension="BUFFER" Width="80" InitialResourceState="COPY_DEST" Init="ByName" />
  1066. <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="240" InitialResourceState="COPY_DEST" Init="ByName" Flags="ALLOW_UNORDERED_ACCESS" TransitionTo="UNORDERED_ACCESS" ReadBack="true" Format="R32_TYPELESS" />
  1067. <Resource Name="UAVBuffer1" Dimension="BUFFER" Width="240" InitialResourceState="COPY_DEST" Init="ByName" Flags="ALLOW_UNORDERED_ACCESS" TransitionTo="UNORDERED_ACCESS" ReadBack="true" />
  1068. <Resource Name="UAVBuffer2" Dimension="BUFFER" Width="240" InitialResourceState="COPY_DEST" Init="ByName" Flags="ALLOW_UNORDERED_ACCESS" TransitionTo="UNORDERED_ACCESS" ReadBack="true" Format="R32_TYPELESS" />
  1069. <Resource Name="UAVBuffer3" Dimension="BUFFER" Width="240" InitialResourceState="COPY_DEST" Init="ByName" Flags="ALLOW_UNORDERED_ACCESS" TransitionTo="UNORDERED_ACCESS" ReadBack="true" />
  1070. <RootValues>
  1071. <RootValue Index="0" ResName="SRVBuffer0" />
  1072. <RootValue Index="1" ResName="SRVBuffer1" />
  1073. <RootValue Index="2" ResName="UAVBuffer0" />
  1074. <RootValue Index="3" ResName="UAVBuffer1" />
  1075. <RootValue Index="4" HeapName="ResHeap" />
  1076. </RootValues>
  1077. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  1078. <Descriptor Name='SRVBuffer2' Kind='SRV' ResName='SRVBuffer2' Flags='RAW' NumElements="20" Format="R32_TYPELESS" />
  1079. <Descriptor Name='SRVBuffer3' Kind='SRV' ResName='SRVBuffer3' NumElements="1" StructureByteStride="80" />
  1080. <Descriptor Name='UAVBuffer2' Kind='UAV' ResName='UAVBuffer2' Flags='RAW' NumElements="60" Format="R32_TYPELESS" />
  1081. <Descriptor Name='UAVBuffer3' Kind='UAV' ResName='UAVBuffer3' NumElements="1" StructureByteStride="240" />
  1082. </DescriptorHeap>
  1083. <Shader Name="CS" Target="cs_6_2">
  1084. <![CDATA[// Shader source code will be set at runtime]]>
  1085. </Shader>
  1086. </ShaderOp>>
  1087. <ShaderOp Name="ComputeRawBufferLdSt16Bit" CS="CS">
  1088. <RootSignature>RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2))</RootSignature>
  1089. <Resource Name="SRVBuffer0" Dimension="BUFFER" Width="20" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1090. <Resource Name="SRVBuffer1" Dimension="BUFFER" Width="20" InitialResourceState="COPY_DEST" Init="ByName" />
  1091. <Resource Name="SRVBuffer2" Dimension="BUFFER" Width="20" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1092. <Resource Name="SRVBuffer3" Dimension="BUFFER" Width="20" InitialResourceState="COPY_DEST" Init="ByName" />
  1093. <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="60" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
  1094. <Resource Name="UAVBuffer1" Dimension="BUFFER" Width="60" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  1095. <Resource Name="UAVBuffer2" Dimension="BUFFER" Width="60" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
  1096. <Resource Name="UAVBuffer3" Dimension="BUFFER" Width="60" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  1097. <RootValues>
  1098. <RootValue Index="0" ResName="SRVBuffer0" />
  1099. <RootValue Index="1" ResName="SRVBuffer1" />
  1100. <RootValue Index="2" ResName="UAVBuffer0" />
  1101. <RootValue Index="3" ResName="UAVBuffer1" />
  1102. <RootValue Index="4" HeapName="ResHeap" />
  1103. </RootValues>
  1104. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  1105. <Descriptor Name='SRVBuffer2' Kind='SRV' ResName='SRVBuffer2' Flags='RAW' NumElements="5" Format="R32_TYPELESS" />
  1106. <Descriptor Name='SRVBuffer3' Kind='SRV' ResName='SRVBuffer3' NumElements="1" StructureByteStride="20" />
  1107. <Descriptor Name='UAVBuffer2' Kind='UAV' ResName='UAVBuffer2' Flags='RAW' NumElements="15" Format="R32_TYPELESS" />
  1108. <Descriptor Name='UAVBuffer3' Kind='UAV' ResName='UAVBuffer3' NumElements="1" StructureByteStride="60" />
  1109. </DescriptorHeap>
  1110. <Shader Name="CS" Target="cs_6_2">
  1111. <![CDATA[// Shader source code will be set at runtime]]>
  1112. </Shader>
  1113. </ShaderOp>>
  1114. <ShaderOp Name="GraphicsRawBufferLdSt32Bit" PS="PS" VS="VS">
  1115. <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2))</RootSignature>
  1116. <Resource Name="SRVBuffer0" Dimension="BUFFER" Width="40" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1117. <Resource Name="SRVBuffer1" Dimension="BUFFER" Width="40" InitialResourceState="COPY_DEST" Init="ByName" />
  1118. <Resource Name="SRVBuffer2" Dimension="BUFFER" Width="40" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1119. <Resource Name="SRVBuffer3" Dimension="BUFFER" Width="40" InitialResourceState="COPY_DEST" Init="ByName" />
  1120. <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="120" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
  1121. <Resource Name="UAVBuffer1" Dimension="BUFFER" Width="120" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  1122. <Resource Name="UAVBuffer2" Dimension="BUFFER" Width="120" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
  1123. <Resource Name="UAVBuffer3" Dimension="BUFFER" Width="120" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  1124. <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
  1125. { { -1.0f, 1.0f, 0.0f } },
  1126. { { 1.0f, 1.0f, 0.0f } },
  1127. { { -1.0f, -1.0f, 0.0f } },
  1128. { { -1.0f, -1.0f, 0.0f } },
  1129. { { 1.0f, 1.0f, 0.0f } },
  1130. { { 1.0f, -1.0f, 0.0f } }
  1131. </Resource>
  1132. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="16" Height="16" Format="R32G32B32A32_UINT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  1133. <RootValues>
  1134. <RootValue Index="0" ResName="SRVBuffer0" />
  1135. <RootValue Index="1" ResName="SRVBuffer1" />
  1136. <RootValue Index="2" ResName="UAVBuffer0" />
  1137. <RootValue Index="3" ResName="UAVBuffer1" />
  1138. <RootValue Index="4" HeapName="ResHeap" />
  1139. </RootValues>
  1140. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  1141. <Descriptor Name='SRVBuffer2' Kind='SRV' ResName='SRVBuffer2' Flags='RAW' NumElements="10" Format="R32_TYPELESS" />
  1142. <Descriptor Name='SRVBuffer3' Kind='SRV' ResName='SRVBuffer3' NumElements="1" StructureByteStride="40" />
  1143. <Descriptor Name='UAVBuffer2' Kind='UAV' ResName='UAVBuffer2' Flags='RAW' NumElements="30" Format="R32_TYPELESS" />
  1144. <Descriptor Name='UAVBuffer3' Kind='UAV' ResName='UAVBuffer3' NumElements="1" StructureByteStride="120" />
  1145. </DescriptorHeap>
  1146. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  1147. <Descriptor Name="RTarget" Kind="RTV"/>
  1148. </DescriptorHeap>
  1149. <InputElements>
  1150. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  1151. </InputElements>
  1152. <RenderTargets>
  1153. <RenderTarget Name="RTarget"/>
  1154. </RenderTargets>
  1155. <Shader Name="VS" Target="vs_6_2">
  1156. <![CDATA[
  1157. struct PSInput {
  1158. float4 pos : SV_POSITION;
  1159. };
  1160. PSInput main(float3 pos : POSITION) {
  1161. PSInput r;
  1162. r.pos = float4(pos, 1);
  1163. return r;
  1164. }
  1165. ]]>
  1166. </Shader>
  1167. <Shader Name="PS" Target="ps_6_2">
  1168. <![CDATA[// Shader source code will be set at runtime]]>
  1169. </Shader>
  1170. </ShaderOp>
  1171. <ShaderOp Name="GraphicsRawBufferLdSt64Bit" PS="PS" VS="VS">
  1172. <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2))</RootSignature>
  1173. <Resource Name="SRVBuffer0" Dimension="BUFFER" Width="80" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1174. <Resource Name="SRVBuffer1" Dimension="BUFFER" Width="80" InitialResourceState="COPY_DEST" Init="ByName" />
  1175. <Resource Name="SRVBuffer2" Dimension="BUFFER" Width="80" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1176. <Resource Name="SRVBuffer3" Dimension="BUFFER" Width="80" InitialResourceState="COPY_DEST" Init="ByName" />
  1177. <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="240" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
  1178. <Resource Name="UAVBuffer1" Dimension="BUFFER" Width="240" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  1179. <Resource Name="UAVBuffer2" Dimension="BUFFER" Width="240" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
  1180. <Resource Name="UAVBuffer3" Dimension="BUFFER" Width="240" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  1181. <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
  1182. { { -1.0f, 1.0f, 0.0f } },
  1183. { { 1.0f, 1.0f, 0.0f } },
  1184. { { -1.0f, -1.0f, 0.0f } },
  1185. { { -1.0f, -1.0f, 0.0f } },
  1186. { { 1.0f, 1.0f, 0.0f } },
  1187. { { 1.0f, -1.0f, 0.0f } }
  1188. </Resource>
  1189. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="16" Height="16" Format="R32G32B32A32_UINT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  1190. <RootValues>
  1191. <RootValue Index="0" ResName="SRVBuffer0" />
  1192. <RootValue Index="1" ResName="SRVBuffer1" />
  1193. <RootValue Index="2" ResName="UAVBuffer0" />
  1194. <RootValue Index="3" ResName="UAVBuffer1" />
  1195. <RootValue Index="4" HeapName="ResHeap" />
  1196. </RootValues>
  1197. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  1198. <Descriptor Name='SRVBuffer2' Kind='SRV' ResName='SRVBuffer2' Flags='RAW' NumElements="20" Format="R32_TYPELESS" />
  1199. <Descriptor Name='SRVBuffer3' Kind='SRV' ResName='SRVBuffer3' NumElements="1" StructureByteStride="80" />
  1200. <Descriptor Name='UAVBuffer2' Kind='UAV' ResName='UAVBuffer2' Flags='RAW' NumElements="60" Format="R32_TYPELESS" />
  1201. <Descriptor Name='UAVBuffer3' Kind='UAV' ResName='UAVBuffer3' NumElements="1" StructureByteStride="240" />
  1202. </DescriptorHeap>
  1203. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  1204. <Descriptor Name="RTarget" Kind="RTV"/>
  1205. </DescriptorHeap>
  1206. <InputElements>
  1207. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  1208. </InputElements>
  1209. <RenderTargets>
  1210. <RenderTarget Name="RTarget"/>
  1211. </RenderTargets>
  1212. <Shader Name="VS" Target="vs_6_2">
  1213. <![CDATA[
  1214. struct PSInput {
  1215. float4 pos : SV_POSITION;
  1216. };
  1217. PSInput main(float3 pos : POSITION) {
  1218. PSInput r;
  1219. r.pos = float4(pos, 1);
  1220. return r;
  1221. }
  1222. ]]>
  1223. </Shader>
  1224. <Shader Name="PS" Target="ps_6_2">
  1225. <![CDATA[// Shader source code will be set at runtime]]>
  1226. </Shader>
  1227. </ShaderOp>
  1228. <ShaderOp Name="GraphicsRawBufferLdSt16Bit" PS="PS" VS="VS">
  1229. <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2))</RootSignature>
  1230. <Resource Name="SRVBuffer0" Dimension="BUFFER" Width="20" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1231. <Resource Name="SRVBuffer1" Dimension="BUFFER" Width="20" InitialResourceState="COPY_DEST" Init="ByName" />
  1232. <Resource Name="SRVBuffer2" Dimension="BUFFER" Width="20" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_TYPELESS"/>
  1233. <Resource Name="SRVBuffer3" Dimension="BUFFER" Width="20" InitialResourceState="COPY_DEST" Init="ByName" />
  1234. <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="60" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
  1235. <Resource Name="UAVBuffer1" Dimension="BUFFER" Width="60" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  1236. <Resource Name="UAVBuffer2" Dimension="BUFFER" Width="60" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
  1237. <Resource Name="UAVBuffer3" Dimension="BUFFER" Width="60" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  1238. <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
  1239. { { -1.0f, 1.0f, 0.0f } },
  1240. { { 1.0f, 1.0f, 0.0f } },
  1241. { { -1.0f, -1.0f, 0.0f } },
  1242. { { -1.0f, -1.0f, 0.0f } },
  1243. { { 1.0f, 1.0f, 0.0f } },
  1244. { { 1.0f, -1.0f, 0.0f } }
  1245. </Resource>
  1246. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="16" Height="16" Format="R32G32B32A32_UINT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  1247. <RootValues>
  1248. <RootValue Index="0" ResName="SRVBuffer0" />
  1249. <RootValue Index="1" ResName="SRVBuffer1" />
  1250. <RootValue Index="2" ResName="UAVBuffer0" />
  1251. <RootValue Index="3" ResName="UAVBuffer1" />
  1252. <RootValue Index="4" HeapName="ResHeap" />
  1253. </RootValues>
  1254. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  1255. <Descriptor Name='SRVBuffer2' Kind='SRV' ResName='SRVBuffer2' Flags='RAW' NumElements="5" Format="R32_TYPELESS" />
  1256. <Descriptor Name='SRVBuffer3' Kind='SRV' ResName='SRVBuffer3' NumElements="1" StructureByteStride="20" />
  1257. <Descriptor Name='UAVBuffer2' Kind='UAV' ResName='UAVBuffer2' Flags='RAW' NumElements="15" Format="R32_TYPELESS" />
  1258. <Descriptor Name='UAVBuffer3' Kind='UAV' ResName='UAVBuffer3' NumElements="1" StructureByteStride="60" />
  1259. </DescriptorHeap>
  1260. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  1261. <Descriptor Name="RTarget" Kind="RTV"/>
  1262. </DescriptorHeap>
  1263. <InputElements>
  1264. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  1265. </InputElements>
  1266. <RenderTargets>
  1267. <RenderTarget Name="RTarget"/>
  1268. </RenderTargets>
  1269. <Shader Name="VS" Target="vs_6_2">
  1270. <![CDATA[
  1271. struct PSInput {
  1272. float4 pos : SV_POSITION;
  1273. };
  1274. PSInput main(float3 pos : POSITION) {
  1275. PSInput r;
  1276. r.pos = float4(pos, 1);
  1277. return r;
  1278. }
  1279. ]]>
  1280. </Shader>
  1281. <Shader Name="PS" Target="ps_6_2">
  1282. <![CDATA[// Shader source code will be set at runtime]]>
  1283. </Shader>
  1284. </ShaderOp>
  1285. <ShaderOp Name="WaveSizeTest" CS="CS">
  1286. <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
  1287. <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="512" InitialResourceState="COPY_DEST" Init="ByName" Flags="ALLOW_UNORDERED_ACCESS" TransitionTo="UNORDERED_ACCESS" ReadBack="true" Format="R32_TYPELESS" />
  1288. <RootValues>
  1289. <RootValue Index="0" ResName="UAVBuffer0" />
  1290. </RootValues>
  1291. <Shader Name="CS" Target="cs_6_6">
  1292. <![CDATA[// Shader source code will be set at runtime]]>
  1293. </Shader>
  1294. </ShaderOp>>
  1295. <ShaderOp Name="PackUnpackOp" CS="CS" DispatchX="1" DispatchY="1">
  1296. <RootSignature>RootFlags(0), UAV(u0), UAV(u1), UAV(u2)</RootSignature>
  1297. <Resource Name="g_bufIn" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="false" />
  1298. <Resource Name="g_bufOutPacked" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  1299. <Resource Name="g_bufOutPackedUnpacked" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
  1300. <RootValues>
  1301. <RootValue Index="0" ResName="g_bufIn" />
  1302. <RootValue Index="1" ResName="g_bufOutPacked" />
  1303. <RootValue Index="2" ResName="g_bufOutPackedUnpacked" />
  1304. </RootValues>
  1305. <Shader Name="CS" Target="cs_6_0">
  1306. <![CDATA[
  1307. void main(uint GI : SV_GroupIndex) {};
  1308. ]]>
  1309. </Shader>
  1310. </ShaderOp>
  1311. <!-- 64-bit raw atomics tests. Used for tests that don't require atomics on heap resources -->
  1312. <!-- For explanations of the atomics tests, see comments in and around VerifyAtomicResults in Executiontest.cpp -->
  1313. <ShaderOp Name="AtomicsRoot" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
  1314. <RootSignature>
  1315. RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
  1316. UAV(u0), UAV(u1), UAV(u2), UAV(u3), UAV(u4), UAV(u5),
  1317. StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT)
  1318. </RootSignature>
  1319. <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
  1320. { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } },
  1321. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  1322. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  1323. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  1324. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  1325. { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } }
  1326. </Resource>
  1327. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="64" Height="64" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" />
  1328. <!-- Raw buffers -->
  1329. <Resource Name="U0" Dimension="BUFFER" Width="576"
  1330. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1331. Init="FromBytes" ReadBack="true" >
  1332. {
  1333. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1334. 0, 0, 0, 0, 0, 0, 0I, 0I, 99999999I, 99999999I, 0I, 0I, 99999999I, 99999999I, 0, 0, 0, 0,
  1335. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1336. 0, 0, 0, 0, 0, 0, 0I, 0I, -1I, -1I, 0I, 0I, -1I, -1I, 0, 0, 0, 0,
  1337. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1338. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1339. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1340. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1341. }
  1342. </Resource>
  1343. <Resource Name="U1" Dimension="BUFFER" Width="9216"
  1344. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1345. Init="Zero" ReadBack="true" />
  1346. <Resource Name="U2" Dimension="BUFFER" Width="256"
  1347. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1348. Init="FromBytes" ReadBack="true">
  1349. { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
  1350. </Resource>
  1351. <Resource Name="U3" Dimension="BUFFER" Width="1024"
  1352. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1353. Init="Zero" ReadBack="true" />
  1354. <!-- groupshared output buffers -->
  1355. <Resource Name="U4" Dimension="BUFFER" Width="256"
  1356. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1357. Init="Zero" ReadBack="true" />
  1358. <Resource Name="U5" Dimension="BUFFER" Width="1024"
  1359. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1360. Init="Zero" ReadBack="true" />
  1361. <RootValues>
  1362. <!-- Raw buffers -->
  1363. <RootValue Index="0" ResName="U0" />
  1364. <RootValue Index="1" ResName="U1" />
  1365. <RootValue Index="2" ResName="U2" />
  1366. <RootValue Index="3" ResName="U3" />
  1367. <!-- groupshared output buffers -->
  1368. <RootValue Index="4" ResName="U4" />
  1369. <RootValue Index="5" ResName="U5" />
  1370. </RootValues>
  1371. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  1372. <Descriptor Name="RTarget" Kind="RTV"/>
  1373. </DescriptorHeap>
  1374. <InputElements>
  1375. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  1376. <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
  1377. </InputElements>
  1378. <RenderTargets>
  1379. <RenderTarget Name="RTarget"/>
  1380. </RenderTargets>
  1381. <Shader Name="ASSH64" Target="as_6_6" EntryPoint="ASMainShared64" Text="@CS"/>
  1382. <Shader Name="MSSH64" Target="ms_6_6" EntryPoint="MSMainShared64" Text="@CS"/>
  1383. <Shader Name="CSSH64" Target="cs_6_6" EntryPoint="CSMainShared64" Text="@CS"/>
  1384. <Shader Name="PS" Target="ps_6_6" EntryPoint="PSMainRaw64" Text="@CS"/>
  1385. <Shader Name="AS" Target="as_6_6" EntryPoint="ASMainRaw64" Text="@CS"/>
  1386. <Shader Name="MS" Target="ms_6_6" EntryPoint="MSMainRaw64" Text="@CS"/>
  1387. <Shader Name="VS" Target="vs_6_6" EntryPoint="VSMainRaw64" Text="@CS"/>
  1388. <Shader Name="CS" Target="cs_6_6" EntryPoint="CSMainRaw64">
  1389. <![CDATA[
  1390. struct PSInput {
  1391. float4 position : SV_POSITION;
  1392. float2 uv : TEXCOORD;
  1393. };
  1394. struct Atomic64Stuff {
  1395. float2 prepad[3];
  1396. uint64_t uintEl[2];
  1397. int64_t2 sintEl;
  1398. struct useless {
  1399. uint3 unused;
  1400. } postpad;
  1401. float last;
  1402. };
  1403. RWStructuredBuffer<Atomic64Stuff> g_struct64Buf : register(u0);
  1404. RWStructuredBuffer<Atomic64Stuff> g_strXchg64Buf : register(u1);
  1405. RWByteAddressBuffer g_raw64Buf : register(u2);
  1406. RWByteAddressBuffer g_rawXchg64Buf : register(u3);
  1407. RWStructuredBuffer<uint64_t> g_share64Buf : register(u4);
  1408. RWStructuredBuffer<uint64_t> g_shareXchg64Buf : register(u5);
  1409. groupshared uint64_t g_uint64Share[6];
  1410. groupshared int64_t g_sint64Share[3];
  1411. groupshared uint64_t g_xchg64Share[64];
  1412. #define VEC_CALL(op, uav, ix, val) op(uav[ix*stride], val);
  1413. #define USTRUCT_CALL(op, uav, ix, val) op(uav[ix].uintEl[stride], val);
  1414. #define SSTRUCT64_CALL(op, uav, ix, val) op(uav[ix].sintEl.y, val);
  1415. #define URAW_CALL(op, uav, ix, val) uav.op(8*ix, val);
  1416. #define SRAW_CALL(op, uav, ix, val) uav.op(8*(5+ix), val); // signed at end. raw buffers don't need separate buffers
  1417. #define OP_TEST(ucall, scall, uuav, suav) \
  1418. ucall(InterlockedAdd, uuav, 0, addVal); \
  1419. scall(InterlockedMin, suav, 1, sminMaxVal); \
  1420. scall(InterlockedMax, suav, 2, sminMaxVal); \
  1421. ucall(InterlockedMin, uuav, 1, uminMaxVal); \
  1422. ucall(InterlockedMax, uuav, 2, uminMaxVal); \
  1423. ucall(InterlockedAnd, uuav, 3, ~value); \
  1424. ucall(InterlockedOr, uuav, 4, value); \
  1425. ucall(InterlockedXor, uuav, 5, xorVal);
  1426. #define VEC_CALL3(op, uav, ix, cmp, val) op(uav[(ix)*stride], cmp, val)
  1427. #define VEC_CALL4(op, uav, ix, cmp, val, o) op(uav[(ix)*stride], cmp, val, o)
  1428. #define STRUCT_CALL3(op, uav, ix, cmp, val) op(uav[ix].uintEl[stride], cmp, val)
  1429. #define STRUCT_CALL4(op, uav, ix, cmp, val, o) op(uav[ix].uintEl[stride], cmp, val, o)
  1430. #define RAW_CALL3(op, uav, ix, cmp, val) uav.op(8*(ix), cmp, val)
  1431. #define RAW_CALL4(op, uav, ix, cmp, val, o) uav.op(8*(ix), cmp, val, o)
  1432. // The first of four to match gets the first and then the winner performs the last two exchanges
  1433. #define XCHG_TEST(call3, call4, uav) \
  1434. call3(InterlockedCompareStore, uav, (ix/3)%64, 0, xchgVal - 2); \
  1435. call4(InterlockedCompareExchange, uav, (ix/3)%64, xchgVal - 2, xchgVal - 1, output); \
  1436. if (output == xchgVal - 2) { call3(InterlockedExchange, uav, (ix/3)%64, xchgVal, output);}
  1437. void AtomicRaw64Test(uint ix, uint64_t bitSize) {
  1438. uint64_t lix = ix;
  1439. uint stride = 1;
  1440. uint64_t value = (lix) | ((lix) << (bitSize/2));
  1441. uint64_t addVal = value;
  1442. uint64_t uminMaxVal = ~value*(~value&1) + value*(value&1);
  1443. int64_t sminMaxVal = ~value*(~value&1) + value*(value&1);
  1444. uint64_t xorVal = 1ULL << (lix%(bitSize-1));
  1445. // make higher bits differ while lower bits match
  1446. uint64_t xchgVal = (lix << (bitSize/2)) | ((lix/3)%64);
  1447. uint64_t output = 0;
  1448. OP_TEST(USTRUCT_CALL, SSTRUCT64_CALL, g_struct64Buf, g_struct64Buf)
  1449. XCHG_TEST(STRUCT_CALL3, STRUCT_CALL4, g_strXchg64Buf)
  1450. // ByteAddressBuffer for 64-bit values are a special case. inlined here
  1451. URAW_CALL(InterlockedAdd64, g_raw64Buf, 0, addVal);
  1452. SRAW_CALL(InterlockedMin64, g_raw64Buf, 1, sminMaxVal);
  1453. SRAW_CALL(InterlockedMax64, g_raw64Buf, 2, sminMaxVal);
  1454. URAW_CALL(InterlockedMin64, g_raw64Buf, 1, uminMaxVal);
  1455. URAW_CALL(InterlockedMax64, g_raw64Buf, 2, uminMaxVal);
  1456. URAW_CALL(InterlockedAnd64, g_raw64Buf, 3, ~value);
  1457. URAW_CALL(InterlockedOr64, g_raw64Buf, 4, value);
  1458. URAW_CALL(InterlockedXor64, g_raw64Buf, 5, xorVal);
  1459. RAW_CALL3(InterlockedCompareStore64, g_rawXchg64Buf, (ix/3)%64, 0, xchgVal - 2);
  1460. RAW_CALL4(InterlockedCompareExchange64, g_rawXchg64Buf, (ix/3)%64, xchgVal - 2, xchgVal - 1, output);
  1461. if (output == xchgVal - 2) { RAW_CALL3(InterlockedExchange64, g_rawXchg64Buf, (ix/3)%64, xchgVal, output);}
  1462. }
  1463. void InitSharedMem64(uint ix) {
  1464. // Zero-init shared memory, with special cases
  1465. if (ix < 6)
  1466. g_uint64Share[ix] = ix == 1 ? 99999999ULL | (99999999ULL << 32) : ix == 3 ? ~0ULL : 0;
  1467. if (ix < 3)
  1468. g_sint64Share[ix] = ix == 1 ? 99999999ULL | (99999999ULL << 32) : 0;
  1469. if (ix < 64)
  1470. g_xchg64Share[ix] = 0;
  1471. GroupMemoryBarrierWithGroupSync();
  1472. }
  1473. void AtomicGroupShared64Test(uint ix) {
  1474. uint64_t lix = ix;
  1475. uint stride = 1;
  1476. uint64_t bitSize = 64;
  1477. uint64_t value = (lix) | ((lix) << (bitSize/2));
  1478. uint64_t addVal = value;
  1479. uint64_t uminMaxVal = ~value*(~value&1) + value*(value&1);
  1480. int64_t sminMaxVal = ~value*(~value&1) + value*(value&1);
  1481. uint64_t xorVal = 1ULL << (lix%(bitSize-1));
  1482. uint64_t xchgVal = (lix << (bitSize/2)) | ((lix/3)%64);
  1483. uint64_t output = 0;
  1484. OP_TEST(VEC_CALL, VEC_CALL, g_uint64Share, g_sint64Share)
  1485. XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchg64Share)
  1486. GroupMemoryBarrierWithGroupSync();
  1487. }
  1488. // Payloads are used to transport AS test results to MS where they are finalized
  1489. struct Payload {
  1490. uint arith[16];
  1491. uint xchg[64];
  1492. };
  1493. struct Payload64 {
  1494. uint64_t arith[16];
  1495. uint64_t xchg[64];
  1496. };
  1497. static float4 g_Verts[6] = {
  1498. { -1.0f, 1.0f, 0.0f, 1.0f },
  1499. { 1.0f, 1.0f, 0.0f, 1.0f },
  1500. { -1.0f, -1.0f, 0.0f, 1.0f },
  1501. { -1.0f, -1.0f, 0.0f, 1.0f },
  1502. { 1.0f, 1.0f, 0.0f, 1.0f },
  1503. { 1.0f, -1.0f, 0.0f, 1.0f }};
  1504. static float2 g_UV[6] = {
  1505. { 0.0f, 0.0f },
  1506. { 1.0f, 0.0f },
  1507. { 0.0f, 1.0f },
  1508. { 0.0f, 1.0f },
  1509. { 1.0f, 0.0f },
  1510. { 1.0f, 1.0f }};
  1511. [NumThreads(8, 8, 2)]
  1512. void ASMainRaw64(uint ix : SV_GroupIndex) {
  1513. Payload payload = (Payload)0;
  1514. AtomicRaw64Test(64*64 + 8*8*2 + ix, 64);
  1515. DispatchMesh(1, 1, 1, payload);
  1516. }
  1517. [NumThreads(8, 8, 2)]
  1518. [OutputTopology("triangle")]
  1519. void MSMainRaw64(
  1520. uint ix : SV_GroupIndex,
  1521. in payload Payload payload,
  1522. out vertices PSInput verts[6],
  1523. out indices uint3 tris[2]) {
  1524. SetMeshOutputCounts(6, 2);
  1525. // Assign static fullscreen 2 tri quad
  1526. verts[ix%6].position = g_Verts[ix%6];
  1527. verts[ix%6].uv = g_UV[ix%6];
  1528. tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
  1529. AtomicRaw64Test(64*64 + ix, 64);
  1530. }
  1531. PSInput VSMainRaw64(float3 position : POSITION, float2 uv : TEXCOORD, uint ix : SV_VertexID) {
  1532. PSInput result;
  1533. result.position = float4(position, 1.0);
  1534. result.uv = uv;
  1535. AtomicRaw64Test(64*64 + ix, 64);
  1536. return result;
  1537. }
  1538. float4 PSMainRaw64(PSInput input) : SV_TARGET {
  1539. uint ix = uint(input.uv.y*64)*64 + input.uv.x*64;
  1540. AtomicRaw64Test(ix, 64);
  1541. return 1;
  1542. }
  1543. [NumThreads(32, 32, 1)]
  1544. void CSMainRaw64(uint ix : SV_GroupIndex) {
  1545. AtomicRaw64Test(ix, 64);
  1546. }
  1547. groupshared Payload64 payload64;
  1548. [NumThreads(8, 8, 2)]
  1549. void ASMainShared64(uint ix : SV_GroupIndex) {
  1550. InitSharedMem64(ix);
  1551. AtomicGroupShared64Test(ix);
  1552. // Copy AS test results to payload and ultimately to MS
  1553. // More threads than results are possible,
  1554. // so indices will result in duplicate copies
  1555. payload64.arith[ix%6] = g_uint64Share[ix%6];
  1556. payload64.arith[ix%3 + 6] = g_sint64Share[ix%3 + 1];
  1557. payload64.xchg[ix%64] = g_xchg64Share[ix%64];
  1558. DispatchMesh(1, 1, 1, payload64);
  1559. }
  1560. [NumThreads(8, 8, 2)]
  1561. [OutputTopology("triangle")]
  1562. void MSMainShared64(
  1563. uint ix : SV_GroupIndex,
  1564. in payload Payload64 payload,
  1565. out vertices PSInput verts[6],
  1566. out indices uint3 tris[2]) {
  1567. SetMeshOutputCounts(6, 2);
  1568. // Assign static fullscreen 2 tri quad
  1569. verts[ix%6].position = g_Verts[ix%6];
  1570. verts[ix%6].uv = g_UV[ix%6];
  1571. tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
  1572. // Load AS test results from payload
  1573. // More threads than results are possible,
  1574. // so indices will result in duplicate copies
  1575. g_uint64Share[ix%6] = payload.arith[ix%6];
  1576. g_sint64Share[ix%3] = payload.arith[ix%3 + 6];
  1577. g_xchg64Share[ix%64] = payload.xchg[ix%64];
  1578. GroupMemoryBarrierWithGroupSync();
  1579. AtomicGroupShared64Test(8*8*2 + ix);
  1580. // Copy final AS + MS results to output UAVs
  1581. g_share64Buf[ix%6] = g_uint64Share[ix%6];
  1582. g_share64Buf[ix%3 + 6] = g_sint64Share[ix%3 + 1];
  1583. g_shareXchg64Buf[ix%64] = g_xchg64Share[ix%64];
  1584. }
  1585. [NumThreads(32, 32, 1)]
  1586. void CSMainShared64(uint ix : SV_GroupIndex) {
  1587. InitSharedMem64(ix);
  1588. AtomicGroupShared64Test(ix);
  1589. // Copy final results to output UAVs
  1590. g_share64Buf[ix%6] = g_uint64Share[ix%6];
  1591. g_share64Buf[ix%3 + 6] = g_sint64Share[ix%3 + 1];
  1592. g_shareXchg64Buf[ix%64] = g_xchg64Share[ix%64];
  1593. }
  1594. ]]>
  1595. </Shader>
  1596. </ShaderOp>
  1597. <!-- Used by 32-bit atomics and typed 64-bit atomics tests, which require heap descriptor support -->
  1598. <!-- For explanations of the atomics tests, see comments in and around VerifyAtomicResults in Executiontest.cpp -->
  1599. <ShaderOp Name="AtomicsHeap" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
  1600. <RootSignature>
  1601. RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
  1602. DescriptorTable(UAV(u0), UAV(u1), UAV(u2), UAV(u3), UAV(u4), UAV(u5), UAV(u6), UAV(u7), UAV(u8), UAV(u9), UAV(u10), UAV(u11), UAV(u12), UAV(u13), UAV(u14), UAV(u15), UAV(u16), UAV(u17)),
  1603. StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT)
  1604. </RootSignature>
  1605. <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
  1606. { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } },
  1607. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  1608. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  1609. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  1610. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  1611. { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } }
  1612. </Resource>
  1613. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="64" Height="64" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" />
  1614. <!-- Raw buffers -->
  1615. <Resource Name="U0" Dimension="BUFFER" Width="576"
  1616. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1617. Init="FromBytes" ReadBack="true" >
  1618. {
  1619. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1620. 0, 0, 0, 0, 0, 0, 0I, 0I, 99999999I, 99999999I, 0I, 0I, 99999999I, 99999999I, 0, 0, 0, 0,
  1621. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1622. 0, 0, 0, 0, 0, 0, 0I, 0I, -1I, -1I, 0I, 0I, -1I, -1I, 0, 0, 0, 0,
  1623. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1624. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1625. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1626. 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0,
  1627. }
  1628. </Resource>
  1629. <Resource Name="U1" Dimension="BUFFER" Width="9216"
  1630. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1631. Init="Zero" ReadBack="true" />
  1632. <Resource Name="U2" Dimension="BUFFER" Width="256"
  1633. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1634. Init="FromBytes" ReadBack="true">
  1635. { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
  1636. </Resource>
  1637. <Resource Name="U3" Dimension="BUFFER" Width="1024"
  1638. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1639. Init="Zero" ReadBack="true" />
  1640. <!-- groupshared output buffers -->
  1641. <Resource Name="U4" Dimension="BUFFER" Width="256"
  1642. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1643. Init="Zero" ReadBack="true" />
  1644. <Resource Name="U5" Dimension="BUFFER" Width="1024"
  1645. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1646. Init="Zero" ReadBack="true" />
  1647. <!-- 32-bit typed resources -->
  1648. <Resource Name="U6" Dimension="BUFFER" Width="256" Format="R32_UINT"
  1649. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1650. Init="FromBytes" ReadBack="true" >
  1651. { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
  1652. </Resource>
  1653. <Resource Name="U7" Dimension="BUFFER" Width="256" Format="R32_SINT"
  1654. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1655. Init="FromBytes" ReadBack="true">
  1656. { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
  1657. </Resource>
  1658. <Resource Name="U8" Dimension="BUFFER" Width="1024" Format="R32_UINT"
  1659. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1660. Init="Zero" ReadBack="true" />
  1661. <Resource Name="U9" Dimension="TEXTURE1D" Width="16" Format="R32_UINT"
  1662. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1663. Init="FromBytes" ReadBack="true" >
  1664. { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
  1665. </Resource>
  1666. <Resource Name="U10" Dimension="TEXTURE1D" Width="16" Format="R32_SINT"
  1667. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1668. Init="FromBytes" ReadBack="true">
  1669. { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
  1670. </Resource>
  1671. <Resource Name="U11" Dimension="TEXTURE1D" Width="128" Format="R32_UINT"
  1672. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1673. Init="Zero" ReadBack="true" />
  1674. <!-- 64-bit typed resources -->
  1675. <Resource Name="U12" Dimension="BUFFER" Width="256" Format="R32G32_UINT"
  1676. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1677. Init="FromBytes" ReadBack="true" >
  1678. { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
  1679. </Resource>
  1680. <Resource Name="U13" Dimension="BUFFER" Width="256" Format="R32G32_SINT"
  1681. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1682. Init="FromBytes" ReadBack="true">
  1683. { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
  1684. </Resource>
  1685. <Resource Name="U14" Dimension="BUFFER" Width="1024" Format="R32G32_UINT"
  1686. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1687. Init="Zero" ReadBack="true" />
  1688. <Resource Name="U15" Dimension="TEXTURE1D" Width="16" Format="R32G32_UINT"
  1689. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1690. Init="FromBytes" ReadBack="true" >
  1691. { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
  1692. </Resource>
  1693. <Resource Name="U16" Dimension="TEXTURE1D" Width="16" Format="R32G32_SINT"
  1694. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1695. Init="FromBytes" ReadBack="true">
  1696. { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I }
  1697. </Resource>
  1698. <Resource Name="U17" Dimension="TEXTURE1D" Width="128" Format="R32G32_UINT"
  1699. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  1700. Init="Zero" ReadBack="true" />
  1701. <RootValues>
  1702. <RootValue HeapName="ResHeap" />
  1703. </RootValues>
  1704. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  1705. <!-- Raw buffers -->
  1706. <Descriptor Name="U0" Kind="UAV" ResName="U0"
  1707. NumElements="8" StructureByteStride="72" />
  1708. <Descriptor Name="U1" Kind="UAV" ResName="U1"
  1709. NumElements="128" StructureByteStride="72" />
  1710. <Descriptor Name="U2" Kind="UAV" ResName="U2"
  1711. NumElements="16" StructureByteStride="8" />
  1712. <Descriptor Name="U3" Kind="UAV" ResName="U3"
  1713. NumElements="128" StructureByteStride="8" />
  1714. <!-- groupshared output buffers -->
  1715. <Descriptor Name="U4" Kind="UAV" ResName="U4"
  1716. NumElements="8" StructureByteStride="8" />
  1717. <Descriptor Name="U5" Kind="UAV" ResName="U5"
  1718. NumElements="64" StructureByteStride="8" />
  1719. <!-- 32-bit typed resources -->
  1720. <Descriptor Name="U6" Kind="UAV" ResName="U6" Dimension="BUFFER"
  1721. NumElements="16" Format="R32_UINT" />
  1722. <Descriptor Name="U7" Kind="UAV" ResName="U7" Dimension="BUFFER"
  1723. NumElements="16" Format="R32_UINT" />
  1724. <Descriptor Name="U8" Kind="UAV" ResName="U8" Dimension="BUFFER"
  1725. NumElements="128" Format="R32_UINT" />
  1726. <Descriptor Name="U9" Kind="UAV" ResName="U9" Dimension="TEXTURE1D"
  1727. NumElements="16" Format="R32_UINT" />
  1728. <Descriptor Name="U10" Kind="UAV" ResName="U10" Dimension="TEXTURE1D"
  1729. NumElements="16" Format="R32_UINT" />
  1730. <Descriptor Name="U11" Kind="UAV" ResName="U11" Dimension="TEXTURE1D"
  1731. NumElements="128" Format="R32_UINT" />
  1732. <!-- 64-bit typed resources -->
  1733. <Descriptor Name="U12" Kind="UAV" ResName="U12" Dimension="BUFFER"
  1734. NumElements="16" Format="R32G32_UINT" />
  1735. <Descriptor Name="U13" Kind="UAV" ResName="U13" Dimension="BUFFER"
  1736. NumElements="16" Format="R32G32_UINT" />
  1737. <Descriptor Name="U14" Kind="UAV" ResName="U14" Dimension="BUFFER"
  1738. NumElements="128" Format="R32G32_UINT" />
  1739. <Descriptor Name="U15" Kind="UAV" ResName="U15" Dimension="TEXTURE1D"
  1740. NumElements="16" Format="R32G32_UINT" />
  1741. <Descriptor Name="U16" Kind="UAV" ResName="U16" Dimension="TEXTURE1D"
  1742. NumElements="16" Format="R32G32_UINT" />
  1743. <Descriptor Name="U17" Kind="UAV" ResName="U17" Dimension="TEXTURE1D"
  1744. NumElements="128" Format="R32G32_UINT" />
  1745. </DescriptorHeap>
  1746. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  1747. <Descriptor Name="RTarget" Kind="RTV"/>
  1748. </DescriptorHeap>
  1749. <InputElements>
  1750. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  1751. <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
  1752. </InputElements>
  1753. <RenderTargets>
  1754. <RenderTarget Name="RTarget"/>
  1755. </RenderTargets>
  1756. <Shader Name="PS64" Target="ps_6_6" EntryPoint="PSMainRaw64" Text="@CS"/>
  1757. <Shader Name="AS64" Target="as_6_6" EntryPoint="ASMainRaw64" Text="@CS"/>
  1758. <Shader Name="MS64" Target="ms_6_6" EntryPoint="MSMainRaw64" Text="@CS"/>
  1759. <Shader Name="VS64" Target="vs_6_6" EntryPoint="VSMainRaw64" Text="@CS"/>
  1760. <Shader Name="CS64" Target="cs_6_6" EntryPoint="CSMainRaw64" Text="@CS"/>
  1761. <Shader Name="PSTY64" Target="ps_6_6" EntryPoint="PSMainTyped64" Text="@CS"/>
  1762. <Shader Name="ASTY64" Target="as_6_6" EntryPoint="ASMainTyped64" Text="@CS"/>
  1763. <Shader Name="MSTY64" Target="ms_6_6" EntryPoint="MSMainTyped64" Text="@CS"/>
  1764. <Shader Name="VSTY64" Target="vs_6_6" EntryPoint="VSMainTyped64" Text="@CS"/>
  1765. <Shader Name="CSTY64" Target="cs_6_6" EntryPoint="CSMainTyped64" Text="@CS"/>
  1766. <Shader Name="AS" Target="as_6_5" EntryPoint="ASMain32" Text="@CS"/>
  1767. <Shader Name="MS" Target="ms_6_5" EntryPoint="MSMain32" Text="@CS"/>
  1768. <Shader Name="VS" Target="vs_6_0" EntryPoint="VSMain32" Text="@CS"/>
  1769. <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain32" Text="@CS"/>
  1770. <Shader Name="CS" Target="cs_6_0" EntryPoint="CSMain32">
  1771. <![CDATA[
  1772. struct PSInput {
  1773. float4 position : SV_POSITION;
  1774. float2 uv : TEXCOORD;
  1775. };
  1776. struct AtomicStuff {
  1777. float2 prepad[3];
  1778. uint uintEl[4];
  1779. int4 sintEl;
  1780. struct useless {
  1781. uint3 unused;
  1782. } postpad;
  1783. float last;
  1784. };
  1785. struct Atomic64Stuff {
  1786. float2 prepad[3];
  1787. uint64_t uintEl[2];
  1788. int64_t2 sintEl;
  1789. struct useless {
  1790. uint3 unused;
  1791. } postpad;
  1792. float last;
  1793. };
  1794. RWStructuredBuffer<AtomicStuff> g_structBuf : register(u0);
  1795. RWStructuredBuffer<AtomicStuff> g_strXchgBuf : register(u1);
  1796. RWByteAddressBuffer g_rawBuf : register(u2);
  1797. RWByteAddressBuffer g_rawXchgBuf : register(u3);
  1798. RWStructuredBuffer<uint2> g_shareBuf : register(u4);
  1799. RWStructuredBuffer<uint2> g_shareXchgBuf : register(u5);
  1800. RWBuffer<uint> g_uintBuf : register(u6);
  1801. RWBuffer<int> g_sintBuf : register(u7);
  1802. RWBuffer<int> g_xchgBuf : register(u8);
  1803. RWTexture1D<uint> g_utexBuf : register(u9);
  1804. RWTexture1D<int> g_stexBuf : register(u10);
  1805. RWTexture1D<int> g_xtexBuf : register(u11);
  1806. groupshared uint g_uintShare[12];
  1807. groupshared int g_sintShare[6];
  1808. groupshared uint g_xchgShare[128];
  1809. RWStructuredBuffer<Atomic64Stuff> g_struct64Buf : register(u0);
  1810. RWStructuredBuffer<Atomic64Stuff> g_strXchg64Buf : register(u1);
  1811. RWByteAddressBuffer g_raw64Buf : register(u2);
  1812. RWByteAddressBuffer g_rawXchg64Buf : register(u3);
  1813. RWBuffer<uint64_t> g_uint64Buf : register(u12);
  1814. RWBuffer<int64_t> g_sint64Buf : register(u13);
  1815. RWBuffer<int64_t> g_xchg64Buf : register(u14);
  1816. RWTexture1D<uint64_t> g_utex64Buf : register(u15);
  1817. RWTexture1D<int64_t> g_stex64Buf : register(u16);
  1818. RWTexture1D<int64_t> g_xtex64Buf : register(u17);
  1819. #define VEC_CALL(op, uav, ix, val) op(uav[ix*stride], val);
  1820. #define USTRUCT_CALL(op, uav, ix, val) op(uav[ix].uintEl[stride], val);
  1821. #define SSTRUCT_CALL(op, uav, ix, val) op(uav[ix].sintEl.z, val);
  1822. #define SSTRUCT64_CALL(op, uav, ix, val) op(uav[ix].sintEl.y, val);
  1823. #define URAW_CALL(op, uav, ix, val) uav.op(8*ix, val);
  1824. #define SRAW_CALL(op, uav, ix, val) uav.op(8*(5+ix), val); // signed at end. raw buffers don't need separate buffers
  1825. #define OP_TEST(ucall, scall, uuav, suav) \
  1826. ucall(InterlockedAdd, uuav, 0, addVal); \
  1827. scall(InterlockedMin, suav, 1, sminMaxVal); \
  1828. scall(InterlockedMax, suav, 2, sminMaxVal); \
  1829. ucall(InterlockedMin, uuav, 1, uminMaxVal); \
  1830. ucall(InterlockedMax, uuav, 2, uminMaxVal); \
  1831. ucall(InterlockedAnd, uuav, 3, ~value); \
  1832. ucall(InterlockedOr, uuav, 4, value); \
  1833. ucall(InterlockedXor, uuav, 5, xorVal);
  1834. #define VEC_CALL3(op, uav, ix, cmp, val) op(uav[(ix)*stride], cmp, val)
  1835. #define VEC_CALL4(op, uav, ix, cmp, val, o) op(uav[(ix)*stride], cmp, val, o)
  1836. #define STRUCT_CALL3(op, uav, ix, cmp, val) op(uav[ix].uintEl[stride], cmp, val)
  1837. #define STRUCT_CALL4(op, uav, ix, cmp, val, o) op(uav[ix].uintEl[stride], cmp, val, o)
  1838. #define RAW_CALL3(op, uav, ix, cmp, val) uav.op(8*(ix), cmp, val)
  1839. #define RAW_CALL4(op, uav, ix, cmp, val, o) uav.op(8*(ix), cmp, val, o)
  1840. // The first of four to match gets the first and then the winner performs the last two exchanges
  1841. #define XCHG_TEST(call3, call4, uav) \
  1842. call3(InterlockedCompareStore, uav, (ix/3)%64, 0, xchgVal - 2); \
  1843. call4(InterlockedCompareExchange, uav, (ix/3)%64, xchgVal - 2, xchgVal - 1, output); \
  1844. if (output == xchgVal - 2) { call3(InterlockedExchange, uav, (ix/3)%64, xchgVal, output);}
  1845. void AtomicTest(uint ix, uint bitSize) {
  1846. uint stride = 2;
  1847. uint value = (ix) | ((ix) << (bitSize/2));
  1848. uint addVal = ix; // 32 bits isn't enough room to dupliate upper and lower
  1849. uint uminMaxVal = ~value*(~value&1) + value*(value&1);
  1850. int sminMaxVal = ~value*(~value&1) + value*(value&1);
  1851. uint xorVal = 1U << (ix%(bitSize-1));
  1852. // make higher bits differ while lower bits match
  1853. uint xchgVal = (ix << (bitSize/2)) | ((ix/3)%64);
  1854. uint output = 0;
  1855. // structured
  1856. OP_TEST(USTRUCT_CALL, SSTRUCT_CALL, g_structBuf, g_structBuf)
  1857. XCHG_TEST(STRUCT_CALL3, STRUCT_CALL4, g_strXchgBuf)
  1858. // raw
  1859. OP_TEST(URAW_CALL, SRAW_CALL, g_rawBuf, g_rawBuf)
  1860. XCHG_TEST(RAW_CALL3, RAW_CALL4, g_rawXchgBuf)
  1861. // typed buffer
  1862. OP_TEST(VEC_CALL, VEC_CALL, g_uintBuf, g_sintBuf)
  1863. XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchgBuf)
  1864. // texture
  1865. OP_TEST(VEC_CALL, VEC_CALL, g_utexBuf, g_stexBuf)
  1866. XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xtexBuf)
  1867. }
  1868. void AtomicRaw64Test(uint ix, uint64_t bitSize) {
  1869. uint64_t lix = ix;
  1870. uint stride = 1;
  1871. uint64_t value = (lix) | ((lix) << (bitSize/2));
  1872. uint64_t addVal = value;
  1873. uint64_t uminMaxVal = ~value*(~value&1) + value*(value&1);
  1874. int64_t sminMaxVal = ~value*(~value&1) + value*(value&1);
  1875. uint64_t xorVal = 1ULL << (lix%(bitSize-1));
  1876. // make higher bits differ while lower bits match
  1877. uint64_t xchgVal = (lix << (bitSize/2)) | ((lix/3)%64);
  1878. uint64_t output = 0;
  1879. OP_TEST(USTRUCT_CALL, SSTRUCT64_CALL, g_struct64Buf, g_struct64Buf)
  1880. XCHG_TEST(STRUCT_CALL3, STRUCT_CALL4, g_strXchg64Buf)
  1881. // ByteAddressBuffer for 64-bit values are a special case. inlined here
  1882. URAW_CALL(InterlockedAdd64, g_raw64Buf, 0, addVal);
  1883. SRAW_CALL(InterlockedMin64, g_raw64Buf, 1, sminMaxVal);
  1884. SRAW_CALL(InterlockedMax64, g_raw64Buf, 2, sminMaxVal);
  1885. URAW_CALL(InterlockedMin64, g_raw64Buf, 1, uminMaxVal);
  1886. URAW_CALL(InterlockedMax64, g_raw64Buf, 2, uminMaxVal);
  1887. URAW_CALL(InterlockedAnd64, g_raw64Buf, 3, ~value);
  1888. URAW_CALL(InterlockedOr64, g_raw64Buf, 4, value);
  1889. URAW_CALL(InterlockedXor64, g_raw64Buf, 5, xorVal);
  1890. RAW_CALL3(InterlockedCompareStore64, g_rawXchg64Buf, (ix/3)%64, 0, xchgVal - 2);
  1891. RAW_CALL4(InterlockedCompareExchange64, g_rawXchg64Buf, (ix/3)%64, xchgVal - 2, xchgVal - 1, output);
  1892. if (output == xchgVal - 2) { RAW_CALL3(InterlockedExchange64, g_rawXchg64Buf, (ix/3)%64, xchgVal, output);}
  1893. }
  1894. void AtomicTyped64Test(uint ix, uint64_t bitSize) {
  1895. uint64_t lix = ix;
  1896. uint stride = 1;
  1897. uint64_t value = (lix) | ((lix) << (bitSize/2));
  1898. uint64_t addVal = value;
  1899. uint64_t uminMaxVal = ~value*(~value&1) + value*(value&1);
  1900. int64_t sminMaxVal = ~value*(~value&1) + value*(value&1);
  1901. uint64_t xorVal = 1ULL << (lix%(bitSize-1));
  1902. // make higher bits differ while lower bits match
  1903. uint64_t xchgVal = (lix << (bitSize/2)) | ((lix/3)%64);
  1904. uint64_t output = 0;
  1905. OP_TEST(VEC_CALL, VEC_CALL, g_uint64Buf, g_sint64Buf)
  1906. XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchg64Buf)
  1907. OP_TEST(VEC_CALL, VEC_CALL, g_utex64Buf, g_stex64Buf)
  1908. XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xtex64Buf)
  1909. }
  1910. void InitSharedMem(uint ix) {
  1911. // Zero-init shared memory, with special cases
  1912. if (ix < 6)
  1913. g_uintShare[ix] = ix == 1 ? 99999999 : ix == 3 ? -1 : 0;
  1914. if (ix < 3)
  1915. g_sintShare[ix] = ix == 1 ? 99999999 : 0;
  1916. if (ix < 64)
  1917. g_xchgShare[ix] = 0;
  1918. GroupMemoryBarrierWithGroupSync();
  1919. }
  1920. void AtomicGroupSharedTest(uint ix) {
  1921. uint stride = 1;
  1922. uint bitSize = 32;
  1923. uint value = (ix) | ((ix) << (bitSize/2));
  1924. uint addVal = ix; // 32 bits isn't enough room to dupliate upper and lower
  1925. uint uminMaxVal = ~value*(~value&1) + value*(value&1);
  1926. int sminMaxVal = ~value*(~value&1) + value*(value&1);
  1927. uint xorVal = 1U << (ix%(bitSize-1));
  1928. uint xchgVal = (ix << (bitSize/2)) | ((ix/3)%64);
  1929. uint output = 0;
  1930. OP_TEST(VEC_CALL, VEC_CALL, g_uintShare, g_sintShare)
  1931. XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchgShare)
  1932. GroupMemoryBarrierWithGroupSync();
  1933. }
  1934. // Payloads are used to transport AS test results to MS where they are finalized
  1935. struct Payload {
  1936. uint arith[16];
  1937. uint xchg[64];
  1938. };
  1939. struct Payload64 {
  1940. uint64_t arith[16];
  1941. uint64_t xchg[64];
  1942. };
  1943. static float4 g_Verts[6] = {
  1944. { -1.0f, 1.0f, 0.0f, 1.0f },
  1945. { 1.0f, 1.0f, 0.0f, 1.0f },
  1946. { -1.0f, -1.0f, 0.0f, 1.0f },
  1947. { -1.0f, -1.0f, 0.0f, 1.0f },
  1948. { 1.0f, 1.0f, 0.0f, 1.0f },
  1949. { 1.0f, -1.0f, 0.0f, 1.0f }};
  1950. static float2 g_UV[6] = {
  1951. { 0.0f, 0.0f },
  1952. { 1.0f, 0.0f },
  1953. { 0.0f, 1.0f },
  1954. { 0.0f, 1.0f },
  1955. { 1.0f, 0.0f },
  1956. { 1.0f, 1.0f }};
  1957. groupshared Payload payload;
  1958. [NumThreads(8, 8, 2)]
  1959. void ASMain32(uint ix : SV_GroupIndex) {
  1960. AtomicTest(64*64 + 8*8*2 + ix, 32);
  1961. InitSharedMem(ix);
  1962. AtomicGroupSharedTest(ix);
  1963. // Copy AS test results to payload and ultimately to MS
  1964. // More threads than results are possible,
  1965. // so indices will result in duplicate copies
  1966. payload.arith[ix%6] = g_uintShare[ix%6];
  1967. payload.arith[ix%3 + 6] = g_sintShare[ix%3 + 1];
  1968. payload.xchg[ix%64] = g_xchgShare[ix%64];
  1969. DispatchMesh(1, 1, 1, payload);
  1970. }
  1971. [NumThreads(8, 8, 2)]
  1972. [OutputTopology("triangle")]
  1973. void MSMain32(
  1974. uint ix : SV_GroupIndex,
  1975. in payload Payload payload,
  1976. out vertices PSInput verts[6],
  1977. out indices uint3 tris[2]) {
  1978. SetMeshOutputCounts(6, 2);
  1979. // Assign static fullscreen 2 tri quad
  1980. verts[ix%6].position = g_Verts[ix%6];
  1981. verts[ix%6].uv = g_UV[ix%6];
  1982. tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
  1983. AtomicTest(64*64 + ix, 32);
  1984. // Load AS test results from payload
  1985. // More threads than results are possible,
  1986. // so indices will result in duplicate copies
  1987. g_uintShare[ix%6] = payload.arith[ix%6];
  1988. g_sintShare[ix%3] = payload.arith[ix%3 + 6];
  1989. g_xchgShare[ix%64] = payload.xchg[ix%64];
  1990. GroupMemoryBarrierWithGroupSync();
  1991. AtomicGroupSharedTest(8*8*2 + ix);
  1992. // Copy final AS + MS results to output UAVs
  1993. g_shareBuf[ix%6].x = g_uintShare[ix%6];
  1994. g_shareBuf[ix%3 + 6].x = g_sintShare[ix%3 + 1];
  1995. g_shareXchgBuf[ix%64].x = g_xchgShare[ix%64];
  1996. }
  1997. PSInput VSMain32(float3 position : POSITION, float2 uv : TEXCOORD, uint ix : SV_VertexID) {
  1998. PSInput result;
  1999. result.position = float4(position, 1.0);
  2000. result.uv = uv;
  2001. AtomicTest(64*64 + ix, 32);
  2002. return result;
  2003. }
  2004. float4 PSMain32(PSInput input) : SV_TARGET {
  2005. uint ix = uint(input.uv.y*64)*64 + input.uv.x*64;
  2006. AtomicTest(ix, 32);
  2007. return 1;
  2008. }
  2009. [NumThreads(32, 32, 1)]
  2010. void CSMain32(uint ix : SV_GroupIndex) {
  2011. AtomicTest(ix, 32);
  2012. InitSharedMem(ix);
  2013. AtomicGroupSharedTest(ix);
  2014. g_shareBuf[ix%6].x = g_uintShare[ix%6];
  2015. g_shareBuf[ix%3 + 6].x = g_sintShare[ix%3 + 1];
  2016. g_shareXchgBuf[ix%64].x = g_xchgShare[ix%64];
  2017. }
  2018. [NumThreads(8, 8, 2)]
  2019. void ASMainRaw64(uint ix : SV_GroupIndex) {
  2020. Payload payload = (Payload)0;
  2021. AtomicRaw64Test(64*64 + 8*8*2 + ix, 64);
  2022. DispatchMesh(1, 1, 1, payload);
  2023. }
  2024. [NumThreads(8, 8, 2)]
  2025. [OutputTopology("triangle")]
  2026. void MSMainRaw64(
  2027. uint ix : SV_GroupIndex,
  2028. in payload Payload payload,
  2029. out vertices PSInput verts[6],
  2030. out indices uint3 tris[2]) {
  2031. SetMeshOutputCounts(6, 2);
  2032. // Assign static fullscreen 2 tri quad
  2033. verts[ix%6].position = g_Verts[ix%6];
  2034. verts[ix%6].uv = g_UV[ix%6];
  2035. tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
  2036. AtomicRaw64Test(64*64 + ix, 64);
  2037. }
  2038. PSInput VSMainRaw64(float3 position : POSITION, float2 uv : TEXCOORD, uint ix : SV_VertexID) {
  2039. PSInput result;
  2040. result.position = float4(position, 1.0);
  2041. result.uv = uv;
  2042. AtomicRaw64Test(64*64 + ix, 64);
  2043. return result;
  2044. }
  2045. float4 PSMainRaw64(PSInput input) : SV_TARGET {
  2046. uint ix = uint(input.uv.y*64)*64 + input.uv.x*64;
  2047. AtomicRaw64Test(ix, 64);
  2048. return 1;
  2049. }
  2050. [NumThreads(32, 32, 1)]
  2051. void CSMainRaw64(uint ix : SV_GroupIndex) {
  2052. AtomicRaw64Test(ix, 64);
  2053. }
  2054. [NumThreads(8, 8, 2)]
  2055. void ASMainTyped64(uint ix : SV_GroupIndex) {
  2056. AtomicTyped64Test(64*64 + 8*8*2 + ix, 64);
  2057. DispatchMesh(1, 1, 1, payload);
  2058. }
  2059. [NumThreads(8, 8, 2)]
  2060. [OutputTopology("triangle")]
  2061. void MSMainTyped64(
  2062. uint ix : SV_GroupIndex,
  2063. in payload Payload payload,
  2064. out vertices PSInput verts[6],
  2065. out indices uint3 tris[2]) {
  2066. SetMeshOutputCounts(6, 2);
  2067. // Assign static fullscreen 2 tri quad
  2068. verts[ix%6].position = g_Verts[ix%6];
  2069. verts[ix%6].uv = g_UV[ix%6];
  2070. tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
  2071. AtomicTyped64Test(64*64 + ix, 64);
  2072. }
  2073. PSInput VSMainTyped64(float3 position : POSITION, float2 uv : TEXCOORD, uint ix : SV_VertexID) {
  2074. PSInput result;
  2075. result.position = float4(position, 1.0);
  2076. result.uv = uv;
  2077. AtomicTyped64Test(64*64 + ix, 64);
  2078. return result;
  2079. }
  2080. float4 PSMainTyped64(PSInput input) : SV_TARGET {
  2081. uint ix = uint(input.uv.y*64)*64 + input.uv.x*64;
  2082. AtomicTyped64Test(ix, 64);
  2083. return 1;
  2084. }
  2085. [NumThreads(32, 32, 1)]
  2086. void CSMainTyped64(uint ix : SV_GroupIndex) {
  2087. AtomicTyped64Test(ix, 64);
  2088. }
  2089. ]]>
  2090. </Shader>
  2091. </ShaderOp>
  2092. <ShaderOp Name="FloatAtomics" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
  2093. <RootSignature>
  2094. RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
  2095. DescriptorTable(UAV(u0), UAV(u1), UAV(u2), UAV(u3), UAV(u4)),
  2096. StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT)
  2097. </RootSignature>
  2098. <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
  2099. { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } },
  2100. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  2101. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  2102. { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
  2103. { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
  2104. { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } }
  2105. </Resource>
  2106. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="64" Height="64" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" />
  2107. <Resource Name="U0" Dimension="BUFFER" Width="2816"
  2108. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  2109. Init="Zero" ReadBack="true" />
  2110. <Resource Name="U1" Dimension="BUFFER" Width="256"
  2111. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  2112. Init="Zero" ReadBack="true" />
  2113. <Resource Name="U2" Dimension="BUFFER" Width="256"
  2114. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  2115. Init="Zero" ReadBack="true" />
  2116. <Resource Name="U3" Dimension="TEXTURE1D" Width="64" Format="R32_FLOAT"
  2117. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  2118. Init="Zero" ReadBack="true" />
  2119. <Resource Name="U4" Dimension="BUFFER" Width="256"
  2120. Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
  2121. Init="Zero" ReadBack="true" />
  2122. <RootValues>
  2123. <RootValue HeapName="ResHeap" />
  2124. </RootValues>
  2125. <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
  2126. <Descriptor Name="U0" Kind="UAV" ResName="U0"
  2127. NumElements="64" StructureByteStride="44" />
  2128. <Descriptor Name="U1" Kind="UAV" ResName="U1"
  2129. NumElements="64" StructureByteStride="4" />
  2130. <Descriptor Name="U2" Kind="UAV" ResName="U2"
  2131. NumElements="64" Format="R32_FLOAT" />
  2132. <Descriptor Name="U3" Kind="UAV" ResName="U3" Dimension="TEXTURE1D"
  2133. NumElements="64" Format="R32_FLOAT" />
  2134. <Descriptor Name="U4" Kind="UAV" ResName="U4"
  2135. NumElements="64" Format="R32_FLOAT" />
  2136. </DescriptorHeap>
  2137. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  2138. <Descriptor Name="RTarget" Kind="RTV"/>
  2139. </DescriptorHeap>
  2140. <InputElements>
  2141. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  2142. <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
  2143. </InputElements>
  2144. <RenderTargets>
  2145. <RenderTarget Name="RTarget"/>
  2146. </RenderTargets>
  2147. <Shader Name="AS" Target="as_6_5" EntryPoint="ASMain" Text="@CS"/>
  2148. <Shader Name="MS" Target="ms_6_5" EntryPoint="MSMain" Text="@CS"/>
  2149. <Shader Name="VS" Target="vs_6_0" EntryPoint="VSMain" Text="@CS"/>
  2150. <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain" Text="@CS"/>
  2151. <Shader Name="CS" Target="cs_6_0" EntryPoint="CSMain">
  2152. <![CDATA[
  2153. struct PSInput {
  2154. float4 position : SV_POSITION;
  2155. float2 uv : TEXCOORD;
  2156. };
  2157. struct AtomicStuff {
  2158. float2 prepad[3];
  2159. float fltEl[2];
  2160. struct useless {
  2161. uint3 unused;
  2162. } postpad;
  2163. };
  2164. RWStructuredBuffer<AtomicStuff> g_strXchgBuf : register(u0);
  2165. RWByteAddressBuffer g_rawXchgBuf : register(u1);
  2166. RWBuffer<float> g_xchgBuf : register(u2);
  2167. RWTexture1D<float> g_xtexBuf : register(u3);
  2168. RWBuffer<float> g_shareXchgBuf : register(u4);
  2169. groupshared float g_xchgShare[1024];
  2170. #define VEC_CALL3(op, uav, ix, cmp, val) op(uav[(ix)], cmp, val)
  2171. #define VEC_CALL4(op, uav, ix, cmp, val, o) op(uav[(ix)], cmp, val, o)
  2172. #define STRUCT_CALL3(op, uav, ix, cmp, val) op(uav[ix].fltEl[1], cmp, val)
  2173. #define STRUCT_CALL4(op, uav, ix, cmp, val, o) op(uav[ix].fltEl[1], cmp, val, o)
  2174. #define RAW_CALL3(op, uav, ix, cmp, val) uav.op(4*(ix), cmp, val)
  2175. #define RAW_CALL4(op, uav, ix, cmp, val, o) uav.op(4*(ix), cmp, val, o)
  2176. // The first of four to match gets the first and then the winner performs the last two exchanges
  2177. #define XCHG_TEST(call3, call4, uav) \
  2178. call3(InterlockedCompareStoreFloatBitwise, uav, (ix/3)%63 + 1, 0, xchgVal - 2); \
  2179. call4(InterlockedCompareExchangeFloatBitwise, uav, (ix/3)%63 + 1, xchgVal - 2, xchgVal - 1, output); \
  2180. if (output == xchgVal - 2) { call3(InterlockedExchange, uav, (ix/3)%63 + 1, xchgVal, output);}
  2181. void AtomicTest(uint ix) {
  2182. float xchgVal = ix;
  2183. float output = 0;
  2184. XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchgBuf)
  2185. XCHG_TEST(STRUCT_CALL3, STRUCT_CALL4, g_strXchgBuf)
  2186. XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xtexBuf)
  2187. // Special case for ByteAddressBuffers
  2188. RAW_CALL3(InterlockedCompareStoreFloatBitwise, g_rawXchgBuf, (ix/3)%63 + 1, 0, xchgVal - 2);
  2189. RAW_CALL4(InterlockedCompareExchangeFloatBitwise, g_rawXchgBuf, (ix/3)%63 + 1, xchgVal - 2, xchgVal - 1, output);
  2190. if (output == xchgVal - 2) { RAW_CALL3(InterlockedExchangeFloat, g_rawXchgBuf, (ix/3)%63 + 1, xchgVal, output);}
  2191. // Check NaN corner case
  2192. InterlockedCompareExchangeFloatBitwise(g_xchgBuf[0], 0, sqrt(-1), output);
  2193. if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_xchgBuf[0], sqrt(-1), 0.123);
  2194. InterlockedCompareExchangeFloatBitwise(g_strXchgBuf[0].fltEl[1], 0, sqrt(-1), output);
  2195. if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_strXchgBuf[0].fltEl[1], sqrt(-1), 0.123);
  2196. g_rawXchgBuf.InterlockedCompareExchangeFloatBitwise(0, 0, sqrt(-1), output);
  2197. if (output == 0.0) g_rawXchgBuf.InterlockedCompareStoreFloatBitwise(0, sqrt(-1), 0.123);
  2198. InterlockedCompareExchangeFloatBitwise(g_xtexBuf[0], 0, sqrt(-1), output);
  2199. if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_xtexBuf[0], sqrt(-1), 0.123);
  2200. }
  2201. void InitSharedMem(uint ix) {
  2202. // Zero-init shared memory
  2203. g_xchgShare[ix%64] = 0;
  2204. GroupMemoryBarrierWithGroupSync();
  2205. }
  2206. void AtomicGroupSharedTest(uint ix) {
  2207. float xchgVal = ix;
  2208. float output = 0;
  2209. XCHG_TEST(VEC_CALL3, VEC_CALL4, g_xchgShare)
  2210. InterlockedCompareExchangeFloatBitwise(g_xchgShare[0], 0, sqrt(-1), output);
  2211. if (output == 0.0) InterlockedCompareStoreFloatBitwise(g_xchgShare[0], sqrt(-1), 0.123);
  2212. GroupMemoryBarrierWithGroupSync();
  2213. }
  2214. // Payloads are used to transport AS test results to MS where they are finalized
  2215. struct Payload {
  2216. float xchg[64];
  2217. };
  2218. static float4 g_Verts[6] = {
  2219. { -1.0f, 1.0f, 0.0f, 1.0f },
  2220. { 1.0f, 1.0f, 0.0f, 1.0f },
  2221. { -1.0f, -1.0f, 0.0f, 1.0f },
  2222. { -1.0f, -1.0f, 0.0f, 1.0f },
  2223. { 1.0f, 1.0f, 0.0f, 1.0f },
  2224. { 1.0f, -1.0f, 0.0f, 1.0f }};
  2225. static float2 g_UV[6] = {
  2226. { 0.0f, 0.0f },
  2227. { 1.0f, 0.0f },
  2228. { 0.0f, 1.0f },
  2229. { 0.0f, 1.0f },
  2230. { 1.0f, 0.0f },
  2231. { 1.0f, 1.0f }};
  2232. groupshared Payload payload;
  2233. [NumThreads(8, 8, 2)]
  2234. void ASMain(uint ix : SV_GroupIndex) {
  2235. AtomicTest(64*64 + 8*8*2 + ix);
  2236. InitSharedMem(ix);
  2237. AtomicGroupSharedTest(ix);
  2238. // Copy AS test results to payload and ultimately to MS
  2239. // More threads than results are possible,
  2240. // so indices will result in duplicate copies
  2241. payload.xchg[ix%64] = g_xchgShare[ix%64];
  2242. DispatchMesh(1, 1, 1, payload);
  2243. }
  2244. [NumThreads(8, 8, 2)]
  2245. [OutputTopology("triangle")]
  2246. void MSMain(
  2247. uint ix : SV_GroupIndex,
  2248. in payload Payload payload,
  2249. out vertices PSInput verts[6],
  2250. out indices uint3 tris[2]) {
  2251. SetMeshOutputCounts(6, 2);
  2252. // Assign static fullscreen 2 tri quad
  2253. verts[ix%6].position = g_Verts[ix%6];
  2254. verts[ix%6].uv = g_UV[ix%6];
  2255. tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
  2256. AtomicTest(64*64 + ix);
  2257. // Load AS test results from payload
  2258. // More threads than results are possible,
  2259. // so indices will result in duplicate copies
  2260. g_xchgShare[ix%64] = payload.xchg[ix%64];
  2261. GroupMemoryBarrierWithGroupSync();
  2262. AtomicGroupSharedTest(8*8*2 + ix);
  2263. // Copy final AS + MS results to output UAVs
  2264. g_shareXchgBuf[ix%64] = g_xchgShare[ix%64];
  2265. }
  2266. PSInput VSMain(float3 position : POSITION, float2 uv : TEXCOORD, uint ix : SV_VertexID) {
  2267. PSInput result;
  2268. result.position = float4(position, 1.0);
  2269. result.uv = uv;
  2270. AtomicTest(64*64 + ix);
  2271. return result;
  2272. }
  2273. float4 PSMain(PSInput input) : SV_TARGET {
  2274. uint ix = uint(input.uv.y*64)*64 + input.uv.x*64;
  2275. AtomicTest(ix);
  2276. return 1;
  2277. }
  2278. [NumThreads(32, 32, 1)]
  2279. void CSMain(uint ix : SV_GroupIndex) {
  2280. AtomicTest(ix);
  2281. InitSharedMem(ix);
  2282. AtomicGroupSharedTest(ix);
  2283. g_shareXchgBuf[ix%64] = g_xchgShare[ix%64];
  2284. }
  2285. ]]>
  2286. </Shader>
  2287. </ShaderOp>
  2288. <ShaderOp Name="HelperLaneTestNoWave" PS="PS" VS="VS" TopologyType="TRIANGLE">
  2289. <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0)</RootSignature>
  2290. <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
  2291. { { -1.0f, 1.0f, 0.0f } },
  2292. { { 1.0f, 1.0f, 0.0f } },
  2293. { { 1.0f, -1.0f, 0.0f } },
  2294. </Resource>
  2295. <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="120" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
  2296. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="16" Height="16" Format="R32G32B32A32_UINT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  2297. <RootValues>
  2298. <RootValue Index="0" ResName="UAVBuffer0" />
  2299. </RootValues>
  2300. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  2301. <Descriptor Name="RTarget" Kind="RTV"/>
  2302. </DescriptorHeap>
  2303. <InputElements>
  2304. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  2305. </InputElements>
  2306. <RenderTargets>
  2307. <RenderTarget Name="RTarget">
  2308. <Viewport Width="2.0" Height="2.0" MaxDepth="1.0"/>
  2309. </RenderTarget>
  2310. </RenderTargets>
  2311. <Shader Name="VS" Target="vs_6_0" EntryPoint="VSMain" Text="@PS" />
  2312. <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain">
  2313. <![CDATA[
  2314. #ifdef ISHELPERLANE_PLACEHOLDER
  2315. bool ph_IsHelperLane(float4 pos, bool first_call) {
  2316. if (first_call) {
  2317. return pos.x < 1.0f && pos.y > 1.0f;
  2318. }
  2319. else {
  2320. return pos.x < 1.0f;
  2321. }
  2322. }
  2323. #endif // ISHELPERLANE_PLACEHOLDER
  2324. struct HelperLaneTestResult{
  2325. int is_helper_00;
  2326. int is_helper_10;
  2327. int is_helper_01;
  2328. int is_helper_11;
  2329. };
  2330. RWStructuredBuffer<HelperLaneTestResult> g_testResults : register(u0);
  2331. int ReadAcrossX_DD(int value, bool isLeft) {
  2332. int d = ddx_fine(value);
  2333. return isLeft ? value + d : value - d;
  2334. }
  2335. int ReadAcrossY_DD(int value, bool isTop) {
  2336. int d = ddy_fine(value);
  2337. return isTop ? value + d : value - d;
  2338. }
  2339. int ReadAcrossDiagonal_DD(int value, bool isLeft, bool isTop) {
  2340. return ReadAcrossY_DD(ReadAcrossX_DD(value, isLeft), isTop);
  2341. }
  2342. struct PSInput {
  2343. float4 pos : SV_POSITION;
  2344. };
  2345. PSInput VSMain(float3 pos : POSITION) {
  2346. PSInput r;
  2347. r.pos = float4(pos, 1);
  2348. return r;
  2349. }
  2350. uint4 PSMain(PSInput input) : SV_TARGET {
  2351. bool isLeft = (input.pos.x < 1.0f);
  2352. bool isTop = (input.pos.y < 1.0f);
  2353. for (int i = 0; i < 2; i++) {
  2354. #ifdef ISHELPERLANE_PLACEHOLDER
  2355. int is_helper = ph_IsHelperLane(input.pos, i == 0);
  2356. #else
  2357. int is_helper = IsHelperLane();
  2358. #endif
  2359. int is_helper_accross_X = ReadAcrossX_DD(is_helper, isLeft);
  2360. int is_helper_accross_Y = ReadAcrossY_DD(is_helper, isTop);
  2361. int is_helper_accross_Diag = ReadAcrossDiagonal_DD(is_helper, isLeft, isTop);
  2362. if (!isLeft && !isTop) { //bottom right pixel writes results
  2363. g_testResults[i].is_helper_00 = is_helper_accross_Diag;
  2364. g_testResults[i].is_helper_10 = is_helper_accross_Y;
  2365. g_testResults[i].is_helper_01 = is_helper_accross_X;
  2366. g_testResults[i].is_helper_11 = is_helper;
  2367. }
  2368. if (i == 0 && isLeft && isTop) // discard top left pixel
  2369. discard;
  2370. }
  2371. return uint4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 1);
  2372. }
  2373. ]]>
  2374. </Shader>
  2375. </ShaderOp>
  2376. <ShaderOp Name="HelperLaneTestWave" CS="CS" PS="PS" VS="VS" DispatchX="3" DispatchY="1" TopologyType="TRIANGLE">
  2377. <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0)</RootSignature>
  2378. <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
  2379. { { -1.0f, 1.0f, 0.0f } },
  2380. { { 1.0f, 1.0f, 0.0f } },
  2381. { { 1.0f, -1.0f, 0.0f } },
  2382. </Resource>
  2383. <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
  2384. <Resource Name="RTarget" Dimension="TEXTURE2D" Width="16" Height="16" Format="R32G32B32A32_UINT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
  2385. <RootValues>
  2386. <RootValue Index="0" ResName="UAVBuffer0" />
  2387. </RootValues>
  2388. <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
  2389. <Descriptor Name="RTarget" Kind="RTV"/>
  2390. </DescriptorHeap>
  2391. <InputElements>
  2392. <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
  2393. </InputElements>
  2394. <RenderTargets>
  2395. <RenderTarget Name="RTarget">
  2396. <Viewport Width="2.0" Height="2.0" MaxDepth="1.0"/>
  2397. </RenderTarget>
  2398. </RenderTargets>
  2399. <Shader Name="PS65" Target="ps_6_5" EntryPoint="PSMain65" Text="@CS"/>
  2400. <Shader Name="VS65" Target="vs_6_5" EntryPoint="VSMain65" Text="@CS"/>
  2401. <Shader Name="CS65" Target="cs_6_5" EntryPoint="CSMain65" Text="@CS"/>
  2402. <Shader Name="PS66" Target="ps_6_6" EntryPoint="PSMain65" Text="@CS"/>
  2403. <Shader Name="VS66" Target="vs_6_6" EntryPoint="VSMain65" Text="@CS"/>
  2404. <Shader Name="CS66" Target="cs_6_6" EntryPoint="CSMain65" Text="@CS"/>
  2405. <Shader Name="VS" Target="vs_6_0" EntryPoint="VSMain" Text="@CS"/>
  2406. <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain" Text="@CS"/>
  2407. <Shader Name="CS" Target="cs_6_0" EntryPoint="CSMain">
  2408. <![CDATA[
  2409. #ifdef ISHELPERLANE_PLACEHOLDER
  2410. #define CALL(x) ph_##x
  2411. bool ph_IsHelperLane() {
  2412. return false;
  2413. }
  2414. bool ph_IsHelperLane(float4 pos, bool first_call) {
  2415. if (first_call) {
  2416. return pos.x < 1.0f && pos.y > 1.0f;
  2417. }
  2418. else {
  2419. return pos.x < 1.0f;
  2420. }
  2421. }
  2422. #else
  2423. #define CALL(x) x
  2424. #endif
  2425. // 6.0 wave ops
  2426. struct HelperLaneWaveTestResult60 {
  2427. int anyTrue;
  2428. int allTrue;
  2429. uint4 ballot;
  2430. int waterfallLoopCount;
  2431. int allEqual;
  2432. int countBits;
  2433. int sum;
  2434. int product;
  2435. int bitAnd;
  2436. int bitOr;
  2437. int bitXor;
  2438. int min;
  2439. int max;
  2440. int prefixCountBits;
  2441. int prefixProduct;
  2442. int prefixSum;
  2443. };
  2444. struct HelperLaneQuadTestResult {
  2445. int is_helper_this;
  2446. int is_helper_across_X;
  2447. int is_helper_across_Y;
  2448. int is_helper_across_Diag;
  2449. };
  2450. // 6.5 wave ops
  2451. struct HelperLaneWaveTestResult65 {
  2452. uint4 match;
  2453. int mpCountBits;
  2454. int mpSum;
  2455. int mpProduct;
  2456. int mpBitAnd;
  2457. int mpBitOr;
  2458. int mpBitXor;
  2459. };
  2460. struct HelperLaneWaveTestResult {
  2461. HelperLaneWaveTestResult60 sm60_wave;
  2462. HelperLaneQuadTestResult sm60_quad;
  2463. HelperLaneWaveTestResult65 sm65_wave;
  2464. };
  2465. RWStructuredBuffer<HelperLaneWaveTestResult> g_TestResults : register(u0);
  2466. #define CS_INDEX 0
  2467. #define VS_INDEX 0
  2468. #define PS_INDEX 1
  2469. #define PS_INDEX_AFTER_DISCARD 2
  2470. HelperLaneWaveTestResult60 RunHelperLaneWaveTests60() {
  2471. HelperLaneWaveTestResult60 tr;
  2472. bool is_helper = CALL(IsHelperLane());
  2473. tr.anyTrue = WaveActiveAnyTrue(is_helper);
  2474. tr.allTrue = WaveActiveAllTrue(!is_helper);
  2475. tr.ballot = WaveActiveBallot(true);
  2476. // waterfall loop
  2477. int count = 0;
  2478. int waveCount = WaveGetLaneCount();
  2479. while (count < waveCount) {
  2480. count++;
  2481. if (WaveReadLaneFirst(!CALL(IsHelperLane())) && WaveIsFirstLane()) {
  2482. break;
  2483. }
  2484. }
  2485. tr.waterfallLoopCount = count;
  2486. is_helper = CALL(IsHelperLane());
  2487. tr.allEqual = WaveActiveAllEqual(is_helper);
  2488. tr.countBits = WaveActiveCountBits(true);
  2489. tr.sum = WaveActiveSum(4);
  2490. tr.product = WaveActiveProduct(4);
  2491. tr.bitAnd = WaveActiveBitAnd((uint)!is_helper);
  2492. tr.bitOr = WaveActiveBitOr((uint)is_helper);
  2493. tr.bitXor = WaveActiveBitXor((uint)is_helper);
  2494. tr.min = WaveActiveMin(is_helper ? 1 : 10);
  2495. tr.max = WaveActiveMax(is_helper ? 10 : 1);
  2496. tr.prefixCountBits = WavePrefixCountBits(1);
  2497. tr.prefixProduct = WavePrefixProduct(4);
  2498. tr.prefixSum = WavePrefixSum(2);
  2499. return tr;
  2500. }
  2501. HelperLaneQuadTestResult RunHelperLaneQuadTests() {
  2502. HelperLaneQuadTestResult tr;
  2503. int is_helper = CALL(IsHelperLane());
  2504. tr.is_helper_this = is_helper;
  2505. tr.is_helper_across_X = QuadReadAcrossX(is_helper);
  2506. tr.is_helper_across_Y = QuadReadAcrossY(is_helper);
  2507. tr.is_helper_across_Diag = QuadReadAcrossDiagonal(is_helper);
  2508. return tr;
  2509. }
  2510. #ifdef ISHELPERLANE_PLACEHOLDER
  2511. HelperLaneQuadTestResult ph_RunHelperLaneQuadTests_PS(float4 pos, bool first_call) {
  2512. HelperLaneQuadTestResult tr;
  2513. int is_helper = ph_IsHelperLane(pos, first_call);
  2514. tr.is_helper_this = is_helper;
  2515. tr.is_helper_across_X = QuadReadAcrossX(is_helper);
  2516. tr.is_helper_across_Y = QuadReadAcrossY(is_helper);
  2517. tr.is_helper_across_Diag = QuadReadAcrossDiagonal(is_helper);
  2518. return tr;
  2519. }
  2520. #endif
  2521. HelperLaneWaveTestResult65 RunHelperLaneWaveTests65() {
  2522. HelperLaneWaveTestResult65 tr;
  2523. uint4 noMaskedBits = (uint4)0xFFFFFFFF;
  2524. bool is_helper = CALL(IsHelperLane());
  2525. tr.match = WaveMatch(true);
  2526. tr.mpCountBits = WaveMultiPrefixCountBits(1, noMaskedBits);
  2527. tr.mpSum = WaveMultiPrefixSum(2, noMaskedBits);
  2528. tr.mpProduct = WaveMultiPrefixProduct(4, noMaskedBits);
  2529. tr.mpBitAnd = WaveMultiPrefixBitAnd(is_helper ? 0 : 1, noMaskedBits);
  2530. tr.mpBitOr = WaveMultiPrefixBitOr(is_helper ? 1 : 0, noMaskedBits);
  2531. tr.mpBitXor = WaveMultiPrefixBitXor(is_helper ? 1 : 0, noMaskedBits);
  2532. return tr;
  2533. }
  2534. struct PSInput {
  2535. float4 pos : SV_POSITION;
  2536. };
  2537. PSInput VSMain(float3 pos : POSITION) {
  2538. HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60();
  2539. if (WaveGetLaneIndex() == 2) { // last lane writes results
  2540. g_TestResults[VS_INDEX].sm60_wave = tr60;
  2541. }
  2542. PSInput r;
  2543. r.pos = float4(pos, 1);
  2544. return r;
  2545. }
  2546. PSInput VSMain65(float3 pos : POSITION) {
  2547. HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60();
  2548. HelperLaneWaveTestResult65 tr65 = RunHelperLaneWaveTests65();
  2549. if (WaveGetLaneIndex() == 2) { // last lane writes results
  2550. g_TestResults[VS_INDEX].sm60_wave = tr60;
  2551. g_TestResults[VS_INDEX].sm65_wave = tr65;
  2552. }
  2553. PSInput r;
  2554. r.pos = float4(pos, 1);
  2555. return r;
  2556. }
  2557. uint4 PSMain(PSInput input) : SV_TARGET {
  2558. HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60();
  2559. #ifdef ISHELPERLANE_PLACEHOLDER
  2560. HelperLaneQuadTestResult tr60_quad = ph_RunHelperLaneQuadTests_PS(input.pos, true);
  2561. #else
  2562. HelperLaneQuadTestResult tr60_quad = RunHelperLaneQuadTests();
  2563. #endif
  2564. if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results
  2565. g_TestResults[PS_INDEX].sm60_wave = tr60;
  2566. g_TestResults[PS_INDEX].sm60_quad = tr60_quad;
  2567. }
  2568. if (input.pos.x < 1.0f && input.pos.y < 1.0f) // discard top left pixel
  2569. discard;
  2570. HelperLaneWaveTestResult60 tr60_disc = RunHelperLaneWaveTests60();
  2571. #ifdef ISHELPERLANE_PLACEHOLDER
  2572. HelperLaneQuadTestResult tr60_quad_disc = ph_RunHelperLaneQuadTests_PS(input.pos, false);
  2573. #else
  2574. HelperLaneQuadTestResult tr60_quad_disc = RunHelperLaneQuadTests();
  2575. #endif
  2576. if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results
  2577. g_TestResults[PS_INDEX_AFTER_DISCARD].sm60_wave = tr60_disc;
  2578. g_TestResults[PS_INDEX_AFTER_DISCARD].sm60_quad = tr60_quad_disc;
  2579. }
  2580. return uint4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 1);
  2581. }
  2582. uint4 PSMain65(PSInput input) : SV_TARGET {
  2583. HelperLaneWaveTestResult tr;
  2584. tr.sm60_wave = RunHelperLaneWaveTests60();
  2585. #ifdef ISHELPERLANE_PLACEHOLDER
  2586. tr.sm60_quad = ph_RunHelperLaneQuadTests_PS(input.pos, true);
  2587. #else
  2588. tr.sm60_quad = RunHelperLaneQuadTests();
  2589. #endif
  2590. tr.sm65_wave = RunHelperLaneWaveTests65();
  2591. if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results
  2592. g_TestResults[PS_INDEX] = tr;
  2593. }
  2594. if (input.pos.x < 1.0f && input.pos.y < 1.0f) // discard top left pixel
  2595. discard;
  2596. HelperLaneWaveTestResult tr_disc;
  2597. tr_disc.sm60_wave = RunHelperLaneWaveTests60();
  2598. #ifdef ISHELPERLANE_PLACEHOLDER
  2599. tr_disc.sm60_quad = ph_RunHelperLaneQuadTests_PS(input.pos, false);
  2600. #else
  2601. tr_disc.sm60_quad = RunHelperLaneQuadTests();
  2602. #endif
  2603. tr_disc.sm65_wave = RunHelperLaneWaveTests65();
  2604. if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results
  2605. g_TestResults[PS_INDEX_AFTER_DISCARD] = tr_disc;
  2606. }
  2607. return uint4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 1);
  2608. }
  2609. [numthreads(3,1,1)]
  2610. void CSMain(uint3 tid : SV_GroupThreadID) {
  2611. HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60();
  2612. HelperLaneQuadTestResult tr60_quad = RunHelperLaneQuadTests();
  2613. if (WaveGetLaneIndex() == 2) { // last lane writes results
  2614. g_TestResults[CS_INDEX].sm60_wave = tr60;
  2615. g_TestResults[CS_INDEX].sm60_quad = tr60_quad;
  2616. }
  2617. }
  2618. [numthreads(3,1,1)]
  2619. void CSMain65() {
  2620. HelperLaneWaveTestResult tr;
  2621. tr.sm60_wave = RunHelperLaneWaveTests60();
  2622. tr.sm60_quad = RunHelperLaneQuadTests();
  2623. tr.sm65_wave = RunHelperLaneWaveTests65();
  2624. if (WaveGetLaneIndex() == 2) { // last lane writes results
  2625. g_TestResults[CS_INDEX] = tr;
  2626. }
  2627. }
  2628. ]]>
  2629. </Shader>
  2630. </ShaderOp>
  2631. </ShaderOpSet>