Ver código fonte

IsHelperLane() Execution Tests (#3367)

Helena Kotas 4 anos atrás
pai
commit
e2af267152

+ 12 - 0
tools/clang/test/HLSL/ShaderOp.xsd

@@ -264,6 +264,18 @@
                   <xs:sequence>
                     <xs:element minOccurs="0" maxOccurs="unbounded" name="RenderTarget">
                       <xs:complexType>
+                        <xs:sequence>
+                          <xs:element minOccurs="0" maxOccurs="1" name="Viewport" >
+                            <xs:complexType>
+                              <xs:attribute name="TopLeftX" type="xs:float" use="optional" />
+                              <xs:attribute name="TopLeftY" type="xs:float" use="optional" />
+                              <xs:attribute name="Width" type="xs:float" use="optional" />
+                              <xs:attribute name="Height" type="xs:float" use="optional" />
+                              <xs:attribute name="MinDepth" type="xs:float" use="optional" />
+                              <xs:attribute name="MaxDepth" type="xs:float" use="optional" />
+                            </xs:complexType>
+                          </xs:element>
+                        </xs:sequence>
                         <xs:attribute name="Name" type="xs:string" use="required" />
                       </xs:complexType>
                     </xs:element>

+ 388 - 3
tools/clang/test/HLSL/ShaderOpArith.xml

@@ -2193,7 +2193,392 @@
     </Shader>
   </ShaderOp>
 
-    <!--
-  TODO: Dynamically index into tables
-  -->
+  <ShaderOp Name="HelperLaneTestNoWave" PS="PS" VS="VS" TopologyType="TRIANGLE">
+    <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0)</RootSignature>
+    <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
+        { { -1.0f,  1.0f, 0.0f } },
+        { {  1.0f,  1.0f, 0.0f } },
+        { {  1.0f, -1.0f, 0.0f } },
+    </Resource>
+    <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="120" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
+    <Resource Name="RTarget" Dimension="TEXTURE2D" Width="16" Height="16" Format="R32G32B32A32_UINT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
+    <RootValues>
+        <RootValue Index="0" ResName="UAVBuffer0" />
+    </RootValues>
+    <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
+      <Descriptor Name="RTarget" Kind="RTV"/>
+    </DescriptorHeap>
+    <InputElements>
+      <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
+    </InputElements>
+    <RenderTargets>
+      <RenderTarget Name="RTarget">
+        <Viewport Width="2.0" Height="2.0" MaxDepth="1.0"/>
+      </RenderTarget>
+    </RenderTargets>
+    <Shader Name="VS" Target="vs_6_0" EntryPoint="VSMain" Text="@PS" />
+    <Shader Name="PS" Target="ps_6_0" EntryPoint="PSMain">
+        <![CDATA[
+#ifdef ISHELPERLANE_PLACEHOLDER
+        bool ph_IsHelperLane(float4 pos, bool first_call) {
+            if (first_call) {
+                return pos.x < 1.0f && pos.y > 1.0f;
+            }
+            else {
+                return pos.x < 1.0f;
+            }
+        }
+#endif // ISHELPERLANE_PLACEHOLDER
+
+        struct HelperLaneTestResult{
+            int is_helper_00;
+            int is_helper_10;
+            int is_helper_01;
+            int is_helper_11;
+        };
+        
+        RWStructuredBuffer<HelperLaneTestResult> g_testResults : register(u0);
+        
+        int ReadAcrossX_DD(int value, bool isLeft) {
+          int d = ddx_fine(value);
+          return isLeft ? value + d : value - d;
+        }
+        
+        int ReadAcrossY_DD(int value, bool isTop) {
+          int d = ddy_fine(value);
+          return isTop ? value + d : value - d;
+        }
+        
+        int ReadAcrossDiagonal_DD(int value, bool isLeft, bool isTop) {
+          return ReadAcrossY_DD(ReadAcrossX_DD(value, isLeft), isTop);
+        }
+
+        struct PSInput {
+            float4 pos : SV_POSITION;
+        };
+        
+        PSInput VSMain(float3 pos : POSITION) {
+            PSInput r;
+            r.pos = float4(pos, 1); 
+            return r;
+        }
+
+        uint4 PSMain(PSInput input) : SV_TARGET {
+          bool isLeft = (input.pos.x < 1.0f);
+          bool isTop = (input.pos.y < 1.0f);
+          
+          for (int i = 0; i < 2; i++) {
+#ifdef ISHELPERLANE_PLACEHOLDER
+            int is_helper = ph_IsHelperLane(input.pos, i == 0);
+#else
+            int is_helper = IsHelperLane();
+#endif
+            int is_helper_accross_X = ReadAcrossX_DD(is_helper, isLeft);
+            int is_helper_accross_Y = ReadAcrossY_DD(is_helper, isTop);
+            int is_helper_accross_Diag = ReadAcrossDiagonal_DD(is_helper, isLeft, isTop);
+          
+            if (!isLeft && !isTop) { //bottom right pixel writes results
+              g_testResults[i].is_helper_00 = is_helper_accross_Diag;
+              g_testResults[i].is_helper_10 = is_helper_accross_Y;
+              g_testResults[i].is_helper_01 = is_helper_accross_X;
+              g_testResults[i].is_helper_11 = is_helper;
+            }
+
+            if (i == 0 && isLeft && isTop) // discard top left pixel
+              discard;
+          }
+          return uint4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 1);
+        }
+        ]]>
+    </Shader>
+  </ShaderOp>
+
+  <ShaderOp Name="HelperLaneTestWave" CS="CS" PS="PS" VS="VS" DispatchX="3" DispatchY="1" TopologyType="TRIANGLE">
+    <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0)</RootSignature>
+      <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
+            { { -1.0f,  1.0f, 0.0f } },
+            { {  1.0f,  1.0f, 0.0f } },
+            { {  1.0f, -1.0f, 0.0f } },
+      </Resource>
+      <Resource Name="UAVBuffer0" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" Format="R32_TYPELESS" />
+      <Resource Name="RTarget" Dimension="TEXTURE2D" Width="16" Height="16" Format="R32G32B32A32_UINT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" ReadBack="true" />
+      <RootValues>
+        <RootValue Index="0" ResName="UAVBuffer0" />
+      </RootValues>
+      <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
+         <Descriptor Name="RTarget" Kind="RTV"/>
+      </DescriptorHeap>
+      <InputElements>
+         <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
+      </InputElements>
+      <RenderTargets>
+          <RenderTarget Name="RTarget">
+              <Viewport Width="2.0" Height="2.0" MaxDepth="1.0"/>
+          </RenderTarget>
+      </RenderTargets>
+      <Shader Name="PS65" Target="ps_6_5" EntryPoint="PSMain65" Text="@CS"/>
+      <Shader Name="VS65" Target="vs_6_5" EntryPoint="VSMain65" Text="@CS"/>
+      <Shader Name="CS65" Target="cs_6_5" EntryPoint="CSMain65" Text="@CS"/>
+      <Shader Name="VS"   Target="vs_6_0" EntryPoint="VSMain"   Text="@CS"/>
+      <Shader Name="PS"   Target="ps_6_0" EntryPoint="PSMain"   Text="@CS"/>
+      <Shader Name="CS"   Target="cs_6_0" EntryPoint="CSMain">
+        <![CDATA[
+#ifdef ISHELPERLANE_PLACEHOLDER
+        #define CALL(x) ph_##x
+        
+        bool ph_IsHelperLane() {
+            return false;
+        }
+
+        bool ph_IsHelperLane(float4 pos, bool first_call) {
+            if (first_call) {
+                return pos.x < 1.0f && pos.y > 1.0f;
+            }
+            else {
+                return pos.x < 1.0f;
+            }
+        }
+#else
+        #define CALL(x) x
+#endif
+
+        // 6.0 wave ops
+        struct HelperLaneWaveTestResult60 {
+            int anyTrue;
+            int allTrue;
+            uint4 ballot;
+            int waterfallLoopCount;
+            int allEqual;
+            int countBits;
+            int sum;
+            int product;
+            int bitAnd;
+            int bitOr;
+            int bitXor;
+            int min;
+            int max;
+            int prefixCountBits;
+            int prefixProduct;
+            int prefixSum;
+        };
+        
+        struct HelperLaneQuadTestResult {
+            int is_helper_this;
+            int is_helper_across_X;
+            int is_helper_across_Y;
+            int is_helper_across_Diag;
+        };
+        
+        // 6.5 wave ops
+        struct HelperLaneWaveTestResult65 {
+            uint4 match;
+            int mpCountBits;
+            int mpSum;
+            int mpProduct;
+            int mpBitAnd;
+            int mpBitOr;
+            int mpBitXor;
+        };
+        
+        struct HelperLaneWaveTestResult {
+            HelperLaneWaveTestResult60 sm60_wave;
+            HelperLaneQuadTestResult sm60_quad;
+            HelperLaneWaveTestResult65 sm65_wave;
+        };
+        
+        RWStructuredBuffer<HelperLaneWaveTestResult> g_TestResults : register(u0);
+        
+        #define CS_INDEX    0
+        #define VS_INDEX    0
+        #define PS_INDEX    1
+        #define PS_INDEX_AFTER_DISCARD 2
+        
+        HelperLaneWaveTestResult60 RunHelperLaneWaveTests60() {
+            HelperLaneWaveTestResult60 tr;
+            bool is_helper = CALL(IsHelperLane());
+            tr.anyTrue = WaveActiveAnyTrue(is_helper);
+            tr.allTrue = WaveActiveAllTrue(!is_helper);
+            tr.ballot = WaveActiveBallot(true);
+
+            // waterfall loop
+            int count = 0;
+            int waveCount = WaveGetLaneCount();
+            while (count < waveCount) {
+                count++;
+                if (WaveReadLaneFirst(!CALL(IsHelperLane())) && WaveIsFirstLane()) {
+                    break;
+                }
+            }
+            tr.waterfallLoopCount = count;
+            
+            is_helper = CALL(IsHelperLane());
+            tr.allEqual = WaveActiveAllEqual(is_helper);
+            tr.countBits = WaveActiveCountBits(true);
+            tr.sum = WaveActiveSum(4);
+            tr.product = WaveActiveProduct(4);
+            tr.bitAnd = WaveActiveBitAnd((uint)!is_helper);
+            tr.bitOr = WaveActiveBitOr((uint)is_helper);
+            tr.bitXor = WaveActiveBitXor((uint)is_helper);
+            tr.min = WaveActiveMin(is_helper ? 1 : 10);
+            tr.max = WaveActiveMax(is_helper ? 10 : 1);
+            tr.prefixCountBits = WavePrefixCountBits(1);
+            tr.prefixProduct = WavePrefixProduct(4);
+            tr.prefixSum = WavePrefixSum(2);
+
+            return tr;
+        }
+        
+        HelperLaneQuadTestResult RunHelperLaneQuadTests() {
+            HelperLaneQuadTestResult tr;
+            int is_helper = CALL(IsHelperLane());
+
+            tr.is_helper_this = is_helper;
+            tr.is_helper_across_X = QuadReadAcrossX(is_helper);
+            tr.is_helper_across_Y = QuadReadAcrossY(is_helper);
+            tr.is_helper_across_Diag = QuadReadAcrossDiagonal(is_helper);
+            
+            return tr;
+        }
+        
+        #ifdef ISHELPERLANE_PLACEHOLDER
+        HelperLaneQuadTestResult ph_RunHelperLaneQuadTests_PS(float4 pos, bool first_call) {
+            HelperLaneQuadTestResult tr;
+            
+            int is_helper = ph_IsHelperLane(pos, first_call);
+            tr.is_helper_this = is_helper;
+            tr.is_helper_across_X = QuadReadAcrossX(is_helper);
+            tr.is_helper_across_Y = QuadReadAcrossY(is_helper);
+            tr.is_helper_across_Diag = QuadReadAcrossDiagonal(is_helper);
+            
+            return tr;
+        }
+        #endif
+
+        HelperLaneWaveTestResult65 RunHelperLaneWaveTests65() {
+            HelperLaneWaveTestResult65 tr;
+            uint4 noMaskedBits = (uint4)0xFFFFFFFF;
+            bool is_helper = CALL(IsHelperLane());
+
+            tr.match = WaveMatch(true);
+            tr.mpCountBits = WaveMultiPrefixCountBits(1, noMaskedBits);
+            tr.mpSum = WaveMultiPrefixSum(2, noMaskedBits);
+            tr.mpProduct = WaveMultiPrefixProduct(4, noMaskedBits);
+            tr.mpBitAnd = WaveMultiPrefixBitAnd(is_helper ? 0 : 1, noMaskedBits);
+            tr.mpBitOr = WaveMultiPrefixBitOr(is_helper ? 1 : 0, noMaskedBits);
+            tr.mpBitXor = WaveMultiPrefixBitXor(is_helper ? 1 : 0, noMaskedBits);
+            return tr;
+        }
+        
+        struct PSInput {
+            float4 pos : SV_POSITION;
+        };
+        
+        PSInput VSMain(float3 pos : POSITION) {
+            HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60();
+            if (WaveGetLaneIndex() == 2) { // last lane writes results
+                g_TestResults[VS_INDEX].sm60_wave = tr60;
+            }
+            PSInput r;
+            r.pos = float4(pos, 1); 
+            return r;
+        }
+        
+        PSInput VSMain65(float3 pos : POSITION) {
+            HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60();
+            HelperLaneWaveTestResult65 tr65 = RunHelperLaneWaveTests65();
+            if (WaveGetLaneIndex() == 2) { // last lane writes results
+                g_TestResults[VS_INDEX].sm60_wave = tr60;
+                g_TestResults[VS_INDEX].sm65_wave = tr65;
+            }
+            PSInput r;
+            r.pos = float4(pos, 1); 
+            return r;
+        }
+
+        uint4 PSMain(PSInput input) : SV_TARGET {
+            HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60();
+            
+        #ifdef ISHELPERLANE_PLACEHOLDER
+            HelperLaneQuadTestResult tr60_quad = ph_RunHelperLaneQuadTests_PS(input.pos, true);
+        #else
+            HelperLaneQuadTestResult tr60_quad = RunHelperLaneQuadTests();
+        #endif
+        
+            if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results
+                g_TestResults[PS_INDEX].sm60_wave = tr60;
+                g_TestResults[PS_INDEX].sm60_quad = tr60_quad;
+            }
+            if (input.pos.x < 1.0f && input.pos.y < 1.0f) // discard top left pixel
+                discard;
+                
+            HelperLaneWaveTestResult60 tr60_disc = RunHelperLaneWaveTests60();
+
+#ifdef ISHELPERLANE_PLACEHOLDER
+            HelperLaneQuadTestResult tr60_quad_disc  = ph_RunHelperLaneQuadTests_PS(input.pos, false);
+#else
+            HelperLaneQuadTestResult tr60_quad_disc = RunHelperLaneQuadTests();
+#endif
+            if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results
+                g_TestResults[PS_INDEX_AFTER_DISCARD].sm60_wave = tr60_disc;
+                g_TestResults[PS_INDEX_AFTER_DISCARD].sm60_quad = tr60_quad_disc;
+            }
+            return uint4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 1);
+        }
+
+        uint4 PSMain65(PSInput input) : SV_TARGET {
+            HelperLaneWaveTestResult tr;
+            tr.sm60_wave = RunHelperLaneWaveTests60();
+#ifdef ISHELPERLANE_PLACEHOLDER
+            tr.sm60_quad = ph_RunHelperLaneQuadTests_PS(input.pos, true);
+#else
+            tr.sm60_quad = RunHelperLaneQuadTests();
+#endif
+            tr.sm65_wave = RunHelperLaneWaveTests65();
+            
+            if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results
+                g_TestResults[PS_INDEX] = tr;
+            }
+
+            if (input.pos.x < 1.0f && input.pos.y < 1.0f) // discard top left pixel
+                discard;
+                
+            HelperLaneWaveTestResult tr_disc;
+            tr_disc.sm60_wave = RunHelperLaneWaveTests60();
+#ifdef ISHELPERLANE_PLACEHOLDER
+            tr_disc.sm60_quad = ph_RunHelperLaneQuadTests_PS(input.pos, false);
+#else
+            tr_disc.sm60_quad = RunHelperLaneQuadTests();
+#endif
+            tr_disc.sm65_wave = RunHelperLaneWaveTests65();
+            
+            if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results
+                g_TestResults[PS_INDEX_AFTER_DISCARD] = tr_disc;
+            }
+            return uint4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 1);
+        }
+        
+        [numthreads(3,1,1)]
+        void CSMain(uint3 tid : SV_GroupThreadID) {
+            HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60();
+            HelperLaneQuadTestResult tr60_quad = RunHelperLaneQuadTests();
+            if (WaveGetLaneIndex() == 2) { // last lane writes results
+                g_TestResults[CS_INDEX].sm60_wave = tr60;
+                g_TestResults[CS_INDEX].sm60_quad = tr60_quad;
+            }
+        }
+        
+        [numthreads(3,1,1)]
+        void CSMain65() {
+            HelperLaneWaveTestResult tr;
+            tr.sm60_wave = RunHelperLaneWaveTests60();
+            tr.sm60_quad = RunHelperLaneQuadTests();
+            tr.sm65_wave = RunHelperLaneWaveTests65();
+
+            if (WaveGetLaneIndex() == 2) { // last lane writes results
+                g_TestResults[CS_INDEX] = tr;
+            }
+        }
+]]>
+    </Shader>
+  </ShaderOp>
 </ShaderOpSet>

+ 344 - 0
tools/clang/unittests/HLSL/ExecutionTest.cpp

@@ -305,6 +305,8 @@ public:
   TEST_METHOD(AtomicsTyped64Test);
   TEST_METHOD(AtomicsShared64Test);
   TEST_METHOD(AtomicsFloatTest);
+  TEST_METHOD(HelperLaneTest);
+  TEST_METHOD(HelperLaneTestWave);
   TEST_METHOD(SignatureResourcesTest)
   TEST_METHOD(DynamicResourcesTest)
   TEST_METHOD(QuadReadTest)
@@ -4607,6 +4609,13 @@ static void VerifyOutputWithExpectedValueUInt(uint32_t output, uint32_t ref, uin
     VERIFY_IS_TRUE(output - ref <= tolerance && ref - output <= tolerance);
 }
 
+static void VerifyOutputWithExpectedValueUInt4(XMUINT4 output, XMUINT4 ref) {
+  VERIFY_ARE_EQUAL(output.x, ref.x);
+  VERIFY_ARE_EQUAL(output.y, ref.y);
+  VERIFY_ARE_EQUAL(output.z, ref.z);
+  VERIFY_ARE_EQUAL(output.w, ref.w);
+}
+
 static void VerifyOutputWithExpectedValueFloat(
     float output, float ref, LPCWSTR type, double tolerance,
     hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) {
@@ -8936,6 +8945,341 @@ TEST_F(ExecutionTest, AtomicsFloatTest) {
   VerifyAtomicsFloatTest(test, 64*64+6);
 }
 
+// The IsHelperLane test renders 3-pixel triangle into 16x16 render target restricted 
+// to 2x2 viewport alligned at (0,0) which guarantees it will run in a single quad. 
+//
+// Pixels to be rendered*
+// (0,0)*  (0,1)*
+// (1,0)   (1,1)*
+//
+// Pixel (1,0) is not rendered and is in helper lane.
+//
+// Each thread will use ddx_fine and ddy_fine to read the IsHelperLane() values from other threads.
+// The bottom right pixel will write the results into the UAV buffer.
+// 
+// Then the top level pixel (0,0) is discarded and the process above is repeated.
+//
+// Runs with shader models 6.0 and 6.6 to test both the HLSL built-in IsHelperLane fallback 
+// function (sm <= 6.5) and the IsHelperLane intrisics (sm >= 6.6).
+//
+TEST_F(ExecutionTest, HelperLaneTest) {
+  WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
+  CComPtr<IStream> pStream;
+  ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream);
+
+  std::shared_ptr<st::ShaderOpSet> ShaderOpSet = std::make_shared<st::ShaderOpSet>();
+  st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get());
+
+#ifdef ISHELPERLANE_PLACEHOLDER
+  string args = "-DISHELPERLANE_PLACEHOLDER";
+#else 
+  string args = "";
+#endif
+
+  D3D_SHADER_MODEL TestShaderModels[] = { D3D_SHADER_MODEL_6_0, D3D_SHADER_MODEL_6_6 };
+  for (unsigned i = 0; i < _countof(TestShaderModels); i++) {
+    D3D_SHADER_MODEL sm = TestShaderModels[i];
+    LogCommentFmt(L"Verifying IsHelperLane in shader model 6.%1u", ((UINT)sm & 0x0f));
+
+    CComPtr<ID3D12Device> pDevice;
+    if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */))
+      continue;
+
+    std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "HelperLaneTestNoWave", 
+      // this callbacked is called when the test is creating the resource to run the test
+      [&](LPCSTR Name, std::vector<BYTE>& Data, st::ShaderOp* pShaderOp) {
+        VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0"));
+        std::fill(Data.begin(), Data.end(), 0xCC);
+        pShaderOp->Shaders.at(0).Arguments = args.c_str();
+        pShaderOp->Shaders.at(1).Arguments = args.c_str();
+      }, ShaderOpSet);
+
+    struct HelperLaneTestResult {
+      int32_t is_helper_00;
+      int32_t is_helper_10;
+      int32_t is_helper_01;
+      int32_t is_helper_11;
+    };
+
+    MappedData uavData;
+    test->Test->GetReadBackData("UAVBuffer0", &uavData);
+    HelperLaneTestResult* pTestResults = (HelperLaneTestResult*)uavData.data();
+
+    MappedData renderData;
+    test->Test->GetReadBackData("RTarget", &renderData);
+    const uint32_t* pPixels = (uint32_t*)renderData.data();
+
+    // before discard
+    VERIFY_ARE_EQUAL(pTestResults[0].is_helper_00, 0);
+    VERIFY_ARE_EQUAL(pTestResults[0].is_helper_10, 0);
+    VERIFY_ARE_EQUAL(pTestResults[0].is_helper_01, 1);
+    VERIFY_ARE_EQUAL(pTestResults[0].is_helper_11, 0);
+
+    // after discard
+    VERIFY_ARE_EQUAL(pTestResults[1].is_helper_00, 1);
+    VERIFY_ARE_EQUAL(pTestResults[1].is_helper_10, 0);
+    VERIFY_ARE_EQUAL(pTestResults[1].is_helper_01, 1);
+    VERIFY_ARE_EQUAL(pTestResults[1].is_helper_11, 0);
+
+    UNREFERENCED_PARAMETER(pPixels);
+  }
+}
+
+struct HelperLaneWaveTestResult60 {
+  // 6.0 wave ops
+  int32_t anyTrue;
+  int32_t allTrue;
+  XMUINT4 ballot;
+  int32_t waterfallLoopCount;
+  int32_t allEqual;
+  int32_t countBits;
+  int32_t sum;
+  int32_t product;
+  int32_t bitAnd;
+  int32_t bitOr;
+  int32_t bitXor;
+  int32_t min;
+  int32_t max;
+  int32_t prefixCountBits;
+  int32_t prefixProduct;
+  int32_t prefixSum;
+};
+
+struct HelperLaneQuadTestResult {
+  int32_t is_helper_this;
+  int32_t is_helper_across_X;
+  int32_t is_helper_across_Y;
+  int32_t is_helper_across_Diag;
+};
+
+struct HelperLaneWaveTestResult65 {
+  // 6.5 wave ops
+  XMUINT4  match;
+  int32_t mpCountBits;
+  int32_t mpSum;
+  int32_t mpProduct;
+  int32_t mpBitAnd;
+  int32_t mpBitOr;
+  int32_t mpBitXor;
+};
+
+struct HelperLaneWaveTestResult {
+  HelperLaneWaveTestResult60 sm60;
+  HelperLaneQuadTestResult sm60_quad;
+  HelperLaneWaveTestResult65 sm65;
+};
+
+struct foo { int32_t a; int32_t b; int32_t c; };
+struct bar { foo f; int32_t d; XMUINT4 g; };
+foo f = {1, 2, 3};
+bar b = { { 1, 2, 3 }, 0, { 1, 2, 3, 4 } };
+
+HelperLaneWaveTestResult HelperLane_CS_ExpectedResults = {
+  // HelperLaneWaveTestResult60
+  { 0, 1, { 0x7, 0, 0, 0 }, 3, 1, 3, 12, 64, 1, 0, 0, 10, 1, 2, 16, 4 },
+  // HelperLaneQuadTestResult
+  { 0, 0, 0, 0 },
+  // HelperLaneWaveTestResult65
+  { {0x7, 0, 0, 0}, 2, 4, 16, 1, 0, 0 }
+};
+
+HelperLaneWaveTestResult HelperLane_VS_ExpectedResults = HelperLane_CS_ExpectedResults;
+  
+HelperLaneWaveTestResult HelperLane_PS_ExpectedResults = {
+  // HelperLaneWaveTestResult60
+  { 0, 1, { 0xB, 0, 0, 0 }, 3, 1, 3, 12, 64, 1, 0, 0, 10, 1, 2, 16, 4 },
+  // HelperLaneQuadTestResult
+  { 0, 1, 0, 0 },
+  // HelperLaneWaveTestResult65
+  { {0xB, 0, 0, 0}, 2, 4, 16, 1, 0, 0 }
+};
+
+HelperLaneWaveTestResult HelperLane_PSAfterDiscard_ExpectedResults = {
+  // HelperLaneWaveTestResult60
+  { 0, 1, { 0xA, 0, 0, 0 }, 2, 1, 2, 8, 16, 1, 0, 0, 10, 1, 1, 4, 2 },
+  // HelperLaneQuadTestResult
+  { 0, 1, 0, 1 },
+  // HelperLaneWaveTestResult65
+  { {0xA, 0, 0, 0}, 1, 2, 4, 1, 0, 0 }
+};
+
+bool HelperLaneResultLogAndVerify(const wchar_t* testDesc, uint32_t expectedValue, uint32_t actualValue) {
+  bool matches = (expectedValue == actualValue);
+  LogCommentFmt(L"%s%s, expected = %u, actual = %u", matches ? L" - " : L"FAILED: ", testDesc, expectedValue, actualValue);
+  return matches;
+}
+
+bool HelperLaneResultLogAndVerify(const wchar_t* testDesc, XMUINT4 expectedValue, XMUINT4 actualValue) {
+  bool matches = (expectedValue.x == actualValue.x && expectedValue.y == actualValue.y &&
+                  expectedValue.z == actualValue.z && expectedValue.w == actualValue.w);
+  LogCommentFmt(L"%s%s, expected = (0x%X,0x%X,0x%X,0x%X), actual = (0x%X,0x%X,0x%X,0x%X)", matches ? L" - " : L"FAILED: ", testDesc,
+    expectedValue.x, expectedValue.y, expectedValue.z, expectedValue.w, actualValue.x, actualValue.y, actualValue.z, actualValue.w);
+  return matches;
+}
+  
+
+bool VerifyHelperLaneWaveResults(ExecutionTest::D3D_SHADER_MODEL sm, HelperLaneWaveTestResult& testResults, HelperLaneWaveTestResult& expectedResults, bool verifyQuads) {
+  bool passed = true;
+  {
+    HelperLaneWaveTestResult60& tr60 = testResults.sm60;
+    HelperLaneWaveTestResult60& tr60exp = expectedResults.sm60;
+
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveAnyTrue(IsHelperLane())", tr60exp.anyTrue, tr60.anyTrue);
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveAllTrue(!IsHelperLane())", tr60exp.allTrue, tr60.allTrue);
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveBallot(true) has exactly 3 bits set", tr60exp.ballot, tr60.ballot);
+
+    passed &= HelperLaneResultLogAndVerify(L"!WaveReadLaneFirst(IsHelperLane()) && WaveIsFirstLane() in a waterfall loop", tr60exp.waterfallLoopCount, tr60.waterfallLoopCount);
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveAllEqual(IsHelperLane())", tr60exp.allEqual, tr60.allEqual);
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveCountBits(true)", tr60exp.countBits, tr60.countBits);
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveSum(4)", tr60exp.sum, tr60.sum);
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveProduct(4)", tr60exp.product, tr60.product);
+
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveBitAnd(!IsHelperLane())", tr60exp.bitAnd, tr60.bitAnd);
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveBitOr(IsHelperLane())", tr60exp.bitOr, tr60.bitOr);
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveBitXor(IsHelperLane())", tr60exp.bitXor, tr60.bitXor);
+
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveMin(IsHelperLane() ? 1 : 10)", tr60exp.min, tr60.min);
+    passed &= HelperLaneResultLogAndVerify(L"WaveActiveMax(IsHelperLane() ? 10 : 1)", tr60exp.max, tr60.max);
+
+    passed &= HelperLaneResultLogAndVerify(L"WavePrefixCountBits(1)", tr60exp.prefixCountBits, tr60.prefixCountBits);
+    passed &= HelperLaneResultLogAndVerify(L"WavePrefixProduct(4)", tr60exp.prefixProduct, tr60.prefixProduct);
+    passed &= HelperLaneResultLogAndVerify(L"WavePrefixSum(2)", tr60exp.prefixSum, tr60.prefixSum);
+  }
+
+  if (verifyQuads) {
+    HelperLaneQuadTestResult& quad_tr = testResults.sm60_quad;
+    HelperLaneQuadTestResult& quad_tr_exp = expectedResults.sm60_quad;
+    passed &= HelperLaneResultLogAndVerify(L"QuadReadAcross* - lane 3 / pixel (1,1) - IsHelperLane()", quad_tr_exp.is_helper_this, quad_tr.is_helper_this);
+    passed &= HelperLaneResultLogAndVerify(L"QuadReadAcross* - lane 2 / pixel (0,1) - IsHelperLane()", quad_tr_exp.is_helper_across_X, quad_tr.is_helper_across_X);
+    passed &= HelperLaneResultLogAndVerify(L"QuadReadAcross* - lane 1 / pixel (1,0) - IsHelperLane()", quad_tr_exp.is_helper_across_Y, quad_tr.is_helper_across_Y);
+    passed &= HelperLaneResultLogAndVerify(L"QuadReadAcross* - lane 0 / pixel (0,0) - IsHelperLane()", quad_tr_exp.is_helper_across_Diag, quad_tr.is_helper_across_Diag);
+  }
+
+  if (sm >= D3D_SHADER_MODEL_6_5) {
+    HelperLaneWaveTestResult65& tr65 = testResults.sm65;
+    HelperLaneWaveTestResult65& tr65exp = expectedResults.sm65;
+    
+    passed &= HelperLaneResultLogAndVerify(L"WaveMatch(true) has exactly 3 bits set", tr65exp.match, tr65.match);
+    passed &= HelperLaneResultLogAndVerify(L"WaveMultiPrefixCountBits(1, no_masked_bits)", tr65exp.mpCountBits, tr65.mpCountBits);
+    passed &= HelperLaneResultLogAndVerify(L"WaveMultiPrefixSum(2, no_masked_bits)", tr65exp.mpSum, tr65.mpSum);
+    passed &= HelperLaneResultLogAndVerify(L"WaveMultiPrefixProduct(4, no_masked_bits)", tr65exp.mpProduct, tr65.mpProduct);
+
+    passed &= HelperLaneResultLogAndVerify(L"WaveMultiPrefixAnd(IsHelperLane() ? 0 : 1, no_masked_bits)", tr65exp.mpBitAnd, tr65.mpBitAnd);
+    passed &= HelperLaneResultLogAndVerify(L"WaveMultiPrefixOr(IsHelperLane() ? 1 : 0, no_masked_bits)", tr65exp.mpBitOr, tr65.mpBitOr);
+    passed &= HelperLaneResultLogAndVerify(L"verify WaveMultiPrefixXor(IsHelperLane() ? 1 : 0, no_masked_bits)", tr65exp.mpBitXor, tr65.mpBitXor);
+  }
+  return passed;
+}
+
+void CleanUAVBuffer0Buffer(LPCSTR BufferName, std::vector<BYTE>& Data, st::ShaderOp* pShaderOp) {
+  VERIFY_IS_TRUE(0 == _stricmp(BufferName, "UAVBuffer0"));
+  std::fill(Data.begin(), Data.end(), 0xCC);
+}
+
+//
+// The IsHelperLane test that use Wave intrinsics to verify IsHelperLane() and Wave operations on active lanes.
+//
+// Runs with shader models 6.0, 6.5 and 6.6 to test both the HLSL built-in IsHelperLane fallback 
+// function (sm <= 6.5) and the IsHelperLane intrisics (sm >= 6.6) and the shader model 6.5 wave intrinsics (sm >= 6.5).
+//
+// For compute and vertex shaders IsHelperLane() always returns false and might be optimized away in the front end.
+// However it can be exposed to the driver in CS/VS through an exported function in a library so drivers need 
+// to be prepared to handle it. For this reason the test is compiled with disabled optimizations (/Od).
+// The tests are also validating that wave intrinsics operate correctly with 3 threads in a CS or 3 vertices 
+// in a VS where the rest of the lanes in the wave are not active (dead lanes).
+//
+TEST_F(ExecutionTest, HelperLaneTestWave) {
+  WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
+  CComPtr<IStream> pStream;
+  ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream);
+
+  std::shared_ptr<st::ShaderOpSet> ShaderOpSet = std::make_shared<st::ShaderOpSet>();
+  st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get());
+  st::ShaderOp* pShaderOp = ShaderOpSet->GetShaderOp("HelperLaneTestWave");
+
+#ifdef ISHELPERLANE_PLACEHOLDER
+  LPCSTR args = "/Od -DISHELPERLANE_PLACEHOLDER";
+#else 
+  LPCSTR args = "/Od";
+#endif
+
+  if (args[0]) {
+    for (st::ShaderOpShader& S : pShaderOp->Shaders)
+      S.Arguments = args;
+  }
+
+  bool testPassed = true;
+
+  D3D_SHADER_MODEL TestShaderModels[] = { D3D_SHADER_MODEL_6_0, D3D_SHADER_MODEL_6_5, D3D_SHADER_MODEL_6_6 };
+  for (unsigned i = 0; i < _countof(TestShaderModels); i++) {
+    D3D_SHADER_MODEL sm = TestShaderModels[i];
+    LogCommentFmt(L"\r\nVerifying IsHelperLane using Wave intrinsics in shader model 6.%1u", ((UINT)sm & 0x0f));
+
+    bool smPassed = true;
+
+    CComPtr<ID3D12Device> pDevice;
+    if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) {
+      continue;
+    }
+
+    if (!DoesDeviceSupportWaveOps(pDevice)) {
+      LogCommentFmt(L"Device does not support wave operations in shader model 6.%1u", ((UINT)sm & 0x0f));
+      continue;
+    }
+
+    if (sm >= D3D_SHADER_MODEL_6_5) {
+      // Reassign shader stages to 6.5 versions
+      LPCSTR CS65 = nullptr, VS65 = nullptr, PS65 = nullptr;
+      for (st::ShaderOpShader& S : pShaderOp->Shaders) {
+        if (!strcmp(S.Name, "CS65")) CS65 = S.Name;
+        if (!strcmp(S.Name, "VS65")) VS65 = S.Name;
+        if (!strcmp(S.Name, "PS65")) PS65 = S.Name;
+      }
+      pShaderOp->CS = CS65;
+      pShaderOp->VS = VS65;
+      pShaderOp->PS = PS65;
+    }
+
+    const unsigned CS_INDEX = 0, VS_INDEX = 0, PS_INDEX = 1, PS_INDEX_AFTER_DISCARD = 2;
+
+    // Test Compute shader
+    {
+      std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "HelperLaneTestWave",
+        CleanUAVBuffer0Buffer, ShaderOpSet);
+
+      MappedData uavData;
+      test->Test->GetReadBackData("UAVBuffer0", &uavData);
+      HelperLaneWaveTestResult* pTestResults = (HelperLaneWaveTestResult*)uavData.data();
+      LogCommentFmt(L"\r\nCompute shader");
+      smPassed &= VerifyHelperLaneWaveResults(sm, pTestResults[CS_INDEX], HelperLane_CS_ExpectedResults, true);
+    }
+    
+    // Test Vertex + Pixel shader
+    {
+      pShaderOp->CS = nullptr;
+      std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "HelperLaneTestWave", CleanUAVBuffer0Buffer, ShaderOpSet);
+
+      MappedData uavData;
+      test->Test->GetReadBackData("UAVBuffer0", &uavData);
+      HelperLaneWaveTestResult* pTestResults = (HelperLaneWaveTestResult*)uavData.data();
+      LogCommentFmt(L"\r\nVertex shader");
+      smPassed &= VerifyHelperLaneWaveResults(sm, pTestResults[VS_INDEX], HelperLane_VS_ExpectedResults, false);
+      LogCommentFmt(L"\r\nPixel shader");
+      smPassed &= VerifyHelperLaneWaveResults(sm, pTestResults[PS_INDEX], HelperLane_PS_ExpectedResults, true);
+      LogCommentFmt(L"\r\nPixel shader with discarded pixel");
+      smPassed &= VerifyHelperLaneWaveResults(sm, pTestResults[PS_INDEX_AFTER_DISCARD], HelperLane_PSAfterDiscard_ExpectedResults, true);
+      
+      MappedData renderData;
+      test->Test->GetReadBackData("RTarget", &renderData);
+      const uint32_t* pPixels = (uint32_t*)renderData.data();
+
+      UNREFERENCED_PARAMETER(pPixels);
+    }
+    testPassed &= smPassed;
+  }
+  VERIFY_ARE_EQUAL(testPassed, true);
+}
+
 #ifndef _HLK_CONF
 static void WriteReadBackDump(st::ShaderOp *pShaderOp, st::ShaderOpTest *pTest,
                               char **pReadBackDump) {

+ 94 - 17
tools/clang/unittests/HLSL/ShaderOpTest.cpp

@@ -463,7 +463,7 @@ void ShaderOpTest::CreatePipelineState() {
     ZeroMemory(&RtArray, sizeof(RtArray));
     RtArray.NumRenderTargets = (UINT)m_pShaderOp->RenderTargets.size();
     for (size_t i = 0; i < RtArray.NumRenderTargets; ++i) {
-      ShaderOpResource *R = m_pShaderOp->GetResourceByName(m_pShaderOp->RenderTargets[i]);
+      ShaderOpResource *R = m_pShaderOp->GetResourceByName(m_pShaderOp->RenderTargets[i].Name);
       RtArray.RTFormats[i] = R->Desc.Format;
     }
     MDesc.RTVFormats = CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS(RtArray);
@@ -498,7 +498,7 @@ void ShaderOpTest::CreatePipelineState() {
     GDesc.NumRenderTargets = (UINT)m_pShaderOp->RenderTargets.size();
     GDesc.SampleMask = m_pShaderOp->SampleMask;
     for (size_t i = 0; i < m_pShaderOp->RenderTargets.size(); ++i) {
-      ShaderOpResource *R = m_pShaderOp->GetResourceByName(m_pShaderOp->RenderTargets[i]);
+      ShaderOpResource *R = m_pShaderOp->GetResourceByName(m_pShaderOp->RenderTargets[i].Name);
       GDesc.RTVFormats[i] = R->Desc.Format;
     }
     GDesc.SampleDesc.Count = 1; // TODO: read from file, set from shader operation; also apply to count
@@ -856,20 +856,26 @@ void ShaderOpTest::RunCommandList() {
     SetDescriptorHeaps(pList, m_DescriptorHeaps);
     SetRootValues(pList, m_pShaderOp->IsCompute());
 
+    D3D12_VIEWPORT viewport;
     if (!m_pShaderOp->RenderTargets.empty()) {
       // Use the first render target to set up the viewport and scissors.
-      ShaderOpResource *R = m_pShaderOp->GetResourceByName(m_pShaderOp->RenderTargets[0]);
-      D3D12_VIEWPORT viewport;
-      D3D12_RECT scissorRect;
+      ShaderOpRenderTarget& rt = m_pShaderOp->RenderTargets[0];
+      ShaderOpResource *R = m_pShaderOp->GetResourceByName(rt.Name);
+      if (rt.Viewport.Width != 0 && rt.Viewport.Height != 0 ) {
+        memcpy(&viewport, &rt.Viewport, sizeof(rt.Viewport));
+      }
+      else {
+        memset(&viewport, 0, sizeof(viewport));
+        viewport.Height = (FLOAT)R->Desc.Height;
+        viewport.Width = (FLOAT)R->Desc.Width;
+        viewport.MaxDepth = 1.0f;
+      }
+      pList->RSSetViewports(1, &viewport);
 
-      memset(&viewport, 0, sizeof(viewport));
-      viewport.Height = (FLOAT)R->Desc.Height;
-      viewport.Width = (FLOAT)R->Desc.Width;
-      viewport.MaxDepth = 1.0f;
+      D3D12_RECT scissorRect;
       memset(&scissorRect, 0, sizeof(scissorRect));
       scissorRect.right = (LONG)viewport.Width;
       scissorRect.bottom = (LONG)viewport.Height;
-      pList->RSSetViewports(1, &viewport);
       pList->RSSetScissorRects(1, &scissorRect);
     }
 
@@ -877,7 +883,7 @@ void ShaderOpTest::RunCommandList() {
     D3D12_CPU_DESCRIPTOR_HANDLE rtvHandles[8];
     UINT rtvHandleCount = (UINT)m_pShaderOp->RenderTargets.size();
     for (size_t i = 0; i < rtvHandleCount; ++i) {
-      auto &rt = m_pShaderOp->RenderTargets[i];
+      auto &rt = m_pShaderOp->RenderTargets[i].Name;
       ShaderOpDescriptorData &DData = m_DescriptorData[rt];
       rtvHandles[i] = DData.CPUHandle;
       RecordTransitionBarrier(pList, DData.ResData->Resource,
@@ -1038,7 +1044,9 @@ void ShaderOpTest::SetupRenderTarget(ShaderOp *pShaderOp, ID3D12Device *pDevice,
   m_CommandList.Queue = pCommandQueue;
   // Simplification - add the render target name if missing, set it up 'by hand' if not.
   if (pShaderOp->RenderTargets.empty()) {
-    pShaderOp->RenderTargets.push_back(pShaderOp->Strings.insert("RTarget"));
+    ShaderOpRenderTarget RT;
+    RT.Name = pShaderOp->Strings.insert("RTarget");
+    pShaderOp->RenderTargets.push_back(RT);
     ShaderOpResource R;
     ZeroMemory(&R, sizeof(R));
     R.Desc = pRenderTarget->GetDesc();
@@ -1117,12 +1125,15 @@ private:
   HRESULT ReadAttrUINT64(IXmlReader *pReader, LPCWSTR pAttrName, UINT64 *pValue, UINT64 defaultValue = 0);
   HRESULT ReadAttrUINT16(IXmlReader *pReader, LPCWSTR pAttrName, UINT16 *pValue, UINT16 defaultValue = 0);
   HRESULT ReadAttrUINT(IXmlReader *pReader, LPCWSTR pAttrName, UINT *pValue, UINT defaultValue = 0);
+  HRESULT ReadAttrFloat(IXmlReader* pReader, LPCWSTR pAttrName, float* pValue, float defaultValue = 0);
   void ReadElementContentStr(IXmlReader *pReader, LPCSTR *ppValue);
   void ParseDescriptor(IXmlReader *pReader, ShaderOpDescriptor *pDesc);
   void ParseDescriptorHeap(IXmlReader *pReader, ShaderOpDescriptorHeap *pHeap);
   void ParseInputElement(IXmlReader *pReader, D3D12_INPUT_ELEMENT_DESC *pInputElement);
   void ParseInputElements(IXmlReader *pReader, std::vector<D3D12_INPUT_ELEMENT_DESC> *pInputElements);
-  void ParseRenderTargets(IXmlReader *pReader, std::vector<LPCSTR> *pRenderTargets);
+  void ParseRenderTargets(IXmlReader *pReader, std::vector<ShaderOpRenderTarget> *pRenderTargets);
+  void ParseRenderTarget(IXmlReader* pReader, ShaderOpRenderTarget *pRenderTarget);
+  void ParseViewport(IXmlReader* pReader, D3D12_VIEWPORT *pViewport);
   void ParseRootValue(IXmlReader *pReader, ShaderOpRootValue *pRootValue);
   void ParseRootValues(IXmlReader *pReader, std::vector<ShaderOpRootValue> *pRootValues);
   void ParseResource(IXmlReader *pReader, ShaderOpResource *pResource);
@@ -1649,6 +1660,20 @@ HRESULT ShaderOpParser::ReadAttrUINT16(IXmlReader *pReader, LPCWSTR pAttrName, U
   return hrRead;
 }
 
+HRESULT ShaderOpParser::ReadAttrFloat(IXmlReader* pReader, LPCWSTR pAttrName, float* pValue, float defaultValue) {
+  if (S_FALSE == CHECK_HR_RET(pReader->MoveToAttributeByName(pAttrName, nullptr))) {
+    *pValue = defaultValue;
+    return S_FALSE;
+  }
+  LPCWSTR pText;
+  CHECK_HR(pReader->GetValue(&pText, nullptr));
+  float d = (float)_wtof(pText);
+  if (errno == ERANGE) CHECK_HR(E_INVALIDARG);
+  *pValue = d;
+  CHECK_HR(pReader->MoveToElement());
+  return S_OK;
+}
+
 void ShaderOpParser::ReadElementContentStr(IXmlReader *pReader, LPCSTR *ppValue) {
   *ppValue = nullptr;
   if (pReader->IsEmptyElement())
@@ -1854,7 +1879,7 @@ void ShaderOpParser::ParseInputElements(IXmlReader *pReader, std::vector<D3D12_I
   }
 }
 
-void ShaderOpParser::ParseRenderTargets(IXmlReader *pReader, std::vector<LPCSTR> *pRenderTargets) {
+void ShaderOpParser::ParseRenderTargets(IXmlReader *pReader, std::vector<ShaderOpRenderTarget> *pRenderTargets) {
   if (!ReadAtElementName(pReader, L"RenderTargets"))
     return;
   if (pReader->IsEmptyElement()) return;
@@ -1872,14 +1897,66 @@ void ShaderOpParser::ParseRenderTargets(IXmlReader *pReader, std::vector<LPCSTR>
       LPCWSTR pLocalName;
       CHECK_HR(pReader->GetLocalName(&pLocalName, nullptr));
       if (0 == wcscmp(pLocalName, L"RenderTarget")) {
-        LPCSTR pName;
-        CHECK_HR(ReadAttrStr(pReader, L"Name", &pName));
-        pRenderTargets->push_back(pName);
+        ShaderOpRenderTarget RT;
+        ParseRenderTarget(pReader, &RT);
+        pRenderTargets->push_back(RT);
       }
     }
   }
 }
 
+void ShaderOpParser::ParseRenderTarget(IXmlReader* pReader, ShaderOpRenderTarget *pRenderTarget) {
+  if (!ReadAtElementName(pReader, L"RenderTarget"))
+    return;
+
+  CHECK_HR(ReadAttrStr(pReader, L"Name", &pRenderTarget->Name));
+
+  if (pReader->IsEmptyElement()) return;
+
+  UINT startDepth;
+  XmlNodeType nt;
+  CHECK_HR(pReader->GetDepth(&startDepth));
+  for (;;) {
+    UINT depth;
+    CHECK_HR(pReader->Read(&nt));
+    CHECK_HR(pReader->GetDepth(&depth));
+    if (nt == XmlNodeType_EndElement && depth == startDepth + 1)
+      return;
+    if (nt == XmlNodeType_Element) {
+      LPCWSTR pLocalName;
+      CHECK_HR(pReader->GetLocalName(&pLocalName, nullptr));
+      if (0 == wcscmp(pLocalName, L"Viewport")) {
+        ParseViewport(pReader, &pRenderTarget->Viewport);
+      }
+    }
+  }
+}
+
+void ShaderOpParser::ParseViewport(IXmlReader* pReader, D3D12_VIEWPORT *pViewport) {
+  if (!ReadAtElementName(pReader, L"Viewport"))
+    return;
+
+  CHECK_HR(ReadAttrFloat(pReader, L"TopLeftX", &pViewport->TopLeftX));
+  CHECK_HR(ReadAttrFloat(pReader, L"TopLeftY", &pViewport->TopLeftY));
+  CHECK_HR(ReadAttrFloat(pReader, L"Width",    &pViewport->Width));
+  CHECK_HR(ReadAttrFloat(pReader, L"Height",   &pViewport->Height));
+  CHECK_HR(ReadAttrFloat(pReader, L"MinDepth", &pViewport->MinDepth));
+  CHECK_HR(ReadAttrFloat(pReader, L"MaxDepth", &pViewport->MaxDepth));
+
+  if (pReader->IsEmptyElement()) return;
+
+  UINT startDepth;
+  XmlNodeType nt;
+  CHECK_HR(pReader->GetDepth(&startDepth));
+  for (;;) {
+    UINT depth;
+    CHECK_HR(pReader->Read(&nt));
+    CHECK_HR(pReader->GetDepth(&depth));
+    if (nt == XmlNodeType_EndElement && depth == startDepth + 1)
+      return;
+  }
+}
+
 void ShaderOpParser::ParseRootValue(IXmlReader *pReader, ShaderOpRootValue *pRootValue) {
   if (!ReadAtElementName(pReader, L"RootValue"))
     return;

+ 8 - 1
tools/clang/unittests/HLSL/ShaderOpTest.h

@@ -167,6 +167,13 @@ public:
   UINT    Index;      // Explicit index in root table.
 };
 
+// Use this class to represent a render target and its viewport.
+class ShaderOpRenderTarget {
+public:
+  LPCSTR             Name;        // Render target name
+  D3D12_VIEWPORT     Viewport;    // Viewport to use; if Width == 0 use the full render target
+};
+
 // Use this class to hold all information needed for a Draw/Dispatch call.
 class ShaderOp {
 public:
@@ -176,7 +183,7 @@ public:
   std::vector<ShaderOpDescriptorHeap> DescriptorHeaps;
   std::vector<ShaderOpShader> Shaders;
   std::vector<ShaderOpRootValue> RootValues;
-  std::vector<LPCSTR> RenderTargets;
+  std::vector<ShaderOpRenderTarget> RenderTargets;
   LPCSTR Name = nullptr;
   LPCSTR RootSignature = nullptr;
   bool UseWarpDevice = true;