O3DE
/
DirectXShaderCompiler
Mirror von https://github.com/o3de/DirectXShaderCompiler


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002
							# This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details
###############################################################################
# This file contains driver test information for DXIL operations              #
###############################################################################

from hctdb import *
import xml.etree.ElementTree as ET
import argparse

parser = argparse.ArgumentParser(description="contains information about dxil op test cases.")
parser.add_argument('mode', help="'gen-xml' or 'info'")

g_db_dxil = None

def get_db_dxil():
    global g_db_dxil
    if g_db_dxil is None:
        g_db_dxil = db_dxil()
    return g_db_dxil

"""
This class represents a test case for instructions for driver testings

DXIL instructions and test cases are two disjoint sets where each instruction can have multiple test cases,
and each test case can cover different DXIL instructions. So these two sets form a bipartite graph.

test_name: Test case identifier. Must be unique for each test case.
insts: dxil instructions
validation_type: validation type for test
    epsilon: absolute difference check
    ulp: units in last place check
    relative: relative error check
validation_tolerance: tolerance value for a given test
inputs: testing inputs
outputs: expected outputs for each input
shader_target: target for testing
shader_text: hlsl file that is used for testing dxil op
"""

class test_case(object):
    def __init__(self, test_name, insts, validation_type, validation_tolerance,
                 input_lists, output_lists, shader_target, shader_text, **kwargs):
        self.test_name = test_name
        self.validation_type = validation_type
        self.validation_tolerance = validation_tolerance
        self.input_lists = input_lists
        self.output_lists = output_lists
        self.shader_target = shader_target
        self.shader_text = shader_text
        self.insts = insts # list of instructions each test case cover
        self.warp_version = -1 # known warp version that works
        self.shader_arguments = ""
        for k,v in kwargs.items():
            setattr(self, k, v)

# Wrapper for each DXIL instruction
class inst_node(object):
    def __init__(self, inst):
        self.inst = inst
        self.test_cases = []  # list of test_case

def add_test_case(test_name, inst_names, validation_type, validation_tolerance,
                  input_lists, output_lists, shader_target, shader_text, **kwargs):
    insts = []
    for inst_name in inst_names:
        assert (inst_name in g_instruction_nodes)
        insts += [g_instruction_nodes[inst_name].inst]
    case = test_case(test_name, insts, validation_type,
                    validation_tolerance, input_lists, output_lists,
                    shader_target, shader_text, **kwargs)
    g_test_cases[test_name] = case
    # update instruction nodes
    for inst_name in inst_names:
        g_instruction_nodes[inst_name].test_cases += [case]

def add_test_case_int(test_name, inst_names, validation_type, validation_tolerance,
                  input_lists, output_lists, shader_key, shader_op_name, **kwargs):
    add_test_case(test_name, inst_names, validation_type, validation_tolerance,
                  input_lists, output_lists, "cs_6_0", get_shader_text(shader_key, shader_op_name), **kwargs)
    input_lists_16, output_lists_16 = input_lists, output_lists
    if "input_16" in kwargs:
        input_lists_16 = kwargs["input_16"]
    if "output_16" in kwargs:
        output_lists_16 = kwargs["output_16"]
    add_test_case(test_name + "Bit16", inst_names, validation_type, validation_tolerance,
                  input_lists_16, output_lists_16, "cs_6_2", get_shader_text(shader_key.replace("int","int16_t"), shader_op_name),
                  shader_arguments="-enable-16bit-types", **kwargs)

def add_test_case_float_half(test_name, inst_names, validation_type, validation_tolerance,
                  float_input_lists, float_output_lists, shader_key, shader_op_name, **kwargs):
    add_test_case(test_name, inst_names, validation_type, validation_tolerance,
                  float_input_lists, float_output_lists, "cs_6_0", get_shader_text(shader_key, shader_op_name), **kwargs)
    # if half test cases are different from float input lists, use those lists instead for half testings
    half_input_lists, half_output_lists, half_validation_type, half_validation_tolerance = float_input_lists, float_output_lists, validation_type, validation_tolerance
    if "half_inputs" in kwargs:
        half_input_lists = kwargs["half_inputs"]
    if "half_outputs" in kwargs:
        half_output_lists = kwargs["half_outputs"]
    if "half_validation_type" in kwargs:
        half_validation_type = kwargs["half_validation_type"]
    if "half_validation_tolerance" in kwargs:
        half_validation_tolerance = kwargs["half_validation_tolerance"]
    # skip relative error test check for half for now
    if validation_type != "Relative":
        add_test_case(test_name + "Half", inst_names, half_validation_type, half_validation_tolerance,
                    half_input_lists, half_output_lists, "cs_6_2",
                    get_shader_text(shader_key.replace("float","half"), shader_op_name), shader_arguments="-enable-16bit-types", **kwargs)

def add_test_case_denorm(test_name, inst_names, validation_type, validation_tolerance, input_lists,
                    output_lists_ftz, output_lists_preserve, shader_target, shader_text, **kwargs):
    add_test_case(test_name + "FTZ", inst_names, validation_type, validation_tolerance, input_lists,
                  output_lists_ftz, shader_target, shader_text, shader_arguments="-denorm ftz")
    add_test_case(test_name + "Preserve", inst_names, validation_type, validation_tolerance, input_lists,
                  output_lists_preserve, shader_target, shader_text, shader_arguments="-denorm preserve")
    # we can expect the same output for "any" and "preserve" mode. We should make sure that for validation zero are accepted outputs for denormal outputs.
    add_test_case(test_name + "Any", inst_names, validation_type, validation_tolerance, input_lists,
                  output_lists_preserve + output_lists_ftz, shader_target, shader_text, shader_arguments="-denorm any")


g_shader_texts = {
    "unary int": ''' struct SUnaryIntOp {
                int input;
                int output;
            };
            RWStructuredBuffer<SUnaryIntOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SUnaryIntOp l = g_buf[GI];
                l.output = %s(l.input);
                g_buf[GI] = l;
            };''',

    "unary int16_t": ''' struct SUnaryInt16Op {
                int16_t input;
                int16_t output;
            };
            RWStructuredBuffer<SUnaryInt16Op> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SUnaryInt16Op l = g_buf[GI];
                l.output = %s(l.input);
                g_buf[GI] = l;
            };''',

    "unary uint": ''' struct SUnaryUintOp {
                uint input;
                uint output;
            };
            RWStructuredBuffer<SUnaryUintOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SUnaryUintOp l = g_buf[GI];
                l.output = %s(l.input);
                g_buf[GI] = l;
            };''',

    "unary uint16_t": ''' struct SUnaryUint16Op {
                uint16_t input;
                uint16_t output;
            };
            RWStructuredBuffer<SUnaryUint16Op> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SUnaryUint16Op l = g_buf[GI];
                l.output = %s(l.input);
                g_buf[GI] = l;
            };''',

    "unary float": ''' struct SUnaryFPOp {
                float input;
                float output;
            };
            RWStructuredBuffer<SUnaryFPOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SUnaryFPOp l = g_buf[GI];
                l.output = %s(l.input);
                g_buf[GI] = l;
            };''',

    "unary float bool": ''' struct SUnaryFPOp {
                float input;
                float output;
            };
            RWStructuredBuffer<SUnaryFPOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SUnaryFPOp l = g_buf[GI];
                if (%s(l.input))
                    l.output = 1;
                else
                    l.output = 0;
                g_buf[GI] = l;
            };''',

    "unary half": ''' struct SUnaryFPOp {
                float16_t input;
                float16_t output;
            };
            RWStructuredBuffer<SUnaryFPOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SUnaryFPOp l = g_buf[GI];
                l.output = %s(l.input);
                g_buf[GI] = l;
            };''',

    "unary half bool": ''' struct SUnaryFPOp {
                float16_t input;
                float16_t output;
            };
            RWStructuredBuffer<SUnaryFPOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SUnaryFPOp l = g_buf[GI];
                if (%s(l.input))
                    l.output = 1;
                else
                    l.output = 0;
                g_buf[GI] = l;
            };''',

     "binary int": ''' struct SBinaryIntOp {
                int input1;
                int input2;
                int output1;
                int output2;
            };
            RWStructuredBuffer<SBinaryIntOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryIntOp l = g_buf[GI];
                l.output1 = l.input1 %s l.input2;
                g_buf[GI] = l;
            };''',

     "binary int16_t": ''' struct SBinaryInt16Op {
                int16_t input1;
                int16_t input2;
                int16_t output1;
                int16_t output2;
            };
            RWStructuredBuffer<SBinaryInt16Op> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryInt16Op l = g_buf[GI];
                l.output1 = l.input1 %s l.input2;
                g_buf[GI] = l;
            };''',

    "binary int call": ''' struct SBinaryIntOp {
                int input1;
                int input2;
                int output1;
                int output2;
            };
            RWStructuredBuffer<SBinaryIntOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryIntOp l = g_buf[GI];
                l.output1 = %s(l.input1,l.input2);
                g_buf[GI] = l;
            };''',

    "binary int16_t call": ''' struct SBinaryInt16Op {
                int16_t input1;
                int16_t input2;
                int16_t output1;
                int16_t output2;
            };
            RWStructuredBuffer<SBinaryInt16Op> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryInt16Op l = g_buf[GI];
                l.output1 = %s(l.input1,l.input2);
                g_buf[GI] = l;
            };''',

    "binary uint": ''' struct SBinaryUintOp {
                uint input1;
                uint input2;
                uint output1;
                uint output2;
            };
            RWStructuredBuffer<SBinaryUintOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryUintOp l = g_buf[GI];
                l.output1 = l.input1 %s l.input2;
                g_buf[GI] = l;
            };''',

    "binary uint16_t": ''' struct SBinaryUint16Op {
                uint16_t input1;
                uint16_t input2;
                uint16_t output1;
                uint16_t output2;
            };
            RWStructuredBuffer<SBinaryUint16Op> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryUint16Op l = g_buf[GI];
                l.output1 = l.input1 %s l.input2;
                g_buf[GI] = l;
            };''',

    "binary uint call": ''' struct SBinaryUintOp {
                uint input1;
                uint input2;
                uint output1;
                uint output2;
            };
            RWStructuredBuffer<SBinaryUintOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryUintOp l = g_buf[GI];
                l.output1 = %s(l.input1,l.input2);
                g_buf[GI] = l;
            };''',

    "binary uint16_t call": ''' struct SBinaryUint16Op {
                uint16_t input1;
                uint16_t input2;
                uint16_t output1;
                uint16_t output2;
            };
            RWStructuredBuffer<SBinaryUint16Op> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryUint16Op l = g_buf[GI];
                l.output1 = %s(l.input1,l.input2);
                g_buf[GI] = l;
            };''',

    "binary float": ''' struct SBinaryFPOp {
                float input1;
                float input2;
                float output1;
                float output2;
            };
            RWStructuredBuffer<SBinaryFPOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryFPOp l = g_buf[GI];
                l.output1 = l.input1 %s l.input2;
                g_buf[GI] = l;
            };''',

    "binary float call": ''' struct SBinaryFPOp {
                float input1;
                float input2;
                float output1;
                float output2;
            };
            RWStructuredBuffer<SBinaryFPOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryFPOp l = g_buf[GI];
                l.output1 = %s(l.input1,l.input2);
                g_buf[GI] = l;
            };''',

    "binary half": ''' struct SBinaryFPOp {
                half input1;
                half input2;
                half output1;
                half output2;
            };
            RWStructuredBuffer<SBinaryFPOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryFPOp l = g_buf[GI];
                l.output1 = l.input1 %s l.input2;
                g_buf[GI] = l;
            };''',

    "binary half call": ''' struct SBinaryFPOp {
                half input1;
                half input2;
                half output1;
                half output2;
            };
            RWStructuredBuffer<SBinaryFPOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryFPOp l = g_buf[GI];
                l.output1 = %s(l.input1,l.input2);
                g_buf[GI] = l;
            };''',

     "tertiary int": ''' struct STertiaryIntOp {
                int input1;
                int input2;
                int input3;
                int output;
            };
            RWStructuredBuffer<STertiaryIntOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                STertiaryIntOp l = g_buf[GI];
                l.output = %s(l.input1, l.input2, l.input3);
                g_buf[GI] = l;
            };''',

     "tertiary int16_t": ''' struct STertiaryInt16Op {
                int16_t input1;
                int16_t input2;
                int16_t input3;
                int16_t output;
            };
            RWStructuredBuffer<STertiaryInt16Op> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                STertiaryInt16Op l = g_buf[GI];
                l.output = %s(l.input1, l.input2, l.input3);
                g_buf[GI] = l;
            };''',

     "tertiary uint": ''' struct STertiaryUintOp {
                uint input1;
                uint input2;
                uint input3;
                uint output;
            };
            RWStructuredBuffer<STertiaryUintOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                STertiaryUintOp l = g_buf[GI];
                l.output = %s(l.input1, l.input2, l.input3);
                g_buf[GI] = l;
            };''',

     "tertiary uint16_t": ''' struct STertiaryUint16Op {
                uint16_t input1;
                uint16_t input2;
                uint16_t input3;
                uint16_t output;
            };
            RWStructuredBuffer<STertiaryUint16Op> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                STertiaryUint16Op l = g_buf[GI];
                l.output = %s(l.input1, l.input2, l.input3);
                g_buf[GI] = l;
            };''',

    "tertiary float": ''' struct STertiaryFloatOp {
                float input1;
                float input2;
                float input3;
                float output;
            };
            RWStructuredBuffer<STertiaryFloatOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                STertiaryFloatOp l = g_buf[GI];
                l.output = %s(l.input1, l.input2, l.input3);
                g_buf[GI] = l;
            };''',
    'tertiary half': ''' struct STertiaryHalfOp {
                half input1;
                half input2;
                half input3;
                half output;
            };
            RWStructuredBuffer<STertiaryHalfOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                STertiaryHalfOp l = g_buf[GI];
                l.output = %s(l.input1, l.input2, l.input3);
                g_buf[GI] = l;
            };''',
    "wave op int" :''' struct PerThreadData {
                        uint firstLaneId;
                        uint laneIndex;
                        int mask;
                        int input;
                        int output;
                    };
                    RWStructuredBuffer<PerThreadData> g_sb : register(u0);
                    [numthreads(8,12,1)]
                    void main(uint GI : SV_GroupIndex) {
                        PerThreadData pts = g_sb[GI];
                        pts.firstLaneId = WaveReadLaneFirst(GI);
                        pts.laneIndex = WaveGetLaneIndex();
                        if (pts.mask != 0) {
                            pts.output = %s(pts.input);
                        }
                        else {
                            pts.output = %s(pts.input);
                        }
                        g_sb[GI] = pts;
                    };''',
    "wave op uint" :''' struct PerThreadData {
                        uint firstLaneId;
                        uint laneIndex;
                        int mask;
                        uint input;
                        uint output;
                    };
                    RWStructuredBuffer<PerThreadData> g_sb : register(u0);
                    [numthreads(8,12,1)]
                    void main(uint GI : SV_GroupIndex) {
                        PerThreadData pts = g_sb[GI];
                        pts.firstLaneId = WaveReadLaneFirst(GI);
                        pts.laneIndex = WaveGetLaneIndex();
                        if (pts.mask != 0) {
                            pts.output = %s(pts.input);
                        }
                        else {
                            pts.output = %s(pts.input);
                        }
                        g_sb[GI] = pts;
                    };''',
    "wave op int count": ''' struct PerThreadData {
                        uint firstLaneId;
                        uint laneIndex;
                        int mask;
                        int input;
                        int output;
                    };
                    RWStructuredBuffer<PerThreadData> g_sb : register(u0);
                    [numthreads(8,12,1)]
                    void main(uint GI : SV_GroupIndex) {
                        PerThreadData pts = g_sb[GI];
                        pts.firstLaneId = WaveReadLaneFirst(GI);
                        pts.laneIndex = WaveGetLaneIndex();
                        if (pts.mask != 0) {
                            pts.output = %s(pts.input > 3);
                        }
                        else {
                            pts.output = %s(pts.input > 3);
                        }
                        g_sb[GI] = pts;
                    };''',
    "wave op multi prefix int": ''' struct ThreadData {
                    uint key;
                    uint firstLaneId;
                    uint laneId;
                    uint mask;
                    int value;
                    int result;
                };
                RWStructuredBuffer<ThreadData> g_buffer : register(u0);
                [numthreads(8, 12, 1)]
                void main(uint id : SV_GroupIndex) {
                    ThreadData data = g_buffer[id];
                    data.firstLaneId = WaveReadLaneFirst(id);
                    data.laneId = WaveGetLaneIndex();
                    if (data.mask != 0) {
                        uint4 mask = WaveMatch(data.key);
                        data.result = %s(data.value, mask);
                    } else {
                        uint4 mask = WaveMatch(data.key);
                        data.result = %s(data.value, mask);
                    }
                    g_buffer[id] = data;
                }''',
    "wave op multi prefix uint": ''' struct ThreadData {
                    uint key;
                    uint firstLaneId;
                    uint laneId;
                    uint mask;
                    uint value;
                    uint result;
                };
                RWStructuredBuffer<ThreadData> g_buffer : register(u0);
                [numthreads(8, 12, 1)]
                void main(uint id : SV_GroupIndex) {
                    ThreadData data = g_buffer[id];
                    data.firstLaneId = WaveReadLaneFirst(id);
                    data.laneId = WaveGetLaneIndex();
                    if (data.mask != 0) {
                        uint4 mask = WaveMatch(data.key);
                        data.result = %s(data.value, mask);
                    } else {
                        uint4 mask = WaveMatch(data.key);
                        data.result = %s(data.value, mask);
                    }
                    g_buffer[id] = data;
                }'''
}

def get_shader_text(op_type, op_call):
    assert(op_type in g_shader_texts)
    if op_type.startswith("wave op"):
        return g_shader_texts[op_type] % (op_call, op_call)
    return g_shader_texts[op_type] % (op_call)

g_denorm_tests = ["FAddDenormAny", "FAddDenormFTZ", "FAddDenormPreserve",
                "FSubDenormAny", "FSubDenormFTZ", "FSubDenormPreserve",
                "FMulDenormAny", "FMulDenormFTZ", "FMulDenormPreserve",
                "FDivDenormAny", "FDivDenormFTZ", "FDivDenormPreserve",
                "FMadDenormAny", "FMadDenormFTZ", "FMadDenormPreserve",
                "FAbsDenormAny", "FAbsDenormFTZ", "FAbsDenormPreserve",
                "FMinDenormAny", "FMinDenormFTZ", "FMinDenormPreserve",
                "FMaxDenormAny", "FMaxDenormFTZ", "FMaxDenormPreserve"]
# This is a collection of test case for driver tests per instruction
# Warning: For test cases, when you want to pass in signed 32-bit integer,
# make sure to pass in negative numbers with decimal values instead of hexadecimal representation.
# For some reason, TAEF is not handling them properly.
# For half values, hex is preferable since the test framework will read string as float values
# and convert them to float16, possibly losing precision. The test will read hex values as it is.
def add_test_cases():
    nan = float('nan')
    p_inf = float('inf')
    n_inf = float('-inf')
    p_denorm = float('1e-38')
    n_denorm = float('-1e-38')
    # Unary Float
    add_test_case_float_half('Sin', ['Sin'], 'Epsilon', 0.0008, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '-314.16',
        '314.16'
    ]], [[
        'NaN', 'NaN', '-0', '-0', '0', '0', 'NaN', '-0.0007346401',
        '0.0007346401'
    ]], "unary float", "sin", half_validation_tolerance=0.003, half_inputs=[[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', 
        '0.6279297', '1.255859', '1.884766', '2.511719', '3.140625',
        '3.769531', '4.398438', '5.023438', '5.652344', '6.281250'
    ]], half_outputs=[[
        'NaN', 'NaN', '-0', '-0', '0', '0', 'NaN', 
        '0.58747065', '0.95081574', '0.95111507', '0.58904284', '0.00096773',
        '-0.58747751', '-0.95112079', '-0.95201313', '-0.58982444', '-0.00193545'
    ]])
    add_test_case_float_half('Cos', ['Cos'], 'Epsilon', 0.0008, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '-314.16',
        '314.16'
    ]], [[
        'NaN', 'NaN', '1.0', '1.0', '1.0', '1.0', 'NaN', '0.99999973015',
        '0.99999973015'
    ]], "unary float", "cos", half_validation_tolerance=0.003, half_inputs=[[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', 
        '0.6279297', '1.255859', '1.884766', '2.511719', '3.140625',
        '3.769531', '4.398438', '5.023438', '5.652344', '6.281250'
    ]], half_outputs=[[
        'NaN', 'NaN', '1.0', '1.0', '1.0', '1.0', 'NaN', 
        '0.80924553', '0.30975693', '-0.30883664', '-0.80810183', '-0.99999952',
        '-0.80924052', '-0.30881903', '0.30605716', '0.80753154', '0.99999809'
    ]])
    add_test_case_float_half('Tan', ['Tan'], 'Epsilon', 0.0008, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '-314.16',
        '314.16'
    ]], [[
        'NaN', 'NaN', '-0.0', '-0.0', '0.0', '0.0', 'NaN', '-0.000735',
        '0.000735'
    ]], "unary float", "tan", half_validation_tolerance=0.016, half_inputs=[[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', 
        '0.6279297', '1.255859', '1.884766', '2.511719', '3.140625',
        '3.769531', '4.398438', '5.652344', '6.281250'
    ]], half_outputs=[[
        'NaN', 'NaN', '-0', '-0', '0', '0', 'NaN', 
        '0.72594857', '3.06955433', '-3.07967043', '-0.72892153', '-0.00096773', 
        '0.72596157', '3.07986474', '-0.7304042', '-0.00193546'
    ]])
    add_test_case_float_half('Hcos', ['Hcos'], 'Epsilon', 0.0008,
        [['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1', '-1']], [[
            'NaN', 'Inf', '1.0', '1.0', '1.0', '1.0', 'Inf', '1.543081',
            '1.543081'
        ]], "unary float", "cosh", half_validation_type='ulp', half_validation_tolerance=2)
    add_test_case_float_half('Hsin', ['Hsin'], 'Epsilon', 0.0008,
        [['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1', '-1']], [[
            'NaN', '-Inf', '0.0', '0.0', '0.0', '0.0', 'Inf', '1.175201',
            '-1.175201'
        ]], "unary float", "sinh")
    add_test_case_float_half('Htan', ['Htan'], 'Epsilon', 0.0008,
        [['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1', '-1']], [[
            'NaN', '-1', '-0.0', '-0.0', '0.0', '0.0', '1', '0.761594',
            '-0.761594'
        ]], "unary float", "tanh", warp_version=16202)
    add_test_case_float_half('Acos', ['Acos'], 'Epsilon', 0.0008, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1', '-1', '1.5',
        '-1.5'
    ]], [[
        'NaN', 'NaN', '1.570796', '1.570796', '1.570796', '1.570796', 'NaN',
        '0', '3.1415926', 'NaN', 'NaN'
    ]], "unary float", "acos")
    add_test_case_float_half('Asin', ['Asin'], 'Epsilon', 0.0008, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1', '-1', '1.5',
        '-1.5'
    ]], [[
        'NaN', 'NaN', '0.0', '0.0', '0.0', '0.0', 'NaN', '1.570796',
        '-1.570796', 'NaN', 'NaN'
    ]], "unary float", "asin")
    add_test_case_float_half('Atan', ['Atan'], 'Epsilon', 0.0008,
        [['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1', '-1']], [[
            'NaN', '-1.570796', '0.0', '0.0', '0.0', '0.0', '1.570796',
            '0.785398163', '-0.785398163'
        ]], "unary float", "atan", warp_version=16202)
    add_test_case_float_half('Exp', ['Exp'], 'Relative', 21,
        [['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '-1', '10']],
        [['NaN', '0', '1', '1', '1', '1', 'Inf', '0.367879441', '22026.46579']
         ], "unary float", "exp")
    add_test_case_float_half('Frc', ['Frc'], 'Epsilon', 0.0008, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '-1', '2.718280',
        '1000.599976', '-7.389'
    ]], [[
        'NaN', 'NaN', '0', '0', '0', '0', 'NaN', '0', '0.718280', '0.599976',
        '0.611'
    ]], "unary float", "frac",
        half_inputs=[['NaN', '-Inf', '0x03FF', '-0', '0', 'Inf', '-1', '2.719',
        '1000.5', '0xC764']],
        half_outputs=[[
         'NaN', 'NaN', '0x03FF', '0', '0', 'NaN', '0', '0.719', '0.5',
        '0x38E1']])
    add_test_case_float_half('Log', ['Log'], 'Relative', 21, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '-1',
        '2.718281828', '7.389056', '100'
    ]], [[
        'NaN', 'NaN', '-Inf', '-Inf', '-Inf', '-Inf', 'Inf', 'NaN', '1.0',
        '1.99999998', '4.6051701'
    ]],"unary float", "log", half_inputs=[[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '-1',
        '2.719', '7.39', '100'
    ]], half_outputs=[[
        'NaN', 'NaN', '-Inf', '-Inf', '-Inf', '-Inf', 'Inf', 'NaN', '1.0',
        '2', '4.605'
    ]])
    add_test_case_float_half('Sqrt', ['Sqrt'], 'ulp', 1, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '-1', '2',
        '16.0', '256.0'
    ]], [[
        'NaN', 'NaN', '-0', '-0', '0', '0', 'Inf', 'NaN', '1.41421356237',
        '4.0', '16.0'
    ]], "unary float", "sqrt",
    half_inputs=[['NaN', '-Inf', '-denorm', '-0', '0', '0x03FF', 'Inf', '-1', '2', '16.0', '256.0']],
    half_outputs=[['NaN', 'NaN', 'NaN', '-0', '0', '0x1FFF', 'Inf', 'NaN', '1.41421', '4.0', '16.0']])
    add_test_case_float_half('Rsqrt', ['Rsqrt'], 'ulp', 1, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '-1', '16.0',
        '256.0', '65536.0'
    ]], [[
        'NaN', 'NaN', '-Inf', '-Inf', 'Inf', 'Inf', '0', 'NaN', '0.25',
        '0.0625', '0.00390625'
    ]], "unary float", "rsqrt", half_inputs=[[
        'NaN', '-Inf', '-denorm', '-0', '0', '0x03FF', 'Inf', '-1', '16.0',
        '256.0', '0x7bff'
    ]], half_outputs=[[
        'NaN', 'NaN', 'NaN', '-Inf', 'Inf', '0x5801', '0', 'NaN', '0.25',
        '0.0625', '0x1C00'
    ]])
    add_test_case_float_half('Round_ne', ['Round_ne'], 'Epsilon', 0, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '10.0', '10.4',
        '10.5', '10.6', '11.5', '-10.0', '-10.4', '-10.5', '-10.6'
    ]], [[
        'NaN', '-Inf', '-0', '-0', '0', '0', 'Inf', '10.0', '10.0', '10.0',
        '11.0', '12.0', '-10.0', '-10.0', '-10.0', '-11.0'
    ]], "unary float", "round")
    add_test_case_float_half('Round_ni', ['Round_ni'], 'Epsilon', 0, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '10.0', '10.4',
        '10.5', '10.6', '-10.0', '-10.4', '-10.5', '-10.6'
    ]], [[
        'NaN', '-Inf', '-0', '-0', '0', '0', 'Inf', '10.0', '10.0', '10.0',
        '10.0', '-10.0', '-11.0', '-11.0', '-11.0'
    ]], "unary float", "floor", half_inputs=[[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '10.0', '10.4',
        '10.5', '10.6', '-10.0', '-10.4', '-10.5', '-10.6'
    ]], half_outputs=[[
        'NaN', '-Inf', '-1', '-0', '0', '0', 'Inf', '10.0', '10.0', '10.0',
        '10.0', '-10.0', '-11.0', '-11.0', '-11.0'
    ]])
    add_test_case_float_half('Round_pi', ['Round_pi'], 'Epsilon', 0,
        [['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '10.0', '10.4',
        '10.5', '10.6', '-10.0', '-10.4', '-10.5', '-10.6']],
        [['NaN', '-Inf', '-0', '-0', '0', '0', 'Inf', '10.0', '11.0', '11.0',
        '11.0', '-10.0', '-10.0', '-10.0', '-10.0']], "unary float", "ceil",
        half_inputs=[['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '10.0', '10.4',
        '10.5', '10.6', '-10.0', '-10.4', '-10.5', '-10.6']],
        half_outputs=[['NaN', '-Inf', '-0', '-0', '0', '1', 'Inf', '10.0', '11.0', '11.0',
        '11.0', '-10.0', '-10.0', '-10.0', '-10.0']])
    add_test_case_float_half('Round_z', ['Round_z'], 'Epsilon', 0,
        [['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '10.0', '10.4',
        '10.5', '10.6', '-10.0', '-10.4', '-10.5', '-10.6']],
        [['NaN', '-Inf', '-0', '-0', '0', '0', 'Inf', '10.0', '10.0', '10.0',
        '10.0', '-10.0', '-10.0', '-10.0', '-10.0']], "unary float", "trunc")
    add_test_case_float_half('IsNaN', ['IsNaN'], 'Epsilon', 0,
        [['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1.0', '-1.0']
         ], [['1', '0', '0', '0', '0', '0', '0', '0', '0']], "unary float bool", "isnan")
    add_test_case_float_half('IsInf', ['IsInf'], 'Epsilon', 0,
        [['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1.0', '-1.0']
         ], [['0', '1', '0', '0', '0', '0', '1', '0', '0']], "unary float bool", "isinf")
    add_test_case_float_half('IsFinite', ['IsFinite'], 'Epsilon', 0,
        [['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1.0', '-1.0']
         ], [['0', '0', '1', '1', '1', '1', '0', '1', '1']], "unary float bool", "isfinite", warp_version=16202)
    add_test_case_float_half('FAbs', ['FAbs'], 'Epsilon', 0,
        [['NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1.0', '-1.0']
         ], [['NaN', 'Inf', 'denorm', '0', '0', 'denorm', 'Inf', '1', '1']], "unary float", "abs")
    # Binary Float
    add_test_case('FMin', ['FMin','FMax'], 'epsilon', 0, [[
        '-inf', '-inf', '-inf', '-inf', 'inf', 'inf', 'inf', 'inf', 'NaN',
        'NaN', 'NaN', 'NaN', '1.0', '1.0', '-1.0', '-1.0', '1.0'
    ], [
        '-inf', 'inf', '1.0', 'NaN', '-inf', 'inf', '1.0', 'NaN', '-inf',
        'inf', '1.0', 'NaN', '-inf', 'inf', '1.0', 'NaN', '-1.0'
    ]], [[
        '-inf', '-inf', '-inf', '-inf', '-inf', 'inf', '1.0', 'inf', '-inf',
        'inf', '1.0', 'NaN', '-inf', '1.0', '-1.0', '-1.0', '-1.0'
    ], [
        '-inf', 'inf', '1.0', '-inf', 'inf', 'inf', 'inf', 'inf', '-inf',
        'inf', '1.0', 'NaN', '1.0', 'inf', '1.0', '-1.0', '1.0'
    ]], 'cs_6_0', ''' struct SBinaryFPOp {
                float input1;
                float input2;
                float output1;
                float output2;
            };
            RWStructuredBuffer<SBinaryFPOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryFPOp l = g_buf[GI];
                l.output1 = min(l.input1, l.input2);
                l.output2 = max(l.input1, l.input2);
                g_buf[GI] = l;
            };''')
    add_test_case('FMinHalf', ['FMin','FMax'], 'epsilon', 0, [[
        '-inf', '-inf', '-inf', '-inf', 'inf', 'inf', 'inf', 'inf', 'NaN',
        'NaN', 'NaN', 'NaN', '1.0', '1.0', '-1.0', '-1.0', '1.0'
    ], [
        '-inf', 'inf', '1.0', 'NaN', '-inf', 'inf', '1.0', 'NaN', '-inf',
        'inf', '1.0', 'NaN', '-inf', 'inf', '1.0', 'NaN', '-1.0'
    ]], [[
        '-inf', '-inf', '-inf', '-inf', '-inf', 'inf', '1.0', 'inf', '-inf',
        'inf', '1.0', 'NaN', '-inf', '1.0', '-1.0', '-1.0', '-1.0'
    ], [
        '-inf', 'inf', '1.0', '-inf', 'inf', 'inf', 'inf', 'inf', '-inf',
        'inf', '1.0', 'NaN', '1.0', 'inf', '1.0', '-1.0', '1.0'
    ]], 'cs_6_2', ''' struct SBinaryHalfOp {
                half input1;
                half input2;
                half output1;
                half output2;
            };
            RWStructuredBuffer<SBinaryHalfOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryHalfOp l = g_buf[GI];
                l.output1 = min(l.input1, l.input2);
                l.output2 = max(l.input1, l.input2);
                g_buf[GI] = l;
            };''', shader_arguments="-enable-16bit-types")
    add_test_case_float_half('FAdd', ['FAdd'], 'ulp', 1, [['-1.0', '1.0', '32.5', '1.0000001000'],['4', '5.5', '334.7', '0.5000001000']], [['3.0', '6.5', '367.2', '1.5000002000']],
    "binary float", "+")
    add_test_case_float_half('FSub', ['FSub'], 'ulp', 1, [['-1.0', '5.5', '32.5', '1.0000001000'],['4', '1.25', '334.7', '0.5000001000']], [['-5', '4.25', '-302.2', '0.5000']],
    "binary float", "-")
    add_test_case_float_half('FMul', ['FMul'], 'ulp', 1, [['-1.0', '5.5', '1.0000001'],['4', '1.25', '2.0']], [['-4.0', '6.875', '2.0000002']],
    "binary float", "*")
    add_test_case_float_half('FDiv', ['FDiv'], 'ulp', 1, [['-1.0', '5.5', '1.0000001'],['4', '1.25', '2.0']], [['-0.25', '4.4', '0.50000006']],
    "binary float", "/")

    # Denorm Binary Float
    add_test_case_denorm('FAddDenorm', ['FAdd'], 'ulp', 1,
    [['0x007E0000', '0x00200000', '0x007E0000', '0x007E0000'],['0x007E0000','0x00200000', '0x807E0000', '0x800E0000']],
    [['0','0', '0', '0']],
    [['0x00FC0000','0x00400000', '0', '0x00700000']],
    'cs_6_2', get_shader_text("binary float", "+"))
    add_test_case_denorm('FSubDenorm', ['FSub'], 'ulp', 1,
    [['0x007E0000', '0x007F0000', '0x00FF0000', '0x007A0000'],['0x007E0000', '0x807F0000', '0x00800000', '0']],
    [['0x0', '0', '0', '0']],
    [['0x0', '0x00FE0000', '0x007F0000', '0x007A0000']],
    'cs_6_2', get_shader_text("binary float", "-"))
    add_test_case_denorm('FDivDenorm', ['FDiv'], 'ulp', 1,
    [['0x007F0000', '0x807F0000', '0x20000000', '0x00800000'],['1', '4', '0x607F0000', '0x40000000']],
    [['0', '0', '0', '0']],
    [['0x007F0000', '0x801FC000', '0x00101010', '0x00400000']],
    'cs_6_2', get_shader_text("binary float", "/"))
    add_test_case_denorm('FMulDenorm', ['FMul'], 'ulp', 1,
    [['0x00000300', '0x007F0000', '0x007F0000', '0x001E0000', '0x00000300'],['128', '1', '0x007F0000', '20', '0x78000000']],
    [['0', '0', '0', '0', '0']],
    [['0x00018000','0x007F0000', '0', '0x01960000', '0x32400000']],
    'cs_6_2', get_shader_text("binary float", "*"))
    # Tertiary Float
    add_test_case_float_half('FMad', ['FMad'], 'ulp', 1, [[
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1.0', '-1.0',
        '0', '1', '1.5'
    ], [
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1.0', '-1.0',
        '0', '1', '10'
    ], [
        'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '1.0', '-1.0',
        '1', '0', '-5.5'
    ]], [['NaN', 'NaN', '0', '0', '0', '0', 'Inf', '2', '0', '1', '1', '9.5']],
                 "tertiary float", "mad",
    half_inputs=[[
        'NaN', '-Inf', '0x03FF', '-0', '0', 'Inf', '1.0', '-1.0',
        '0', '1', '1.5'
    ], [
        'NaN', '-Inf', '1', '-0', '0', 'Inf', '1.0', '-1.0',
        '0', '1', '10'
    ], [
        'NaN', '-Inf', '0x03FF', '-0', '0', 'Inf', '1.0', '-1.0',
        '1', '0', '-5.5'
    ]],
    half_outputs=[['NaN', 'NaN', '0x07FE', '0', '0', 'Inf', '2', '0', '1', '1', '9.5']])
    # Denorm Tertiary Float
    add_test_case_denorm('FMadDenorm', ['FMad'], 'ulp', 1,
    [['0x80780000', '0x80780000', '0x00780000'],
     ['1', '2', '2'],
     ['0x80780000', '0x00800000', '0x00800000']],
    [['0', '0x00800000', '0x00800000']],
     [['0x80F00000', '0x80700000', '0x01380000']],
                  'cs_6_2', get_shader_text("tertiary float", "mad"))

    # Unary Int
    int8_min, int8_max = '-128', '127'
    int16_min, int16_max = '-32768', '32767'
    int32_min, int32_max = '-2147483648', '2147483647'
    uint16_max = '65535'
    uint32_max = '4294967295'
    add_test_case_int('Bfrev', ['Bfrev'], 'Epsilon', 0, [[
        int32_min, '-65536', '-8', '-1', '0', '1', '8', '65536',
        int32_max
    ]], [[
        '1', '65535', '536870911', '-1', '0', int32_min, '268435456',
        '32768', '-2'
    ]], "unary int", "reversebits",
        input_16=[[int16_min, '-256', '-8', '-1', '0', '1', '8', '256', int16_max]],
        output_16=[['1', '255', '8191', '-1', '0', int16_min, '4096', '128', '-2']])
    # firstbit_shi (s for signed) returns the
    # first 0 from the MSB if the number is negative,
    # else the first 1 from the MSB.
    # all the variants of the instruction return ~0 if no match was found
    add_test_case_int('FirstbitSHi', ['FirstbitSHi'], 'Epsilon', 0, [[
        int32_min, '-65536', '-8', '-1', '0', '1', '8', '65536',
        int32_max
    ]], [['30', '15', '2', '-1', '-1', '0', '3', '16', '30']],
        "unary int", "firstbithigh",
        input_16=[[int16_min, '-256', '-8', '-1', '0', '1', '8', '256', int16_max]],
        output_16=[['14', '7', '2', '-1', '-1', '0', '3', '8', '14']])
    add_test_case_int('FirstbitLo', ['FirstbitLo'], 'Epsilon', 0, [[
        int32_min, '-65536', '-8', '-1', '0', '1', '8', '65536',
        int32_max
    ]], [['31', '16', '3', '0', '-1', '0', '3', '16', '0']],
        "unary int", "firstbitlow",
        input_16=[[int16_min, '-256', '-8', '-1', '0', '1', '8', '256', int16_max]],
        output_16=[['15', '8', '3', '0', '-1', '0', '3', '8', '0']])
    # TODO: there is a known bug in countbits when passing in immediate values.
    # Fix this later
    add_test_case('Countbits', ['Countbits'], 'Epsilon', 0, [[
        int32_min, '-65536', '-8', '-1', '0', '1', '8', '65536',
        int32_max
    ]], [['1', '16', '29', '32', '0', '1', '1', '1', '31']],
         "cs_6_0", get_shader_text("unary int", "countbits"))
    # Unary uint
    add_test_case_int('FirstbitHi', ['FirstbitHi'], 'Epsilon', 0,
                  [['0', '1', '8', '65536', int32_max, uint32_max]],
                  [['-1', '0', '3', '16', '30', '31']],
         "unary uint", "firstbithigh",
        input_16=[['0', '1', '8', uint16_max]],
        output_16=[['-1', '0', '3', '15']])
    # Binary Int
    add_test_case_int('IAdd', ['Add'], 'Epsilon', 0,
                  [[int32_min, '-10', '0', '0', '10', int32_max, '486'],
                   ['0', '10', '-10', '10', '10', '0', '54238']],
                  [[int32_min, '0', '-10', '10', '20', int32_max, '54724']],
            "binary int", "+",
            input_16=[[int16_min, '-10', '0', '0', '10', int16_max],
                      ['0', '10', '-3114', '272', '15', '0']],
            output_16=[[int16_min, '0', '-3114', '272', '25', int16_max]])
    add_test_case_int('ISub', ['Sub'], 'Epsilon', 0,
                  [[int32_min, '-10', '0', '0', '10', int32_max, '486'],
                   ['0', '10', '-10', '10', '10', '0', '54238']],
                  [[int32_min, '-20', '10', '-10', '0', int32_max, '-53752']],
            "binary int", "-",
            input_16=[[int16_min, '-10', '0', '0', '10', int16_max],
                      ['0', '10', '-3114', '272', '15', '0']],
            output_16=[[int16_min, '-20', '3114', '-272', '-5', int16_max]])
    add_test_case_int('IMax', ['IMax'], 'Epsilon', 0,
                  [[int32_min, '-10', '0', '0', '10', int32_max],
                   ['0', '10', '-10', '10', '10', '0']],
                  [['0', '10', '0', '10', '10', int32_max]],
            "binary int call", "max",
            input_16=[[int16_min, '-10', '0', '0', '10', int16_max],
                      ['0', '10', '-3114', '272', '15', '0']],
            output_16=[['0', '10', '0', '272', '15', int16_max]])
    add_test_case_int('IMin', ['IMin'], 'Epsilon', 0,
                  [[int32_min, '-10', '0', '0', '10', int32_max],
                   ['0', '10', '-10', '10', '10', '0']],
                  [[int32_min, '-10', '-10', '0', '10', '0']],
            "binary int call", "min",
            input_16=[[int16_min, '-10', '0', '0', '10', int16_max],
                      ['0', '10', '-3114', '272', '15', '0']],
            output_16=[[int16_min, '-10', '-3114', '0', '10', '0']])
    add_test_case_int('IMul', ['Mul'], 'Epsilon', 0, [
         [ int32_min, '-10', '-1', '0', '1', '10', '10000', int32_max, int32_max ],
         ['-10', '-10', '10', '0', '256', '4', '10001', '0', int32_max]],
         [['0', '100', '-10', '0', '256', '40', '100010000', '0', '1']],
            "binary int", "*",
         input_16=[[ int16_min, '-10', '-1', '0', '1', '10', int16_max],
         ['-10', '-10', '10', '0', '256', '4', '0']],
         output_16=[['0', '100', '-10', '0', '256', '40', '0']])
    add_test_case('IDiv', ['SDiv', 'SRem'], 'Epsilon', 0,
        [['1', '1', '10', '10000', int32_max, int32_max, '-1'],
         ['1', '256', '4', '10001', '2', int32_max, '1']],
        [['1', '0', '2', '0', '1073741823', '1', '-1'],
         ['0', '1', '2', '10000', '1', '0', '0']], "cs_6_0",
        ''' struct SBinaryIntOp {
                int input1;
                int input2;
                int output1;
                int output2;
            };
            RWStructuredBuffer<SBinaryIntOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryIntOp l = g_buf[GI];
                l.output1 = l.input1 / l.input2;
                l.output2 = l.input1 % l.input2;
                g_buf[GI] = l;
            };''')
    add_test_case_int('Shl', ['Shl'], 'Epsilon', 0,
        [['1', '1', '0x1010', '0xa', '-1', '0x12341234', '-1'],
         ['0', '259', '4', '2', '0', '15', '3']],
        [['0x1', '0x8', '0x10100', '0x28', '-1','0x091a0000', '-8']],
        "binary int", "<<",
        input_16=[['1', '1', '0x0101', '0xa', '-1', '0x1234', '-1'],
         ['0', '259', '4', '2', '0', '13', '3']],
        output_16=[['0x1', '0x8', '0x1010', '0x28', '-1','0x8000', '-8']])
    add_test_case_int("LShr", ['LShr'], 'Epsilon', 0,
        [['1', '1', '0xffff', '0x7fffffff', '0x70001234', '0x12340ab3', '0x7fffffff'],
        ['0', '1', '4', '30', '15', '16', '1']],
        [['1', '0', '0xfff', '1', '0xe000', '0x1234', '0x3fffffff']],
        "binary int", ">>",
        input_16=[['1', '1', '0x7fff', '0x7fff'],
        ['0', '1', '4', '14']],
        output_16=[['1', '0', '0x07ff', '1']]
    )
    add_test_case_int("And", ['And'], 'Epsilon', 0,
        [['0x1', '0x01', '0x7fff0000', '0x33333333', '0x137f', '0x12345678', '0xa341', '-1'],
         ['0x1', '0xf0', '0x0000ffff', '0x22222222', '0xec80', '-1', '0x3471', '-1']],
        [['0x1', '0x00', '0x0', '0x22222222', '0x0', '0x12345678', '0x2041', '-1']],
        "binary int", "&",
        input_16=[['0x1', '0x01', '0x7fff', '0x3333', '0x137f', '0x1234', '0xa341', '-1'],
         ['0x1', '0xf0', '0x0000', '0x2222', '0xec80', '-1', '0x3471', '-1']],
        output_16=[['0x1', '0x00', '0x0', '0x2222', '0x0', '0x1234', '0x2041', '-1']],
     )
    add_test_case_int("Or", ['Or'], 'Epsilon', 0,
        [['0x1', '0x01', '0x7fff0000', '0x11111111', '0x137f', '0x0', '0x12345678', '0xa341', '-1'],
         ['0x1', '0xf0', '0x0000ffff', '0x22222222', '0xec80', '0x0', '0x00000000', '0x3471', '-1']],
        [['0x1', '0xf1', '0x7fffffff', '0x33333333', '0xffff', '0x0', '0x12345678', '0xb771', '-1']],
        "binary int", "|",
        input_16=[['0x1', '0x01', '0x7fff', '0x3333', '0x137f', '0x1234', '0xa341', '-1'],
         ['0x1', '0xf0', '0x0000', '0x2222', '0xec80', '0xffff', '0x3471', '-1']],
        output_16=[['0x1', '0xf1', '0x7fff', '0x3333', '0xffff', '0xffff', '0xb771', '-1']],
     )
    add_test_case_int("Xor", ['Xor'], 'Epsilon', 0,
        [['0x1', '0x01', '0x7fff0000', '0x11111111', '0x137f', '0x0', '0x12345678', '0xa341', '-1'],
         ['0x1', '0xf0', '0x0000ffff', '0x22222222', '0xec80', '0x0', '0x00000000', '0x3471', '-1']],
        [['0x0', '0xf1', '0x7fffffff', '0x33333333', '0xffff', '0x0', '0x12345678', '0x9730', '0x00000000']],
        "binary int", "^",
        input_16=[['0x1', '0x01', '0x7fff', '0x1111', '0x137f', '0x0', '0x1234', '0xa341', '-1'],
                  ['0x1', '0xf0', '0x0000', '0x2222', '0xec80', '0x0', '0x0000', '0x3471', '-1']],
        output_16=[['0x0', '0xf1', '0x7fff', '0x3333', '0xffff', '0x0', '0x1234', '0x9730', '0x0000']],
    )

    # Binary Uint
    add_test_case_int('UAdd', ['Add'], 'Epsilon', 0,
                  [['2147483648', '4294967285', '0', '0', '10', int32_max, '486'],
                   ['0', '10', '0', '10', '10', '0', '54238']],
                  [['2147483648', uint32_max, '0', '10', '20', int32_max, '54724']],
                  "binary uint", "+",
                  input_16=[['323', '0xfff5', '0', '0', '10', uint16_max, '486'],
                            ['0', '10', '0', '10', '10', '0', '334']],
                  output_16=[['323', uint16_max, '0', '10', '20', uint16_max, '820']])
    add_test_case_int('USub', ['Sub'], 'Epsilon', 0,
                  [['2147483648', uint32_max, '0', '0', '30', int32_max, '54724'],
                   ['0', '10', '0', '10', '10', '0', '54238']],
                  [['2147483648', '4294967285', '0', '4294967286', '20', int32_max, '486']],
                  "binary uint", "-",
                  input_16=[['323', uint16_max, '0', '0', '10', uint16_max, '486'],
                            ['0', '10', '0', '10', '10', '0', '334']],
                  output_16=[['323', '0xfff5', '0', '-10', '0', uint16_max, '152']])
    add_test_case_int('UMax', ['UMax'], 'Epsilon', 0,
                  [['0', '0', '10', '10000', int32_max, uint32_max],
                   ['0', '256', '4', '10001', '0', uint32_max]],
                  [['0', '256', '10', '10001', int32_max, uint32_max]],
                  "binary uint call", "max",
                  input_16=[['0', '0', '10', '10000', int16_max, uint16_max],
                            ['0', '256', '4', '10001', '0', uint16_max]],
                  output_16=[['0', '256', '10', '10001', int16_max, uint16_max]])
    add_test_case_int('UMin', ['UMin'], 'Epsilon', 0,
                  [['0', '0', '10', '10000', int32_max, uint32_max],
                   ['0', '256', '4', '10001', '0', uint32_max]],
                  [['0', '0', '4', '10000', '0', uint32_max]],
                  "binary uint call", "min",
                  input_16=[['0', '0', '10', '10000', int16_max, uint16_max],
                            ['0', '256', '4', '10001', '0', uint16_max]],
                  output_16=[['0', '0', '4', '10000', '0', uint16_max]])
    add_test_case_int('UMul', ['Mul'], 'Epsilon', 0,
                  [['0', '1', '10', '10000', int32_max],
                   ['0', '256', '4', '10001', '0']],
                  [['0', '256', '40', '100010000', '0']],
                  "binary uint", "*",
                  input_16=[['0', '0', '10', '100', int16_max],
                            ['0', '256', '4', '101', '0']],
                  output_16=[['0', '0', '40', '10100', '0']])
    add_test_case('UDiv', ['UDiv', 'URem'], 'Epsilon', 0,
        [['1', '1', '10', '10000', int32_max, int32_max, '0xffffffff'],
         ['0', '256', '4', '10001', '0', int32_max, '1']],
        [['0xffffffff', '0', '2', '0', '0xffffffff', '1', '0xffffffff'],
         ['0xffffffff', '1', '2', '10000', '0xffffffff', '0', '0']], 'cs_6_0',
        ''' struct SBinaryUintOp {
                uint input1;
                uint input2;
                uint output1;
                uint output2;
            };
            RWStructuredBuffer<SBinaryUintOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryUintOp l = g_buf[GI];
                l.output1 = l.input1 / l.input2;
                l.output2 = l.input1 % l.input2;
                g_buf[GI] = l;
            };''')
    add_test_case('UAddc', ['UAddc'], 'Epsilon', 0,
        [['1', '1', '10000', '0x80000000', '0x7fffffff', '0xffffffff'],
         ['0', '256', '10001', '1', '0x7fffffff', '0x7fffffff']],
        [['2', '2', '20000', '0', '0xfffffffe', '0xfffffffe'],
         ['0', '512', '20002', '3', '0xfffffffe', '0xffffffff']], 'cs_6_0',
        ''' struct SBinaryUintOp {
                uint input1;
                uint input2;
                uint output1;
                uint output2;
            };
            RWStructuredBuffer<SBinaryUintOp> g_buf : register(u0);
            [numthreads(8,8,1)]
            void main(uint GI : SV_GroupIndex) {
                SBinaryUintOp l = g_buf[GI];
                uint2 x = uint2(l.input1, l.input2);
                uint2 y = AddUint64(x, x);
                l.output1 = y.x;
                l.output2 = y.y;
                g_buf[GI] = l;
            };''')

    # Tertiary Int
    add_test_case_int('IMad', ['IMad'], 'epsilon', 0, [[
        '-2147483647', '-256', '-1', '0', '1', '2', '16', int32_max, '1',
        '-1', '1', '10'
    ], ['1', '-256', '-1', '0', '1', '3', '16', '0', '1', '-1', '10', '100'], [
        '0', '0', '0', '0', '1', '3', '1', '255', '2147483646', '-2147483647',
        '-10', '-2000'
    ]], [[
        '-2147483647', '65536', '1', '0', '2', '9', '257', '255', int32_max,
        '-2147483646', '0', '-1000'
    ]], "tertiary int", "mad",
    input_16=[[int16_min, '-256', '-1', '0', '1', '2', '16', int16_max],
                  ['1','8','-1', '0', '1', '3', '16','1'],
                  ['0', '0', '1', '3', '250', '-30', int16_min, '-50']],
    output_16=[[int16_min, '-2048', '2', '3', '251', '-24', '-32512', '32717']]
    )

    add_test_case_int('UMad', ['UMad'], 'epsilon', 0,
                  [['0', '1', '2', '16', int32_max, '0', '10'], [
                      '0', '1', '2', '16', '1', '0', '10'
                  ], ['0', '0', '1', '15', '0', '10', '10']],
                  [['0', '1', '5', '271', int32_max, '10', '110']],
                  "tertiary uint", "mad",
                  input_16=[['0', '1', '2', '16', int16_max, '0', '10'], [
                      '0', '1', '2', '16', '1', '0', '10'
                  ], ['0', '0', '1', '15', '0', '10', '10']],
                  output_16=[['0', '1', '5', '271', int16_max, '10', '110']],
    )

    # Dot
    add_test_case('Dot', ['Dot2', 'Dot3', 'Dot4'], 'epsilon', 0.008, [[
        'NaN,NaN,NaN,NaN', '-Inf,-Inf,-Inf,-Inf',
        '-denorm,-denorm,-denorm,-denorm', '-0,-0,-0,-0', '0,0,0,0',
        'denorm,denorm,denorm,denorm', 'Inf,Inf,Inf,Inf', '1,1,1,1',
        '-10,0,0,10', 'Inf,Inf,Inf,-Inf'
    ], [
        'NaN,NaN,NaN,NaN', '-Inf,-Inf,-Inf,-Inf',
        '-denorm,-denorm,-denorm,-denorm', '-0,-0,-0,-0', '0,0,0,0',
        'denorm,denorm,denorm,denorm', 'Inf,Inf,Inf,Inf', '1,1,1,1',
        '10,0,0,10', 'Inf,Inf,Inf,Inf'
    ]], [
        [nan, p_inf, 0, 0, 0, 0, p_inf, 2, -100, p_inf],
        [nan, p_inf, 0, 0, 0, 0, p_inf, 3, -100, p_inf],
        [nan, p_inf, 0, 0, 0, 0, p_inf, 4, 0, nan],
    ], 'cs_6_0', ''' struct SDotOp {
                   float4 input1;
                   float4 input2;
                   float o_dot2;
                   float o_dot3;
                   float o_dot4;
                };
                RWStructuredBuffer<SDotOp> g_buf : register(u0);
                [numthreads(8,8,1)]
                void main(uint GI : SV_GroupIndex) {
                    SDotOp l = g_buf[GI];
                    l.o_dot2 = dot(l.input1.xy, l.input2.xy);
                    l.o_dot3 = dot(l.input1.xyz, l.input2.xyz);
                    l.o_dot4 = dot(l.input1.xyzw, l.input2.xyzw);
                    g_buf[GI] = l;
                };''')

    # Dot2AddHalf
    add_test_case('Dot2AddHalf', ['Dot2AddHalf'], 'epsilon', 0.008, [[
        '1,2', '1,-2', '1,2', '-1,2', '1,2', '-1,2', '1,2', '-1,-2',
        '65504,1', '-65504,1', '1,65504', '1,-65504', 'inf,inf',
        'denorm,denorm', '-denorm,-denorm', 'nan,nan'
    ], [
        '3,4', '-3,4', '3,4', '3,-4', '3,4', '-3,4', '3,4', '-3,-4',
        '1,65504', '1,-65504', '65504,1', '-65504,1', 'inf,inf',
        'denorm,denorm', '-denorm,-denorm', 'nan,nan'
    ], [
        '0', '0', '10', '10', '-5', '-5', '-30', '-30', '0', '0',
        '10000000', '-10000000', 'inf', 'denorm', '-denorm',
        'nan'
    ]], [
        [11, -11, 21, -1, 6, 6, -19, -19, 131008, -131008, 10131008,
         -10131008, p_inf, 0, 0, nan],
    ], 'cs_6_4', ''' struct SDot2AddHalfOp {
                   half2 input1;
                   half2 input2;
                   float acc;
                   float result;
                };
                RWStructuredBuffer<SDot2AddHalfOp> g_buf : register(u0);
                [numthreads(8,8,1)]
                void main(uint GI : SV_GroupIndex) {
                    SDot2AddHalfOp l = g_buf[GI];
                    l.result = dot2add(l.input1, l.input2, l.acc);
                    g_buf[GI] = l;
                };''', shader_arguments='-enable-16bit-types')

    # Dot4AddI8Packed
    add_test_case('Dot4AddI8Packed', ['Dot4AddI8Packed'], 'epsilon', 0, [[
        '0x00000102', '0x00000102', '0x00000102', '0x00000102',
        '0XFFFFFFFF', '0x80808080', '0x80808080', '0x807F807F',
        '0x7F7F7F7F', '0x80808080'
    ], [
        '0x00000304', '0x00000304', '0x00000304', '0x00000304',
        '0xFFFFFFFF', '0x01010101', '0x7F7F7F7F', '0x807F807F',
        '0x7F7F7F7F', '0x80808080'
    ], [
        '0', '10', '-5', '-30', '0', '0', '0', '0', '0', '0'
    ]], [
        [11, 21, 6, -19, 4, -512, -65024, 65026, 64516, 65536],
    ], 'cs_6_4', ''' struct SDot4AddI8PackedOp {
                   dword input1;
                   dword input2;
                   int acc;
                   int result;
                };
                RWStructuredBuffer<SDot4AddI8PackedOp> g_buf : register(u0);
                [numthreads(8,8,1)]
                void main(uint GI : SV_GroupIndex) {
                    SDot4AddI8PackedOp l = g_buf[GI];
                    l.result = dot4add_i8packed(l.input1, l.input2, l.acc);
                    g_buf[GI] = l;
                };''')

    # Dot4AddU8Packed
    add_test_case('Dot4AddU8Packed', ['Dot4AddU8Packed'], 'epsilon', 0, [[
        '0x00000102', '0x00000102', '0x01234567', '0xFFFFFFFF',
        '0xFFFFFFFF'
    ], [
        '0x00000304', '0x00000304', '0x23456789', '0xFFFFFFFF',
        '0xFFFFFFFF'
    ], [
        '0', '10', '10000', '0', '3000000000'
    ]], [
        [11, 21, 33668, 260100, 3000260100],
    ], 'cs_6_4', ''' struct SDot4AddU8PackedOp {
                   dword input1;
                   dword input2;
                   dword acc;
                   dword result;
                };
                RWStructuredBuffer<SDot4AddU8PackedOp> g_buf : register(u0);
                [numthreads(8,8,1)]
                void main(uint GI : SV_GroupIndex) {
                    SDot4AddU8PackedOp l = g_buf[GI];
                    l.result = dot4add_u8packed(l.input1, l.input2, l.acc);
                    g_buf[GI] = l;
                };''')

    # Quaternary
    # Msad4 intrinsic calls both Bfi and Msad. Currently this is the only way to call bfi instruction from HLSL
    add_test_case('Bfi', ['Bfi', 'Msad'], 'epsilon', 0,
        [["0xA100B2C3", "0x00000000", "0xFFFF01C1", "0xFFFFFFFF"], [
            "0xD7B0C372, 0x4F57C2A3", "0xFFFFFFFF, 0x00000000",
            "0x38A03AEF, 0x38194DA3", "0xFFFFFFFF, 0x00000000"
        ], ["1,2,3,4", "1,2,3,4", "0,0,0,0", "10,10,10,10"]],
        [['153,6,92,113', '1,2,3,4', '397,585,358,707', '10,265,520,775']],
        'cs_6_0', ''' struct SMsad4 {
                        uint ref;
                        uint2 source;
                        uint4 accum;
                        uint4 result;
                    };
                    RWStructuredBuffer<SMsad4> g_buf : register(u0);
                    [numthreads(8,8,1)]
                    void main(uint GI : SV_GroupIndex) {
                        SMsad4 l = g_buf[GI];
                        l.result = msad4(l.ref, l.source, l.accum);
                        g_buf[GI] = l;
                    };''')

    # Wave Active Tests
    add_test_case('WaveActiveSum', ['WaveActiveOp', 'WaveReadLaneFirst', 'WaveReadLaneAt'], 'Epsilon', 0,
                  [['1', '2', '3', '4'], ['0'], ['2', '4', '8', '-64']], [],
                  'cs_6_0', get_shader_text("wave op int", "WaveActiveSum"))
    add_test_case('WaveActiveProduct', ['WaveActiveOp', 'WaveReadLaneFirst', 'WaveReadLaneAt'], 'Epsilon', 0,
                  [['1', '2', '3', '4'], ['0'], ['1', '2', '4', '-64']], [],
                  'cs_6_0', get_shader_text("wave op int", "WaveActiveProduct"))

    add_test_case('WaveActiveCountBits', ['WaveAllBitCount', 'WaveReadLaneFirst', 'WaveReadLaneAt'], 'Epsilon', 0,
                  [['1', '2', '3', '4'], ['0'], ['1', '10', '-4', '-64'],
                   ['-100', '-1000', '300']], [], 'cs_6_0',
                  get_shader_text("wave op int count", "WaveActiveCountBits"))
    add_test_case('WaveActiveMax', ['WaveActiveOp', 'WaveReadLaneFirst', 'WaveReadLaneAt'], 'Epsilon', 0,
                  [['1', '2', '3', '4'], ['0'], ['1', '10', '-4', '-64'],
                   ['-100', '-1000', '300']], [], 'cs_6_0',
                  get_shader_text("wave op int", "WaveActiveMax"))
    add_test_case('WaveActiveMin', ['WaveActiveOp', 'WaveReadLaneFirst', 'WaveReadLaneAt'], 'Epsilon', 0,
                  [['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], ['0'],
                   ['1', '10', '-4', '-64'], ['-100', '-1000', '300']], [],
                  'cs_6_0', get_shader_text("wave op int", "WaveActiveMin"))
    add_test_case('WaveActiveAllEqual', ['WaveActiveAllEqual'], 'Epsilon', 0,
                  [['1', '2', '3', '4', '1', '1', '1', '1'], ['3'], ['-10']],
                  [], 'cs_6_0', get_shader_text("wave op int", "WaveActiveAllEqual"))
    add_test_case('WaveActiveAnyTrue', ['WaveAnyTrue', 'WaveReadLaneFirst', 'WaveReadLaneAt'], 'Epsilon', 0,
                  [['1', '0', '1', '0', '1'], ['1'], ['0']], [], 'cs_6_0',
                  get_shader_text("wave op int", "WaveActiveAnyTrue"))
    add_test_case('WaveActiveAllTrue', ['WaveAllTrue', 'WaveReadLaneFirst', 'WaveReadLaneAt'], 'Epsilon', 0,
                  [['1', '0', '1', '0', '1'], ['1'], ['1']], [], 'cs_6_0',
                  get_shader_text("wave op int", "WaveActiveAllTrue"))

    add_test_case('WaveActiveUSum', ['WaveActiveOp', 'WaveReadLaneFirst', 'WaveReadLaneAt'], 'Epsilon', 0,
                  [['1', '2', '3', '4'], ['0'], ['2', '4', '8', '64']], [],
                  'cs_6_0', get_shader_text("wave op uint", "WaveActiveSum"))
    add_test_case('WaveActiveUProduct', ['WaveActiveOp', 'WaveReadLaneFirst', 'WaveReadLaneAt'], 'Epsilon', 0,
                  [['1', '2', '3', '4'], ['0'], ['1', '2', '4', '64']], [],
                  'cs_6_0', get_shader_text("wave op uint", "WaveActiveProduct"))
    add_test_case('WaveActiveUMax', ['WaveActiveOp', 'WaveReadLaneFirst', 'WaveReadLaneAt'], 'Epsilon', 0,
                  [['1', '2', '3', '4'], ['0'], ['1', '10', '4', '64']], [],
                  'cs_6_0', get_shader_text("wave op uint", "WaveActiveMax"))
    add_test_case('WaveActiveUMin', ['WaveActiveOp', 'WaveReadLaneFirst', 'WaveReadLaneAt'], 'Epsilon', 0,
                  [['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], ['0'],
                   ['1', '10', '4', '64']], [], 'cs_6_0',
                  get_shader_text("wave op uint", "WaveActiveMin"))
    add_test_case('WaveActiveBitOr', ['WaveActiveBit'], 'Epsilon', 0, [[
        '0xe0000000', '0x0d000000', '0x00b00000', '0x00070000', '0x0000e000',
        '0x00000d00', '0x000000b0', '0x00000007'
    ], ['0xedb7edb7', '0xdb7edb7e', '0xb7edb7ed', '0x7edb7edb'], [
        '0x12481248', '0x24812481', '0x48124812', '0x81248124'
    ], ['0x00000000', '0xffffffff']], [], 'cs_6_0', get_shader_text("wave op uint", "WaveActiveBitOr"))
    add_test_case('WaveActiveBitAnd', ['WaveActiveBit'], 'Epsilon', 0, [[
        '0xefffffff', '0xfdffffff', '0xffbfffff', '0xfff7ffff', '0xffffefff',
        '0xfffffdff', '0xffffffbf', '0xfffffff7'
    ], ['0xedb7edb7', '0xdb7edb7e', '0xb7edb7ed', '0x7edb7edb'], [
        '0x12481248', '0x24812481', '0x48124812', '0x81248124'
    ], ['0x00000000', '0xffffffff']], [], 'cs_6_0', get_shader_text("wave op uint", "WaveActiveBitAnd"))
    add_test_case('WaveActiveBitXor', ['WaveActiveBit'], 'Epsilon', 0, [[
        '0xe0000000', '0x0d000000', '0x00b00000', '0x00070000', '0x0000e000',
        '0x00000d00', '0x000000b0', '0x00000007'
    ], ['0xedb7edb7', '0xdb7edb7e', '0xb7edb7ed', '0x7edb7edb'], [
        '0x12481248', '0x24812481', '0x48124812', '0x81248124'
    ], ['0x00000000', '0xffffffff']], [], 'cs_6_0', get_shader_text("wave op uint", "WaveActiveBitXor"))
    add_test_case('WavePrefixCountBits', ['WavePrefixBitCount'], 'Epsilon', 0,
                  [['1', '2', '3', '4', '5'], ['0'], ['1', '10', '-4', '-64'],
                   ['-100', '-1000', '300']], [], 'cs_6_0',
                  get_shader_text("wave op int count", "WavePrefixCountBits"))
    add_test_case('WavePrefixSum', ['WavePrefixOp'], 'Epsilon', 0,
        [['1', '2', '3', '4', '5'], ['0', '1'], ['1', '2', '4', '-64', '128']],
        [], 'cs_6_0', get_shader_text("wave op int", "WavePrefixSum"))
    add_test_case('WavePrefixProduct', ['WavePrefixOp'], 'Epsilon', 0,
        [['1', '2', '3', '4', '5'], ['0', '1'], ['1', '2', '4', '-64', '128']],
        [], 'cs_6_0', get_shader_text("wave op int", "WavePrefixProduct"))
    add_test_case('WavePrefixUSum', ['WavePrefixOp'], 'Epsilon', 0,
        [['1', '2', '3', '4', '5'], ['0', '1'], ['1', '2', '4', '128']], [],
        'cs_6_0', get_shader_text("wave op uint", "WavePrefixSum"))
    add_test_case('WavePrefixUProduct', ['WavePrefixOp'], 'Epsilon', 0,
        [['1', '2', '3', '4', '5'], ['0', '1'], ['1', '2', '4', '128']], [],
        'cs_6_0', get_shader_text("wave op uint", "WavePrefixProduct"))

    #Wave Multi Prefix Tests
    add_test_case('WaveMultiPrefixBitAnd', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['10', '42', '1', '64', '11', '76', '90', '111', '9', '6', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix int", "WaveMultiPrefixBitAnd"))
    add_test_case('WaveMultiPrefixBitOr', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['10', '42', '1', '64', '11', '76', '90', '111', '9', '6', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix int", "WaveMultiPrefixBitOr"))
    add_test_case('WaveMultiPrefixBitXor', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['10', '42', '1', '64', '11', '76', '90', '111', '9', '6', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix int", "WaveMultiPrefixBitXor"))
    add_test_case('WaveMultiPrefixSum', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['10', '42', '1', '64', '11', '76', '90', '111', '9', '6', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix int", "WaveMultiPrefixSum"))
    add_test_case('WaveMultiPrefixProduct', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['10', '42', '1', '64', '11', '76', '90', '111', '9', '6', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix int", "WaveMultiPrefixProduct"))
    add_test_case('WaveMultiPrefixCountBits', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['0', '42', '0', '64', '11', '76', '90', '111', '0', '0', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix int", "WaveMultiPrefixCountBits"))

    add_test_case('WaveMultiPrefixUBitAnd', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['10', '42', '1', '64', '11', '76', '90', '111', '9', '6', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix uint", "WaveMultiPrefixBitAnd"))
    add_test_case('WaveMultiPrefixUBitOr', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['10', '42', '1', '64', '11', '76', '90', '111', '9', '6', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix uint", "WaveMultiPrefixBitOr"))
    add_test_case('WaveMultiPrefixUBitXor', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['10', '42', '1', '64', '11', '76', '90', '111', '9', '6', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix uint", "WaveMultiPrefixBitXor"))
    add_test_case('WaveMultiPrefixUSum', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['10', '42', '1', '64', '11', '76', '90', '111', '9', '6', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix uint", "WaveMultiPrefixSum"))
    add_test_case('WaveMultiPrefixUProduct', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['10', '42', '1', '64', '11', '76', '90', '111', '9', '6', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix uint", "WaveMultiPrefixProduct"))
    add_test_case('WaveMultiPrefixUCountBits', ['WaveMultiPrefixOp'], 'Epsilon', 0,
         [['0', '3', '1', '5', '4'], ['0', '42', '0', '64', '11', '76', '90', '111', '0', '0', '79', '34']], [],
        'cs_6_5', get_shader_text("wave op multi prefix uint", "WaveMultiPrefixCountBits"))


# generating xml file for execution test using data driven method
# TODO: ElementTree is not generating formatted XML. Currently xml file is checked in after VS Code formatter.
# Implement xml formatter or import formatter library and use that instead.

def generate_parameter_types(table, num_inputs, num_outputs, has_known_warp_issue=False):
    param_types = ET.SubElement(table, "ParameterTypes")
    ET.SubElement(
        param_types, "ParameterType", attrib={
            "Name": "ShaderOp.Target"
        }).text = "String"
    ET.SubElement(
        param_types, "ParameterType", attrib={
            "Name": "ShaderOp.Arguments"
        }).text = "String"
    ET.SubElement(
        param_types, "ParameterType", attrib={
            "Name": "ShaderOp.Text"
        }).text = "String"
    ET.SubElement(
        param_types, "ParameterType", attrib={
            "Name": "Validation.Type"
        }).text = "String"
    ET.SubElement(
        param_types, "ParameterType", attrib={
            "Name": "Validation.Tolerance"
        }).text = "double"
    for i in range(0, num_inputs):
        ET.SubElement(
            param_types,
            "ParameterType",
            attrib={
                "Name": 'Validation.Input{}'.format(i + 1),
                'Array': 'true'
            }).text = "String"
    for i in range(0, num_outputs):
        ET.SubElement(
            param_types,
            "ParameterType",
            attrib={
                "Name": 'Validation.Expected{}'.format(i + 1),
                'Array': 'true'
            }).text = "String"
    if has_known_warp_issue:
        ET.SubElement(param_types, "ParameterType", attrib={"Name":"Warp.Version"}).text = "unsigned int"

def generate_parameter_types_wave(table):
    param_types = ET.SubElement(table, "ParameterTypes")
    ET.SubElement(
        param_types, "ParameterType", attrib={
            "Name": "ShaderOp.Target"
        }).text = "String"
    ET.SubElement(
        param_types, "ParameterType", attrib={
            "Name": "ShaderOp.Text"
        }).text = "String"
    ET.SubElement(
        param_types,
        "ParameterType",
        attrib={
            "Name": "Validation.NumInputSet"
        }).text = "String"
    ET.SubElement(
        param_types,
        "ParameterType",
        attrib={
            "Name": "Validation.InputSet1",
            "Array": "true"
        }).text = "String"
    ET.SubElement(
        param_types,
        "ParameterType",
        attrib={
            "Name": "Validation.InputSet2",
            "Array": "true"
        }).text = "String"
    ET.SubElement(
        param_types,
        "ParameterType",
        attrib={
            "Name": "Validation.InputSet3",
            "Array": "true"
        }).text = "String"
    ET.SubElement(
        param_types,
        "ParameterType",
        attrib={
            "Name": "Validation.InputSet4",
            "Array": "true"
        }).text = "String"

def generate_parameter_types_wave_multi_prefix(table):
    param_types = ET.SubElement(table, "ParameterTypes")

    ET.SubElement(
        param_types, "ParameterType", attrib={
            "Name": "ShaderOp.Target"
        }).text = "String"
    ET.SubElement(
        param_types, "ParameterType", attrib={
            "Name": "ShaderOp.Text"
        }).text = "String"
    ET.SubElement(
        param_types,
        "ParameterType",
        attrib={
            "Name": "Validation.Keys",
            "Array": "true"
        }).text = "String"
    ET.SubElement(
        param_types,
        "ParameterType",
        attrib={
            "Name": "Validation.Values",
            "Array": "true"
        }).text = "String"

def generate_parameter_types_msad(table):
    param_types = ET.SubElement(table, "ParameterTypes")
    ET.SubElement(
        param_types, "ParameterType", attrib={
            "Name": "ShaderOp.Text"
        }).text = "String"
    ET.SubElement(
        param_types, "ParameterType", attrib={
            "Name": "Validation.Tolerance"
        }).text = "int"
    ET.SubElement(
        param_types,
        "ParameterType",
        attrib={
            "Name": "Validation.Input1",
            "Array": "true"
        }).text = "unsigned int"
    ET.SubElement(
        param_types,
        "ParameterType",
        attrib={
            "Name": "Validation.Input2",
            "Array": "true"
        }).text = "String"
    ET.SubElement(
        param_types,
        "ParameterType",
        attrib={
            "Name": "Validation.Input3",
            "Array": "true"
        }).text = "String"
    ET.SubElement(
        param_types,
        "ParameterType",
        attrib={
            "Name": "Validation.Expected1",
            "Array": "true"
        }).text = "String"

def generate_row(table, case):
    row = ET.SubElement(table, "Row", {"Name": case.test_name})
    ET.SubElement(row, "Parameter", {
        "Name": "Validation.Type"
    }).text = case.validation_type
    ET.SubElement(row, "Parameter", {
        "Name": "Validation.Tolerance"
    }).text = str(case.validation_tolerance)
    ET.SubElement(row, "Parameter", {
        "Name": "ShaderOp.Text"
    }).text = case.shader_text
    ET.SubElement(row, "Parameter", {
        "Name": "ShaderOp.Target"
    }).text = case.shader_target
    for i in range(len(case.input_lists)):
        inputs = ET.SubElement(row, "Parameter", {
            "Name": "Validation.Input{}".format(i + 1)
        })
        for val in case.input_lists[i]:
            ET.SubElement(inputs, "Value").text = str(val)
    for i in range(len(case.output_lists)):
        outputs = ET.SubElement(row, "Parameter", {
            "Name": "Validation.Expected{}".format(i + 1)
        })
        for val in case.output_lists[i]:
            ET.SubElement(outputs, "Value").text = str(val)
    # Optional parameters
    if case.warp_version > 0:
        ET.SubElement(row, "Parameter", {"Name":"Warp.Version"}).text = str(case.warp_version)
    if case.shader_arguments != "":
        ET.SubElement(row, "Parameter", {"Name":"ShaderOp.Arguments"}).text = case.shader_arguments

def generate_row_wave(table, case):
    row = ET.SubElement(table, "Row", {"Name": case.test_name})
    ET.SubElement(row, "Parameter", {
        "Name": "ShaderOp.Name"
    }).text = case.test_name
    ET.SubElement(row, "Parameter", {
        "Name": "ShaderOp.Text"
    }).text = case.shader_text
    ET.SubElement(row, "Parameter", {
        "Name": "Validation.NumInputSet"
    }).text = str(len(case.input_lists))
    for i in range(len(case.input_lists)):
        inputs = ET.SubElement(row, "Parameter", {
            "Name": "Validation.InputSet{}".format(i + 1)
        })
        for val in case.input_lists[i]:
            ET.SubElement(inputs, "Value").text = str(val)

def generate_row_wave_multi(table, case):
    row = ET.SubElement(table, "Row", {"Name": case.test_name})
    ET.SubElement(row, "Parameter", {
        "Name": "ShaderOp.Name"
    }).text = case.test_name
    ET.SubElement(row, "Parameter", {
        "Name": "ShaderOp.Target"
    }).text = case.shader_target
    ET.SubElement(row, "Parameter", {
        "Name": "ShaderOp.Text"
    }).text = case.shader_text
    inputs = ET.SubElement(row, "Parameter", {
        "Name": "Validation.Keys"
    })
    for val in case.input_lists[0]:
        ET.SubElement(inputs, "Value").text = str(val)
    inputs = ET.SubElement(row, "Parameter", {
        "Name": "Validation.Values"
    })
    for val in case.input_lists[1]:
        ET.SubElement(inputs, "Value").text = str(val)

def generate_table_for_taef():
    with open("..\\..\\tools\\clang\\unittests\\HLSL\\ShaderOpArithTable.xml",
              'w') as f:
        tree = ET.ElementTree()
        root = ET.Element('Data')
        # Create tables
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "UnaryFloatOpTable"
            }), 1, 1, True)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "BinaryFloatOpTable"
            }), 2, 2)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "TertiaryFloatOpTable"
            }), 3, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "UnaryHalfOpTable"
            }), 1, 1, True)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "BinaryHalfOpTable"
            }), 2, 2)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "TertiaryHalfOpTable"
            }), 3, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "UnaryIntOpTable"
            }), 1, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "BinaryIntOpTable"
            }), 2, 2)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "TertiaryIntOpTable"
            }), 3, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "UnaryInt16OpTable"
            }), 1, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "BinaryInt16OpTable"
            }), 2, 2)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "TertiaryInt16OpTable"
            }), 3, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "UnaryUintOpTable"
            }), 1, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "BinaryUintOpTable"
            }), 2, 2)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "TertiaryUintOpTable"
            }), 3, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "UnaryUint16OpTable"
            }), 1, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "BinaryUint16OpTable"
            }), 2, 2)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "TertiaryUint16OpTable"
            }), 3, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "DotOpTable"
            }), 2, 3)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "Dot2AddHalfOpTable"
            }), 3, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "Dot4AddI8PackedOpTable"
            }), 3, 1)
        generate_parameter_types(
            ET.SubElement(root, "Table", attrib={
                "Id": "Dot4AddU8PackedOpTable"
            }), 3, 1)
        generate_parameter_types_msad(
            ET.SubElement(root, "Table", attrib={
                "Id": "Msad4Table"
            }))
        generate_parameter_types_wave(
            ET.SubElement(
                root, "Table", attrib={
                    "Id": "WaveIntrinsicsActiveIntTable"
                }))
        generate_parameter_types_wave(
            ET.SubElement(
                root, "Table", attrib={
                    "Id": "WaveIntrinsicsActiveUintTable"
                }))
        generate_parameter_types_wave(
            ET.SubElement(
                root, "Table", attrib={
                    "Id": "WaveIntrinsicsPrefixIntTable"
                }))
        generate_parameter_types_wave(
            ET.SubElement(
                root, "Table", attrib={
                    "Id": "WaveIntrinsicsPrefixUintTable"
                }))
        generate_parameter_types_wave_multi_prefix(
            ET.SubElement(
                root, "Table", attrib={
                    "Id": "WaveIntrinsicsMultiPrefixIntTable"
                }))
        generate_parameter_types_wave_multi_prefix(
            ET.SubElement(
                root, "Table", attrib={
                    "Id": "WaveIntrinsicsMultiPrefixUintTable"
                }))
        generate_parameter_types(
            ET.SubElement(
                root, "Table", attrib={
                    "Id": "DenormBinaryFloatOpTable"
                }), 2, 2) # 2 sets of expected values for any mode
        generate_parameter_types(
            ET.SubElement(
                root, "Table", attrib={
                    "Id": "DenormTertiaryFloatOpTable"
                }), 3, 2)

        for case in g_test_cases.values():
            cur_inst = case.insts[0]
            if cur_inst.is_cast or cur_inst.category.startswith("Unary"):
                if "f" in cur_inst.oload_types and not "Half" in case.test_name:
                    generate_row(
                        root.find("./Table[@Id='UnaryFloatOpTable']"),
                        case)
                if "h" in cur_inst.oload_types and "Half" in case.test_name:
                    generate_row(root.find("./Table[@Id='UnaryHalfOpTable']"),case)
                if "i" in cur_inst.oload_types and "Bit16" not in case.test_name:
                    if cur_inst.category.startswith("Unary int"):
                        generate_row(
                            root.find("./Table[@Id='UnaryIntOpTable']"),
                            case)
                    elif cur_inst.category.startswith("Unary uint"):
                        generate_row(
                            root.find("./Table[@Id='UnaryUintOpTable']"),
                            case)
                    else:
                        print("unknown op: " + cur_inst.name)
                        print(cur_inst.dxil_class)
                if "w" in cur_inst.oload_types and "Bit16" in case.test_name:
                    if cur_inst.category.startswith("Unary int"):
                        generate_row(
                            root.find("./Table[@Id='UnaryInt16OpTable']"),
                            case)
                    elif cur_inst.category.startswith("Unary uint"):
                        generate_row(
                            root.find("./Table[@Id='UnaryUint16OpTable']"),
                            case)
                    else:
                        print("unknown op: " + cur_inst.name)
                        print(cur_inst.dxil_class)

            elif cur_inst.is_binary or cur_inst.category.startswith(
                    "Binary"):
                if "f" in cur_inst.oload_types and not "Half" in case.test_name:
                    if case.test_name in g_denorm_tests: # for denorm tests
                        generate_row(
                            root.find("./Table[@Id='DenormBinaryFloatOpTable']"),
                            case)
                    else:
                        generate_row(
                            root.find("./Table[@Id='BinaryFloatOpTable']"),
                            case)
                if "h" in cur_inst.oload_types and "Half" in case.test_name:
                    generate_row(root.find("./Table[@Id='BinaryHalfOpTable']"),case)
                if "i" in cur_inst.oload_types and "Bit16" not in case.test_name:
                    if cur_inst.category.startswith("Binary int"):
                        if case.test_name in ['UAdd', 'USub', 'UMul']: # Add, Sub, Mul use same operations for int and uint.
                            generate_row(
                                root.find("./Table[@Id='BinaryUintOpTable']"),
                                case)
                        else:
                            generate_row(
                                root.find("./Table[@Id='BinaryIntOpTable']"),
                                case)
                    elif cur_inst.category.startswith("Binary uint"):
                        generate_row(
                            root.find("./Table[@Id='BinaryUintOpTable']"),
                            case)
                    else:
                        print("unknown op: " + cur_inst.name)
                        print(cur_inst.dxil_class)
                if "w" in cur_inst.oload_types and "Bit16" in case.test_name:
                    if cur_inst.category.startswith("Binary int"):
                        if case.test_name in ['UAdd', 'USub', 'UMul']: # Add, Sub, Mul use same operations for int and uint.
                            generate_row(
                                root.find("./Table[@Id='BinaryUint16OpTable']"),
                                case)
                        else:
                            generate_row(
                                root.find("./Table[@Id='BinaryInt16OpTable']"),
                                case)
                    elif cur_inst.category.startswith("Binary uint"):
                        generate_row(
                            root.find("./Table[@Id='BinaryUint16OpTable']"),
                            case)
                    else:
                        print("unknown op: " + cur_inst.name)
                        print(cur_inst.dxil_class)

            elif cur_inst.category.startswith("Tertiary"):
                if "f" in cur_inst.oload_types and not "Half" in case.test_name:
                    if case.test_name in g_denorm_tests: # for denorm tests
                        generate_row(
                            root.find("./Table[@Id='DenormTertiaryFloatOpTable']"),case)
                    else:
                        generate_row(
                            root.find("./Table[@Id='TertiaryFloatOpTable']"),case)
                if "h" in cur_inst.oload_types and "Half" in case.test_name:
                    generate_row(root.find("./Table[@Id='TertiaryHalfOpTable']"),case)
                if "i" in cur_inst.oload_types and "Bit16" not in case.test_name:
                    if cur_inst.category.startswith("Tertiary int"):
                        generate_row(
                            root.find("./Table[@Id='TertiaryIntOpTable']"),
                            case)
                    elif cur_inst.category.startswith("Tertiary uint"):
                        generate_row(
                            root.find(
                                "./Table[@Id='TertiaryUintOpTable']"),
                            case)
                    else:
                        print("unknown op: " + cur_inst.name)
                        print(cur_inst.dxil_class)
                if "w" in cur_inst.oload_types and "Bit16" in case.test_name:
                    if cur_inst.category.startswith("Tertiary int"):
                        generate_row(
                            root.find("./Table[@Id='TertiaryInt16OpTable']"),
                            case)
                    elif cur_inst.category.startswith("Tertiary uint"):
                        generate_row(
                            root.find(
                                "./Table[@Id='TertiaryUint16OpTable']"),
                            case)
                    else:
                        print("unknown op: " + cur_inst.name)
                        print(cur_inst.dxil_class)
            elif cur_inst.category.startswith("Quaternary"):
                if cur_inst.name == "Bfi":
                    generate_row(
                        root.find("./Table[@Id='Msad4Table']"), case)
                else:
                    print("unknown op: " + cur_inst.name)
                    print(cur_inst.dxil_class)
            elif cur_inst.category == "Dot":
                generate_row(root.find("./Table[@Id='DotOpTable']"), case)
            elif cur_inst.category == "Dot product with accumulate":
                if cur_inst.name == "Dot2AddHalf":
                    generate_row(root.find("./Table[@Id='Dot2AddHalfOpTable']"), case)
                elif cur_inst.name == "Dot4AddI8Packed":
                    generate_row(root.find("./Table[@Id='Dot4AddI8PackedOpTable']"), case)
                elif cur_inst.name == "Dot4AddU8Packed":
                    generate_row(root.find("./Table[@Id='Dot4AddU8PackedOpTable']"), case)
                else:
                    print("unknown op: " + cur_inst.name)
                    print(cur_inst.dxil_class)
            elif cur_inst.dxil_class in ["WaveActiveOp", "WaveAllOp","WaveActiveAllEqual","WaveAnyTrue","WaveAllTrue"]:
                if case.test_name.startswith("WaveActiveU"):
                    generate_row_wave(
                        root.find(
                            "./Table[@Id='WaveIntrinsicsActiveUintTable']"
                        ), case)
                else:
                    generate_row_wave(
                        root.find(
                            "./Table[@Id='WaveIntrinsicsActiveIntTable']"),
                        case)
            elif cur_inst.dxil_class == "WaveActiveBit":
                generate_row_wave(
                    root.find(
                        "./Table[@Id='WaveIntrinsicsActiveUintTable']"),
                    case)
            elif cur_inst.dxil_class == "WavePrefixOp":
                if case.test_name.startswith("WavePrefixU"):
                    generate_row_wave(
                        root.find(
                            "./Table[@Id='WaveIntrinsicsPrefixUintTable']"
                        ), case)
                else:
                    generate_row_wave(
                        root.find(
                            "./Table[@Id='WaveIntrinsicsPrefixIntTable']"),
                        case)
            elif cur_inst.dxil_class == "WaveMultiPrefixOp":
                if case.test_name.startswith("WaveMultiPrefixU"):
                    generate_row_wave_multi(
                        root.find(
                            "./Table[@Id='WaveIntrinsicsMultiPrefixUintTable']"
                        ), case)
                else:
                    generate_row_wave_multi(
                        root.find(
                            "./Table[@Id='WaveIntrinsicsMultiPrefixIntTable']"),
                        case)
            else:
                print("unknown op: " + cur_inst.name)
                print(cur_inst.dxil_class)
        tree._setroot(root)
        from xml.dom.minidom import parseString
        pretty_xml = parseString(ET.tostring(root)).toprettyxml(indent="    ")
        f.write(pretty_xml)

        print("Saved file at: " + f.name)
        f.close()

def print_untested_inst():
    lst = []
    for name in [node.inst.name for node in g_instruction_nodes.values() if len(node.test_cases) == 0]:
        lst += [name]
    lst.sort()
    print("Untested dxil ops: ")
    for name in lst:
        print(name)
    print("Total uncovered dxil ops: " + str(len(lst)))
    print("Total covered dxil ops: " + str(len(g_instruction_nodes)-len(lst)))

# inst name to instruction dict
g_instruction_nodes = {}
# test name to test case dict
g_test_cases = {}

if __name__ == "__main__":
    db = get_db_dxil()
    for inst in db.instr:
        g_instruction_nodes[inst.name] = inst_node(inst)
    add_test_cases()

    args = vars(parser.parse_args())
    mode = args['mode']
    if mode == "info":
        print_untested_inst()
    elif mode == "gen-xml":
        generate_table_for_taef()
    else:
        print("unknown mode: " + mode)
        exit(1)
    exit(0)