BC7Decode.hlsl 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014
  1. // RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
  2. // CHECK: flattenedThreadIdInGroup
  3. // CHECK: groupId
  4. // CHECK: bufferLoad
  5. // CHECK: bufferStore
  6. //--------------------------------------------------------------------------------------
  7. // File: BC7Encode.hlsl
  8. //
  9. // The Compute Shader for BC7 Decoder
  10. //
  11. // Copyright (c) Microsoft Corporation. All rights reserved.
  12. //--------------------------------------------------------------------------------------
  13. //#define REF_DEVICE
  14. #define UINTLENGTH 32
  15. #define NCHANNELS 4
  16. #define BC7_UNORM 98
  17. static const uint candidateSectionCompressed[128] =
  18. {
  19. 0x5050505, 0x1010101, 0x15151515, 0x1050515,
  20. 0x10105, 0x5151555, 0x1051555, 0x10515,
  21. 0x105, 0x5155555, 0x11555, 0x115,
  22. 0x1155555, 0x5555, 0x555555, 0x55,
  23. 0x405455, 0x15010000, 0x4054, 0x15050100,
  24. 0x5010000, 0x405054, 0x4050, 0x15050501,
  25. 0x5010100, 0x404050, 0x14141414, 0x5141450,
  26. 0x1155440, 0x555500, 0x15014054, 0x5414150,
  27. 0x11111111, 0x550055, 0x11441144, 0x5055050,
  28. 0x5500550, 0x11114444, 0x14411441, 0x11444411,
  29. 0x15055054, 0x1055040, 0x5041050, 0x5455150,
  30. 0x14414114, 0x5505005, 0x14144141, 0x141400,
  31. 0x10541000, 0x4150400, 0x41504, 0x105410,
  32. 0x14504105, 0x5145041, 0x14054150, 0x5415014,
  33. 0x14505041, 0x14050541, 0x15544001, 0x1405415,
  34. 0x550505, 0x5055500, 0x4045454, 0x10101515,
  35. 0x50529aa, 0x105a5a9, 0x81a5a5, 0x2a0a0515,
  36. 0x5a5a, 0x5050a0a, 0xa0a5555, 0x505a5a5,
  37. 0x55aa, 0x5555aa, 0x55aaaa, 0x6060606,
  38. 0x16161616, 0x1a1a1a1a, 0x5165a6a, 0x581a0a8,
  39. 0x105165a, 0x150581a0, 0x5a5a5a, 0xa0a0a55,
  40. 0x15152a2a, 0x101a9a9, 0x51a1a, 0x50a4a4,
  41. 0x1a1a0500, 0x6065aaa, 0x14696914, 0x146969,
  42. 0xa52520a, 0x141482aa, 0x51a1a05, 0x80a5a9,
  43. 0x25a6a, 0x2a0a0605, 0x5060a2a, 0x18181818,
  44. 0x55aa00, 0x18618618, 0x18866118, 0x5a05a05,
  45. 0x55aa005, 0x1111aaaa, 0x9999, 0xa5a0a5a,
  46. 0xa050a05, 0x28692869, 0x11aaaa11, 0x999999,
  47. 0x111111aa, 0x2a152a15, 0x2560256, 0x969696,
  48. 0x2a15152a, 0x2565602, 0x141414aa, 0x9696,
  49. 0x1414aaaa, 0xa05050a, 0xa5a5a0a, 0x96,
  50. 0x2010201, 0x2a6a2a6a, 0x11aaaaaa, 0x1585a1a8
  51. };
  52. /*static const uint4x4 candidateSection[128] =
  53. {
  54. {0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1}, {0,0,0,1, 0,0,0,1, 0,0,0,1, 0,0,0,1}, {0,1,1,1, 0,1,1,1, 0,1,1,1, 0,1,1,1}, {0,0,0,1, 0,0,1,1, 0,0,1,1, 0,1,1,1},
  55. {0,0,0,0, 0,0,0,1, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,1, 0,0,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,0,1,1, 0,1,1,1},
  56. {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,1,1,1},
  57. {0,0,0,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,0, 1,1,1,1},
  58. {0,0,0,0, 1,0,0,0, 1,1,1,0, 1,1,1,1}, {0,1,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,1,0}, {0,1,1,1, 0,0,1,1, 0,0,0,1, 0,0,0,0},
  59. {0,0,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,1,0,0, 1,1,1,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,1, 0,0,1,1, 0,0,1,1, 0,0,0,1},
  60. {0,0,1,1, 0,0,0,1, 0,0,0,1, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,0, 0,1,1,0, 0,1,1,0, 0,1,1,0}, {0,0,1,1, 0,1,1,0, 0,1,1,0, 1,1,0,0},
  61. {0,0,0,1, 0,1,1,1, 1,1,1,0, 1,0,0,0}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 0,0,0,0}, {0,1,1,1, 0,0,0,1, 1,0,0,0, 1,1,1,0}, {0,0,1,1, 1,0,0,1, 1,0,0,1, 1,1,0,0},
  62. {0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1}, {0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1}, {0,1,0,1, 1,0,1,0, 0,1,0,1, 1,0,1,0}, {0,0,1,1, 0,0,1,1, 1,1,0,0, 1,1,0,0},
  63. {0,0,1,1, 1,1,0,0, 0,0,1,1, 1,1,0,0}, {0,1,0,1, 0,1,0,1, 1,0,1,0, 1,0,1,0}, {0,1,1,0, 1,0,0,1, 0,1,1,0, 1,0,0,1}, {0,1,0,1, 1,0,1,0, 1,0,1,0, 0,1,0,1},
  64. {0,1,1,1, 0,0,1,1, 1,1,0,0, 1,1,1,0}, {0,0,0,1, 0,0,1,1, 1,1,0,0, 1,0,0,0}, {0,0,1,1, 0,0,1,0, 0,1,0,0, 1,1,0,0}, {0,0,1,1, 1,0,1,1, 1,1,0,1, 1,1,0,0},
  65. {0,1,1,0, 1,0,0,1, 1,0,0,1, 0,1,1,0}, {0,0,1,1, 1,1,0,0, 1,1,0,0, 0,0,1,1}, {0,1,1,0, 0,1,1,0, 1,0,0,1, 1,0,0,1}, {0,0,0,0, 0,1,1,0, 0,1,1,0, 0,0,0,0},
  66. {0,1,0,0, 1,1,1,0, 0,1,0,0, 0,0,0,0}, {0,0,1,0, 0,1,1,1, 0,0,1,0, 0,0,0,0}, {0,0,0,0, 0,0,1,0, 0,1,1,1, 0,0,1,0}, {0,0,0,0, 0,1,0,0, 1,1,1,0, 0,1,0,0},
  67. {0,1,1,0, 1,1,0,0, 1,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,0, 1,1,0,0, 1,0,0,1}, {0,1,1,0, 0,0,1,1, 1,0,0,1, 1,1,0,0}, {0,0,1,1, 1,0,0,1, 1,1,0,0, 0,1,1,0},
  68. {0,1,1,0, 1,1,0,0, 1,1,0,0, 1,0,0,1}, {0,1,1,0, 0,0,1,1, 0,0,1,1, 1,0,0,1}, {0,1,1,1, 1,1,1,0, 1,0,0,0, 0,0,0,1}, {0,0,0,1, 1,0,0,0, 1,1,1,0, 0,1,1,1},
  69. {0,0,0,0, 1,1,1,1, 0,0,1,1, 0,0,1,1}, {0,0,1,1, 0,0,1,1, 1,1,1,1, 0,0,0,0}, {0,0,1,0, 0,0,1,0, 1,1,1,0, 1,1,1,0}, {0,1,0,0, 0,1,0,0, 0,1,1,1, 0,1,1,1},
  70. {0,0,1,1, 0,0,1,1, 0,2,2,1, 2,2,2,2}, {0,0,0,1, 0,0,1,1, 2,2,1,1, 2,2,2,1}, {0,0,0,0, 2,0,0,1, 2,2,1,1, 2,2,1,1}, {0,2,2,2, 0,0,2,2, 0,0,1,1, 0,1,1,1},
  71. {0,0,0,0, 0,0,0,0, 1,1,2,2, 1,1,2,2}, {0,0,1,1, 0,0,1,1, 0,0,2,2, 0,0,2,2}, {0,0,2,2, 0,0,2,2, 1,1,1,1, 1,1,1,1}, {0,0,1,1, 0,0,1,1, 2,2,1,1, 2,2,1,1},
  72. {0,0,0,0, 0,0,0,0, 1,1,1,1, 2,2,2,2}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 2,2,2,2}, {0,0,0,0, 1,1,1,1, 2,2,2,2, 2,2,2,2}, {0,0,1,2, 0,0,1,2, 0,0,1,2, 0,0,1,2},
  73. {0,1,1,2, 0,1,1,2, 0,1,1,2, 0,1,1,2}, {0,1,2,2, 0,1,2,2, 0,1,2,2, 0,1,2,2}, {0,0,1,1, 0,1,1,2, 1,1,2,2, 1,2,2,2}, {0,0,1,1, 2,0,0,1, 2,2,0,0, 2,2,2,0},
  74. {0,0,0,1, 0,0,1,1, 0,1,1,2, 1,1,2,2}, {0,1,1,1, 0,0,1,1, 2,0,0,1, 2,2,0,0}, {0,0,0,0, 1,1,2,2, 1,1,2,2, 1,1,2,2}, {0,0,2,2, 0,0,2,2, 0,0,2,2, 1,1,1,1},
  75. {0,1,1,1, 0,1,1,1, 0,2,2,2, 0,2,2,2}, {0,0,0,1, 0,0,0,1, 2,2,2,1, 2,2,2,1}, {0,0,0,0, 0,0,1,1, 0,1,2,2, 0,1,2,2}, {0,0,0,0, 1,1,0,0, 2,2,1,0, 2,2,1,0},
  76. {0,1,2,2, 0,1,2,2, 0,0,1,1, 0,0,0,0}, {0,0,1,2, 0,0,1,2, 1,1,2,2, 2,2,2,2}, {0,1,1,0, 1,2,2,1, 1,2,2,1, 0,1,1,0}, {0,0,0,0, 0,1,1,0, 1,2,2,1, 1,2,2,1},
  77. {0,0,2,2, 1,1,0,2, 1,1,0,2, 0,0,2,2}, {0,1,1,0, 0,1,1,0, 2,0,0,2, 2,2,2,2}, {0,0,1,1, 0,1,2,2, 0,1,2,2, 0,0,1,1}, {0,0,0,0, 2,0,0,0, 2,2,1,1, 2,2,2,1},
  78. {0,0,0,0, 0,0,0,2, 1,1,2,2, 1,2,2,2}, {0,2,2,2, 0,0,2,2, 0,0,1,2, 0,0,1,1}, {0,0,1,1, 0,0,1,2, 0,0,2,2, 0,2,2,2}, {0,1,2,0, 0,1,2,0, 0,1,2,0, 0,1,2,0},
  79. {0,0,0,0, 1,1,1,1, 2,2,2,2, 0,0,0,0}, {0,1,2,0, 1,2,0,1, 2,0,1,2, 0,1,2,0}, {0,1,2,0, 2,0,1,2, 1,2,0,1, 0,1,2,0}, {0,0,1,1, 2,2,0,0, 1,1,2,2, 0,0,1,1},
  80. {0,0,1,1, 1,1,2,2, 2,2,0,0, 0,0,1,1}, {0,1,0,1, 0,1,0,1, 2,2,2,2, 2,2,2,2}, {0,0,0,0, 0,0,0,0, 2,1,2,1, 2,1,2,1}, {0,0,2,2, 1,1,2,2, 0,0,2,2, 1,1,2,2},
  81. {0,0,2,2, 0,0,1,1, 0,0,2,2, 0,0,1,1}, {0,2,2,0, 1,2,2,1, 0,2,2,0, 1,2,2,1}, {0,1,0,1, 2,2,2,2, 2,2,2,2, 0,1,0,1}, {0,0,0,0, 2,1,2,1, 2,1,2,1, 2,1,2,1},
  82. {0,1,0,1, 0,1,0,1, 0,1,0,1, 2,2,2,2}, {0,2,2,2, 0,1,1,1, 0,2,2,2, 0,1,1,1}, {0,0,0,2, 1,1,1,2, 0,0,0,2, 1,1,1,2}, {0,0,0,0, 2,1,1,2, 2,1,1,2, 2,1,1,2},
  83. {0,2,2,2, 0,1,1,1, 0,1,1,1, 0,2,2,2}, {0,0,0,2, 1,1,1,2, 1,1,1,2, 0,0,0,2}, {0,1,1,0, 0,1,1,0, 0,1,1,0, 2,2,2,2}, {0,0,0,0, 0,0,0,0, 2,1,1,2, 2,1,1,2},
  84. {0,1,1,0, 0,1,1,0, 2,2,2,2, 2,2,2,2}, {0,0,2,2, 0,0,1,1, 0,0,1,1, 0,0,2,2}, {0,0,2,2, 1,1,2,2, 1,1,2,2, 0,0,2,2}, {0,0,0,0, 0,0,0,0, 0,0,0,0, 2,1,1,2},
  85. {0,0,0,2, 0,0,0,1, 0,0,0,2, 0,0,0,1}, {0,2,2,2, 1,2,2,2, 0,2,2,2, 1,2,2,2}, {0,1,0,1, 2,2,2,2, 2,2,2,2, 2,2,2,2}, {0,1,1,1, 2,0,1,1, 2,2,0,1, 2,2,2,0}
  86. };*/
  87. /*static const uint2 candidateFixUpIndex1D[128] =
  88. {
  89. {15, 0},{15, 0},{15, 0},{15, 0},
  90. {15, 0},{15, 0},{15, 0},{15, 0},
  91. {15, 0},{15, 0},{15, 0},{15, 0},
  92. {15, 0},{15, 0},{15, 0},{15, 0},
  93. {15, 0},{ 2, 0},{ 8, 0},{ 2, 0},
  94. { 2, 0},{ 8, 0},{ 8, 0},{15, 0},
  95. { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
  96. { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0},
  97. {15, 0},{15, 0},{ 6, 0},{ 8, 0},
  98. { 2, 0},{ 8, 0},{15, 0},{15, 0},
  99. { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
  100. { 2, 0},{15, 0},{15, 0},{ 6, 0},
  101. { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0},
  102. {15, 0},{15, 0},{ 2, 0},{ 2, 0},
  103. {15, 0},{15, 0},{15, 0},{15, 0},
  104. {15, 0},{ 2, 0},{ 2, 0},{15, 0},
  105. //candidateFixUpIndex1D[i][1], i < 64 should not be used
  106. { 3,15},{ 3, 8},{15, 8},{15, 3},
  107. { 8,15},{ 3,15},{15, 3},{15, 8},
  108. { 8,15},{ 8,15},{ 6,15},{ 6,15},
  109. { 6,15},{ 5,15},{ 3,15},{ 3, 8},
  110. { 3,15},{ 3, 8},{ 8,15},{15, 3},
  111. { 3,15},{ 3, 8},{ 6,15},{10, 8},
  112. { 5, 3},{ 8,15},{ 8, 6},{ 6,10},
  113. { 8,15},{ 5,15},{15,10},{15, 8},
  114. { 8,15},{15, 3},{ 3,15},{ 5,10},
  115. { 6,10},{10, 8},{ 8, 9},{15,10},
  116. {15, 6},{ 3,15},{15, 8},{ 5,15},
  117. {15, 3},{15, 6},{15, 6},{15, 8}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct
  118. { 3,15},{15, 3},{ 5,15},{ 5,15},
  119. { 5,15},{ 8,15},{ 5,15},{10,15},
  120. { 5,15},{10,15},{ 8,15},{13,15},
  121. {15, 3},{12,15},{ 3,15},{ 3, 8},
  122. };*/
  123. static const uint2 candidateFixUpIndex1DOrdered[128] = //Same with candidateFixUpIndex1D but order the result when i >= 64
  124. {
  125. {15, 0},{15, 0},{15, 0},{15, 0},
  126. {15, 0},{15, 0},{15, 0},{15, 0},
  127. {15, 0},{15, 0},{15, 0},{15, 0},
  128. {15, 0},{15, 0},{15, 0},{15, 0},
  129. {15, 0},{ 2, 0},{ 8, 0},{ 2, 0},
  130. { 2, 0},{ 8, 0},{ 8, 0},{15, 0},
  131. { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
  132. { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0},
  133. {15, 0},{15, 0},{ 6, 0},{ 8, 0},
  134. { 2, 0},{ 8, 0},{15, 0},{15, 0},
  135. { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
  136. { 2, 0},{15, 0},{15, 0},{ 6, 0},
  137. { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0},
  138. {15, 0},{15, 0},{ 2, 0},{ 2, 0},
  139. {15, 0},{15, 0},{15, 0},{15, 0},
  140. {15, 0},{ 2, 0},{ 2, 0},{15, 0},
  141. //candidateFixUpIndex1DOrdered[i][1], i < 64 should not be used
  142. { 3,15},{ 3, 8},{ 8,15},{ 3,15},
  143. { 8,15},{ 3,15},{ 3,15},{ 8,15},
  144. { 8,15},{ 8,15},{ 6,15},{ 6,15},
  145. { 6,15},{ 5,15},{ 3,15},{ 3, 8},
  146. { 3,15},{ 3, 8},{ 8,15},{ 3,15},
  147. { 3,15},{ 3, 8},{ 6,15},{ 8,10},
  148. { 3, 5},{ 8,15},{ 6, 8},{ 6,10},
  149. { 8,15},{ 5,15},{10,15},{ 8,15},
  150. { 8,15},{ 3,15},{ 3,15},{ 5,10},
  151. { 6,10},{ 8,10},{ 8, 9},{10,15},
  152. { 6,15},{ 3,15},{ 8,15},{ 5,15},
  153. { 3,15},{ 6,15},{ 6,15},{ 8,15}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them
  154. { 3,15},{ 3,15},{ 5,15},{ 5,15},
  155. { 5,15},{ 8,15},{ 5,15},{10,15},
  156. { 5,15},{10,15},{ 8,15},{13,15},
  157. { 3,15},{12,15},{ 3,15},{ 3, 8},
  158. };
  159. static const uint4x4 candidateRotation[4] =
  160. {
  161. {1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1},
  162. {0,0,0,1},{0,1,0,0},{0,0,1,0},{1,0,0,0},
  163. {1,0,0,0},{0,0,0,1},{0,0,1,0},{0,1,0,0},
  164. {1,0,0,0},{0,1,0,0},{0,0,0,1},{0,0,1,0}
  165. };
  166. static const uint2 candidateIndexPrec[8] = {{3,0},{3,0},{2,0},{2,0},
  167. {2,3}, //color index and alpha index can exchange
  168. {2,2},{4,4},{2,2}};
  169. /*cbuffer cbCS : register( b0 )
  170. {
  171. uint4 g_param; //(g_param.x, g_param.y) is the x and y dimensions of the Dispatch call
  172. //g_param.z defines the format, should be only BC7_UNORM, but is not used in the shader
  173. };*/
  174. cbuffer cbCS : register( b0 )
  175. {
  176. uint g_tex_width;
  177. uint g_num_block_x;
  178. uint g_format;
  179. uint g_tex_size;
  180. uint g_start_block_id;
  181. };
  182. void extract_mode_and_partition( out uint mode, out uint partition, out uint rotation, out uint2 indexPrec, uint4 block );
  183. void extract_and_decode_endpoints_00( out uint4 endPoint, uint mode, uint4 block );
  184. void extract_and_decode_endpoints_01( out uint4 endPoint, uint mode, uint4 block );
  185. void extract_and_decode_endpoints_10( out uint4 endPoint, uint mode, uint4 block );
  186. void extract_and_decode_endpoints_11( out uint4 endPoint, uint mode, uint4 block );
  187. void extract_and_decode_endpoints_20( out uint4 endPoint, uint mode, uint4 block );
  188. void extract_and_decode_endpoints_21( out uint4 endPoint, uint mode, uint4 block );
  189. void get_index( out uint alpha_index, out uint color_index, out uint subset_index, uint x, uint y, uint mode, uint partition, uint4 block );
  190. uint3 interpolate_color( uint color_index, uint index_prec, uint2x4 endPoint );
  191. uint interpolate_alpha( uint alpha_index, uint index_prec, uint2x4 endPoint );
  192. StructuredBuffer<uint4> g_InBuff : register( t0 );
  193. RWStructuredBuffer<uint> g_OutBuff : register( u0 );
  194. #define THREAD_GROUP_SIZE 64
  195. #define BLOCK_SIZE_Y 4
  196. #define BLOCK_SIZE_X 4
  197. #define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X)
  198. #define BLOCK_IN_GROUP (THREAD_GROUP_SIZE / BLOCK_SIZE)
  199. groupshared uint4 shared_temp[THREAD_GROUP_SIZE];
  200. [numthreads( THREAD_GROUP_SIZE, 1, 1 )]
  201. void main(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
  202. {
  203. uint blockInGroup = GI / BLOCK_SIZE;
  204. uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
  205. uint threadInBlock = GI - blockInGroup * BLOCK_SIZE;
  206. if (0 == threadInBlock)
  207. {
  208. shared_temp[GI] = g_InBuff[blockID];
  209. }
  210. #ifdef REF_DEVICE
  211. GroupMemoryBarrierWithGroupSync();
  212. #endif
  213. uint4 bc_data = shared_temp[blockInGroup * BLOCK_SIZE + 0];
  214. uint mode;
  215. uint partition;
  216. uint rotation;
  217. uint2 indexPrec;
  218. extract_mode_and_partition( mode, partition, rotation, indexPrec, bc_data );
  219. if (1 == threadInBlock)
  220. {
  221. uint4 endPoint;
  222. extract_and_decode_endpoints_00( endPoint, mode, bc_data );
  223. shared_temp[GI] = endPoint;
  224. }
  225. else if (2 == threadInBlock)
  226. {
  227. uint4 endPoint;
  228. extract_and_decode_endpoints_01( endPoint, mode, bc_data );
  229. shared_temp[GI] = endPoint;
  230. }
  231. else if (3 == threadInBlock)
  232. {
  233. uint4 endPoint;
  234. extract_and_decode_endpoints_10( endPoint, mode, bc_data );
  235. shared_temp[GI] = endPoint;
  236. }
  237. else if (4 == threadInBlock)
  238. {
  239. uint4 endPoint;
  240. extract_and_decode_endpoints_11( endPoint, mode, bc_data );
  241. shared_temp[GI] = endPoint;
  242. }
  243. else if (5 == threadInBlock)
  244. {
  245. uint4 endPoint;
  246. extract_and_decode_endpoints_20( endPoint, mode, bc_data );
  247. shared_temp[GI] = endPoint;
  248. }
  249. else if (6 == threadInBlock)
  250. {
  251. uint4 endPoint;
  252. extract_and_decode_endpoints_21( endPoint, mode, bc_data );
  253. shared_temp[GI] = endPoint;
  254. }
  255. #ifdef REF_DEVICE
  256. GroupMemoryBarrierWithGroupSync();
  257. #endif
  258. uint y = threadInBlock / BLOCK_SIZE_X;
  259. uint x = threadInBlock - y * BLOCK_SIZE_X;
  260. uint block_y = blockID / g_num_block_x;
  261. uint block_x = blockID - block_y * g_num_block_x;
  262. uint addr = (block_y * BLOCK_SIZE_Y + y) * g_tex_width + block_x * BLOCK_SIZE_X + x;
  263. if (addr < g_tex_size)
  264. {
  265. uint alpha_index = 0;
  266. uint color_index = 0;
  267. uint subset_index = 0;
  268. get_index( alpha_index, color_index, subset_index, x, y, mode, partition, bc_data );
  269. uint2x4 endPoint; //At most has 3 pairs of endpoints
  270. endPoint[0] = shared_temp[blockInGroup * BLOCK_SIZE + subset_index * 2 + 1];
  271. endPoint[1] = shared_temp[blockInGroup * BLOCK_SIZE + subset_index * 2 + 2];
  272. uint4 pixel;
  273. pixel.rgb = interpolate_color( color_index, indexPrec.x, endPoint);
  274. if ( mode >= 4 )
  275. {
  276. pixel.a = interpolate_alpha( alpha_index, indexPrec.y, endPoint);
  277. }
  278. else
  279. {
  280. pixel.a = 255;
  281. }
  282. uint4 pixelFinal = pixel;
  283. if (1 == rotation)
  284. {
  285. pixelFinal.ra = pixel.ar;
  286. }
  287. else if (2 == rotation)
  288. {
  289. pixelFinal.ga = pixel.ag;
  290. }
  291. else if (3 == rotation)
  292. {
  293. pixelFinal.ba = pixel.ab;
  294. }
  295. g_OutBuff[addr] = pixelFinal.r | ( pixelFinal.g << 8 ) | ( pixelFinal.b << 16 ) | ( pixelFinal.a << 24 );
  296. }
  297. }
  298. void extract_mode_and_partition( out uint mode, out uint partition, out uint rotation, out uint2 indexPrec, uint4 block )
  299. {
  300. if ( block.x & 0x01 )
  301. {
  302. mode = 0;
  303. partition = ( ( block.x >> 1 ) & 0x0F ) + 64;
  304. rotation = 0;
  305. indexPrec = candidateIndexPrec[0];
  306. }
  307. else if ( block.x & 0x02 )
  308. {
  309. mode = 1;
  310. partition = ( block.x >> 2 ) & 0x3F;
  311. rotation = 0;
  312. indexPrec = candidateIndexPrec[1];
  313. }
  314. else if ( block.x & 0x04 )
  315. {
  316. mode = 2;
  317. partition = ( ( block.x >> 3 ) & 0x3F ) + 64;
  318. rotation = 0;
  319. indexPrec = candidateIndexPrec[2];
  320. }
  321. else if ( block.x & 0x08 )
  322. {
  323. mode = 3;
  324. partition = ( block.x >> 4 ) & 0x3F;
  325. rotation = 0;
  326. indexPrec = candidateIndexPrec[3];
  327. }
  328. else if ( block.x & 0x10 )
  329. {
  330. mode = 4;
  331. partition = 0;
  332. rotation = ( block.x >> 5 ) & 0x03;
  333. if ( block.x & 0x80 )
  334. indexPrec = uint2( 3, 2 );
  335. else
  336. indexPrec = candidateIndexPrec[4];
  337. }
  338. else if ( block.x & 0x20 )
  339. {
  340. mode = 5;
  341. partition = 0;
  342. rotation = ( block.x >> 6 ) & 0x03;
  343. indexPrec = candidateIndexPrec[5];
  344. }
  345. else if ( block.x & 0x40 )
  346. {
  347. mode = 6;
  348. partition = 0;
  349. rotation = 0;
  350. indexPrec = candidateIndexPrec[6];
  351. }
  352. else //block.x & 0x80
  353. {
  354. mode = 7;
  355. partition = ( block.x >> 8 ) & 0x3F;
  356. rotation = 0;
  357. indexPrec = candidateIndexPrec[7];
  358. }
  359. }
  360. void extract_and_decode_endpoints_00( out uint4 endPoint, uint mode, uint4 block )
  361. {
  362. if ( mode == 0 )
  363. {
  364. endPoint.r = ( block.x >> 1 ) & 0xF0;
  365. endPoint.g = ( ( block.x >> 25 ) & 0xF0 ) | ( ( block.y << 7 ) & 0xF0 );
  366. endPoint.b = ( block.y >> 17 ) & 0xF0;
  367. endPoint.a = 255;
  368. endPoint.rgb |= ( ( block.z >> 10 ) & 0x08 ) | ( endPoint.rgb >> 5 );
  369. }
  370. else if ( mode == 1 )
  371. {
  372. endPoint.r = ( block.x >> 6 ) & 0xFC;
  373. endPoint.g = ( block.y << 2 ) & 0xFC;
  374. endPoint.b = ( block.y >> 22 ) & 0xFC;
  375. endPoint.a = 255;
  376. endPoint.rgb |= ( ( block.z >> 15 ) & 0x02 ) | ( endPoint.rgb >> 7 );
  377. }
  378. else if ( mode == 2 )
  379. {
  380. endPoint.r = ( block.x >> 6 ) & 0xF8;
  381. endPoint.g = ( block.y >> 4 ) & 0xF8;
  382. endPoint.b = ( block.z >> 2 ) & 0xF8;
  383. endPoint.a = 255;
  384. endPoint |= endPoint >> 5;
  385. }
  386. else if ( mode == 3 )
  387. {
  388. endPoint.r = ( block.x >> 9 ) & 0xFE;
  389. endPoint.g = ( block.y >> 5 ) & 0xFE;
  390. endPoint.b = ( block.z >> 1 ) & 0xFE;
  391. endPoint.a = 255;
  392. endPoint.rgb |= ( block.z >> 30 ) & 0x01;
  393. }
  394. else if ( mode == 4 )
  395. {
  396. endPoint.r = ( block.x >> 5 ) & 0xF8;
  397. endPoint.g = ( block.x >> 15 ) & 0xF8;
  398. endPoint.b = ( ( block.x >> 25 ) & 0xF8 ) | ( ( block.y << 7 ) & 0xF8 );
  399. endPoint.a = ( block.y >> 4 ) & 0xFC;
  400. endPoint.rgb |= endPoint.rgb >> 5;
  401. endPoint.a |= endPoint.a >> 6;
  402. }
  403. else if ( mode == 5 )
  404. {
  405. endPoint.r = ( block.x >> 7 ) & 0xFE;
  406. endPoint.g = ( block.x >> 21 ) & 0xFE;
  407. endPoint.b = ( block.y >> 3 ) & 0xFE;
  408. endPoint.a = ( block.y >> 18 ) & 0xFF;
  409. endPoint.rgb |= endPoint.rgb >> 7;
  410. }
  411. else if ( mode == 6 )
  412. {
  413. endPoint.r = ( block.x >> 6 ) & 0xFE;
  414. endPoint.g = ( block.x >> 20 ) & 0xFE;
  415. endPoint.b = ( block.y >> 2 ) & 0xFE;
  416. endPoint.a = ( block.y >> 16 ) & 0xFE;
  417. endPoint |= ( block.y >> 31 ) & 0x01;
  418. }
  419. else if ( mode == 7 )
  420. {
  421. endPoint.r = ( block.x >> 11 ) & 0xF8;
  422. endPoint.g = ( block.y << 1 ) & 0xF8;
  423. endPoint.b = ( block.y >> 19 ) & 0xF8;
  424. endPoint.a = ( block.z >> 7 ) & 0xF8;
  425. endPoint |= ( ( block.z >> 28 ) & 0x04 ) | ( endPoint >> 6 );
  426. }
  427. }
  428. void extract_and_decode_endpoints_01( out uint4 endPoint, uint mode, uint4 block )
  429. {
  430. if ( mode == 0 )
  431. {
  432. endPoint.r = ( block.x >> 5 ) & 0xF0;
  433. endPoint.g = ( block.y << 3 ) & 0xF0;
  434. endPoint.b = ( block.y >> 21 ) & 0xF0;
  435. endPoint.a = 255;
  436. endPoint.rgb |= ( ( block.z >> 11 ) & 0x08 ) | ( endPoint.rgb >> 5 );
  437. }
  438. else if ( mode == 1 )
  439. {
  440. endPoint.r = ( block.x >> 12 ) & 0xFC;
  441. endPoint.g = ( block.y >> 4 ) & 0xFC;
  442. endPoint.b = ( ( block.y >> 28 ) & 0xFC ) | ( ( block.z << 4 ) & 0xFC );
  443. endPoint.a = 255;
  444. endPoint.rgb |= ( ( block.z >> 15 ) & 0x02 ) | ( endPoint.rgb >> 7 );
  445. }
  446. else if ( mode == 2 )
  447. {
  448. endPoint.r = ( block.x >> 11 ) & 0xF8;
  449. endPoint.g = ( block.y >> 9 ) & 0xF8;
  450. endPoint.b = ( block.z >> 7 ) & 0xF8;
  451. endPoint.a = 255;
  452. endPoint |= endPoint >> 5;
  453. }
  454. else if ( mode == 3 )
  455. {
  456. endPoint.r = ( block.x >> 16 ) & 0xFE;
  457. endPoint.g = ( block.y >> 12 ) & 0xFE;
  458. endPoint.b = ( block.z >> 8 ) & 0xFE;
  459. endPoint.a = 255;
  460. endPoint.rgb |= ( block.z >> 31 ) & 0x01;
  461. }
  462. else if ( mode == 4 )
  463. {
  464. endPoint.r = ( block.x >> 10 ) & 0xF8;
  465. endPoint.g = ( block.x >> 20 ) & 0xF8;
  466. endPoint.b = ( block.y << 2 ) & 0xF8;
  467. endPoint.a = ( block.y >> 10 ) & 0xFC;
  468. endPoint.rgb |= endPoint.rgb >> 5;
  469. endPoint.a |= endPoint.a >> 6;
  470. }
  471. else if ( mode == 5 )
  472. {
  473. endPoint.r = ( block.x >> 14 ) & 0xFE;
  474. endPoint.g = ( ( block.x >> 28 ) & 0xFE ) | ( ( block.y << 4 ) & 0xFE );
  475. endPoint.b = ( block.y >> 10 ) & 0xFE;
  476. endPoint.a = ( ( block.y >> 26 ) & 0xFF ) | ( ( block.z << 6 ) & 0xFF );
  477. endPoint.rgb |= endPoint.rgb >> 7;
  478. }
  479. else if ( mode == 6 )
  480. {
  481. endPoint.r = ( block.x >> 13 ) & 0xFE;
  482. endPoint.g = ( ( block.x >> 27 ) & 0xFE ) | ( ( block.y << 5 ) & 0xFE );
  483. endPoint.b = ( block.y >> 9 ) & 0xFE;
  484. endPoint.a = ( block.y >> 23 ) & 0xFE;
  485. endPoint |= ( block.z >> 0 ) & 0x01;
  486. }
  487. else if ( mode == 7 )
  488. {
  489. endPoint.r = ( block.x >> 16 ) & 0xF8;
  490. endPoint.g = ( block.y >> 4 ) & 0xF8;
  491. endPoint.b = ( block.y >> 24 ) & 0xF8;
  492. endPoint.a = ( block.z >> 12 ) & 0xF8;
  493. endPoint |= ( ( block.z >> 29 ) & 0x04 ) | ( endPoint >> 6 );
  494. }
  495. }
  496. void extract_and_decode_endpoints_10( out uint4 endPoint, uint mode, uint4 block )
  497. {
  498. if ( mode == 0 )
  499. {
  500. endPoint.r = ( block.x >> 9 ) & 0xF0;
  501. endPoint.g = ( block.y >> 1 ) & 0xF0;
  502. endPoint.b = ( ( block.y >> 25 ) & 0xF0 ) | ( ( block.z << 7 ) & 0xF0 );
  503. endPoint.a = 255;
  504. endPoint.rgb |= ( ( block.z >> 12 ) & 0x08 ) | ( endPoint.rgb >> 5 );
  505. }
  506. else if ( mode == 1 )
  507. {
  508. endPoint.r = ( block.x >> 18 ) & 0xFC;
  509. endPoint.g = ( block.y >> 10 ) & 0xFC;
  510. endPoint.b = ( block.z >> 2 ) & 0xFC;
  511. endPoint.a = 255;
  512. endPoint.rgb |= ( ( block.z >> 16 ) & 0x02 ) | ( endPoint.rgb >> 7 );
  513. }
  514. else if ( mode == 2 )
  515. {
  516. endPoint.r = ( block.x >> 16 ) & 0xF8;
  517. endPoint.g = ( block.y >> 14 ) & 0xF8;
  518. endPoint.b = ( block.z >> 12 ) & 0xF8;
  519. endPoint.a = 255;
  520. endPoint |= endPoint >> 5;
  521. }
  522. else if ( mode == 3 )
  523. {
  524. endPoint.r = ( block.x >> 23 ) & 0xFE;
  525. endPoint.g = ( block.y >> 19 ) & 0xFE;
  526. endPoint.b = ( block.z >> 15 ) & 0xFE;
  527. endPoint.a = 255;
  528. endPoint.rgb |= ( block.w >> 0 ) & 0x01;
  529. }
  530. else if ( mode == 4 )
  531. {
  532. endPoint = 0;
  533. }
  534. else if ( mode == 5 )
  535. {
  536. endPoint = 0;
  537. }
  538. else if ( mode == 6 )
  539. {
  540. endPoint = 0;
  541. }
  542. else if ( mode == 7 )
  543. {
  544. endPoint.r = ( block.x >> 21 ) & 0xF8;
  545. endPoint.g = ( block.y >> 9 ) & 0xF8;
  546. endPoint.b = ( block.z << 3 ) & 0xF8;
  547. endPoint.a = ( block.z >> 17 ) & 0xF8;
  548. endPoint |= ( ( block.w << 2 ) & 0x04 ) | ( endPoint >> 6 );
  549. }
  550. }
  551. void extract_and_decode_endpoints_11( out uint4 endPoint, uint mode, uint4 block )
  552. {
  553. if ( mode == 0 )
  554. {
  555. endPoint.r = ( block.x >> 13 ) & 0xF0;
  556. endPoint.g = ( block.y >> 5 ) & 0xF0;
  557. endPoint.b = ( block.z << 3 ) & 0xF0;
  558. endPoint.a = 255;
  559. endPoint.rgb |= ( ( block.z >> 13 ) & 0x08 ) | ( endPoint.rgb >> 5 );
  560. }
  561. else if ( mode == 1 )
  562. {
  563. endPoint.r = ( block.x >> 24 ) & 0xFC;
  564. endPoint.g = ( block.y >> 16 ) & 0xFC;
  565. endPoint.b = ( block.z >> 8 ) & 0xFC;
  566. endPoint.a = 255;
  567. endPoint.rgb |= ( ( block.z >> 16 ) & 0x02 ) | ( endPoint.rgb >> 7 );
  568. }
  569. else if ( mode == 2 )
  570. {
  571. endPoint.r = ( block.x >> 21 ) & 0xF8;
  572. endPoint.g = ( block.y >> 19 ) & 0xF8;
  573. endPoint.b = ( block.z >> 17 ) & 0xF8;
  574. endPoint.a = 255;
  575. endPoint |= endPoint >> 5;
  576. }
  577. else if ( mode == 3 )
  578. {
  579. endPoint.r = ( ( block.x >> 30 ) & 0xFE ) | ( ( block.y << 2 ) & 0xFE );
  580. endPoint.g = ( ( block.y >> 26 ) & 0xFE ) | ( ( block.z << 6 ) & 0xFE );
  581. endPoint.b = ( block.z >> 22 ) & 0xFE;
  582. endPoint.a = 255;
  583. endPoint.rgb |= ( block.w >> 1 ) & 0x01;
  584. }
  585. else if ( mode == 4 )
  586. {
  587. endPoint = 0;
  588. }
  589. else if ( mode == 5 )
  590. {
  591. endPoint = 0;
  592. }
  593. else if ( mode == 6 )
  594. {
  595. endPoint = 0;
  596. }
  597. else if ( mode == 7 )
  598. {
  599. endPoint.r = ( ( block.x >> 26 ) & 0xF8 ) | ( ( block.y << 6 ) & 0xF8 );
  600. endPoint.g = ( block.y >> 14 ) & 0xF8;
  601. endPoint.b = ( block.z >> 2 ) & 0xF8;
  602. endPoint.a = ( block.z >> 22 ) & 0xF8;
  603. endPoint |= ( ( block.w << 1 ) & 0x04 ) | ( endPoint >> 6 );
  604. }
  605. }
  606. void extract_and_decode_endpoints_20( out uint4 endPoint, uint mode, uint4 block )
  607. {
  608. if ( mode == 0 )
  609. {
  610. endPoint.r = ( block.x >> 17 ) & 0xF0;
  611. endPoint.g = ( block.y >> 9 ) & 0xF0;
  612. endPoint.b = ( block.z >> 1 ) & 0xF0;
  613. endPoint.a = 255;
  614. endPoint.rgb |= ( ( block.z >> 14 ) & 0x08 ) | ( endPoint.rgb >> 5 );
  615. }
  616. else if ( mode == 1 )
  617. {
  618. endPoint = 0;
  619. }
  620. else if ( mode == 2 )
  621. {
  622. endPoint.r = ( ( block.x >> 26 ) & 0xF8 ) | ( ( block.y << 6 ) & 0xF8 );
  623. endPoint.g = ( block.y >> 24 ) & 0xF8;
  624. endPoint.b = ( block.z >> 22 ) & 0xF8;
  625. endPoint.a = 255;
  626. endPoint |= endPoint >> 5;
  627. }
  628. else if ( mode == 3 )
  629. {
  630. endPoint = 0;
  631. }
  632. else if ( mode == 4 )
  633. {
  634. endPoint = 0;
  635. }
  636. else if ( mode == 5 )
  637. {
  638. endPoint = 0;
  639. }
  640. else if ( mode == 6 )
  641. {
  642. endPoint = 0;
  643. }
  644. else if ( mode == 7 )
  645. {
  646. endPoint = 0;
  647. }
  648. }
  649. void extract_and_decode_endpoints_21( out uint4 endPoint, uint mode, uint4 block )
  650. {
  651. if ( mode == 0 )
  652. {
  653. endPoint.r = ( block.x >> 21 ) & 0xF0;
  654. endPoint.g = ( block.y >> 13 ) & 0xF0;
  655. endPoint.b = ( block.z >> 5 ) & 0xF0;
  656. endPoint.a = 255;
  657. endPoint.rgb |= ( ( block.z >> 15 ) & 0x08 ) | ( endPoint.rgb >> 5 );
  658. }
  659. else if ( mode == 1 )
  660. {
  661. endPoint = 0;
  662. }
  663. else if ( mode == 2 )
  664. {
  665. endPoint.r = ( block.y << 1 ) & 0xF8;
  666. endPoint.g = ( block.z << 3 ) & 0xF8;
  667. endPoint.b = ( ( block.z >> 27 ) & 0xF8 ) | ( ( block.w << 5 ) & 0xF8 );
  668. endPoint.a = 255;
  669. endPoint |= endPoint >> 5;
  670. }
  671. else if ( mode == 3 )
  672. {
  673. endPoint = 0;
  674. }
  675. else if ( mode == 4 )
  676. {
  677. endPoint = 0;
  678. }
  679. else if ( mode == 5 )
  680. {
  681. endPoint = 0;
  682. }
  683. else if ( mode == 6 )
  684. {
  685. endPoint = 0;
  686. }
  687. else if ( mode == 7 )
  688. {
  689. endPoint = 0;
  690. }
  691. }
  692. void extract_and_decode_endpoints( out uint2x4 endPoint[3], uint mode, uint4 block )
  693. {
  694. if ( mode == 0 )
  695. {
  696. endPoint[0][0].r = ( block.x >> 1 ) & 0xF0; endPoint[0][0].g = ( ( block.x >> 25 ) & 0xF0 ) | ( ( block.y << 7 ) & 0xF0 ); endPoint[0][0].b = ( block.y >> 17 ) & 0xF0; endPoint[0][0].a = 255;
  697. endPoint[0][1].r = ( block.x >> 5 ) & 0xF0; endPoint[0][1].g = ( block.y << 3 ) & 0xF0; endPoint[0][1].b = ( block.y >> 21 ) & 0xF0; endPoint[0][1].a = 255;
  698. endPoint[1][0].r = ( block.x >> 9 ) & 0xF0; endPoint[1][0].g = ( block.y >> 1 ) & 0xF0; endPoint[1][0].b = ( ( block.y >> 25 ) & 0xF0 ) | ( ( block.z << 7 ) & 0xF0 ); endPoint[1][0].a = 255;
  699. endPoint[1][1].r = ( block.x >> 13 ) & 0xF0; endPoint[1][1].g = ( block.y >> 5 ) & 0xF0; endPoint[1][1].b = ( block.z << 3 ) & 0xF0; endPoint[1][1].a = 255;
  700. endPoint[2][0].r = ( block.x >> 17 ) & 0xF0; endPoint[2][0].g = ( block.y >> 9 ) & 0xF0; endPoint[2][0].b = ( block.z >> 1 ) & 0xF0; endPoint[2][0].a = 255;
  701. endPoint[2][1].r = ( block.x >> 21 ) & 0xF0; endPoint[2][1].g = ( block.y >> 13 ) & 0xF0; endPoint[2][1].b = ( block.z >> 5 ) & 0xF0; endPoint[2][1].a = 255;
  702. endPoint[0][0].rgb |= ( ( block.z >> 10 ) & 0x08 ) | ( endPoint[0][0].rgb >> 5 );
  703. endPoint[0][1].rgb |= ( ( block.z >> 11 ) & 0x08 ) | ( endPoint[0][1].rgb >> 5 );
  704. endPoint[1][0].rgb |= ( ( block.z >> 12 ) & 0x08 ) | ( endPoint[1][0].rgb >> 5 );
  705. endPoint[1][1].rgb |= ( ( block.z >> 13 ) & 0x08 ) | ( endPoint[1][1].rgb >> 5 );
  706. endPoint[2][0].rgb |= ( ( block.z >> 14 ) & 0x08 ) | ( endPoint[2][0].rgb >> 5 );
  707. endPoint[2][1].rgb |= ( ( block.z >> 15 ) & 0x08 ) | ( endPoint[2][1].rgb >> 5 );
  708. }
  709. else if ( mode == 1 )
  710. {
  711. endPoint[0][0].r = ( block.x >> 6 ) & 0xFC; endPoint[0][0].g = ( block.y << 2 ) & 0xFC; endPoint[0][0].b = ( block.y >> 22 ) & 0xFC; endPoint[0][0].a = 255;
  712. endPoint[0][1].r = ( block.x >> 12 ) & 0xFC; endPoint[0][1].g = ( block.y >> 4 ) & 0xFC; endPoint[0][1].b = ( ( block.y >> 28 ) & 0xFC ) | ( ( block.z << 4 ) & 0xFC ); endPoint[0][1].a = 255;
  713. endPoint[1][0].r = ( block.x >> 18 ) & 0xFC; endPoint[1][0].g = ( block.y >> 10 ) & 0xFC; endPoint[1][0].b = ( block.z >> 2 ) & 0xFC; endPoint[1][0].a = 255;
  714. endPoint[1][1].r = ( block.x >> 24 ) & 0xFC; endPoint[1][1].g = ( block.y >> 16 ) & 0xFC; endPoint[1][1].b = ( block.z >> 8 ) & 0xFC; endPoint[1][1].a = 255;
  715. endPoint[0][0].rgb |= ( ( block.z >> 15 ) & 0x02 ) | ( endPoint[0][0].rgb >> 7 );
  716. endPoint[0][1].rgb |= ( ( block.z >> 15 ) & 0x02 ) | ( endPoint[0][1].rgb >> 7 );
  717. endPoint[1][0].rgb |= ( ( block.z >> 16 ) & 0x02 ) | ( endPoint[1][0].rgb >> 7 );
  718. endPoint[1][1].rgb |= ( ( block.z >> 16 ) & 0x02 ) | ( endPoint[1][1].rgb >> 7 );
  719. endPoint[2] = 0;
  720. }
  721. else if ( mode == 2 )
  722. {
  723. endPoint[0][0].r = ( block.x >> 6 ) & 0xF8; endPoint[0][0].g = ( block.y >> 4 ) & 0xF8; endPoint[0][0].b = ( block.z >> 2 ) & 0xF8; endPoint[0][0].a = 255;
  724. endPoint[0][1].r = ( block.x >> 11 ) & 0xF8; endPoint[0][1].g = ( block.y >> 9 ) & 0xF8; endPoint[0][1].b = ( block.z >> 7 ) & 0xF8; endPoint[0][1].a = 255;
  725. endPoint[1][0].r = ( block.x >> 16 ) & 0xF8; endPoint[1][0].g = ( block.y >> 14 ) & 0xF8; endPoint[1][0].b = ( block.z >> 12 ) & 0xF8; endPoint[1][0].a = 255;
  726. endPoint[1][1].r = ( block.x >> 21 ) & 0xF8; endPoint[1][1].g = ( block.y >> 19 ) & 0xF8; endPoint[1][1].b = ( block.z >> 17 ) & 0xF8; endPoint[1][1].a = 255;
  727. endPoint[2][0].r = ( ( block.x >> 26 ) & 0xF8 ) | ( ( block.y << 6 ) & 0xF8 ); endPoint[2][0].g = ( block.y >> 24 ) & 0xF8; endPoint[2][0].b = ( block.z >> 22 ) & 0xF8; endPoint[2][0].a = 255;
  728. endPoint[2][1].r = ( block.y << 1 ) & 0xF8; endPoint[2][1].g = ( block.z << 3 ) & 0xF8; endPoint[2][1].b = ( ( block.z >> 27 ) & 0xF8 ) | ( ( block.w << 5 ) & 0xF8 ); endPoint[2][1].a = 255;
  729. endPoint[0] |= endPoint[0] >> 5;
  730. endPoint[1] |= endPoint[1] >> 5;
  731. endPoint[2] |= endPoint[2] >> 5;
  732. }
  733. else if ( mode == 3 )
  734. {
  735. endPoint[0][0].r = ( block.x >> 9 ) & 0xFE; endPoint[0][0].g = ( block.y >> 5 ) & 0xFE; endPoint[0][0].b = ( block.z >> 1 ) & 0xFE; endPoint[0][0].a = 255;
  736. endPoint[0][1].r = ( block.x >> 16 ) & 0xFE; endPoint[0][1].g = ( block.y >> 12 ) & 0xFE; endPoint[0][1].b = ( block.z >> 8 ) & 0xFE; endPoint[0][1].a = 255;
  737. endPoint[1][0].r = ( block.x >> 23 ) & 0xFE; endPoint[1][0].g = ( block.y >> 19 ) & 0xFE; endPoint[1][0].b = ( block.z >> 15 ) & 0xFE; endPoint[1][0].a = 255;
  738. endPoint[1][1].r = ( ( block.x >> 30 ) & 0xFE ) | ( ( block.y << 2 ) & 0xFE ); endPoint[1][1].g = ( ( block.y >> 26 ) & 0xFE ) | ( ( block.z << 6 ) & 0xFE ); endPoint[1][1].b = ( block.z >> 22 ) & 0xFE; endPoint[1][1].a = 255;
  739. endPoint[0][0].rgb |= ( block.z >> 30 ) & 0x01;
  740. endPoint[0][1].rgb |= ( block.z >> 31 ) & 0x01;
  741. endPoint[1][0].rgb |= ( block.w >> 0 ) & 0x01;
  742. endPoint[1][1].rgb |= ( block.w >> 1 ) & 0x01;
  743. endPoint[2] = 0;
  744. }
  745. else if ( mode == 4 )
  746. {
  747. endPoint[0][0].r = ( block.x >> 5 ) & 0xF8; endPoint[0][0].g = ( block.x >> 15 ) & 0xF8; endPoint[0][0].b = ( ( block.x >> 25 ) & 0xF8 ) | ( ( block.y << 7 ) & 0xF8 ); endPoint[0][0].a = ( block.y >> 4 ) & 0xFC;
  748. endPoint[0][1].r = ( block.x >> 10 ) & 0xF8; endPoint[0][1].g = ( block.x >> 20 ) & 0xF8; endPoint[0][1].b = ( block.y << 2 ) & 0xF8; endPoint[0][1].a = ( block.y >> 10 ) & 0xFC;
  749. endPoint[0][0].rgb |= endPoint[0][0].rgb >> 5; endPoint[0][0].a |= endPoint[0][0].a >> 6;
  750. endPoint[0][1].rgb |= endPoint[0][1].rgb >> 5; endPoint[0][1].a |= endPoint[0][1].a >> 6;
  751. endPoint[1] = 0;
  752. endPoint[2] = 0;
  753. }
  754. else if ( mode == 5 )
  755. {
  756. endPoint[0][0].r = ( block.x >> 7 ) & 0xFE; endPoint[0][0].g = ( block.x >> 21 ) & 0xFE; endPoint[0][0].b = ( block.y >> 3 ) & 0xFE; endPoint[0][0].a = ( block.y >> 18 ) & 0xFF;
  757. endPoint[0][1].r = ( block.x >> 14 ) & 0xFE; endPoint[0][1].g = ( ( block.x >> 28 ) & 0xFE ) | ( ( block.y << 4 ) & 0xFE ); endPoint[0][1].b = ( block.y >> 10 ) & 0xFE; endPoint[0][1].a = ( ( block.y >> 26 ) & 0xFF ) | ( ( block.z << 6 ) & 0xFF );
  758. endPoint[0][0].rgb |= endPoint[0][0].rgb >> 7;
  759. endPoint[0][1].rgb |= endPoint[0][1].rgb >> 7;
  760. endPoint[1] = 0;
  761. endPoint[2] = 0;
  762. }
  763. else if ( mode == 6 )
  764. {
  765. endPoint[0][0].r = ( block.x >> 6 ) & 0xFE; endPoint[0][0].g = ( block.x >> 20 ) & 0xFE; endPoint[0][0].b = ( block.y >> 2 ) & 0xFE; endPoint[0][0].a = ( block.y >> 16 ) & 0xFE;
  766. endPoint[0][1].r = ( block.x >> 13 ) & 0xFE; endPoint[0][1].g = ( ( block.x >> 27 ) & 0xFE ) | ( ( block.y << 5 ) & 0xFE ); endPoint[0][1].b = ( block.y >> 9 ) & 0xFE; endPoint[0][1].a = ( block.y >> 23 ) & 0xFE;
  767. endPoint[0][0] |= ( block.y >> 31 ) & 0x01;
  768. endPoint[0][1] |= ( block.z >> 0 ) & 0x01;
  769. endPoint[1] = 0;
  770. endPoint[2] = 0;
  771. }
  772. else if ( mode == 7 )
  773. {
  774. endPoint[0][0].r = ( block.x >> 11 ) & 0xF8; endPoint[0][0].g = ( block.y << 1 ) & 0xF8; endPoint[0][0].b = ( block.y >> 19 ) & 0xF8; endPoint[0][0].a = ( block.z >> 7 ) & 0xF8;
  775. endPoint[0][1].r = ( block.x >> 16 ) & 0xF8; endPoint[0][1].g = ( block.y >> 4 ) & 0xF8; endPoint[0][1].b = ( block.y >> 24 ) & 0xF8; endPoint[0][1].a = ( block.z >> 12 ) & 0xF8;
  776. endPoint[1][0].r = ( block.x >> 21 ) & 0xF8; endPoint[1][0].g = ( block.y >> 9 ) & 0xF8; endPoint[1][0].b = ( block.z << 3 ) & 0xF8; endPoint[1][0].a = ( block.z >> 17 ) & 0xF8;
  777. endPoint[1][1].r = ( ( block.x >> 26 ) & 0xF8 ) | ( ( block.y << 6 ) & 0xF8 ); endPoint[1][1].g = ( block.y >> 14 ) & 0xF8; endPoint[1][1].b = ( block.z >> 2 ) & 0xF8; endPoint[1][1].a = ( block.z >> 22 ) & 0xF8;
  778. endPoint[0][0] |= ( ( block.z >> 28 ) & 0x04 ) | ( endPoint[0][0] >> 6 );
  779. endPoint[0][1] |= ( ( block.z >> 29 ) & 0x04 ) | ( endPoint[0][1] >> 6 );
  780. endPoint[1][0] |= ( ( block.w << 2 ) & 0x04 ) | ( endPoint[1][0] >> 6 );
  781. endPoint[1][1] |= ( ( block.w << 1 ) & 0x04 ) | ( endPoint[1][1] >> 6 );
  782. endPoint[2] = 0;
  783. }
  784. }
  785. void get_index( out uint alpha_index, out uint color_index, out uint subset_index, uint x, uint y, uint mode, uint partition, uint4 block )
  786. {
  787. uint i = y * 4 + x;
  788. if ( mode == 0 ) //64 <= partition < 64 + 16
  789. {
  790. if ( i == 0 )
  791. color_index = ( block.z >> 19 ) & 0x03;
  792. else if ( i < candidateFixUpIndex1DOrdered[partition][0] )
  793. {
  794. if ( i < 4 )
  795. color_index = ( block.z >> ( i * 3 + 18 ) ) & 0x07;
  796. else if ( i == 4 )
  797. color_index = ( ( block.z >> ( i * 3 + 18 ) ) & 0x03 ) | ( ( block.w << 2 ) & 0x04 );
  798. else
  799. color_index = ( block.w >> ( i * 3 - 14 ) ) & 0x07;
  800. }
  801. else if ( i == candidateFixUpIndex1DOrdered[partition][0] )
  802. {
  803. if ( i <= 4 )
  804. color_index = ( block.z >> ( i * 3 + 18 ) ) & 0x03;
  805. else
  806. color_index = ( block.w >> ( i * 3 - 14 ) ) & 0x03;
  807. }
  808. else if ( i < candidateFixUpIndex1DOrdered[partition][1] )
  809. {
  810. if ( i <= 4 )
  811. color_index = ( block.z >> ( i * 3 + 17 ) ) & 0x07;
  812. else
  813. color_index = ( block.w >> ( i * 3 - 15 ) ) & 0x07;
  814. }
  815. else if ( i == candidateFixUpIndex1DOrdered[partition][1] ) //i >= 8
  816. color_index = ( block.w >> ( i * 3 - 15 ) ) & 0x03;
  817. else //i >= 9
  818. color_index = ( block.w >> ( i * 3 - 16 ) ) & 0x07;
  819. alpha_index = 0; //Not used
  820. subset_index = ( candidateSectionCompressed[partition] >> ( 30 - i * 2 ) ) & 0x03;
  821. }
  822. else if ( mode == 1 )
  823. {
  824. if ( i == 0 )
  825. color_index = ( block.z >> 18 ) & 0x03;
  826. else if ( i < candidateFixUpIndex1DOrdered[partition][0] )
  827. {
  828. if ( i < 5 )
  829. color_index = ( block.z >> ( i * 3 + 17 ) ) & 0x07;
  830. else
  831. color_index = ( block.w >> ( i * 3 - 15 ) ) & 0x07;
  832. }
  833. else if ( i == candidateFixUpIndex1DOrdered[partition][0] ) //i can't be 5
  834. {
  835. if ( i < 5 )
  836. color_index = ( block.z >> ( i * 3 + 17 ) ) & 0x03;
  837. else
  838. color_index = ( block.w >> ( i * 3 - 15 ) ) & 0x03;
  839. }
  840. else
  841. {
  842. if ( i < 5 )
  843. color_index = ( block.z >> ( i * 3 + 16 ) ) & 0x07;
  844. else if ( i == 5 )
  845. color_index = ( ( block.z >> ( i * 3 + 16 ) ) & 0x01 ) | ( ( block.w << 1 ) & 0x06 );
  846. else
  847. color_index = ( block.w >> ( i * 3 - 16 ) ) & 0x07;
  848. }
  849. alpha_index = 0; //Not used
  850. subset_index = ( candidateSectionCompressed[partition] >> ( 30 - i * 2 ) ) & 0x03;
  851. }
  852. else if ( mode == 2 )
  853. {
  854. if ( i == 0 )
  855. color_index = ( block.w >> 3 ) & 0x01;
  856. else if ( i < candidateFixUpIndex1DOrdered[partition][0] )
  857. color_index = ( block.w >> ( i * 2 + 2 ) ) & 0x03;
  858. else if ( i == candidateFixUpIndex1DOrdered[partition][0] )
  859. color_index = ( block.w >> ( i * 2 + 2 ) ) & 0x01;
  860. else if ( i < candidateFixUpIndex1DOrdered[partition][1] )
  861. color_index = ( block.w >> ( i * 2 + 1 ) ) & 0x03;
  862. else if ( i == candidateFixUpIndex1DOrdered[partition][1] )
  863. color_index = ( block.w >> ( i * 2 + 1 ) ) & 0x01;
  864. else
  865. color_index = ( block.w >> ( i * 2 ) ) & 0x03;
  866. alpha_index = 0; //Not used
  867. subset_index = ( candidateSectionCompressed[partition] >> ( 30 - i * 2 ) ) & 0x03;
  868. }
  869. else if ( mode == 3 )
  870. {
  871. if ( i == 0 )
  872. color_index = ( block.w >> 2 ) & 0x01;
  873. else if ( i < candidateFixUpIndex1DOrdered[partition][0] )
  874. color_index = ( block.w >> ( i * 2 + 1 ) ) & 0x03;
  875. else if ( i == candidateFixUpIndex1DOrdered[partition][0] )
  876. color_index = ( block.w >> ( i * 2 + 1 ) ) & 0x01;
  877. else
  878. color_index = ( block.w >> ( i * 2 ) ) & 0x03;
  879. alpha_index = 0; //Not used
  880. subset_index = ( candidateSectionCompressed[partition] >> ( 30 - i * 2 ) ) & 0x03;
  881. }
  882. else if ( mode == 4 )
  883. {
  884. if ( i == 0 )
  885. color_index = ( block.y >> 18 ) & 0x01;
  886. else if ( i < 7 )
  887. color_index = ( block.y >> ( i * 2 + 17 ) ) & 0x03;
  888. else if ( i == 7 )
  889. color_index = ( ( block.y >> ( i * 2 + 17 ) ) & 0x01 ) | ( ( block.z << 1 ) & 0x02 );
  890. else
  891. color_index = ( block.z >> ( i * 2 - 15 ) ) & 0x03;
  892. if ( i == 0 )
  893. alpha_index = ( block.z >> 17 ) & 0x03;
  894. else if ( i < 5 )
  895. alpha_index = ( block.z >> ( i * 3 + 16 ) ) & 0x07;
  896. else if ( i == 5 )
  897. alpha_index = ( ( block.z >> ( i * 3 + 16 ) ) & 0x01 ) | ( ( block.w << 1 ) & 0x06 );
  898. else
  899. alpha_index = ( block.w >> ( i * 3 - 16 ) ) & 0x07;
  900. if ( block.x & 0x80 )
  901. {
  902. uint tmp = color_index;
  903. color_index = alpha_index;
  904. alpha_index = tmp;
  905. }
  906. subset_index = 0; //Not used
  907. }
  908. else if ( mode == 5 )
  909. {
  910. if ( i == 0 )
  911. color_index = ( block.z >> 2 ) & 0x01;
  912. else if ( i < 15 )
  913. color_index = ( block.z >> ( i * 2 + 1 ) ) & 0x03;
  914. else
  915. color_index = ( ( block.z >> 31 ) & 0x01 ) | ( ( block.w << 1 ) & 0x02 );
  916. if ( i == 0 )
  917. alpha_index = ( block.w >> 1 ) & 0x01;
  918. else
  919. alpha_index = ( block.w >> ( i * 2 ) ) & 0x03;
  920. subset_index = 0; //Not used
  921. }
  922. else if ( mode == 6 )
  923. {
  924. if ( i == 0 )
  925. color_index = ( block.z >> 1 ) & 0x07;
  926. else if ( i < 8 )
  927. color_index = ( block.z >> ( i * 4 ) ) & 0x0F;
  928. else
  929. color_index = ( block.w >> ( i * 4 - 32 ) ) & 0x0F;
  930. alpha_index = color_index;
  931. subset_index = 0; //Not used
  932. }
  933. else // mode == 7
  934. {
  935. if ( i == 0 )
  936. color_index = ( block.w >> 2 ) & 0x01;
  937. else if ( i < candidateFixUpIndex1DOrdered[partition][0] )
  938. color_index = ( block.w >> ( i * 2 + 1 ) ) & 0x03;
  939. else if ( i == candidateFixUpIndex1DOrdered[partition][0] )
  940. color_index = ( block.w >> ( i * 2 + 1 ) ) & 0x01;
  941. else
  942. color_index = ( block.w >> ( i * 2 ) ) & 0x03;
  943. alpha_index = color_index;
  944. subset_index = ( candidateSectionCompressed[partition] >> ( 30 - i * 2 ) ) & 0x03;
  945. }
  946. }
  947. static const uint aWeight2[4] = {0, 21, 43, 64};
  948. static const uint aWeight3[8] = {0, 9, 18, 27, 37, 46, 55, 64};
  949. static const uint aWeight4[16] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
  950. uint3 interpolate_color( uint color_index, uint index_prec, uint2x4 endPoint )
  951. {
  952. if ( index_prec == 2 )
  953. return ( ( 64 - aWeight2[color_index] ) * endPoint[0].rgb + aWeight2[color_index] * endPoint[1].rgb + 32 ) >> 6;
  954. if ( index_prec == 3 )
  955. return ( ( 64 - aWeight3[color_index] ) * endPoint[0].rgb + aWeight3[color_index] * endPoint[1].rgb + 32 ) >> 6;
  956. return ( ( 64 - aWeight4[color_index] ) * endPoint[0].rgb + aWeight4[color_index] * endPoint[1].rgb + 32 ) >> 6;
  957. }
  958. uint interpolate_alpha( uint alpha_index, uint index_prec, uint2x4 endPoint )
  959. {
  960. if ( index_prec == 2 )
  961. return ( ( 64 - aWeight2[alpha_index] ) * endPoint[0].a + aWeight2[alpha_index] * endPoint[1].a + 32 ) >> 6;
  962. if ( index_prec == 3 )
  963. return ( ( 64 - aWeight3[alpha_index] ) * endPoint[0].a + aWeight3[alpha_index] * endPoint[1].a + 32 ) >> 6;
  964. return ( ( 64 - aWeight4[alpha_index] ) * endPoint[0].a + aWeight4[alpha_index] * endPoint[1].a + 32 ) >> 6;
  965. }