bc7_encode_kernel.hlsl 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. // 607dfc9f-----------------------------------------------------------------------------
  2. //==============================================================================
  3. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
  4. //
  5. // Permission is hereby granted, free of charge, to any person obtaining a copy
  6. // of this software and associated documentation files(the "Software"), to deal
  7. // in the Software without restriction, including without limitation the rights
  8. // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
  9. // copies of the Software, and to permit persons to whom the Software is
  10. // furnished to do so, subject to the following conditions :
  11. //
  12. // The above copyright notice and this permission notice shall be included in
  13. // all copies or substantial portions of the Software.
  14. //
  15. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
  18. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21. // THE SOFTWARE.
  22. //
  23. //===============================================================================
  24. //
  25. // Copyright (c) Microsoft Corporation. All rights reserved.
  26. // Licensed under the MIT License.
  27. //-------------------------------------------------------------------------------
  28. #ifndef ASPM_GPU
  29. #define ASPM_GPU // This is required
  30. #endif
  31. #ifndef ASPM_HLSL
  32. #define ASPM_HLSL // This is required
  33. #endif
  34. //#define USE_CMP // Use New Dev Compressonator Codec for v4.2
  35. #define USE_CMPMSC // Use New Dev Compressonator Codec for v4.2
  36. //#define USE_MSC // Microsoft DirectXTex HLSL all modes
  37. //#define USE_ENCODEBLOCKS_ONLY
  38. //#define USE_INT // Compiles on CPU. Compiles on HLSL in 38 seconds with reg and loop warnings : Images are good
  39. //#define USE_RGBCX_RDO // Compiles on CPU Images are good. Does not compile on HLSL
  40. //#define USE_VOLT // Compiles on CPU + HLSL but image is curupt
  41. //#define USE_ICBC // Compiles on CPU Does not compile on HLSL
  42. //#define USE_ARRIS // Arris port over of rgbcx_rdo code for GPU
  43. #define CHAR_LENGTH 8
  44. #define NCHANNELS 4
  45. #define BC7_UNORM 98
  46. #define MAX_UINT 0xFFFFFFFF
  47. #define MIN_UINT 0
  48. #define BLOCK_SIZE_Y 4
  49. #define BLOCK_SIZE_X 4
  50. #define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X)
  51. #define THREAD_GROUP_SIZE 64
  52. // Source Texture to process
  53. Texture2D g_Input : register(t0);
  54. struct BufferShared
  55. {
  56. uint4 pixel;
  57. uint error;
  58. uint mode;
  59. uint partition;
  60. uint index_selector;
  61. uint rotation;
  62. uint pbit;
  63. uint4 endPoint_low;
  64. uint4 endPoint_high;
  65. uint4 endPoint_low_quantized;
  66. uint4 endPoint_high_quantized;
  67. uint colorindex;
  68. uint alphaindex;
  69. };
  70. groupshared BufferShared shared_temp[THREAD_GROUP_SIZE];
  71. cbuffer cbCS : register(b0)
  72. {
  73. uint g_tex_width;
  74. uint g_num_block_x;
  75. uint g_format;
  76. uint g_mode_id;
  77. uint g_start_block_id;
  78. uint g_num_total_blocks;
  79. float g_alpha_weight;
  80. float g_quality;
  81. };
  82. struct SharedIOData
  83. {
  84. uint error;
  85. uint mode;
  86. uint index_selector;
  87. uint rotation;
  88. uint partition;
  89. uint4 data2;
  90. };
  91. StructuredBuffer<SharedIOData> g_InBuff : register(t1);
  92. RWStructuredBuffer<SharedIOData> g_OutBuff1 : register(u0); // Used by TryMode...
  93. RWStructuredBuffer<uint4> g_OutBuff : register(u0); // Used by EncodeBlocks & TryMode...
  94. #include "bc7_common_encoder.h"
  95. #ifdef USE_ENCODEBLOCKS_ONLY
  96. [numthreads(THREAD_GROUP_SIZE, 1, 1)] void TryMode456CS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
  97. {
  98. // Do nothing
  99. }
  100. [numthreads( THREAD_GROUP_SIZE, 1, 1 )]
  101. void TryMode137CS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
  102. {
  103. // Do nothing
  104. }
  105. [numthreads( THREAD_GROUP_SIZE, 1, 1 )]
  106. void TryMode02CS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
  107. {
  108. // Do nothing
  109. }
  110. [numthreads(THREAD_GROUP_SIZE, 1, 1)]
  111. void EncodeBlocks(uint GI: SV_GroupIndex, uint3 groupID: SV_GroupID)
  112. {
  113. // we process 4 BC blocks per thread group
  114. uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
  115. uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
  116. uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
  117. uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
  118. uint block_y = blockID / g_num_block_x;
  119. uint block_x = blockID - block_y * g_num_block_x;
  120. uint base_x = block_x * BLOCK_SIZE_X;
  121. uint base_y = block_y * BLOCK_SIZE_Y;
  122. // Load up the pixels
  123. if (pixelInBlock < 16)
  124. {
  125. // load pixels (0..1)
  126. shared_temp[GI].pixel = g_Input.Load(uint3(base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0));
  127. }
  128. GroupMemoryBarrierWithGroupSync();
  129. // Process and save s
  130. if (pixelInBlock == 0)
  131. {
  132. float4 image_src[16];
  133. for (int i = 0; i < 16; i++)
  134. {
  135. image_src[i].x = shared_temp[pixelBase + i].pixel.x * 255;
  136. image_src[i].y = shared_temp[pixelBase + i].pixel.y * 255;
  137. image_src[i].z = shared_temp[pixelBase + i].pixel.z * 255;
  138. image_src[i].w = shared_temp[pixelBase + i].pixel.w * 255;
  139. }
  140. g_OutBuff[blockID] = CompressBlockBC7_UNORM(image_src, g_quality);
  141. }
  142. }
  143. #endif