bc6_encode_kernel.hlsl 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. // 60354e86-----------------------------------------------------------------------------
  2. //==============================================================================
  3. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
  4. //
  5. // Permission is hereby granted, free of charge, to any person obtaining a copy
  6. // of this software and associated documentation files(the "Software"), to deal
  7. // in the Software without restriction, including without limitation the rights
  8. // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
  9. // copies of the Software, and to permit persons to whom the Software is
  10. // furnished to do so, subject to the following conditions :
  11. //
  12. // The above copyright notice and this permission notice shall be included in
  13. // all copies or substantial portions of the Software.
  14. //
  15. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
  18. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21. // THE SOFTWARE.
  22. //
  23. //===============================================================================
  24. #define ASPM_HLSL // This is required
  25. #define ASPM_GPU // This is required
  26. #define USE_MSC // Use MSC Codec
  27. //#define USE_BETSY // Use Betsy Codec
  28. //#define USE_CMP // Use Compressonator Codec
  29. #define CHAR_LENGTH 8
  30. #define NCHANNELS 3
  31. #define MAX_UINT 0xFFFFFFFF
  32. #define MIN_UINT 0
  33. #define BLOCK_SIZE_Y 4
  34. #define BLOCK_SIZE_X 4
  35. #define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X)
  36. #define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
  37. cbuffer cbCS : register(b0)
  38. {
  39. uint g_tex_width;
  40. uint g_num_block_x;
  41. uint g_format;
  42. uint g_mode_id;
  43. uint g_start_block_id;
  44. uint g_num_total_blocks;
  45. float g_alpha_weight;
  46. float g_quality;
  47. };
  48. Texture2D<float4> g_Input : register(t0);
  49. StructuredBuffer<uint4> g_InBuff : register(t1);
  50. RWStructuredBuffer<uint4> g_OutBuff : register(u0);
  51. struct SharedData
  52. {
  53. float3 pixel;
  54. int3 pixel_ph;
  55. float3 pixel_hr;
  56. float pixel_lum;
  57. float error;
  58. uint best_mode;
  59. uint best_partition;
  60. int3 endPoint_low;
  61. int3 endPoint_high;
  62. float endPoint_lum_low;
  63. float endPoint_lum_high;
  64. };
  65. #ifdef USE_MSC
  66. groupshared SharedData shared_temp[THREAD_GROUP_SIZE];
  67. #else
  68. groupshared float3 shared_temp[THREAD_GROUP_SIZE];
  69. #endif
  70. #include "bc6_common_encoder.h"
  71. #ifndef USE_MSC
  72. [numthreads(THREAD_GROUP_SIZE, 1, 1)] void EncodeBlocks(CGU_UINT32 GI
  73. : SV_GroupIndex, CGU_Vec3ui groupID
  74. : SV_GroupID) {
  75. // we process 4 BC blocks per thread group
  76. const CGU_UINT32 MAX_USED_THREAD = 32;
  77. CGU_UINT32 BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
  78. CGU_UINT32 blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
  79. CGU_UINT32 blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
  80. CGU_UINT32 pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
  81. CGU_UINT32 pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
  82. CGU_UINT32 block_y = blockID / g_num_block_x;
  83. CGU_UINT32 block_x = blockID - block_y * g_num_block_x;
  84. CGU_UINT32 base_x = block_x * BLOCK_SIZE_X;
  85. CGU_UINT32 base_y = block_y * BLOCK_SIZE_Y;
  86. // Load up the pixels
  87. if (pixelInBlock < 16)
  88. {
  89. shared_temp[GI] = g_Input.Load(CGU_Vec3ui(base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0)).rgb;
  90. }
  91. GroupMemoryBarrierWithGroupSync();
  92. // Process and save s
  93. if (pixelInBlock == 0)
  94. {
  95. CGU_Vec3f image_src[16];
  96. for (CGU_INT i = 0; i < 16; i++)
  97. {
  98. image_src[i].x = shared_temp[pixelBase + i].x;
  99. image_src[i].y = shared_temp[pixelBase + i].y;
  100. image_src[i].z = shared_temp[pixelBase + i].z;
  101. }
  102. g_OutBuff[blockID] = CompressBlockBC6H_UNORM(image_src, g_quality);
  103. }
  104. }
  105. #endif