bc1_encode_kernel.hlsl 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. // 603e63ed=====================================================================
  2. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files(the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions :
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. //===============================================================================
  23. #define ASPM_HLSL // This is required
  24. #define ASPM_GPU // This is required
  25. #define USE_CMP
  26. //#define USE_BETSY
  27. cbuffer cbCS : register(b0)
  28. {
  29. uint g_tex_width;
  30. uint g_num_block_x;
  31. uint g_format;
  32. uint g_mode_id;
  33. uint g_start_block_id;
  34. uint g_num_total_blocks;
  35. float g_alpha_weight;
  36. float g_quality;
  37. };
  38. // Source Data
  39. Texture2D g_Input : register( t0 );
  40. StructuredBuffer<uint4> g_InBuff : register( t1 ); // Currently unused for BC1 processing
  41. // Compressed Output Data
  42. RWStructuredBuffer<uint2> g_OutBuff : register( u0 );
  43. // Processing multiple blocks at a time
  44. #define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block
  45. #define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4
  46. #define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
  47. #define BLOCK_SIZE_Y 4
  48. #define BLOCK_SIZE_X 4
  49. groupshared float4 shared_temp[THREAD_GROUP_SIZE];
  50. #include "bc1_common_kernel.h"
  51. [numthreads( THREAD_GROUP_SIZE, 1, 1 )]
  52. void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
  53. {
  54. // we process 4 BC blocks per thread group
  55. uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
  56. uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
  57. uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
  58. uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
  59. uint block_y = blockID / g_num_block_x;
  60. uint block_x = blockID - block_y * g_num_block_x;
  61. uint base_x = block_x * BLOCK_SIZE_X;
  62. uint base_y = block_y * BLOCK_SIZE_Y;
  63. // Load up the pixels
  64. if (pixelInBlock < 16)
  65. {
  66. // load pixels (0..1)
  67. shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
  68. }
  69. GroupMemoryBarrierWithGroupSync();
  70. // Process and save s
  71. if (pixelInBlock == 0)
  72. {
  73. float4 block[16];
  74. for (int i = 0; i < 16; i++ )
  75. {
  76. block[i].x = shared_temp[pixelBase + i].x;
  77. block[i].y = shared_temp[pixelBase + i].y;
  78. block[i].z = shared_temp[pixelBase + i].z;
  79. block[i].w = shared_temp[pixelBase + i].w;
  80. }
  81. CMP_BC15Options BC15Options;
  82. // set defaults
  83. BC15Options.m_fquality = g_quality;
  84. BC15Options.m_fChannelWeights[0] = 1.0f;
  85. BC15Options.m_fChannelWeights[1] = 1.0f;
  86. BC15Options.m_fChannelWeights[2] = 1.0f;
  87. BC15Options.m_bUseChannelWeighting = false;
  88. BC15Options.m_bUseAdaptiveWeighting = false;
  89. BC15Options.m_bUseFloat = false;
  90. BC15Options.m_b3DRefinement = false;
  91. BC15Options.m_bUseAlpha = false;
  92. BC15Options.m_bIsSRGB = false;
  93. BC15Options.m_bIsSNORM = false;
  94. BC15Options.m_sintsrc = 0; // source data pointer is signed data
  95. BC15Options.m_nRefinementSteps = 1;
  96. BC15Options.m_nAlphaThreshold = 128;
  97. BC15Options.m_mapDecodeRGBA = false;
  98. BC15Options.m_src_width = 4;
  99. BC15Options.m_src_height = 4;
  100. g_OutBuff[blockID] = CompressBlockBC1_UNORM2(block,BC15Options);
  101. }
  102. }