bc1_encode_kernel.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. //==============================================================================
  2. // Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files(the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions :
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. //===============================================================================
  23. #include "common_def.h"
  24. //============================================== BC1 INTERFACES =======================================================
  25. #ifndef ASPM_OPENCL
  26. #define USE_NEW_SINGLE_HEADER_INTERFACES // Else use HPC v4.1 codec if using OpenCL
  27. #ifdef USE_NEW_SINGLE_HEADER_INTERFACES
  28. #define USE_CMP
  29. //#define USE_RGBCX
  30. //#define USE_ICBC
  31. //#define USE_BETSY
  32. //#define USE_INT
  33. //#define USE_STB
  34. //#define USE_SQUISH
  35. //#define USE_HUMUS
  36. #endif
  37. #endif
  38. #include "bc1_encode_kernel.h" // new header for testing common encoders
  39. // Heat Mapping
  40. // This is code that compares quality of two similar or equal codecs with varying quality settings
  41. // A resulting compressed codec data block is colored according to three colors conditions
  42. // The base codec, lowest quality is colored green and the varying quality code is colored red.
  43. // If the quality of the base matches that of the varying codec then the color is set to blue
  44. // Base codecs can be local to CMP_Core or imported using a external set of files, the base codec
  45. #ifndef TEST_HEATMAP
  46. //#define TEST_HEATMAP // Enable this to run heat map tests on BC1 codec
  47. #endif
  48. #ifdef TEST_HEATMAP
  49. #include "externcodec.h" // Use external codec for testing
  50. #endif
  51. #ifndef ASPM_HLSL
  52. void CompressBlockBC1_Internal(const CMP_Vec4uc srcBlockTemp[16],
  53. CMP_GLOBAL CGU_UINT32 compressedBlock[2],
  54. CMP_GLOBAL CMP_BC15Options *BC15options)
  55. {
  56. #ifdef USE_NEW_SINGLE_HEADER_INTERFACES
  57. CGU_Vec2ui cmpBlock2 = {0,0};
  58. CGU_Vec4f image_src[16];
  59. //int px = 0;
  60. for (int i = 0; i < 16; i++)
  61. {
  62. image_src[i].x = srcBlockTemp[i].x / 255.0f;
  63. image_src[i].y = srcBlockTemp[i].y / 255.0f;
  64. image_src[i].z = srcBlockTemp[i].z / 255.0f;
  65. image_src[i].w = srcBlockTemp[i].w / 255.0f;
  66. }
  67. cmpBlock2 = CompressBlockBC1_UNORM2(image_src, *BC15options);
  68. compressedBlock[0] = cmpBlock2.x;
  69. compressedBlock[1] = cmpBlock2.y;
  70. #else
  71. CGU_UINT8 srcindex = 0;
  72. CGU_FLOAT BlockA[16];
  73. CGU_Vec3f rgbBlockUV[16];
  74. for ( CGU_INT32 j = 0; j < 4; j++) {
  75. for ( CGU_INT32 i = 0; i < 4; i++) {
  76. rgbBlockUV[srcindex].x = (CGU_FLOAT)(srcBlockTemp[srcindex].x & 0xFF)/ 255.0f; // R
  77. rgbBlockUV[srcindex].y = (CGU_FLOAT)(srcBlockTemp[srcindex].y & 0xFF)/ 255.0f; // G
  78. rgbBlockUV[srcindex].z = (CGU_FLOAT)(srcBlockTemp[srcindex].z & 0xFF)/ 255.0f; // B
  79. srcindex++;
  80. }
  81. }
  82. CMP_BC15Options internalOptions = *BC15options;
  83. internalOptions = CalculateColourWeightings3f(rgbBlockUV,internalOptions);
  84. CGU_Vec3f channelWeights = {internalOptions.m_fChannelWeights[0],internalOptions.m_fChannelWeights[1],internalOptions.m_fChannelWeights[2]};
  85. CGU_BOOL isSRGB = internalOptions.m_bIsSRGB; // feature not supported in this section of code until v4.1
  86. CGU_Vec2ui cmpBlock = 0;
  87. //#define CMP_PRINTRESULTS
  88. #ifdef TEST_HEATMAP
  89. #ifdef CMP_PRINTRESULTS
  90. static int q1= 0,q2= 0,same = 0;
  91. static int testnum = 0;
  92. printf("%4d ",testnum);
  93. #endif
  94. {
  95. // Heatmap test: See BCn_Common_Kernel for details
  96. CGU_Vec2ui red = {0xf800f800,0};
  97. CGU_Vec2ui green = {0x07e007e0,0};
  98. CGU_Vec2ui blue = {0x001f001f,0};
  99. CGU_Vec2ui comp1;
  100. CGU_Vec2ui comp2;
  101. float err ;
  102. comp1 = (BC15options->m_fquality < 0.3)?CompressBC1Block_SRGB(rgbBlockUV):CompressBC1Block(rgbBlockUV);
  103. comp2 = CompressBlockBC1_UNORM(rgbBlockUV, BC15options->m_fquality,BC15options->m_fquality < 0.3?true:false);
  104. if ((comp1.x == comp2.x)&&(comp1.y == comp2.y)) err = 0.0f;
  105. else {
  106. float err1 = CMP_RGBBlockError(rgbBlockUV,comp1,(BC15options->m_fquality < 0.3)?true:false);
  107. float err2 = CMP_RGBBlockError(rgbBlockUV,comp2,(BC15options->m_fquality < 0.3)?true:false);
  108. err = err1-err2;
  109. }
  110. if (err > 0.0f) {
  111. cmpBlock = red;
  112. } else if (err < 0.0f) {
  113. cmpBlock = green;
  114. } else {
  115. cmpBlock = blue;
  116. }
  117. }
  118. #ifdef CMP_PRINTRESULTS
  119. printf("Q1 [%4X:%4X] %.3f, ",cmpBlockQ1.x,cmpBlockQ1.y,err1);
  120. printf("Q2 [%4X:%4X] %.3f, ",cmpBlock.x,cmpBlock.y,err2);
  121. testnum++;
  122. #endif
  123. #else
  124. // printf("q = %f\n",internalOptions.m_fquality);
  125. cmpBlock = CompressBlockBC1_RGBA_Internal(
  126. rgbBlockUV,
  127. BlockA,
  128. channelWeights,
  129. 0, //internalOptions.m_nAlphaThreshold, bug to investigate in debug is ok release has issue!
  130. 1,
  131. internalOptions.m_fquality,
  132. isSRGB
  133. );
  134. #endif
  135. compressedBlock[0] = cmpBlock.x;
  136. compressedBlock[1] = cmpBlock.y;
  137. union {
  138. unsigned char buf[8];
  139. uint32 blocks[2];
  140. } cmp;
  141. cmp.blocks[0] = compressedBlock[0];
  142. cmp.blocks[1] = compressedBlock[1];
  143. // printf("[%3d,%3d,%3d,%3d:%3d,%3d,%3d,%3d]\n",
  144. // cmp.buf[0], cmp.buf[1], cmp.buf[2], cmp.buf[3],
  145. // cmp.buf[4], cmp.buf[5], cmp.buf[6], cmp.buf[7]);
  146. #endif
  147. }
  148. #endif // ASPM_HLSL
  149. //============================================== CPU INTERFACES ========================================================
  150. #ifndef ASPM_GPU
  151. int CMP_CDECL CreateOptionsBC1(void **options) {
  152. CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
  153. if (BC15optionsDefault) {
  154. SetDefaultBC15Options(BC15optionsDefault);
  155. (*options) = BC15optionsDefault;
  156. } else {
  157. (*options) = NULL;
  158. return CGU_CORE_ERR_NEWMEM;
  159. }
  160. return CGU_CORE_OK;
  161. }
  162. int CMP_CDECL DestroyOptionsBC1(void *options) {
  163. if (!options) return CGU_CORE_ERR_INVALIDPTR;
  164. CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
  165. delete BCOptions;
  166. return CGU_CORE_OK;
  167. }
  168. int CMP_CDECL SetQualityBC1(void *options, CGU_FLOAT fquality) {
  169. if (!options) return CGU_CORE_ERR_NEWMEM;
  170. CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
  171. if (fquality < 0.0f) fquality = 0.0f;
  172. else if (fquality > 1.0f) fquality = 1.0f;
  173. BC15optionsDefault->m_fquality = fquality;
  174. return CGU_CORE_OK;
  175. }
  176. int CMP_CDECL SetRefineStepsBC1(void *options, CGU_UINT32 steps) {
  177. if (!options) return CGU_CORE_ERR_NEWMEM;
  178. CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
  179. if (steps < 0) steps = 1;
  180. else if (steps > 1) steps = 1;
  181. BC15optionsDefault->m_nRefinementSteps = steps;
  182. return CGU_CORE_OK;
  183. }
  184. int CMP_CDECL SetAlphaThresholdBC1(void *options, CGU_UINT8 alphaThreshold) {
  185. if (!options) return CGU_CORE_ERR_INVALIDPTR;
  186. CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
  187. BC15optionsDefault->m_nAlphaThreshold = alphaThreshold;
  188. return CGU_CORE_OK;
  189. }
  190. int CMP_CDECL SetDecodeChannelMapping(void *options, CGU_BOOL mapRGBA) {
  191. if (!options) return CGU_CORE_ERR_INVALIDPTR;
  192. CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
  193. BC15optionsDefault->m_mapDecodeRGBA = mapRGBA;
  194. return CGU_CORE_OK;
  195. }
  196. int CMP_CDECL SetChannelWeightsBC1(void *options,
  197. CGU_FLOAT WeightRed,
  198. CGU_FLOAT WeightGreen,
  199. CGU_FLOAT WeightBlue) {
  200. if (!options) return CGU_CORE_ERR_INVALIDPTR;
  201. CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options;
  202. if ((WeightRed < 0.0f) || (WeightRed > 1.0f)) return CGU_CORE_ERR_RANGERED;
  203. if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f)) return CGU_CORE_ERR_RANGEGREEN;
  204. if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f)) return CGU_CORE_ERR_RANGEBLUE;
  205. BC15optionsDefault->m_bUseChannelWeighting = true;
  206. BC15optionsDefault->m_fChannelWeights[0] = WeightRed;
  207. BC15optionsDefault->m_fChannelWeights[1] = WeightGreen;
  208. BC15optionsDefault->m_fChannelWeights[2] = WeightBlue;
  209. return CGU_CORE_OK;
  210. }
  211. int CMP_CDECL SetGammaBC1(void *options, CGU_BOOL sRGB) {
  212. if (!options) return CGU_CORE_ERR_INVALIDPTR;
  213. CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options;
  214. BC15optionsDefault->m_bIsSRGB = sRGB;
  215. return CGU_CORE_OK;
  216. }
  217. int CMP_CDECL CompressBlockBC1(const unsigned char *srcBlock,
  218. unsigned int srcStrideInBytes,
  219. CMP_GLOBAL unsigned char cmpBlock[8],
  220. const void *options = NULL) {
  221. CMP_Vec4uc inBlock[16];
  222. //----------------------------------
  223. // Fill the inBlock with source data
  224. //----------------------------------
  225. CGU_INT srcpos = 0;
  226. CGU_INT dstptr = 0;
  227. for (CGU_UINT8 row=0; row < 4; row++) {
  228. srcpos = row * srcStrideInBytes;
  229. for (CGU_UINT8 col = 0; col < 4; col++) {
  230. inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]);
  231. inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]);
  232. inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]);
  233. inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]);
  234. dstptr++;
  235. }
  236. }
  237. CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
  238. CMP_BC15Options BC15optionsDefault;
  239. if (BC15options == NULL) {
  240. BC15options = &BC15optionsDefault;
  241. SetDefaultBC15Options(BC15options);
  242. }
  243. CompressBlockBC1_Internal(inBlock, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
  244. return CGU_CORE_OK;
  245. }
  246. int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8],
  247. CMP_GLOBAL unsigned char srcBlock[64],
  248. const void *options = NULL) {
  249. CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
  250. CMP_BC15Options BC15optionsDefault;
  251. if (BC15options == NULL) {
  252. BC15options = &BC15optionsDefault;
  253. SetDefaultBC15Options(BC15options);
  254. }
  255. CGU_Vec2ui compBlock;
  256. compBlock.x = (CGU_UINT32)cmpBlock[3] << 24 |
  257. (CGU_UINT32)cmpBlock[2] << 16 |
  258. (CGU_UINT32)cmpBlock[1] << 8 |
  259. (CGU_UINT32)cmpBlock[0];
  260. compBlock.y = (CGU_UINT32)cmpBlock[7] << 24 |
  261. (CGU_UINT32)cmpBlock[6] << 16 |
  262. (CGU_UINT32)cmpBlock[5] << 8 |
  263. (CGU_UINT32)cmpBlock[4];
  264. cmp_decompressDXTRGBA_Internal(srcBlock, compBlock, BC15options->m_mapDecodeRGBA);
  265. return CGU_CORE_OK;
  266. }
  267. #endif
  268. //============================================== OpenCL USER INTERFACE ========================================================
  269. #ifdef ASPM_OPENCL
  270. CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
  271. CMP_GLOBAL const CMP_Vec4uc* ImageSource,
  272. CMP_GLOBAL CGU_UINT8* ImageDestination,
  273. CMP_GLOBAL Source_Info* SourceInfo,
  274. CMP_GLOBAL CMP_BC15Options* BC15options
  275. ) {
  276. CGU_UINT32 xID;
  277. CGU_UINT32 yID;
  278. //printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality);
  279. xID = get_global_id(0);
  280. yID = get_global_id(1);
  281. if (xID >= (SourceInfo->m_src_width / BlockX)) return;
  282. if (yID >= (SourceInfo->m_src_height / BlockX)) return;
  283. int srcWidth = SourceInfo->m_src_width;
  284. CGU_UINT32 destI = (xID*BC1CompBlockSize) + (yID*(srcWidth / BlockX)*BC1CompBlockSize);
  285. int srcindex = 4 * (yID * srcWidth + xID);
  286. int blkindex = 0;
  287. CMP_Vec4uc srcData[16];
  288. srcWidth = srcWidth - 4;
  289. for ( CGU_INT32 j = 0; j < 4; j++) {
  290. for ( CGU_INT32 i = 0; i < 4; i++) {
  291. srcData[blkindex++] = ImageSource[srcindex++];
  292. }
  293. srcindex += srcWidth;
  294. }
  295. CompressBlockBC1_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
  296. }
  297. #endif