bc5_encode_kernel.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. //=====================================================================
  2. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files(the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions :
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. //=====================================================================
  23. #include "bc5_encode_kernel.h"
  24. //============================================== BC5 INTERFACES =======================================================
  25. // Channel data aBlockU and aBlockV is either 0..1 or -1..1
  26. CGU_Vec4ui CompressBC5Block_Internal(CMP_IN CGU_FLOAT aBlockU[16],
  27. CMP_IN CGU_FLOAT aBlockV[16],
  28. CMP_IN CGU_FLOAT fquality,
  29. CMP_IN CGU_BOOL isSNorm)
  30. {
  31. CGU_Vec4ui compBlock;
  32. CGU_Vec2ui cmpBlock;
  33. cmpBlock = cmp_compressAlphaBlock(aBlockU, fquality, isSNorm);
  34. compBlock.x = cmpBlock.x;
  35. compBlock.y = cmpBlock.y;
  36. cmpBlock = cmp_compressAlphaBlock(aBlockV, fquality, isSNorm);
  37. compBlock.z = cmpBlock.x;
  38. compBlock.w = cmpBlock.y;
  39. return compBlock;
  40. }
  41. #ifndef ASPM_HLSL
  42. // rcBlockTemp[] is range 0 to 255
  43. void CompressBlockBC5_Internal(CMP_Vec4uc srcBlockTemp[16],
  44. CMP_GLOBAL CGU_UINT32 compressedBlock[4],
  45. CMP_GLOBAL CMP_BC15Options *BC15options) {
  46. CGU_Vec4ui cmpBlock;
  47. CGU_FLOAT alphaBlockU[16];
  48. CGU_FLOAT alphaBlockV[16];
  49. if (BC15options->m_bIsSNORM)
  50. {
  51. if (BC15options->m_sintsrc)
  52. {
  53. // Convert UINT (carrier of signed ) -> SINT -> SNORM
  54. for (int i = 0; i < BLOCK_SIZE_4X4; i++)
  55. {
  56. char x = (char)(srcBlockTemp[i].x);
  57. char y = (char)(srcBlockTemp[i].y);
  58. alphaBlockU[i] = x / 127.0f;
  59. alphaBlockV[i] = y / 127.0f;
  60. }
  61. }
  62. else
  63. {
  64. // Convert UINT -> SNORM
  65. for (int i = 0; i < BLOCK_SIZE_4X4; i++)
  66. {
  67. alphaBlockU[i] = ((srcBlockTemp[i].x / 255.0f) * 2.0f - 1.0f);
  68. alphaBlockV[i] = ((srcBlockTemp[i].y / 255.0f) * 2.0f - 1.0f);
  69. }
  70. }
  71. }
  72. else
  73. {
  74. // Convert SINT -> UNORM
  75. if (BC15options->m_sintsrc)
  76. {
  77. for (int i = 0; i < BLOCK_SIZE_4X4; i++)
  78. {
  79. char x = (char)(srcBlockTemp[i].x);
  80. char y = (char)(srcBlockTemp[i].y);
  81. alphaBlockU[i] = (((x /127.0f) * 0.5f) + 0.5f);
  82. alphaBlockV[i] = (((y /127.0f) * 0.5f) + 0.5f);
  83. }
  84. }
  85. else {
  86. // Convert UINT -> UNORM
  87. for (int i = 0; i < BLOCK_SIZE_4X4; i++)
  88. {
  89. alphaBlockU[i] = srcBlockTemp[i].x / 255.0f;
  90. alphaBlockV[i] = srcBlockTemp[i].y / 255.0f;
  91. }
  92. }
  93. }
  94. cmpBlock = CompressBC5Block_Internal(alphaBlockU, alphaBlockV, BC15options->m_fquality, BC15options->m_bIsSNORM);
  95. compressedBlock[0] = cmpBlock.x;
  96. compressedBlock[1] = cmpBlock.y;
  97. compressedBlock[2] = cmpBlock.z;
  98. compressedBlock[3] = cmpBlock.w;
  99. }
  100. #endif
  101. #ifndef ASPM_GPU
  102. void DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
  103. CGU_UINT32 compressedBlock[4],
  104. CMP_BC15Options *BC15options) {
  105. CGU_UINT8 alphaBlockR[BLOCK_SIZE_4X4];
  106. CGU_UINT8 alphaBlockG[BLOCK_SIZE_4X4];
  107. cmp_decompressAlphaBlock(alphaBlockR, &compressedBlock[0]);
  108. cmp_decompressAlphaBlock(alphaBlockG, &compressedBlock[2]);
  109. CGU_UINT8 blkindex = 0;
  110. CGU_UINT8 srcindex = 0;
  111. if (BC15options->m_mapDecodeRGBA) {
  112. for (CGU_INT32 j = 0; j < 4; j++) {
  113. for (CGU_INT32 i = 0; i < 4; i++) {
  114. rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockR[srcindex];
  115. rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockG[srcindex];
  116. rgbaBlock[blkindex++] = 0;
  117. rgbaBlock[blkindex++] = 255;
  118. srcindex++;
  119. }
  120. }
  121. } else {
  122. for (CGU_INT32 j = 0; j < 4; j++) {
  123. for (CGU_INT32 i = 0; i < 4; i++) {
  124. rgbaBlock[blkindex++] = 0;
  125. rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockG[srcindex];
  126. rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockR[srcindex];
  127. rgbaBlock[blkindex++] = 255;
  128. srcindex++;
  129. }
  130. }
  131. }
  132. }
  133. void CompressBlockBC5_DualChannel_Internal(const CGU_UINT8 srcBlockR[BLOCK_SIZE_4X4],
  134. const CGU_UINT8 srcBlockG[BLOCK_SIZE_4X4],
  135. CMP_GLOBAL CGU_UINT32 compressedBlock[4],
  136. CMP_GLOBAL const CMP_BC15Options *BC15options) {
  137. if (BC15options) {}
  138. CGU_Vec2ui cmpBlock;
  139. CGU_FLOAT srcAlphaRF[BLOCK_SIZE_4X4];
  140. CGU_FLOAT srcAlphaGF[BLOCK_SIZE_4X4];
  141. for (CGU_INT i = 0; i < BLOCK_SIZE_4X4; i++) {
  142. srcAlphaRF[i] = (CGU_FLOAT)( srcBlockR[i] ) / 255.0f;
  143. srcAlphaGF[i] = (CGU_FLOAT)( srcBlockG[i] ) / 255.0f;
  144. }
  145. cmpBlock = cmp_compressAlphaBlock(srcAlphaRF, BC15options->m_fquality, FALSE);
  146. compressedBlock[0] = cmpBlock.x;
  147. compressedBlock[1] = cmpBlock.y;
  148. cmpBlock = cmp_compressAlphaBlock(srcAlphaGF, BC15options->m_fquality, FALSE);
  149. compressedBlock[2] = cmpBlock.x;
  150. compressedBlock[3] = cmpBlock.y;
  151. }
  152. void DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16],
  153. CMP_GLOBAL CGU_UINT8 srcBlockG[16],
  154. const CGU_UINT32 compressedBlock[4],
  155. const CMP_BC15Options *BC15options) {
  156. if (BC15options) {}
  157. cmp_decompressAlphaBlock(srcBlockR, &compressedBlock[0]);
  158. cmp_decompressAlphaBlock(srcBlockG, &compressedBlock[2]);
  159. }
  160. void CompressBlockBC5S_DualChannel_Internal(const CGU_INT8 srcBlockR[BLOCK_SIZE_4X4],
  161. const CGU_INT8 srcBlockG[16],
  162. CMP_GLOBAL CGU_UINT32 compressedBlock[4],
  163. CMP_GLOBAL const CMP_BC15Options* BC15options) {
  164. if (BC15options) {
  165. }
  166. CGU_Vec2ui cmpBlock;
  167. CGU_FLOAT srcAlphaRF[BLOCK_SIZE_4X4];
  168. CGU_FLOAT srcAlphaGF[BLOCK_SIZE_4X4];
  169. for (CGU_INT i = 0; i < BLOCK_SIZE_4X4; i++) {
  170. srcAlphaRF[i] = (CGU_FLOAT)(srcBlockR[i])/127.0f;
  171. srcAlphaGF[i] = (CGU_FLOAT)(srcBlockG[i])/127.0f;
  172. }
  173. cmpBlock = cmp_compressAlphaBlock(srcAlphaRF, BC15options->m_fquality, TRUE);
  174. compressedBlock[0] = cmpBlock.x;
  175. compressedBlock[1] = cmpBlock.y;
  176. cmpBlock = cmp_compressAlphaBlock(srcAlphaGF, BC15options->m_fquality, TRUE);
  177. compressedBlock[2] = cmpBlock.x;
  178. compressedBlock[3] = cmpBlock.y;
  179. }
  180. void DecompressBC5S_DualChannel_Internal(CMP_GLOBAL CGU_INT8 srcBlockR[16],
  181. CMP_GLOBAL CGU_INT8 srcBlockG[16],
  182. const CGU_UINT32 compressedBlock[4],
  183. const CMP_BC15Options* BC15options) {
  184. if (BC15options) {
  185. }
  186. cmp_decompressAlphaBlockS(srcBlockR, &compressedBlock[0]);
  187. cmp_decompressAlphaBlockS(srcBlockG, &compressedBlock[2]);
  188. }
  189. #endif
  190. //============================================== USER INTERFACES ========================================================
  191. #ifndef ASPM_GPU
  192. int CMP_CDECL CreateOptionsBC5(void **options) {
  193. CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
  194. if (BC15optionsDefault) {
  195. SetDefaultBC15Options(BC15optionsDefault);
  196. (*options) = BC15optionsDefault;
  197. } else {
  198. (*options) = NULL;
  199. return CGU_CORE_ERR_NEWMEM;
  200. }
  201. return CGU_CORE_OK;
  202. }
  203. int CMP_CDECL DestroyOptionsBC5(void *options) {
  204. if (!options) return CGU_CORE_ERR_INVALIDPTR;
  205. CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
  206. delete BCOptions;
  207. return CGU_CORE_OK;
  208. }
  209. int CMP_CDECL SetQualityBC5(void *options,
  210. CGU_FLOAT fquality) {
  211. if (!options) return CGU_CORE_ERR_INVALIDPTR;
  212. CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
  213. if (fquality < 0.0f) fquality = 0.0f;
  214. else if (fquality > 1.0f) fquality = 1.0f;
  215. BC15optionsDefault->m_fquality = fquality;
  216. return CGU_CORE_OK;
  217. }
  218. int CMP_CDECL CompressBlockBC5(const CGU_UINT8 *srcBlockR,
  219. unsigned int srcStrideInBytes1,
  220. const CGU_UINT8 *srcBlockG,
  221. unsigned int srcStrideInBytes2,
  222. CMP_GLOBAL CGU_UINT8 cmpBlock[16],
  223. const void *options = NULL) {
  224. CGU_UINT8 inBlockR[16];
  225. //----------------------------------
  226. // Fill the inBlock with source data
  227. //----------------------------------
  228. CGU_INT srcpos = 0;
  229. CGU_INT dstptr = 0;
  230. for (CGU_UINT8 row = 0; row < 4; row++) {
  231. srcpos = row * srcStrideInBytes1;
  232. for (CGU_UINT8 col = 0; col < 4; col++) {
  233. inBlockR[dstptr++] = CGU_UINT8(srcBlockR[srcpos++]);
  234. }
  235. }
  236. CGU_UINT8 inBlockG[16];
  237. //----------------------------------
  238. // Fill the inBlock with source data
  239. //----------------------------------
  240. srcpos = 0;
  241. dstptr = 0;
  242. for (CGU_UINT8 row = 0; row < 4; row++) {
  243. srcpos = row * srcStrideInBytes2;
  244. for (CGU_UINT8 col = 0; col < 4; col++) {
  245. inBlockG[dstptr++] = CGU_UINT8(srcBlockG[srcpos++]);
  246. }
  247. }
  248. CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
  249. CMP_BC15Options BC15optionsDefault;
  250. if (BC15options == NULL) {
  251. BC15options = &BC15optionsDefault;
  252. SetDefaultBC15Options(BC15options);
  253. }
  254. CompressBlockBC5_DualChannel_Internal(inBlockR,inBlockG, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
  255. return CGU_CORE_OK;
  256. }
  257. int CMP_CDECL DecompressBlockBC5(const CGU_UINT8 cmpBlock[16],
  258. CMP_GLOBAL CGU_UINT8 srcBlockR[16],
  259. CMP_GLOBAL CGU_UINT8 srcBlockG[16],
  260. const void *options = NULL) {
  261. CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
  262. CMP_BC15Options BC15optionsDefault;
  263. if (BC15options == NULL) {
  264. BC15options = &BC15optionsDefault;
  265. SetDefaultBC15Options(BC15options);
  266. }
  267. DecompressBC5_DualChannel_Internal(srcBlockR,srcBlockG,(CGU_UINT32 *)cmpBlock,BC15options);
  268. return CGU_CORE_OK;
  269. }
  270. // prototype code
  271. int CMP_CDECL CompressBlockBC5S(const CGU_INT8* srcBlockR,
  272. unsigned int srcStrideInBytes1,
  273. const CGU_INT8* srcBlockG,
  274. unsigned int srcStrideInBytes2,
  275. CMP_GLOBAL CGU_UINT8 cmpBlock[16],
  276. const void* options = NULL) {
  277. CGU_INT8 inBlockR[16];
  278. //----------------------------------
  279. // Fill the inBlock with source data
  280. //----------------------------------
  281. CGU_INT srcpos = 0;
  282. CGU_INT dstptr = 0;
  283. for (CGU_UINT8 row = 0; row < 4; row++) {
  284. srcpos = row * srcStrideInBytes1;
  285. for (CGU_UINT8 col = 0; col < 4; col++) {
  286. inBlockR[dstptr++] = CGU_INT8(srcBlockR[srcpos++]);
  287. }
  288. }
  289. CGU_INT8 inBlockG[16];
  290. //----------------------------------
  291. // Fill the inBlock with source data
  292. //----------------------------------
  293. srcpos = 0;
  294. dstptr = 0;
  295. for (CGU_UINT8 row = 0; row < 4; row++) {
  296. srcpos = row * srcStrideInBytes2;
  297. for (CGU_UINT8 col = 0; col < 4; col++) {
  298. inBlockG[dstptr++] = CGU_INT8(srcBlockG[srcpos++]);
  299. }
  300. }
  301. CMP_BC15Options* BC15options = (CMP_BC15Options*)options;
  302. CMP_BC15Options BC15optionsDefault;
  303. if (BC15options == NULL) {
  304. BC15options = &BC15optionsDefault;
  305. SetDefaultBC15Options(BC15options);
  306. }
  307. CompressBlockBC5S_DualChannel_Internal(inBlockR, inBlockG, (CMP_GLOBAL CGU_UINT32*)cmpBlock, BC15options);
  308. return CGU_CORE_OK;
  309. }
  310. // prototype code
  311. int CMP_CDECL DecompressBlockBC5S(const CGU_UINT8 cmpBlock[16],
  312. CMP_GLOBAL CGU_INT8 srcBlockR[16],
  313. CMP_GLOBAL CGU_INT8 srcBlockG[16],
  314. const void* options = NULL) {
  315. CMP_BC15Options* BC15options = (CMP_BC15Options*)options;
  316. CMP_BC15Options BC15optionsDefault;
  317. if (BC15options == NULL) {
  318. BC15options = &BC15optionsDefault;
  319. SetDefaultBC15Options(BC15options);
  320. }
  321. DecompressBC5S_DualChannel_Internal(srcBlockR, srcBlockG, (CGU_UINT32*)cmpBlock, BC15options);
  322. return CGU_CORE_OK;
  323. }
  324. #endif
  325. //============================================== OpenCL USER INTERFACE ====================================================
  326. #ifdef ASPM_OPENCL
  327. CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(CMP_GLOBAL const CMP_Vec4uc* ImageSource,
  328. CMP_GLOBAL CGU_UINT8* ImageDestination,
  329. CMP_GLOBAL Source_Info* SourceInfo,
  330. CMP_GLOBAL CMP_BC15Options* BC15options
  331. ) {
  332. CGU_UINT32 xID;
  333. CGU_UINT32 yID;
  334. #ifdef ASPM_GPU
  335. xID = get_global_id(0);
  336. yID = get_global_id(1);
  337. #else
  338. xID = 0;
  339. yID = 0;
  340. #endif
  341. if (xID >= (SourceInfo->m_src_width / BlockX)) return;
  342. if (yID >= (SourceInfo->m_src_height / BlockX)) return;
  343. int srcWidth = SourceInfo->m_src_width;
  344. CGU_UINT32 destI = (xID*BC5CompBlockSize) + (yID*(srcWidth / BlockX)*BC5CompBlockSize);
  345. int srcindex = 4 * (yID * srcWidth + xID);
  346. int blkindex = 0;
  347. CMP_Vec4uc srcData[16];
  348. srcWidth = srcWidth - 4;
  349. for ( CGU_INT32 j = 0; j < 4; j++) {
  350. for ( CGU_INT32 i = 0; i < 4; i++) {
  351. srcData[blkindex++] = ImageSource[srcindex++];
  352. }
  353. srcindex += srcWidth;
  354. }
  355. CompressBlockBC5_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
  356. }
  357. #endif