BC.cpp 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147
  1. //-------------------------------------------------------------------------------------
  2. // BC.cpp
  3. //
  4. // Block-compression (BC) functionality for BC1, BC2, BC3 (orginal DXTn formats)
  5. //
  6. // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
  7. // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  8. // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
  9. // PARTICULAR PURPOSE.
  10. //
  11. // Copyright (c) Microsoft Corporation. All rights reserved.
  12. //
  13. // http://go.microsoft.com/fwlink/?LinkId=248926
  14. //-------------------------------------------------------------------------------------
  15. //#include "directxtexp.h" ESENTHEL CHANGED
  16. // Experiemental encoding variants, not enabled by default
  17. //#define COLOR_WEIGHTS
  18. //#define COLOR_AVG_0WEIGHTS
  19. //#include "BC.h" ESENTHEL CHANGED
  20. using namespace DirectX;
  21. using namespace DirectX::PackedVector;
  22. namespace
  23. {
  24. //-------------------------------------------------------------------------------------
  25. // Constants
  26. //-------------------------------------------------------------------------------------
  27. // Perceptual weightings for the importance of each channel.
  28. const HDRColorA g_Luminance(0.2125f / 0.7154f, 1.0f, 0.0721f / 0.7154f, 1.0f);
  29. const HDRColorA g_LuminanceInv(0.7154f / 0.2125f, 1.0f, 0.7154f / 0.0721f, 1.0f);
  30. //-------------------------------------------------------------------------------------
  31. // Decode/Encode RGB 5/6/5 colors
  32. //-------------------------------------------------------------------------------------
  33. inline void Decode565(_Out_ HDRColorA *pColor, _In_ const uint16_t w565)
  34. {
  35. pColor->r = (float)((w565 >> 11) & 31) * (1.0f / 31.0f);
  36. pColor->g = (float)((w565 >> 5) & 63) * (1.0f / 63.0f);
  37. pColor->b = (float)((w565 >> 0) & 31) * (1.0f / 31.0f);
  38. pColor->a = 1.0f;
  39. }
  40. inline uint16_t Encode565(_In_ const HDRColorA *pColor)
  41. {
  42. HDRColorA Color;
  43. Color.r = (pColor->r < 0.0f) ? 0.0f : (pColor->r > 1.0f) ? 1.0f : pColor->r;
  44. Color.g = (pColor->g < 0.0f) ? 0.0f : (pColor->g > 1.0f) ? 1.0f : pColor->g;
  45. Color.b = (pColor->b < 0.0f) ? 0.0f : (pColor->b > 1.0f) ? 1.0f : pColor->b;
  46. uint16_t w;
  47. w = (uint16_t) ((static_cast<int32_t>(Color.r * 31.0f + 0.5f) << 11) |
  48. (static_cast<int32_t>(Color.g * 63.0f + 0.5f) << 5) |
  49. (static_cast<int32_t>(Color.b * 31.0f + 0.5f) << 0));
  50. return w;
  51. }
  52. //-------------------------------------------------------------------------------------
  53. static const float fEpsilon = (0.25f / 64.0f) * (0.25f / 64.0f);
  54. static const float pC3[] = { 2.0f/2.0f, 1.0f/2.0f, 0.0f/2.0f };
  55. static const float pD3[] = { 0.0f/2.0f, 1.0f/2.0f, 2.0f/2.0f };
  56. static const float pC4[] = { 3.0f/3.0f, 2.0f/3.0f, 1.0f/3.0f, 0.0f/3.0f };
  57. static const float pD4[] = { 0.0f/3.0f, 1.0f/3.0f, 2.0f/3.0f, 3.0f/3.0f };
  58. void OptimizeRGB(
  59. _Out_ HDRColorA *pX,
  60. _Out_ HDRColorA *pY,
  61. _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA *pPoints,
  62. _In_ size_t cSteps,
  63. const HDRColorA *pWeights)
  64. {
  65. const float *pC = (3 == cSteps) ? pC3 : pC4;
  66. const float *pD = (3 == cSteps) ? pD3 : pD4;
  67. // Find Min and Max points, as starting point
  68. HDRColorA X = pWeights ? *pWeights : HDRColorA(1.0f, 1.0f, 1.0f, 1.0f);
  69. HDRColorA Y = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
  70. for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
  71. {
  72. #ifdef COLOR_WEIGHTS
  73. if(pPoints[iPoint].a > 0.0f)
  74. #endif // COLOR_WEIGHTS
  75. {
  76. if(pPoints[iPoint].r < X.r)
  77. X.r = pPoints[iPoint].r;
  78. if(pPoints[iPoint].g < X.g)
  79. X.g = pPoints[iPoint].g;
  80. if(pPoints[iPoint].b < X.b)
  81. X.b = pPoints[iPoint].b;
  82. if(pPoints[iPoint].r > Y.r)
  83. Y.r = pPoints[iPoint].r;
  84. if(pPoints[iPoint].g > Y.g)
  85. Y.g = pPoints[iPoint].g;
  86. if(pPoints[iPoint].b > Y.b)
  87. Y.b = pPoints[iPoint].b;
  88. }
  89. }
  90. // Diagonal axis
  91. HDRColorA AB;
  92. AB.r = Y.r - X.r;
  93. AB.g = Y.g - X.g;
  94. AB.b = Y.b - X.b;
  95. float fAB = AB.r * AB.r + AB.g * AB.g + AB.b * AB.b;
  96. // Single color block.. no need to root-find
  97. if(fAB < FLT_MIN)
  98. {
  99. pX->r = X.r; pX->g = X.g; pX->b = X.b;
  100. pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
  101. return;
  102. }
  103. // Try all four axis directions, to determine which diagonal best fits data
  104. float fABInv = 1.0f / fAB;
  105. HDRColorA Dir;
  106. Dir.r = AB.r * fABInv;
  107. Dir.g = AB.g * fABInv;
  108. Dir.b = AB.b * fABInv;
  109. HDRColorA Mid;
  110. Mid.r = (X.r + Y.r) * 0.5f;
  111. Mid.g = (X.g + Y.g) * 0.5f;
  112. Mid.b = (X.b + Y.b) * 0.5f;
  113. float fDir[4];
  114. fDir[0] = fDir[1] = fDir[2] = fDir[3] = 0.0f;
  115. for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
  116. {
  117. HDRColorA Pt;
  118. Pt.r = (pPoints[iPoint].r - Mid.r) * Dir.r;
  119. Pt.g = (pPoints[iPoint].g - Mid.g) * Dir.g;
  120. Pt.b = (pPoints[iPoint].b - Mid.b) * Dir.b;
  121. float f;
  122. #ifdef COLOR_WEIGHTS
  123. f = Pt.r + Pt.g + Pt.b;
  124. fDir[0] += pPoints[iPoint].a * f * f;
  125. f = Pt.r + Pt.g - Pt.b;
  126. fDir[1] += pPoints[iPoint].a * f * f;
  127. f = Pt.r - Pt.g + Pt.b;
  128. fDir[2] += pPoints[iPoint].a * f * f;
  129. f = Pt.r - Pt.g - Pt.b;
  130. fDir[3] += pPoints[iPoint].a * f * f;
  131. #else
  132. f = Pt.r + Pt.g + Pt.b;
  133. fDir[0] += f * f;
  134. f = Pt.r + Pt.g - Pt.b;
  135. fDir[1] += f * f;
  136. f = Pt.r - Pt.g + Pt.b;
  137. fDir[2] += f * f;
  138. f = Pt.r - Pt.g - Pt.b;
  139. fDir[3] += f * f;
  140. #endif // COLOR_WEIGHTS
  141. }
  142. float fDirMax = fDir[0];
  143. size_t iDirMax = 0;
  144. for(size_t iDir = 1; iDir < 4; iDir++)
  145. {
  146. if(fDir[iDir] > fDirMax)
  147. {
  148. fDirMax = fDir[iDir];
  149. iDirMax = iDir;
  150. }
  151. }
  152. if(iDirMax & 2)
  153. {
  154. float f = X.g; X.g = Y.g; Y.g = f;
  155. }
  156. if(iDirMax & 1)
  157. {
  158. float f = X.b; X.b = Y.b; Y.b = f;
  159. }
  160. // Two color block.. no need to root-find
  161. if(fAB < 1.0f / 4096.0f)
  162. {
  163. pX->r = X.r; pX->g = X.g; pX->b = X.b;
  164. pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
  165. return;
  166. }
  167. // Use Newton's Method to find local minima of sum-of-squares error.
  168. float fSteps = (float) (cSteps - 1);
  169. for(size_t iIteration = 0; iIteration < 8; iIteration++)
  170. {
  171. // Calculate new steps
  172. HDRColorA pSteps[4];
  173. for(size_t iStep = 0; iStep < cSteps; iStep++)
  174. {
  175. pSteps[iStep].r = X.r * pC[iStep] + Y.r * pD[iStep];
  176. pSteps[iStep].g = X.g * pC[iStep] + Y.g * pD[iStep];
  177. pSteps[iStep].b = X.b * pC[iStep] + Y.b * pD[iStep];
  178. }
  179. // Calculate color direction
  180. Dir.r = Y.r - X.r;
  181. Dir.g = Y.g - X.g;
  182. Dir.b = Y.b - X.b;
  183. float fLen = (Dir.r * Dir.r + Dir.g * Dir.g + Dir.b * Dir.b);
  184. if(fLen < (1.0f / 4096.0f))
  185. break;
  186. float fScale = fSteps / fLen;
  187. Dir.r *= fScale;
  188. Dir.g *= fScale;
  189. Dir.b *= fScale;
  190. // Evaluate function, and derivatives
  191. float d2X, d2Y;
  192. HDRColorA dX, dY;
  193. d2X = d2Y = dX.r = dX.g = dX.b = dY.r = dY.g = dY.b = 0.0f;
  194. for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
  195. {
  196. float fDot = (pPoints[iPoint].r - X.r) * Dir.r +
  197. (pPoints[iPoint].g - X.g) * Dir.g +
  198. (pPoints[iPoint].b - X.b) * Dir.b;
  199. size_t iStep;
  200. if(fDot <= 0.0f)
  201. iStep = 0;
  202. else if(fDot >= fSteps)
  203. iStep = cSteps - 1;
  204. else
  205. iStep = static_cast<size_t>(fDot + 0.5f);
  206. HDRColorA Diff;
  207. Diff.r = pSteps[iStep].r - pPoints[iPoint].r;
  208. Diff.g = pSteps[iStep].g - pPoints[iPoint].g;
  209. Diff.b = pSteps[iStep].b - pPoints[iPoint].b;
  210. #ifdef COLOR_WEIGHTS
  211. float fC = pC[iStep] * pPoints[iPoint].a * (1.0f / 8.0f);
  212. float fD = pD[iStep] * pPoints[iPoint].a * (1.0f / 8.0f);
  213. #else
  214. float fC = pC[iStep] * (1.0f / 8.0f);
  215. float fD = pD[iStep] * (1.0f / 8.0f);
  216. #endif // COLOR_WEIGHTS
  217. d2X += fC * pC[iStep];
  218. dX.r += fC * Diff.r;
  219. dX.g += fC * Diff.g;
  220. dX.b += fC * Diff.b;
  221. d2Y += fD * pD[iStep];
  222. dY.r += fD * Diff.r;
  223. dY.g += fD * Diff.g;
  224. dY.b += fD * Diff.b;
  225. }
  226. // Move endpoints
  227. if(d2X > 0.0f)
  228. {
  229. float f = -1.0f / d2X;
  230. X.r += dX.r * f;
  231. X.g += dX.g * f;
  232. X.b += dX.b * f;
  233. }
  234. if(d2Y > 0.0f)
  235. {
  236. float f = -1.0f / d2Y;
  237. Y.r += dY.r * f;
  238. Y.g += dY.g * f;
  239. Y.b += dY.b * f;
  240. }
  241. if((dX.r * dX.r < fEpsilon) && (dX.g * dX.g < fEpsilon) && (dX.b * dX.b < fEpsilon) &&
  242. (dY.r * dY.r < fEpsilon) && (dY.g * dY.g < fEpsilon) && (dY.b * dY.b < fEpsilon))
  243. {
  244. break;
  245. }
  246. }
  247. pX->r = X.r; pX->g = X.g; pX->b = X.b;
  248. pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
  249. }
  250. //-------------------------------------------------------------------------------------
  251. static XMVECTORF32 s_Scale = { 1.f/31.f, 1.f/63.f, 1.f/31.f, 1.f };
  252. inline void DecodeBC1(
  253. _Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor,
  254. _In_ const D3DX_BC1 *pBC,
  255. bool isbc1)
  256. {
  257. assert( pColor && pBC );
  258. static_assert( sizeof(D3DX_BC1) == 8, "D3DX_BC1 should be 8 bytes" );
  259. XMVECTOR clr0 = XMLoadU565( reinterpret_cast<const XMU565*>(&pBC->rgb[0]) );
  260. XMVECTOR clr1 = XMLoadU565( reinterpret_cast<const XMU565*>(&pBC->rgb[1]) );
  261. clr0 = XMVectorMultiply( clr0, s_Scale );
  262. clr1 = XMVectorMultiply( clr1, s_Scale );
  263. clr0 = XMVectorSwizzle<2, 1, 0, 3>( clr0 );
  264. clr1 = XMVectorSwizzle<2, 1, 0, 3>( clr1 );
  265. clr0 = XMVectorSelect( g_XMIdentityR3, clr0, g_XMSelect1110 );
  266. clr1 = XMVectorSelect( g_XMIdentityR3, clr1, g_XMSelect1110 );
  267. XMVECTOR clr2, clr3;
  268. if ( isbc1 && (pBC->rgb[0] <= pBC->rgb[1]) )
  269. {
  270. clr2 = XMVectorLerp( clr0, clr1, 0.5f );
  271. clr3 = XMVectorZero(); // Alpha of 0
  272. }
  273. else
  274. {
  275. clr2 = XMVectorLerp( clr0, clr1, 1.f/3.f );
  276. clr3 = XMVectorLerp( clr0, clr1, 2.f/3.f );
  277. }
  278. uint32_t dw = pBC->bitmap;
  279. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i, dw >>= 2)
  280. {
  281. switch(dw & 3)
  282. {
  283. case 0: pColor[i] = clr0; break;
  284. case 1: pColor[i] = clr1; break;
  285. case 2: pColor[i] = clr2; break;
  286. case 3:
  287. default: pColor[i] = clr3; break;
  288. }
  289. }
  290. }
  291. //-------------------------------------------------------------------------------------
  292. static const size_t pSteps3[] = { 0, 2, 1 }; // ESENTHEL
  293. static const size_t pSteps4[] = { 0, 2, 3, 1 }; // ESENTHEL
  294. void EncodeBC1(
  295. _Out_ D3DX_BC1 *pBC,
  296. _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA *pColor,
  297. const HDRColorA *pWeights, // ESENTHEL
  298. bool bColorKey,
  299. float threshold,
  300. DWORD flags)
  301. {
  302. assert( pBC && pColor );
  303. static_assert( sizeof(D3DX_BC1) == 8, "D3DX_BC1 should be 8 bytes" );
  304. // Determine if we need to colorkey this block
  305. size_t uSteps;
  306. if (bColorKey)
  307. {
  308. size_t uColorKey = 0;
  309. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  310. {
  311. if (pColor[i].a < threshold)
  312. uColorKey++;
  313. }
  314. if(NUM_PIXELS_PER_BLOCK == uColorKey)
  315. {
  316. pBC->rgb[0] = 0x0000;
  317. pBC->rgb[1] = 0xffff;
  318. pBC->bitmap = 0xffffffff;
  319. return;
  320. }
  321. uSteps = (uColorKey > 0) ? 3 : 4;
  322. }
  323. else
  324. {
  325. uSteps = 4;
  326. }
  327. // Quantize block to R56B5, using Floyd Stienberg error diffusion. This
  328. // increases the chance that colors will map directly to the quantized
  329. // axis endpoints.
  330. HDRColorA Color[NUM_PIXELS_PER_BLOCK];
  331. HDRColorA Error[NUM_PIXELS_PER_BLOCK];
  332. if (flags & BC_FLAGS_DITHER_RGB)
  333. memset(Error, 0x00, NUM_PIXELS_PER_BLOCK * sizeof(HDRColorA));
  334. size_t i;
  335. for(i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  336. {
  337. HDRColorA Clr;
  338. Clr.r = pColor[i].r;
  339. Clr.g = pColor[i].g;
  340. Clr.b = pColor[i].b;
  341. if (flags & BC_FLAGS_DITHER_RGB)
  342. {
  343. Clr.r += Error[i].r;
  344. Clr.g += Error[i].g;
  345. Clr.b += Error[i].b;
  346. }
  347. Color[i].r = (float) static_cast<int32_t>(Clr.r * 31.0f + 0.5f) * (1.0f / 31.0f);
  348. Color[i].g = (float) static_cast<int32_t>(Clr.g * 63.0f + 0.5f) * (1.0f / 63.0f);
  349. Color[i].b = (float) static_cast<int32_t>(Clr.b * 31.0f + 0.5f) * (1.0f / 31.0f);
  350. #ifdef COLOR_WEIGHTS
  351. Color[i].a = pColor[i].a;
  352. #else
  353. Color[i].a = 1.0f;
  354. #endif // COLOR_WEIGHTS
  355. if (flags & BC_FLAGS_DITHER_RGB)
  356. {
  357. HDRColorA Diff;
  358. Diff.r = Color[i].a * (Clr.r - Color[i].r);
  359. Diff.g = Color[i].a * (Clr.g - Color[i].g);
  360. Diff.b = Color[i].a * (Clr.b - Color[i].b);
  361. if(3 != (i & 3))
  362. {
  363. assert( i < 15 );
  364. _Analysis_assume_( i < 15 );
  365. Error[i + 1].r += Diff.r * (7.0f / 16.0f);
  366. Error[i + 1].g += Diff.g * (7.0f / 16.0f);
  367. Error[i + 1].b += Diff.b * (7.0f / 16.0f);
  368. }
  369. if(i < 12)
  370. {
  371. if(i & 3)
  372. {
  373. Error[i + 3].r += Diff.r * (3.0f / 16.0f);
  374. Error[i + 3].g += Diff.g * (3.0f / 16.0f);
  375. Error[i + 3].b += Diff.b * (3.0f / 16.0f);
  376. }
  377. Error[i + 4].r += Diff.r * (5.0f / 16.0f);
  378. Error[i + 4].g += Diff.g * (5.0f / 16.0f);
  379. Error[i + 4].b += Diff.b * (5.0f / 16.0f);
  380. if(3 != (i & 3))
  381. {
  382. assert( i < 11 );
  383. _Analysis_assume_( i < 11 );
  384. Error[i + 5].r += Diff.r * (1.0f / 16.0f);
  385. Error[i + 5].g += Diff.g * (1.0f / 16.0f);
  386. Error[i + 5].b += Diff.b * (1.0f / 16.0f);
  387. }
  388. }
  389. }
  390. if (pWeights) // ESENTHEL
  391. {
  392. Color[i].r *= pWeights->r;
  393. Color[i].g *= pWeights->g;
  394. Color[i].b *= pWeights->b;
  395. }
  396. }
  397. // Perform 6D root finding function to find two endpoints of color axis.
  398. // Then quantize and sort the endpoints depending on mode.
  399. HDRColorA ColorA, ColorB, ColorC, ColorD;
  400. OptimizeRGB(&ColorA, &ColorB, Color, uSteps, pWeights); // ESENTHEL
  401. if ( pWeights ) // ESENTHEL
  402. {
  403. ColorC.r = ColorA.r / pWeights->r;
  404. ColorC.g = ColorA.g / pWeights->g;
  405. ColorC.b = ColorA.b / pWeights->b;
  406. ColorD.r = ColorB.r / pWeights->r;
  407. ColorD.g = ColorB.g / pWeights->g;
  408. ColorD.b = ColorB.b / pWeights->b;
  409. }
  410. else
  411. {
  412. ColorC = ColorA;
  413. ColorD = ColorB;
  414. }
  415. uint16_t wColorA = Encode565(&ColorC);
  416. uint16_t wColorB = Encode565(&ColorD);
  417. if((uSteps == 4) && (wColorA == wColorB))
  418. {
  419. pBC->rgb[0] = wColorA;
  420. pBC->rgb[1] = wColorB;
  421. pBC->bitmap = 0x00000000;
  422. return;
  423. }
  424. Decode565(&ColorC, wColorA);
  425. Decode565(&ColorD, wColorB);
  426. if ( pWeights ) // ESENTHEL
  427. {
  428. ColorA.r = ColorC.r * pWeights->r;
  429. ColorA.g = ColorC.g * pWeights->g;
  430. ColorA.b = ColorC.b * pWeights->b;
  431. ColorB.r = ColorD.r * pWeights->r;
  432. ColorB.g = ColorD.g * pWeights->g;
  433. ColorB.b = ColorD.b * pWeights->b;
  434. }
  435. else
  436. {
  437. ColorA = ColorC;
  438. ColorB = ColorD;
  439. }
  440. // Calculate color steps
  441. HDRColorA Step[4];
  442. if((3 == uSteps) == (wColorA <= wColorB))
  443. {
  444. pBC->rgb[0] = wColorA;
  445. pBC->rgb[1] = wColorB;
  446. Step[0] = ColorA;
  447. Step[1] = ColorB;
  448. }
  449. else
  450. {
  451. pBC->rgb[0] = wColorB;
  452. pBC->rgb[1] = wColorA;
  453. Step[0] = ColorB;
  454. Step[1] = ColorA;
  455. }
  456. const size_t *pSteps;
  457. if(3 == uSteps)
  458. {
  459. pSteps = pSteps3;
  460. HDRColorALerp(&Step[2], &Step[0], &Step[1], 0.5f);
  461. }
  462. else
  463. {
  464. pSteps = pSteps4;
  465. HDRColorALerp(&Step[2], &Step[0], &Step[1], 1.0f / 3.0f);
  466. HDRColorALerp(&Step[3], &Step[0], &Step[1], 2.0f / 3.0f);
  467. }
  468. // Calculate color direction
  469. HDRColorA Dir;
  470. Dir.r = Step[1].r - Step[0].r;
  471. Dir.g = Step[1].g - Step[0].g;
  472. Dir.b = Step[1].b - Step[0].b;
  473. float fSteps = (float) (uSteps - 1);
  474. float fScale = (wColorA != wColorB) ? (fSteps / (Dir.r * Dir.r + Dir.g * Dir.g + Dir.b * Dir.b)) : 0.0f;
  475. Dir.r *= fScale;
  476. Dir.g *= fScale;
  477. Dir.b *= fScale;
  478. // Encode colors
  479. uint32_t dw = 0;
  480. if (flags & BC_FLAGS_DITHER_RGB)
  481. memset(Error, 0x00, NUM_PIXELS_PER_BLOCK * sizeof(HDRColorA));
  482. for(i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  483. {
  484. if ((3 == uSteps) && (pColor[i].a < threshold))
  485. {
  486. dw = (3 << 30) | (dw >> 2);
  487. }
  488. else
  489. {
  490. HDRColorA Clr;
  491. if ( pWeights ) // ESENTHEL
  492. {
  493. Clr.r = pColor[i].r * pWeights->r;
  494. Clr.g = pColor[i].g * pWeights->g;
  495. Clr.b = pColor[i].b * pWeights->b;
  496. }
  497. else
  498. {
  499. Clr.r = pColor[i].r;
  500. Clr.g = pColor[i].g;
  501. Clr.b = pColor[i].b;
  502. }
  503. if (flags & BC_FLAGS_DITHER_RGB)
  504. {
  505. Clr.r += Error[i].r;
  506. Clr.g += Error[i].g;
  507. Clr.b += Error[i].b;
  508. }
  509. float fDot = (Clr.r - Step[0].r) * Dir.r + (Clr.g - Step[0].g) * Dir.g + (Clr.b - Step[0].b) * Dir.b;
  510. uint32_t iStep;
  511. if(fDot <= 0.0f)
  512. iStep = 0;
  513. else if(fDot >= fSteps)
  514. iStep = 1;
  515. else
  516. iStep = static_cast<uint32_t>( pSteps[static_cast<size_t>(fDot + 0.5f)] );
  517. dw = (iStep << 30) | (dw >> 2);
  518. if (flags & BC_FLAGS_DITHER_RGB)
  519. {
  520. HDRColorA Diff;
  521. Diff.r = Color[i].a * (Clr.r - Step[iStep].r);
  522. Diff.g = Color[i].a * (Clr.g - Step[iStep].g);
  523. Diff.b = Color[i].a * (Clr.b - Step[iStep].b);
  524. if(3 != (i & 3))
  525. {
  526. Error[i + 1].r += Diff.r * (7.0f / 16.0f);
  527. Error[i + 1].g += Diff.g * (7.0f / 16.0f);
  528. Error[i + 1].b += Diff.b * (7.0f / 16.0f);
  529. }
  530. if(i < 12)
  531. {
  532. if(i & 3)
  533. {
  534. Error[i + 3].r += Diff.r * (3.0f / 16.0f);
  535. Error[i + 3].g += Diff.g * (3.0f / 16.0f);
  536. Error[i + 3].b += Diff.b * (3.0f / 16.0f);
  537. }
  538. Error[i + 4].r += Diff.r * (5.0f / 16.0f);
  539. Error[i + 4].g += Diff.g * (5.0f / 16.0f);
  540. Error[i + 4].b += Diff.b * (5.0f / 16.0f);
  541. if(3 != (i & 3))
  542. {
  543. Error[i + 5].r += Diff.r * (1.0f / 16.0f);
  544. Error[i + 5].g += Diff.g * (1.0f / 16.0f);
  545. Error[i + 5].b += Diff.b * (1.0f / 16.0f);
  546. }
  547. }
  548. }
  549. }
  550. }
  551. pBC->bitmap = dw;
  552. }
  553. //-------------------------------------------------------------------------------------
  554. #ifdef COLOR_WEIGHTS
  555. void EncodeSolidBC1(_Out_ D3DX_BC1 *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA *pColor)
  556. {
  557. #ifdef COLOR_AVG_0WEIGHTS
  558. // Compute avg color
  559. HDRColorA Color;
  560. Color.r = pColor[0].r;
  561. Color.g = pColor[0].g;
  562. Color.b = pColor[0].b;
  563. for(size_t i = 1; i < NUM_PIXELS_PER_BLOCK; ++i)
  564. {
  565. Color.r += pColor[i].r;
  566. Color.g += pColor[i].g;
  567. Color.b += pColor[i].b;
  568. }
  569. Color.r *= 1.0f / 16.0f;
  570. Color.g *= 1.0f / 16.0f;
  571. Color.b *= 1.0f / 16.0f;
  572. uint16_t wColor = Encode565(&Color);
  573. #else
  574. uint16_t wColor = 0x0000;
  575. #endif // COLOR_AVG_0WEIGHTS
  576. // Encode solid block
  577. pBC->rgb[0] = wColor;
  578. pBC->rgb[1] = wColor;
  579. pBC->bitmap = 0x00000000;
  580. }
  581. #endif // COLOR_WEIGHTS
  582. }
  583. //=====================================================================================
  584. // Entry points
  585. //=====================================================================================
  586. //-------------------------------------------------------------------------------------
  587. // BC1 Compression
  588. //-------------------------------------------------------------------------------------
  589. _Use_decl_annotations_
  590. void DirectX::D3DXDecodeBC1(XMVECTOR *pColor, const uint8_t *pBC)
  591. {
  592. auto pBC1 = reinterpret_cast<const D3DX_BC1 *>(pBC);
  593. DecodeBC1(pColor, pBC1, true);
  594. }
  595. _Use_decl_annotations_
  596. void DirectX::D3DXEncodeBC1(uint8_t *pBC, const XMVECTOR *pColor, const HDRColorA *pWeights, float threshold, DWORD flags) // ESENTHEL CHANGED
  597. {
  598. assert(pBC && pColor);
  599. HDRColorA Color[NUM_PIXELS_PER_BLOCK];
  600. if (flags & BC_FLAGS_DITHER_A)
  601. {
  602. float fError[NUM_PIXELS_PER_BLOCK];
  603. memset(fError, 0x00, NUM_PIXELS_PER_BLOCK * sizeof(float));
  604. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  605. {
  606. HDRColorA clr;
  607. XMStoreFloat4( reinterpret_cast<XMFLOAT4*>( &clr ), pColor[i] );
  608. float fAlph = clr.a + fError[i];
  609. Color[i].r = clr.r;
  610. Color[i].g = clr.g;
  611. Color[i].b = clr.b;
  612. Color[i].a = (float) static_cast<int32_t>(clr.a + fError[i] + 0.5f);
  613. float fDiff = fAlph - Color[i].a;
  614. if(3 != (i & 3))
  615. {
  616. assert( i < 15 );
  617. _Analysis_assume_( i < 15 );
  618. fError[i + 1] += fDiff * (7.0f / 16.0f);
  619. }
  620. if(i < 12)
  621. {
  622. if(i & 3)
  623. fError[i + 3] += fDiff * (3.0f / 16.0f);
  624. fError[i + 4] += fDiff * (5.0f / 16.0f);
  625. if(3 != (i & 3))
  626. {
  627. assert( i < 11 );
  628. _Analysis_assume_( i < 11 );
  629. fError[i + 5] += fDiff * (1.0f / 16.0f);
  630. }
  631. }
  632. }
  633. }
  634. else
  635. {
  636. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  637. {
  638. XMStoreFloat4( reinterpret_cast<XMFLOAT4*>( &Color[i] ), pColor[i] );
  639. }
  640. }
  641. auto pBC1 = reinterpret_cast<D3DX_BC1 *>(pBC);
  642. EncodeBC1(pBC1, Color, pWeights, true, threshold, flags);
  643. }
  644. //-------------------------------------------------------------------------------------
  645. // BC2 Compression
  646. //-------------------------------------------------------------------------------------
  647. _Use_decl_annotations_
  648. void DirectX::D3DXDecodeBC2(XMVECTOR *pColor, const uint8_t *pBC)
  649. {
  650. assert(pColor && pBC);
  651. static_assert(sizeof(D3DX_BC2) == 16, "D3DX_BC2 should be 16 bytes");
  652. auto pBC2 = reinterpret_cast<const D3DX_BC2 *>(pBC);
  653. // RGB part
  654. DecodeBC1(pColor, &pBC2->bc1, false);
  655. // 4-bit alpha part
  656. DWORD dw = pBC2->bitmap[0];
  657. for(size_t i = 0; i < 8; ++i, dw >>= 4)
  658. {
  659. #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
  660. pColor[i] = XMVectorSetW( pColor[i], (float) (dw & 0xf) * (1.0f / 15.0f) );
  661. }
  662. dw = pBC2->bitmap[1];
  663. for (size_t i = 8; i < NUM_PIXELS_PER_BLOCK; ++i, dw >>= 4)
  664. pColor[i] = XMVectorSetW(pColor[i], (float)(dw & 0xf) * (1.0f / 15.0f));
  665. }
  666. _Use_decl_annotations_
  667. void DirectX::D3DXEncodeBC2(uint8_t *pBC, const XMVECTOR *pColor, const HDRColorA *pWeights, DWORD flags)
  668. {
  669. assert( pBC && pColor );
  670. static_assert( sizeof(D3DX_BC2) == 16, "D3DX_BC2 should be 16 bytes" );
  671. HDRColorA Color[NUM_PIXELS_PER_BLOCK];
  672. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  673. {
  674. XMStoreFloat4( reinterpret_cast<XMFLOAT4*>( &Color[i] ), pColor[i] );
  675. }
  676. auto pBC2 = reinterpret_cast<D3DX_BC2 *>(pBC);
  677. // 4-bit alpha part. Dithered using Floyd Stienberg error diffusion.
  678. pBC2->bitmap[0] = 0;
  679. pBC2->bitmap[1] = 0;
  680. float fError[NUM_PIXELS_PER_BLOCK];
  681. if (flags & BC_FLAGS_DITHER_A)
  682. memset(fError, 0x00, NUM_PIXELS_PER_BLOCK * sizeof(float));
  683. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  684. {
  685. float fAlph = Color[i].a;
  686. if (flags & BC_FLAGS_DITHER_A)
  687. fAlph += fError[i];
  688. uint32_t u = (uint32_t) static_cast<int32_t>(fAlph * 15.0f + 0.5f);
  689. pBC2->bitmap[i >> 3] >>= 4;
  690. pBC2->bitmap[i >> 3] |= (u << 28);
  691. if (flags & BC_FLAGS_DITHER_A)
  692. {
  693. float fDiff = fAlph - (float) u * (1.0f / 15.0f);
  694. if(3 != (i & 3))
  695. {
  696. assert( i < 15 );
  697. _Analysis_assume_( i < 15 );
  698. fError[i + 1] += fDiff * (7.0f / 16.0f);
  699. }
  700. if(i < 12)
  701. {
  702. if(i & 3)
  703. fError[i + 3] += fDiff * (3.0f / 16.0f);
  704. fError[i + 4] += fDiff * (5.0f / 16.0f);
  705. if(3 != (i & 3))
  706. {
  707. assert( i < 11 );
  708. _Analysis_assume_( i < 11 );
  709. fError[i + 5] += fDiff * (1.0f / 16.0f);
  710. }
  711. }
  712. }
  713. }
  714. // RGB part
  715. #ifdef COLOR_WEIGHTS
  716. if(!pBC2->bitmap[0] && !pBC2->bitmap[1])
  717. {
  718. EncodeSolidBC1(pBC2->dxt1, Color);
  719. return;
  720. }
  721. #endif // COLOR_WEIGHTS
  722. EncodeBC1(&pBC2->bc1, Color, pWeights, false, 0.f, flags);
  723. }
  724. //-------------------------------------------------------------------------------------
  725. // BC3 Compression
  726. //-------------------------------------------------------------------------------------
  727. _Use_decl_annotations_
  728. void DirectX::D3DXDecodeBC3(XMVECTOR *pColor, const uint8_t *pBC)
  729. {
  730. assert(pColor && pBC);
  731. static_assert( sizeof(D3DX_BC3) == 16, "D3DX_BC3 should be 16 bytes" );
  732. auto pBC3 = reinterpret_cast<const D3DX_BC3 *>(pBC);
  733. // RGB part
  734. DecodeBC1(pColor, &pBC3->bc1, false);
  735. // Adaptive 3-bit alpha part
  736. float fAlpha[8];
  737. fAlpha[0] = ((float) pBC3->alpha[0]) * (1.0f / 255.0f);
  738. fAlpha[1] = ((float) pBC3->alpha[1]) * (1.0f / 255.0f);
  739. if(pBC3->alpha[0] > pBC3->alpha[1])
  740. {
  741. for(size_t i = 1; i < 7; ++i)
  742. fAlpha[i + 1] = (fAlpha[0] * (7 - i) + fAlpha[1] * i) * (1.0f / 7.0f);
  743. }
  744. else
  745. {
  746. for(size_t i = 1; i < 5; ++i)
  747. fAlpha[i + 1] = (fAlpha[0] * (5 - i) + fAlpha[1] * i) * (1.0f / 5.0f);
  748. fAlpha[6] = 0.0f;
  749. fAlpha[7] = 1.0f;
  750. }
  751. DWORD dw = pBC3->bitmap[0] | (pBC3->bitmap[1] << 8) | (pBC3->bitmap[2] << 16);
  752. for(size_t i = 0; i < 8; ++i, dw >>= 3)
  753. pColor[i] = XMVectorSetW( pColor[i], fAlpha[dw & 0x7] );
  754. dw = pBC3->bitmap[3] | (pBC3->bitmap[4] << 8) | (pBC3->bitmap[5] << 16);
  755. for(size_t i = 8; i < NUM_PIXELS_PER_BLOCK; ++i, dw >>= 3)
  756. pColor[i] = XMVectorSetW(pColor[i], fAlpha[dw & 0x7]);
  757. }
  758. static const size_t pSteps6[] = { 0, 2, 3, 4, 5, 1 };
  759. static const size_t pSteps8[] = { 0, 2, 3, 4, 5, 6, 7, 1 };
  760. _Use_decl_annotations_
  761. void DirectX::D3DXEncodeBC3(uint8_t *pBC, const XMVECTOR *pColor, const HDRColorA *pWeights, DWORD flags)
  762. {
  763. assert( pBC && pColor );
  764. static_assert( sizeof(D3DX_BC3) == 16, "D3DX_BC3 should be 16 bytes" );
  765. HDRColorA Color[NUM_PIXELS_PER_BLOCK];
  766. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  767. {
  768. XMStoreFloat4( reinterpret_cast<XMFLOAT4*>( &Color[i] ), pColor[i] );
  769. }
  770. auto pBC3 = reinterpret_cast<D3DX_BC3 *>(pBC);
  771. // Quantize block to A8, using Floyd Stienberg error diffusion. This
  772. // increases the chance that colors will map directly to the quantized
  773. // axis endpoints.
  774. float fAlpha[NUM_PIXELS_PER_BLOCK];
  775. float fError[NUM_PIXELS_PER_BLOCK];
  776. float fMinAlpha = Color[0].a;
  777. float fMaxAlpha = Color[0].a;
  778. if (flags & BC_FLAGS_DITHER_A)
  779. memset(fError, 0x00, NUM_PIXELS_PER_BLOCK * sizeof(float));
  780. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  781. {
  782. float fAlph = Color[i].a;
  783. if (flags & BC_FLAGS_DITHER_A)
  784. fAlph += fError[i];
  785. fAlpha[i] = static_cast<int32_t>(fAlph * 255.0f + 0.5f) * (1.0f / 255.0f);
  786. if(fAlpha[i] < fMinAlpha)
  787. fMinAlpha = fAlpha[i];
  788. else if(fAlpha[i] > fMaxAlpha)
  789. fMaxAlpha = fAlpha[i];
  790. if (flags & BC_FLAGS_DITHER_A)
  791. {
  792. float fDiff = fAlph - fAlpha[i];
  793. if(3 != (i & 3))
  794. {
  795. assert( i < 15 );
  796. _Analysis_assume_( i < 15 );
  797. fError[i + 1] += fDiff * (7.0f / 16.0f);
  798. }
  799. if(i < 12)
  800. {
  801. if(i & 3)
  802. fError[i + 3] += fDiff * (3.0f / 16.0f);
  803. fError[i + 4] += fDiff * (5.0f / 16.0f);
  804. if(3 != (i & 3))
  805. {
  806. assert( i < 11 );
  807. _Analysis_assume_( i < 11 );
  808. fError[i + 5] += fDiff * (1.0f / 16.0f);
  809. }
  810. }
  811. }
  812. }
  813. #ifdef COLOR_WEIGHTS
  814. if(0.0f == fMaxAlpha)
  815. {
  816. EncodeSolidBC1(&pBC3->dxt1, Color);
  817. pBC3->alpha[0] = 0x00;
  818. pBC3->alpha[1] = 0x00;
  819. memset(pBC3->bitmap, 0x00, 6);
  820. }
  821. #endif
  822. // RGB part
  823. EncodeBC1(&pBC3->bc1, Color, pWeights, false, 0.f, flags); // ESENTHEL
  824. // Alpha part
  825. if(1.0f == fMinAlpha)
  826. {
  827. pBC3->alpha[0] = 0xff;
  828. pBC3->alpha[1] = 0xff;
  829. memset(pBC3->bitmap, 0x00, 6);
  830. return;
  831. }
  832. // Optimize and Quantize Min and Max values
  833. size_t uSteps = ((0.0f == fMinAlpha) || (1.0f == fMaxAlpha)) ? 6 : 8;
  834. float fAlphaA, fAlphaB;
  835. OptimizeAlpha<false>(&fAlphaA, &fAlphaB, fAlpha, uSteps);
  836. uint8_t bAlphaA = (uint8_t) static_cast<int32_t>(fAlphaA * 255.0f + 0.5f);
  837. uint8_t bAlphaB = (uint8_t) static_cast<int32_t>(fAlphaB * 255.0f + 0.5f);
  838. fAlphaA = (float) bAlphaA * (1.0f / 255.0f);
  839. fAlphaB = (float) bAlphaB * (1.0f / 255.0f);
  840. // Setup block
  841. if((8 == uSteps) && (bAlphaA == bAlphaB))
  842. {
  843. pBC3->alpha[0] = bAlphaA;
  844. pBC3->alpha[1] = bAlphaB;
  845. memset(pBC3->bitmap, 0x00, 6);
  846. return;
  847. }
  848. const size_t *pSteps;
  849. float fStep[8];
  850. if(6 == uSteps)
  851. {
  852. pBC3->alpha[0] = bAlphaA;
  853. pBC3->alpha[1] = bAlphaB;
  854. fStep[0] = fAlphaA;
  855. fStep[1] = fAlphaB;
  856. for(size_t i = 1; i < 5; ++i)
  857. fStep[i + 1] = (fStep[0] * (5 - i) + fStep[1] * i) * (1.0f / 5.0f);
  858. fStep[6] = 0.0f;
  859. fStep[7] = 1.0f;
  860. pSteps = pSteps6;
  861. }
  862. else
  863. {
  864. pBC3->alpha[0] = bAlphaB;
  865. pBC3->alpha[1] = bAlphaA;
  866. fStep[0] = fAlphaB;
  867. fStep[1] = fAlphaA;
  868. for(size_t i = 1; i < 7; ++i)
  869. fStep[i + 1] = (fStep[0] * (7 - i) + fStep[1] * i) * (1.0f / 7.0f);
  870. pSteps = pSteps8;
  871. }
  872. // Encode alpha bitmap
  873. float fSteps = (float) (uSteps - 1);
  874. float fScale = (fStep[0] != fStep[1]) ? (fSteps / (fStep[1] - fStep[0])) : 0.0f;
  875. if (flags & BC_FLAGS_DITHER_A)
  876. memset(fError, 0x00, NUM_PIXELS_PER_BLOCK * sizeof(float));
  877. for(size_t iSet = 0; iSet < 2; iSet++)
  878. {
  879. uint32_t dw = 0;
  880. size_t iMin = iSet * 8;
  881. size_t iLim = iMin + 8;
  882. for(size_t i = iMin; i < iLim; ++i)
  883. {
  884. float fAlph = Color[i].a;
  885. if (flags & BC_FLAGS_DITHER_A)
  886. fAlph += fError[i];
  887. float fDot = (fAlph - fStep[0]) * fScale;
  888. uint32_t iStep;
  889. if(fDot <= 0.0f)
  890. iStep = ((6 == uSteps) && (fAlph <= fStep[0] * 0.5f)) ? 6 : 0;
  891. else if(fDot >= fSteps)
  892. iStep = ((6 == uSteps) && (fAlph >= (fStep[1] + 1.0f) * 0.5f)) ? 7 : 1;
  893. else
  894. iStep = static_cast<uint32_t>( pSteps[static_cast<size_t>(fDot + 0.5f)] );
  895. dw = (iStep << 21) | (dw >> 3);
  896. if (flags & BC_FLAGS_DITHER_A)
  897. {
  898. float fDiff = (fAlph - fStep[iStep]);
  899. if(3 != (i & 3))
  900. fError[i + 1] += fDiff * (7.0f / 16.0f);
  901. if(i < 12)
  902. {
  903. if(i & 3)
  904. fError[i + 3] += fDiff * (3.0f / 16.0f);
  905. fError[i + 4] += fDiff * (5.0f / 16.0f);
  906. if(3 != (i & 3))
  907. fError[i + 5] += fDiff * (1.0f / 16.0f);
  908. }
  909. }
  910. }
  911. pBC3->bitmap[0 + iSet * 3] = ((uint8_t *) &dw)[0];
  912. pBC3->bitmap[1 + iSet * 3] = ((uint8_t *) &dw)[1];
  913. pBC3->bitmap[2 + iSet * 3] = ((uint8_t *) &dw)[2];
  914. }
  915. }