BC.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. //-------------------------------------------------------------------------------------
  2. // BC.h
  3. //
  4. // Block-compression (BC) functionality
  5. //
  6. // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
  7. // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  8. // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
  9. // PARTICULAR PURPOSE.
  10. //
  11. // Copyright (c) Microsoft Corporation. All rights reserved.
  12. //
  13. // http://go.microsoft.com/fwlink/?LinkId=248926
  14. //-------------------------------------------------------------------------------------
  15. #pragma once
  16. #include <assert.h>
  17. //#include <directxmath.h> ESENTHEL CHANGED
  18. //#include <directxpackedvector.h> //ESENTHEL CHANGED
  19. namespace DirectX
  20. {
  21. //-------------------------------------------------------------------------------------
  22. // Macros
  23. //-------------------------------------------------------------------------------------
  24. // Because these are used in SAL annotations, they need to remain macros rather than const values
  25. #define NUM_PIXELS_PER_BLOCK 16
  26. //-------------------------------------------------------------------------------------
  27. // Constants
  28. //-------------------------------------------------------------------------------------
  29. enum BC_FLAGS
  30. {
  31. BC_FLAGS_NONE = 0x0,
  32. BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3
  33. BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3
  34. BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting
  35. BC_FLAGS_USE_3SUBSETS = 0x80000, // By default, BC7 skips mode 0 & 2; this flag adds those modes back
  36. BC_FLAGS_FORCE_BC7_MODE6 = 0x100000, // BC7 should only use mode 6; skip other modes
  37. };
  38. //-------------------------------------------------------------------------------------
  39. // Structures
  40. //-------------------------------------------------------------------------------------
  41. class LDRColorA;
  42. class HDRColorA
  43. {
  44. public:
  45. float r, g, b, a;
  46. public:
  47. HDRColorA() = default;
  48. HDRColorA(float _r, float _g, float _b, float _a) : r(_r), g(_g), b(_b), a(_a) {}
  49. HDRColorA(const HDRColorA& c) : r(c.r), g(c.g), b(c.b), a(c.a) {}
  50. // binary operators
  51. HDRColorA operator + ( const HDRColorA& c ) const
  52. {
  53. return HDRColorA(r + c.r, g + c.g, b + c.b, a + c.a);
  54. }
  55. HDRColorA operator - ( const HDRColorA& c ) const
  56. {
  57. return HDRColorA(r - c.r, g - c.g, b - c.b, a - c.a);
  58. }
  59. HDRColorA operator * ( float f ) const
  60. {
  61. return HDRColorA(r * f, g * f, b * f, a * f);
  62. }
  63. HDRColorA operator / ( float f ) const
  64. {
  65. float fInv = 1.0f / f;
  66. return HDRColorA(r * fInv, g * fInv, b * fInv, a * fInv);
  67. }
  68. float operator * ( const HDRColorA& c ) const
  69. {
  70. return r * c.r + g * c.g + b * c.b + a * c.a;
  71. }
  72. // assignment operators
  73. HDRColorA& operator += ( const HDRColorA& c )
  74. {
  75. r += c.r;
  76. g += c.g;
  77. b += c.b;
  78. a += c.a;
  79. return *this;
  80. }
  81. HDRColorA& operator -= ( const HDRColorA& c )
  82. {
  83. r -= c.r;
  84. g -= c.g;
  85. b -= c.b;
  86. a -= c.a;
  87. return *this;
  88. }
  89. HDRColorA& operator *= ( float f )
  90. {
  91. r *= f;
  92. g *= f;
  93. b *= f;
  94. a *= f;
  95. return *this;
  96. }
  97. HDRColorA& operator /= ( float f )
  98. {
  99. float fInv = 1.0f / f;
  100. r *= fInv;
  101. g *= fInv;
  102. b *= fInv;
  103. a *= fInv;
  104. return *this;
  105. }
  106. HDRColorA& Clamp(_In_ float fMin, _In_ float fMax)
  107. {
  108. r = std::min<float>(fMax, std::max<float>(fMin, r));
  109. g = std::min<float>(fMax, std::max<float>(fMin, g));
  110. b = std::min<float>(fMax, std::max<float>(fMin, b));
  111. a = std::min<float>(fMax, std::max<float>(fMin, a));
  112. return *this;
  113. }
  114. HDRColorA(const LDRColorA& c);
  115. HDRColorA& operator = (const LDRColorA& c);
  116. LDRColorA ToLDRColorA() const;
  117. };
  118. inline HDRColorA* HDRColorALerp(_Out_ HDRColorA *pOut, _In_ const HDRColorA *pC1, _In_ const HDRColorA *pC2, _In_ float s)
  119. {
  120. pOut->r = pC1->r + s * (pC2->r - pC1->r);
  121. pOut->g = pC1->g + s * (pC2->g - pC1->g);
  122. pOut->b = pC1->b + s * (pC2->b - pC1->b);
  123. pOut->a = pC1->a + s * (pC2->a - pC1->a);
  124. return pOut;
  125. }
  126. #pragma pack(push,1)
  127. // BC1/DXT1 compression (4 bits per texel)
  128. struct D3DX_BC1
  129. {
  130. uint16_t rgb[2]; // 565 colors
  131. uint32_t bitmap; // 2bpp rgb bitmap
  132. };
  133. // BC2/DXT2/3 compression (8 bits per texel)
  134. struct D3DX_BC2
  135. {
  136. uint32_t bitmap[2]; // 4bpp alpha bitmap
  137. D3DX_BC1 bc1; // BC1 rgb data
  138. };
  139. // BC3/DXT4/5 compression (8 bits per texel)
  140. struct D3DX_BC3
  141. {
  142. uint8_t alpha[2]; // alpha values
  143. uint8_t bitmap[6]; // 3bpp alpha bitmap
  144. D3DX_BC1 bc1; // BC1 rgb data
  145. };
  146. #pragma pack(pop)
  147. //-------------------------------------------------------------------------------------
  148. // Templates
  149. //-------------------------------------------------------------------------------------
  150. #pragma warning(push)
  151. #pragma warning(disable : 4127)
  152. template <bool bRange> void OptimizeAlpha(float *pX, float *pY, const float *pPoints, size_t cSteps)
  153. {
  154. static const float pC6[] = { 5.0f / 5.0f, 4.0f / 5.0f, 3.0f / 5.0f, 2.0f / 5.0f, 1.0f / 5.0f, 0.0f / 5.0f };
  155. static const float pD6[] = { 0.0f / 5.0f, 1.0f / 5.0f, 2.0f / 5.0f, 3.0f / 5.0f, 4.0f / 5.0f, 5.0f / 5.0f };
  156. static const float pC8[] = { 7.0f / 7.0f, 6.0f / 7.0f, 5.0f / 7.0f, 4.0f / 7.0f, 3.0f / 7.0f, 2.0f / 7.0f, 1.0f / 7.0f, 0.0f / 7.0f };
  157. static const float pD8[] = { 0.0f / 7.0f, 1.0f / 7.0f, 2.0f / 7.0f, 3.0f / 7.0f, 4.0f / 7.0f, 5.0f / 7.0f, 6.0f / 7.0f, 7.0f / 7.0f };
  158. const float *pC = (6 == cSteps) ? pC6 : pC8;
  159. const float *pD = (6 == cSteps) ? pD6 : pD8;
  160. const float MAX_VALUE = 1.0f;
  161. const float MIN_VALUE = (bRange) ? -1.0f : 0.0f;
  162. // Find Min and Max points, as starting point
  163. float fX = MAX_VALUE;
  164. float fY = MIN_VALUE;
  165. if (8 == cSteps)
  166. {
  167. for (size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
  168. {
  169. if (pPoints[iPoint] < fX)
  170. fX = pPoints[iPoint];
  171. if (pPoints[iPoint] > fY)
  172. fY = pPoints[iPoint];
  173. }
  174. }
  175. else
  176. {
  177. for (size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
  178. {
  179. if (pPoints[iPoint] < fX && pPoints[iPoint] > MIN_VALUE)
  180. fX = pPoints[iPoint];
  181. if (pPoints[iPoint] > fY && pPoints[iPoint] < MAX_VALUE)
  182. fY = pPoints[iPoint];
  183. }
  184. if (fX == fY)
  185. {
  186. fY = MAX_VALUE;
  187. }
  188. }
  189. // Use Newton's Method to find local minima of sum-of-squares error.
  190. float fSteps = (float)(cSteps - 1);
  191. for (size_t iIteration = 0; iIteration < 8; iIteration++)
  192. {
  193. float fScale;
  194. if ((fY - fX) < (1.0f / 256.0f))
  195. break;
  196. fScale = fSteps / (fY - fX);
  197. // Calculate new steps
  198. float pSteps[8];
  199. for (size_t iStep = 0; iStep < cSteps; iStep++)
  200. pSteps[iStep] = pC[iStep] * fX + pD[iStep] * fY;
  201. if (6 == cSteps)
  202. {
  203. pSteps[6] = MIN_VALUE;
  204. pSteps[7] = MAX_VALUE;
  205. }
  206. // Evaluate function, and derivatives
  207. float dX = 0.0f;
  208. float dY = 0.0f;
  209. float d2X = 0.0f;
  210. float d2Y = 0.0f;
  211. for (size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
  212. {
  213. float fDot = (pPoints[iPoint] - fX) * fScale;
  214. size_t iStep;
  215. if (fDot <= 0.0f)
  216. iStep = ((6 == cSteps) && (pPoints[iPoint] <= fX * 0.5f)) ? 6 : 0;
  217. else if (fDot >= fSteps)
  218. iStep = ((6 == cSteps) && (pPoints[iPoint] >= (fY + 1.0f) * 0.5f)) ? 7 : (cSteps - 1);
  219. else
  220. iStep = static_cast<int32_t>(fDot + 0.5f);
  221. if (iStep < cSteps)
  222. {
  223. // D3DX had this computation backwards (pPoints[iPoint] - pSteps[iStep])
  224. // this fix improves RMS of the alpha component
  225. float fDiff = pSteps[iStep] - pPoints[iPoint];
  226. dX += pC[iStep] * fDiff;
  227. d2X += pC[iStep] * pC[iStep];
  228. dY += pD[iStep] * fDiff;
  229. d2Y += pD[iStep] * pD[iStep];
  230. }
  231. }
  232. // Move endpoints
  233. if (d2X > 0.0f)
  234. fX -= dX / d2X;
  235. if (d2Y > 0.0f)
  236. fY -= dY / d2Y;
  237. if (fX > fY)
  238. {
  239. float f = fX; fX = fY; fY = f;
  240. }
  241. if ((dX * dX < (1.0f / 64.0f)) && (dY * dY < (1.0f / 64.0f)))
  242. break;
  243. }
  244. *pX = (fX < MIN_VALUE) ? MIN_VALUE : (fX > MAX_VALUE) ? MAX_VALUE : fX;
  245. *pY = (fY < MIN_VALUE) ? MIN_VALUE : (fY > MAX_VALUE) ? MAX_VALUE : fY;
  246. }
  247. #pragma warning(pop)
  248. //-------------------------------------------------------------------------------------
  249. // Functions
  250. //-------------------------------------------------------------------------------------
  251. typedef void (*BC_DECODE)(XMVECTOR *pColor, const uint8_t *pBC);
  252. typedef void (*BC_ENCODE)(uint8_t *pDXT, const XMVECTOR *pColor, DWORD flags);
  253. void D3DXDecodeBC1(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC);
  254. void D3DXDecodeBC2(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
  255. void D3DXDecodeBC3(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
  256. void D3DXDecodeBC4U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC);
  257. void D3DXDecodeBC4S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC);
  258. void D3DXDecodeBC5U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
  259. void D3DXDecodeBC5S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
  260. void D3DXDecodeBC6HU(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
  261. void D3DXDecodeBC6HS(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
  262. void D3DXDecodeBC7(Color (&color)[4][4], const uint8_t *pBC); // ESENTHEL CHANGED
  263. void D3DXEncodeBC1(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, const HDRColorA *pWeights, _In_ float threshold, _In_ DWORD flags); // ESENTHEL CHANGED
  264. // BC1 requires one additional parameter, so it doesn't match signature of BC_ENCODE above
  265. void D3DXEncodeBC2(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, const HDRColorA *pWeights, _In_ DWORD flags); // ESENTHEL CHANGED
  266. void D3DXEncodeBC3(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, const HDRColorA *pWeights, _In_ DWORD flags); // ESENTHEL CHANGED
  267. void D3DXEncodeBC4U(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
  268. void D3DXEncodeBC4S(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
  269. void D3DXEncodeBC5U(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
  270. void D3DXEncodeBC5S(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
  271. void D3DXEncodeBC6HU(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
  272. void D3DXEncodeBC6HS(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
  273. void D3DXEncodeBC7(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
  274. }; // namespace