astcenc_entry.cpp 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402
  1. // SPDX-License-Identifier: Apache-2.0
  2. // ----------------------------------------------------------------------------
  3. // Copyright 2011-2022 Arm Limited
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6. // use this file except in compliance with the License. You may obtain a copy
  7. // of the License at:
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14. // License for the specific language governing permissions and limitations
  15. // under the License.
  16. // ----------------------------------------------------------------------------
  17. /**
  18. * @brief Functions for the library entrypoint.
  19. */
  20. #include <array>
  21. #include <cstring>
  22. #include <new>
  23. #include "astcenc.h"
  24. #include "astcenc_internal_entry.h"
  25. #include "astcenc_diagnostic_trace.h"
  26. /**
  27. * @brief Record of the quality tuning parameter values.
  28. *
  29. * See the @c astcenc_config structure for detailed parameter documentation.
  30. *
  31. * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit.
  32. * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios
  33. * for the more through search presets because the underlying db_limit is so much higher.
  34. */
  35. struct astcenc_preset_config
  36. {
  37. float quality;
  38. unsigned int tune_partition_count_limit;
  39. unsigned int tune_partition_index_limit;
  40. unsigned int tune_block_mode_limit;
  41. unsigned int tune_refinement_limit;
  42. unsigned int tune_candidate_limit;
  43. float tune_db_limit_a_base;
  44. float tune_db_limit_b_base;
  45. float tune_mode0_mse_overshoot;
  46. float tune_refinement_mse_overshoot;
  47. float tune_2_partition_early_out_limit_factor;
  48. float tune_3_partition_early_out_limit_factor;
  49. float tune_2_plane_early_out_limit_correlation;
  50. unsigned int tune_low_weight_count_limit;
  51. };
  52. /**
  53. * @brief The static quality presets that are built-in for high bandwidth
  54. * presets (x < 25 texels per block).
  55. */
  56. static const std::array<astcenc_preset_config, 5> preset_configs_high {{
  57. {
  58. ASTCENC_PRE_FASTEST,
  59. 2, 10, 43, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 25
  60. }, {
  61. ASTCENC_PRE_FAST,
  62. 3, 14, 55, 3, 3, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.1f, 0.65f, 20
  63. }, {
  64. ASTCENC_PRE_MEDIUM,
  65. 4, 28, 76, 3, 3, 95.0f, 70.0f, 2.5f, 2.5f, 1.2f, 1.25f, 0.85f, 16
  66. }, {
  67. ASTCENC_PRE_THOROUGH,
  68. 4, 76, 93, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.95f, 12
  69. }, {
  70. ASTCENC_PRE_EXHAUSTIVE,
  71. 4, 1024, 100, 4, 4, 200.0f, 200.0f, 10.0f, 10.0f, 10.0f, 10.0f, 0.99f, 0
  72. }
  73. }};
  74. /**
  75. * @brief The static quality presets that are built-in for medium bandwidth
  76. * presets (25 <= x < 64 texels per block).
  77. */
  78. static const std::array<astcenc_preset_config, 5> preset_configs_mid {{
  79. {
  80. ASTCENC_PRE_FASTEST,
  81. 2, 10, 43, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 20
  82. }, {
  83. ASTCENC_PRE_FAST,
  84. 3, 15, 55, 3, 3, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.1f, 0.5f, 16
  85. }, {
  86. ASTCENC_PRE_MEDIUM,
  87. 4, 30, 76, 3, 3, 95.0f, 70.0f, 3.0f, 3.0f, 1.2f, 1.25f, 0.75f, 14
  88. }, {
  89. ASTCENC_PRE_THOROUGH,
  90. 4, 76, 93, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.95f, 10
  91. }, {
  92. ASTCENC_PRE_EXHAUSTIVE,
  93. 4, 1024, 100, 4, 4, 200.0f, 200.0f, 10.0f, 10.0f, 10.0f, 10.0f, 0.99f, 0
  94. }
  95. }};
  96. /**
  97. * @brief The static quality presets that are built-in for low bandwidth
  98. * presets (64 <= x texels per block).
  99. */
  100. static const std::array<astcenc_preset_config, 5> preset_configs_low {{
  101. {
  102. ASTCENC_PRE_FASTEST,
  103. 2, 10, 40, 2, 2, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 20
  104. }, {
  105. ASTCENC_PRE_FAST,
  106. 2, 15, 55, 3, 3, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.1f, 0.5f, 16
  107. }, {
  108. ASTCENC_PRE_MEDIUM,
  109. 3, 30, 76, 3, 3, 95.0f, 70.0f, 3.5f, 3.5f, 1.2f, 1.25f, 0.65f, 12
  110. }, {
  111. ASTCENC_PRE_THOROUGH,
  112. 4, 75, 92, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.85f, 10
  113. }, {
  114. ASTCENC_PRE_EXHAUSTIVE,
  115. 4, 1024, 100, 4, 4, 200.0f, 200.0f, 10.0f, 10.0f, 10.0f, 10.0f, 0.99f, 0
  116. }
  117. }};
  118. /**
  119. * @brief Validate CPU floating point meets assumptions made in the codec.
  120. *
  121. * The codec is written with the assumption that a float threaded through the @c if32 union will be
  122. * stored and reloaded as a 32-bit IEEE-754 float with round-to-nearest rounding. This is always the
  123. * case in an IEEE-754 compliant system, however not every system or compilation mode is actually
  124. * IEEE-754 compliant. This normally fails if the code is compiled with fast math enabled.
  125. *
  126. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  127. */
  128. static astcenc_error validate_cpu_float()
  129. {
  130. if32 p;
  131. volatile float xprec_testval = 2.51f;
  132. p.f = xprec_testval + 12582912.0f;
  133. float q = p.f - 12582912.0f;
  134. if (q != 3.0f)
  135. {
  136. return ASTCENC_ERR_BAD_CPU_FLOAT;
  137. }
  138. return ASTCENC_SUCCESS;
  139. }
  140. /**
  141. * @brief Validate CPU ISA support meets the requirements of this build of the library.
  142. *
  143. * Each library build is statically compiled for a particular set of CPU ISA features, such as the
  144. * SIMD support or other ISA extensions such as POPCNT. This function checks that the host CPU
  145. * actually supports everything this build needs.
  146. *
  147. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  148. */
  149. static astcenc_error validate_cpu_isa()
  150. {
  151. #if ASTCENC_SSE >= 41
  152. if (!cpu_supports_sse41())
  153. {
  154. return ASTCENC_ERR_BAD_CPU_ISA;
  155. }
  156. #endif
  157. #if ASTCENC_POPCNT >= 1
  158. if (!cpu_supports_popcnt())
  159. {
  160. return ASTCENC_ERR_BAD_CPU_ISA;
  161. }
  162. #endif
  163. #if ASTCENC_F16C >= 1
  164. if (!cpu_supports_f16c())
  165. {
  166. return ASTCENC_ERR_BAD_CPU_ISA;
  167. }
  168. #endif
  169. #if ASTCENC_AVX >= 2
  170. if (!cpu_supports_avx2())
  171. {
  172. return ASTCENC_ERR_BAD_CPU_ISA;
  173. }
  174. #endif
  175. return ASTCENC_SUCCESS;
  176. }
  177. /**
  178. * @brief Validate config profile.
  179. *
  180. * @param profile The profile to check.
  181. *
  182. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  183. */
  184. static astcenc_error validate_profile(
  185. astcenc_profile profile
  186. ) {
  187. // Values in this enum are from an external user, so not guaranteed to be
  188. // bounded to the enum values
  189. switch (static_cast<int>(profile))
  190. {
  191. case ASTCENC_PRF_LDR_SRGB:
  192. case ASTCENC_PRF_LDR:
  193. case ASTCENC_PRF_HDR_RGB_LDR_A:
  194. case ASTCENC_PRF_HDR:
  195. return ASTCENC_SUCCESS;
  196. default:
  197. return ASTCENC_ERR_BAD_PROFILE;
  198. }
  199. }
  200. /**
  201. * @brief Validate block size.
  202. *
  203. * @param block_x The block x dimensions.
  204. * @param block_y The block y dimensions.
  205. * @param block_z The block z dimensions.
  206. *
  207. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  208. */
  209. static astcenc_error validate_block_size(
  210. unsigned int block_x,
  211. unsigned int block_y,
  212. unsigned int block_z
  213. ) {
  214. // Test if this is a legal block size at all
  215. bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) ||
  216. ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z)));
  217. if (!is_legal)
  218. {
  219. return ASTCENC_ERR_BAD_BLOCK_SIZE;
  220. }
  221. // Test if this build has sufficient capacity for this block size
  222. bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS;
  223. if (!have_capacity)
  224. {
  225. return ASTCENC_ERR_NOT_IMPLEMENTED;
  226. }
  227. return ASTCENC_SUCCESS;
  228. }
  229. /**
  230. * @brief Validate flags.
  231. *
  232. * @param flags The flags to check.
  233. *
  234. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  235. */
  236. static astcenc_error validate_flags(
  237. unsigned int flags
  238. ) {
  239. // Flags field must not contain any unknown flag bits
  240. unsigned int exMask = ~ASTCENC_ALL_FLAGS;
  241. if (popcount(flags & exMask) != 0)
  242. {
  243. return ASTCENC_ERR_BAD_FLAGS;
  244. }
  245. // Flags field must only contain at most a single map type
  246. exMask = ASTCENC_FLG_MAP_MASK
  247. | ASTCENC_FLG_MAP_NORMAL
  248. | ASTCENC_FLG_MAP_RGBM;
  249. if (popcount(flags & exMask) > 1)
  250. {
  251. return ASTCENC_ERR_BAD_FLAGS;
  252. }
  253. return ASTCENC_SUCCESS;
  254. }
  255. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  256. /**
  257. * @brief Validate single channel compression swizzle.
  258. *
  259. * @param swizzle The swizzle to check.
  260. *
  261. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  262. */
  263. static astcenc_error validate_compression_swz(
  264. astcenc_swz swizzle
  265. ) {
  266. // Not all enum values are handled; SWZ_Z is invalid for compression
  267. switch (static_cast<int>(swizzle))
  268. {
  269. case ASTCENC_SWZ_R:
  270. case ASTCENC_SWZ_G:
  271. case ASTCENC_SWZ_B:
  272. case ASTCENC_SWZ_A:
  273. case ASTCENC_SWZ_0:
  274. case ASTCENC_SWZ_1:
  275. return ASTCENC_SUCCESS;
  276. default:
  277. return ASTCENC_ERR_BAD_SWIZZLE;
  278. }
  279. }
  280. /**
  281. * @brief Validate overall compression swizzle.
  282. *
  283. * @param swizzle The swizzle to check.
  284. *
  285. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  286. */
  287. static astcenc_error validate_compression_swizzle(
  288. const astcenc_swizzle& swizzle
  289. ) {
  290. if (validate_compression_swz(swizzle.r) ||
  291. validate_compression_swz(swizzle.g) ||
  292. validate_compression_swz(swizzle.b) ||
  293. validate_compression_swz(swizzle.a))
  294. {
  295. return ASTCENC_ERR_BAD_SWIZZLE;
  296. }
  297. return ASTCENC_SUCCESS;
  298. }
  299. #endif
  300. /**
  301. * @brief Validate single channel decompression swizzle.
  302. *
  303. * @param swizzle The swizzle to check.
  304. *
  305. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  306. */
  307. static astcenc_error validate_decompression_swz(
  308. astcenc_swz swizzle
  309. ) {
  310. // Values in this enum are from an external user, so not guaranteed to be
  311. // bounded to the enum values
  312. switch (static_cast<int>(swizzle))
  313. {
  314. case ASTCENC_SWZ_R:
  315. case ASTCENC_SWZ_G:
  316. case ASTCENC_SWZ_B:
  317. case ASTCENC_SWZ_A:
  318. case ASTCENC_SWZ_0:
  319. case ASTCENC_SWZ_1:
  320. case ASTCENC_SWZ_Z:
  321. return ASTCENC_SUCCESS;
  322. default:
  323. return ASTCENC_ERR_BAD_SWIZZLE;
  324. }
  325. }
  326. /**
  327. * @brief Validate overall decompression swizzle.
  328. *
  329. * @param swizzle The swizzle to check.
  330. *
  331. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  332. */
  333. static astcenc_error validate_decompression_swizzle(
  334. const astcenc_swizzle& swizzle
  335. ) {
  336. if (validate_decompression_swz(swizzle.r) ||
  337. validate_decompression_swz(swizzle.g) ||
  338. validate_decompression_swz(swizzle.b) ||
  339. validate_decompression_swz(swizzle.a))
  340. {
  341. return ASTCENC_ERR_BAD_SWIZZLE;
  342. }
  343. return ASTCENC_SUCCESS;
  344. }
  345. /**
  346. * Validate that an incoming configuration is in-spec.
  347. *
  348. * This function can respond in two ways:
  349. *
  350. * * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown
  351. * for out-of-range inputs in this case.
  352. * * Numerical inputs and logic inputs are are logically invalid and which make no sense
  353. * algorithmically will return an error.
  354. *
  355. * @param[in,out] config The input compressor configuration.
  356. *
  357. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  358. */
  359. static astcenc_error validate_config(
  360. astcenc_config &config
  361. ) {
  362. astcenc_error status;
  363. status = validate_profile(config.profile);
  364. if (status != ASTCENC_SUCCESS)
  365. {
  366. return status;
  367. }
  368. status = validate_flags(config.flags);
  369. if (status != ASTCENC_SUCCESS)
  370. {
  371. return status;
  372. }
  373. status = validate_block_size(config.block_x, config.block_y, config.block_z);
  374. if (status != ASTCENC_SUCCESS)
  375. {
  376. return status;
  377. }
  378. #if defined(ASTCENC_DECOMPRESS_ONLY)
  379. // Decompress-only builds only support decompress-only contexts
  380. if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
  381. {
  382. return ASTCENC_ERR_BAD_PARAM;
  383. }
  384. #endif
  385. config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f);
  386. config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u);
  387. config.tune_partition_index_limit = astc::clamp(config.tune_partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
  388. config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u);
  389. config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u);
  390. config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES);
  391. config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
  392. config.tune_mode0_mse_overshoot = astc::max(config.tune_mode0_mse_overshoot, 1.0f);
  393. config.tune_refinement_mse_overshoot = astc::max(config.tune_refinement_mse_overshoot, 1.0f);
  394. config.tune_2_partition_early_out_limit_factor = astc::max(config.tune_2_partition_early_out_limit_factor, 0.0f);
  395. config.tune_3_partition_early_out_limit_factor = astc::max(config.tune_3_partition_early_out_limit_factor, 0.0f);
  396. config.tune_2_plane_early_out_limit_correlation = astc::max(config.tune_2_plane_early_out_limit_correlation, 0.0f);
  397. // Specifying a zero weight color component is not allowed; force to small value
  398. float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
  399. astc::max(config.cw_b_weight, config.cw_a_weight));
  400. if (max_weight > 0.0f)
  401. {
  402. max_weight /= 1000.0f;
  403. config.cw_r_weight = astc::max(config.cw_r_weight, max_weight);
  404. config.cw_g_weight = astc::max(config.cw_g_weight, max_weight);
  405. config.cw_b_weight = astc::max(config.cw_b_weight, max_weight);
  406. config.cw_a_weight = astc::max(config.cw_a_weight, max_weight);
  407. }
  408. // If all color components error weights are zero then return an error
  409. else
  410. {
  411. return ASTCENC_ERR_BAD_PARAM;
  412. }
  413. return ASTCENC_SUCCESS;
  414. }
  415. /* See header for documentation. */
  416. astcenc_error astcenc_config_init(
  417. astcenc_profile profile,
  418. unsigned int block_x,
  419. unsigned int block_y,
  420. unsigned int block_z,
  421. float quality,
  422. unsigned int flags,
  423. astcenc_config* configp
  424. ) {
  425. astcenc_error status;
  426. astcenc_config& config = *configp;
  427. // Zero init all config fields; although most of will be over written
  428. std::memset(&config, 0, sizeof(config));
  429. // Process the block size
  430. block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1
  431. status = validate_block_size(block_x, block_y, block_z);
  432. if (status != ASTCENC_SUCCESS)
  433. {
  434. return status;
  435. }
  436. config.block_x = block_x;
  437. config.block_y = block_y;
  438. config.block_z = block_z;
  439. float texels = static_cast<float>(block_x * block_y * block_z);
  440. float ltexels = logf(texels) / logf(10.0f);
  441. // Process the performance quality level or preset; note that this must be done before we
  442. // process any additional settings, such as color profile and flags, which may replace some of
  443. // these settings with more use case tuned values
  444. if (quality < ASTCENC_PRE_FASTEST ||
  445. quality > ASTCENC_PRE_EXHAUSTIVE)
  446. {
  447. return ASTCENC_ERR_BAD_QUALITY;
  448. }
  449. static const std::array<astcenc_preset_config, 5>* preset_configs;
  450. int texels_int = block_x * block_y * block_z;
  451. if (texels_int < 25)
  452. {
  453. preset_configs = &preset_configs_high;
  454. }
  455. else if (texels_int < 64)
  456. {
  457. preset_configs = &preset_configs_mid;
  458. }
  459. else
  460. {
  461. preset_configs = &preset_configs_low;
  462. }
  463. // Determine which preset to use, or which pair to interpolate
  464. size_t start;
  465. size_t end;
  466. for (end = 0; end < preset_configs->size(); end++)
  467. {
  468. if ((*preset_configs)[end].quality >= quality)
  469. {
  470. break;
  471. }
  472. }
  473. start = end == 0 ? 0 : end - 1;
  474. // Start and end node are the same - so just transfer the values.
  475. if (start == end)
  476. {
  477. config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit;
  478. config.tune_partition_index_limit = (*preset_configs)[start].tune_partition_index_limit;
  479. config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit;
  480. config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit;
  481. config.tune_candidate_limit = astc::min((*preset_configs)[start].tune_candidate_limit,
  482. TUNE_MAX_TRIAL_CANDIDATES);
  483. config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels,
  484. (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels);
  485. config.tune_mode0_mse_overshoot = (*preset_configs)[start].tune_mode0_mse_overshoot;
  486. config.tune_refinement_mse_overshoot = (*preset_configs)[start].tune_refinement_mse_overshoot;
  487. config.tune_2_partition_early_out_limit_factor = (*preset_configs)[start].tune_2_partition_early_out_limit_factor;
  488. config.tune_3_partition_early_out_limit_factor =(*preset_configs)[start].tune_3_partition_early_out_limit_factor;
  489. config.tune_2_plane_early_out_limit_correlation = (*preset_configs)[start].tune_2_plane_early_out_limit_correlation;
  490. config.tune_low_weight_count_limit = (*preset_configs)[start].tune_low_weight_count_limit;
  491. }
  492. // Start and end node are not the same - so interpolate between them
  493. else
  494. {
  495. auto& node_a = (*preset_configs)[start];
  496. auto& node_b = (*preset_configs)[end];
  497. float wt_range = node_b.quality - node_a.quality;
  498. assert(wt_range > 0);
  499. // Compute interpolation factors
  500. float wt_node_a = (node_b.quality - quality) / wt_range;
  501. float wt_node_b = (quality - node_a.quality) / wt_range;
  502. #define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b))
  503. #define LERPI(param) astc::flt2int_rtn(\
  504. (static_cast<float>(node_a.param) * wt_node_a) + \
  505. (static_cast<float>(node_b.param) * wt_node_b))
  506. #define LERPUI(param) static_cast<unsigned int>(LERPI(param))
  507. config.tune_partition_count_limit = LERPI(tune_partition_count_limit);
  508. config.tune_partition_index_limit = LERPI(tune_partition_index_limit);
  509. config.tune_block_mode_limit = LERPI(tune_block_mode_limit);
  510. config.tune_refinement_limit = LERPI(tune_refinement_limit);
  511. config.tune_candidate_limit = astc::min(LERPUI(tune_candidate_limit),
  512. TUNE_MAX_TRIAL_CANDIDATES);
  513. config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels,
  514. LERP(tune_db_limit_b_base) - 19 * ltexels);
  515. config.tune_mode0_mse_overshoot = LERP(tune_mode0_mse_overshoot);
  516. config.tune_refinement_mse_overshoot = LERP(tune_refinement_mse_overshoot);
  517. config.tune_2_partition_early_out_limit_factor = LERP(tune_2_partition_early_out_limit_factor);
  518. config.tune_3_partition_early_out_limit_factor = LERP(tune_3_partition_early_out_limit_factor);
  519. config.tune_2_plane_early_out_limit_correlation = LERP(tune_2_plane_early_out_limit_correlation);
  520. config.tune_low_weight_count_limit = LERPI(tune_low_weight_count_limit);
  521. #undef LERP
  522. #undef LERPI
  523. #undef LERPUI
  524. }
  525. // Set heuristics to the defaults for each color profile
  526. config.cw_r_weight = 1.0f;
  527. config.cw_g_weight = 1.0f;
  528. config.cw_b_weight = 1.0f;
  529. config.cw_a_weight = 1.0f;
  530. config.a_scale_radius = 0;
  531. config.rgbm_m_scale = 0.0f;
  532. config.profile = profile;
  533. // Values in this enum are from an external user, so not guaranteed to be
  534. // bounded to the enum values
  535. switch (static_cast<int>(profile))
  536. {
  537. case ASTCENC_PRF_LDR:
  538. case ASTCENC_PRF_LDR_SRGB:
  539. break;
  540. case ASTCENC_PRF_HDR_RGB_LDR_A:
  541. case ASTCENC_PRF_HDR:
  542. config.tune_db_limit = 999.0f;
  543. break;
  544. default:
  545. return ASTCENC_ERR_BAD_PROFILE;
  546. }
  547. // Flags field must not contain any unknown flag bits
  548. status = validate_flags(flags);
  549. if (status != ASTCENC_SUCCESS)
  550. {
  551. return status;
  552. }
  553. if (flags & ASTCENC_FLG_MAP_NORMAL)
  554. {
  555. // Normal map encoding uses L+A blocks, so allow one more partitioning
  556. // than normal. We need need fewer bits for endpoints, so more likely
  557. // to be able to use more partitions than an RGB/RGBA block
  558. config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u);
  559. config.cw_g_weight = 0.0f;
  560. config.cw_b_weight = 0.0f;
  561. config.tune_2_partition_early_out_limit_factor *= 1.5f;
  562. config.tune_3_partition_early_out_limit_factor *= 1.5f;
  563. config.tune_2_plane_early_out_limit_correlation = 0.99f;
  564. // Normals are prone to blocking artifacts on smooth curves
  565. // so force compressor to try harder here ...
  566. config.tune_db_limit *= 1.03f;
  567. }
  568. else if (flags & ASTCENC_FLG_MAP_MASK)
  569. {
  570. // Masks are prone to blocking artifacts on mask edges
  571. // so force compressor to try harder here ...
  572. config.tune_db_limit *= 1.03f;
  573. }
  574. else if (flags & ASTCENC_FLG_MAP_RGBM)
  575. {
  576. config.rgbm_m_scale = 5.0f;
  577. config.cw_a_weight = 2.0f * config.rgbm_m_scale;
  578. }
  579. else // (This is color data)
  580. {
  581. // This is a very basic perceptual metric for RGB color data, which weights error
  582. // significance by the perceptual luminance contribution of each color channel. For
  583. // luminance the usual weights to compute luminance from a linear RGB value are as
  584. // follows:
  585. //
  586. // l = r * 0.3 + g * 0.59 + b * 0.11
  587. //
  588. // ... but we scale these up to keep a better balance between color and alpha. Note
  589. // that if the content is using alpha we'd recommend using the -a option to weight
  590. // the color contribution by the alpha transparency.
  591. if (flags & ASTCENC_FLG_USE_PERCEPTUAL)
  592. {
  593. config.cw_r_weight = 0.30f * 2.25f;
  594. config.cw_g_weight = 0.59f * 2.25f;
  595. config.cw_b_weight = 0.11f * 2.25f;
  596. }
  597. }
  598. config.flags = flags;
  599. return ASTCENC_SUCCESS;
  600. }
  601. /* See header for documentation. */
  602. astcenc_error astcenc_context_alloc(
  603. const astcenc_config* configp,
  604. unsigned int thread_count,
  605. astcenc_context** context
  606. ) {
  607. astcenc_error status;
  608. const astcenc_config& config = *configp;
  609. status = validate_cpu_float();
  610. if (status != ASTCENC_SUCCESS)
  611. {
  612. return status;
  613. }
  614. status = validate_cpu_isa();
  615. if (status != ASTCENC_SUCCESS)
  616. {
  617. return status;
  618. }
  619. if (thread_count == 0)
  620. {
  621. return ASTCENC_ERR_BAD_PARAM;
  622. }
  623. #if defined(ASTCENC_DIAGNOSTICS)
  624. // Force single threaded compressor use in diagnostic mode.
  625. if (thread_count != 1)
  626. {
  627. return ASTCENC_ERR_BAD_PARAM;
  628. }
  629. #endif
  630. astcenc_context* ctxo = new astcenc_context;
  631. astcenc_contexti* ctx = &ctxo->context;
  632. ctx->thread_count = thread_count;
  633. ctx->config = config;
  634. ctx->working_buffers = nullptr;
  635. // These are allocated per-compress, as they depend on image size
  636. ctx->input_alpha_averages = nullptr;
  637. // Copy the config first and validate the copy (we may modify it)
  638. status = validate_config(ctx->config);
  639. if (status != ASTCENC_SUCCESS)
  640. {
  641. delete ctx;
  642. return status;
  643. }
  644. ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
  645. bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
  646. init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
  647. can_omit_modes,
  648. config.tune_partition_count_limit,
  649. static_cast<float>(config.tune_block_mode_limit) / 100.0f,
  650. *ctx->bsd);
  651. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  652. // Do setup only needed by compression
  653. if (!(status & ASTCENC_FLG_DECOMPRESS_ONLY))
  654. {
  655. // Turn a dB limit into a per-texel error for faster use later
  656. if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB))
  657. {
  658. ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f;
  659. }
  660. else
  661. {
  662. ctx->config.tune_db_limit = 0.0f;
  663. }
  664. size_t worksize = sizeof(compression_working_buffers) * thread_count;
  665. ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN);
  666. static_assert((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0,
  667. "compression_working_buffers size must be multiple of vector alignment");
  668. if (!ctx->working_buffers)
  669. {
  670. aligned_free<block_size_descriptor>(ctx->bsd);
  671. delete ctxo;
  672. *context = nullptr;
  673. return ASTCENC_ERR_OUT_OF_MEM;
  674. }
  675. }
  676. #endif
  677. #if defined(ASTCENC_DIAGNOSTICS)
  678. ctx->trace_log = new TraceLog(ctx->config.trace_file_path);
  679. if (!ctx->trace_log->m_file)
  680. {
  681. return ASTCENC_ERR_DTRACE_FAILURE;
  682. }
  683. trace_add_data("block_x", config.block_x);
  684. trace_add_data("block_y", config.block_y);
  685. trace_add_data("block_z", config.block_z);
  686. #endif
  687. *context = ctxo;
  688. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  689. prepare_angular_tables();
  690. #endif
  691. return ASTCENC_SUCCESS;
  692. }
  693. /* See header dor documentation. */
  694. void astcenc_context_free(
  695. astcenc_context* ctxo
  696. ) {
  697. if (ctxo)
  698. {
  699. astcenc_contexti* ctx = &ctxo->context;
  700. aligned_free<compression_working_buffers>(ctx->working_buffers);
  701. aligned_free<block_size_descriptor>(ctx->bsd);
  702. #if defined(ASTCENC_DIAGNOSTICS)
  703. delete ctx->trace_log;
  704. #endif
  705. delete ctxo;
  706. }
  707. }
  708. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  709. /**
  710. * @brief Compress an image, after any preflight has completed.
  711. *
  712. * @param[out] ctxo The compressor context.
  713. * @param thread_index The thread index.
  714. * @param image The intput image.
  715. * @param swizzle The input swizzle.
  716. * @param[out] buffer The output array for the compressed data.
  717. */
  718. static void compress_image(
  719. astcenc_context& ctxo,
  720. unsigned int thread_index,
  721. const astcenc_image& image,
  722. const astcenc_swizzle& swizzle,
  723. uint8_t* buffer
  724. ) {
  725. astcenc_contexti& ctx = ctxo.context;
  726. const block_size_descriptor& bsd = *ctx.bsd;
  727. astcenc_profile decode_mode = ctx.config.profile;
  728. image_block blk;
  729. int block_x = bsd.xdim;
  730. int block_y = bsd.ydim;
  731. int block_z = bsd.zdim;
  732. blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
  733. int dim_x = image.dim_x;
  734. int dim_y = image.dim_y;
  735. int dim_z = image.dim_z;
  736. int xblocks = (dim_x + block_x - 1) / block_x;
  737. int yblocks = (dim_y + block_y - 1) / block_y;
  738. int zblocks = (dim_z + block_z - 1) / block_z;
  739. int block_count = zblocks * yblocks * xblocks;
  740. int row_blocks = xblocks;
  741. int plane_blocks = xblocks * yblocks;
  742. // Populate the block channel weights
  743. blk.channel_weight = vfloat4(ctx.config.cw_r_weight,
  744. ctx.config.cw_g_weight,
  745. ctx.config.cw_b_weight,
  746. ctx.config.cw_a_weight);
  747. // Use preallocated scratch buffer
  748. auto& temp_buffers = ctx.working_buffers[thread_index];
  749. // Only the first thread actually runs the initializer
  750. ctxo.manage_compress.init(block_count);
  751. // Determine if we can use an optimized load function
  752. bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) ||
  753. (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A);
  754. bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) ||
  755. (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A);
  756. bool use_fast_load = !needs_swz && !needs_hdr &&
  757. block_z == 1 && image.data_type == ASTCENC_TYPE_U8;
  758. auto load_func = load_image_block;
  759. if (use_fast_load)
  760. {
  761. load_func = load_image_block_fast_ldr;
  762. }
  763. // All threads run this processing loop until there is no work remaining
  764. while (true)
  765. {
  766. unsigned int count;
  767. unsigned int base = ctxo.manage_compress.get_task_assignment(16, count);
  768. if (!count)
  769. {
  770. break;
  771. }
  772. for (unsigned int i = base; i < base + count; i++)
  773. {
  774. // Decode i into x, y, z block indices
  775. int z = i / plane_blocks;
  776. unsigned int rem = i - (z * plane_blocks);
  777. int y = rem / row_blocks;
  778. int x = rem - (y * row_blocks);
  779. // Test if we can apply some basic alpha-scale RDO
  780. bool use_full_block = true;
  781. if (ctx.config.a_scale_radius != 0 && block_z == 1)
  782. {
  783. int start_x = x * block_x;
  784. int end_x = astc::min(dim_x, start_x + block_x);
  785. int start_y = y * block_y;
  786. int end_y = astc::min(dim_y, start_y + block_y);
  787. // SATs accumulate error, so don't test exactly zero. Test for
  788. // less than 1 alpha in the expanded block footprint that
  789. // includes the alpha radius.
  790. int x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1);
  791. int y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1);
  792. float footprint = static_cast<float>(x_footprint * y_footprint);
  793. float threshold = 0.9f / (255.0f * footprint);
  794. // Do we have any alpha values?
  795. use_full_block = false;
  796. for (int ay = start_y; ay < end_y; ay++)
  797. {
  798. for (int ax = start_x; ax < end_x; ax++)
  799. {
  800. float a_avg = ctx.input_alpha_averages[ay * dim_x + ax];
  801. if (a_avg > threshold)
  802. {
  803. use_full_block = true;
  804. ax = end_x;
  805. ay = end_y;
  806. }
  807. }
  808. }
  809. }
  810. // Fetch the full block for compression
  811. if (use_full_block)
  812. {
  813. load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
  814. // Scale RGB error contribution by the maximum alpha in the block
  815. // This encourages preserving alpha accuracy in regions with high
  816. // transparency, and can buy up to 0.5 dB PSNR.
  817. if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)
  818. {
  819. float alpha_scale = blk.data_max.lane<3>() * (1.0f / 65535.0f);
  820. blk.channel_weight = vfloat4(ctx.config.cw_r_weight * alpha_scale,
  821. ctx.config.cw_g_weight * alpha_scale,
  822. ctx.config.cw_b_weight * alpha_scale,
  823. ctx.config.cw_a_weight);
  824. }
  825. }
  826. // Apply alpha scale RDO - substitute constant color block
  827. else
  828. {
  829. blk.origin_texel = vfloat4::zero();
  830. blk.data_min = vfloat4::zero();
  831. blk.data_mean = vfloat4::zero();
  832. blk.data_max = vfloat4::zero();
  833. blk.grayscale = true;
  834. }
  835. int offset = ((z * yblocks + y) * xblocks + x) * 16;
  836. uint8_t *bp = buffer + offset;
  837. physical_compressed_block* pcb = reinterpret_cast<physical_compressed_block*>(bp);
  838. compress_block(ctx, blk, *pcb, temp_buffers);
  839. }
  840. ctxo.manage_compress.complete_task_assignment(count);
  841. }
  842. }
  843. /**
  844. * @brief Compute regional averages in an image.
  845. *
  846. * This function can be called by multiple threads, but only after a single
  847. * thread calls the setup function @c init_compute_averages().
  848. *
  849. * Results are written back into @c img->input_alpha_averages.
  850. *
  851. * @param[out] ctx The context.
  852. * @param ag The average and variance arguments created during setup.
  853. */
  854. static void compute_averages(
  855. astcenc_context& ctx,
  856. const avg_args &ag
  857. ) {
  858. pixel_region_args arg = ag.arg;
  859. arg.work_memory = new vfloat4[ag.work_memory_size];
  860. int size_x = ag.img_size_x;
  861. int size_y = ag.img_size_y;
  862. int size_z = ag.img_size_z;
  863. int step_xy = ag.blk_size_xy;
  864. int step_z = ag.blk_size_z;
  865. int y_tasks = (size_y + step_xy - 1) / step_xy;
  866. // All threads run this processing loop until there is no work remaining
  867. while (true)
  868. {
  869. unsigned int count;
  870. unsigned int base = ctx.manage_avg.get_task_assignment(16, count);
  871. if (!count)
  872. {
  873. break;
  874. }
  875. for (unsigned int i = base; i < base + count; i++)
  876. {
  877. int z = (i / (y_tasks)) * step_z;
  878. int y = (i - (z * y_tasks)) * step_xy;
  879. arg.size_z = astc::min(step_z, size_z - z);
  880. arg.offset_z = z;
  881. arg.size_y = astc::min(step_xy, size_y - y);
  882. arg.offset_y = y;
  883. for (int x = 0; x < size_x; x += step_xy)
  884. {
  885. arg.size_x = astc::min(step_xy, size_x - x);
  886. arg.offset_x = x;
  887. compute_pixel_region_variance(ctx.context, arg);
  888. }
  889. }
  890. ctx.manage_avg.complete_task_assignment(count);
  891. }
  892. delete[] arg.work_memory;
  893. }
  894. #endif
  895. /* See header for documentation. */
  896. astcenc_error astcenc_compress_image(
  897. astcenc_context* ctxo,
  898. astcenc_image* imagep,
  899. const astcenc_swizzle* swizzle,
  900. uint8_t* data_out,
  901. size_t data_len,
  902. unsigned int thread_index
  903. ) {
  904. #if defined(ASTCENC_DECOMPRESS_ONLY)
  905. (void)ctxo;
  906. (void)imagep;
  907. (void)swizzle;
  908. (void)data_out;
  909. (void)data_len;
  910. (void)thread_index;
  911. return ASTCENC_ERR_BAD_CONTEXT;
  912. #else
  913. astcenc_contexti* ctx = &ctxo->context;
  914. astcenc_error status;
  915. astcenc_image& image = *imagep;
  916. if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
  917. {
  918. return ASTCENC_ERR_BAD_CONTEXT;
  919. }
  920. status = validate_compression_swizzle(*swizzle);
  921. if (status != ASTCENC_SUCCESS)
  922. {
  923. return status;
  924. }
  925. if (thread_index >= ctx->thread_count)
  926. {
  927. return ASTCENC_ERR_BAD_PARAM;
  928. }
  929. unsigned int block_x = ctx->config.block_x;
  930. unsigned int block_y = ctx->config.block_y;
  931. unsigned int block_z = ctx->config.block_z;
  932. unsigned int xblocks = (image.dim_x + block_x - 1) / block_x;
  933. unsigned int yblocks = (image.dim_y + block_y - 1) / block_y;
  934. unsigned int zblocks = (image.dim_z + block_z - 1) / block_z;
  935. // Check we have enough output space (16 bytes per block)
  936. size_t size_needed = xblocks * yblocks * zblocks * 16;
  937. if (data_len < size_needed)
  938. {
  939. return ASTCENC_ERR_OUT_OF_MEM;
  940. }
  941. // If context thread count is one then implicitly reset
  942. if (ctx->thread_count == 1)
  943. {
  944. astcenc_compress_reset(ctxo);
  945. }
  946. if (ctx->config.a_scale_radius != 0)
  947. {
  948. // First thread to enter will do setup, other threads will subsequently
  949. // enter the critical section but simply skip over the initialization
  950. auto init_avg = [ctx, &image, swizzle]() {
  951. // Perform memory allocations for the destination buffers
  952. size_t texel_count = image.dim_x * image.dim_y * image.dim_z;
  953. ctx->input_alpha_averages = new float[texel_count];
  954. return init_compute_averages(
  955. image, ctx->config.a_scale_radius, *swizzle,
  956. ctx->avg_preprocess_args);
  957. };
  958. // Only the first thread actually runs the initializer
  959. ctxo->manage_avg.init(init_avg);
  960. // All threads will enter this function and dynamically grab work
  961. compute_averages(*ctxo, ctx->avg_preprocess_args);
  962. }
  963. // Wait for compute_averages to complete before compressing
  964. ctxo->manage_avg.wait();
  965. compress_image(*ctxo, thread_index, image, *swizzle, data_out);
  966. // Wait for compress to complete before freeing memory
  967. ctxo->manage_compress.wait();
  968. auto term_compress = [ctx]() {
  969. delete[] ctx->input_alpha_averages;
  970. ctx->input_alpha_averages = nullptr;
  971. };
  972. // Only the first thread to arrive actually runs the term
  973. ctxo->manage_compress.term(term_compress);
  974. return ASTCENC_SUCCESS;
  975. #endif
  976. }
  977. /* See header for documentation. */
  978. astcenc_error astcenc_compress_reset(
  979. astcenc_context* ctxo
  980. ) {
  981. #if defined(ASTCENC_DECOMPRESS_ONLY)
  982. (void)ctxo;
  983. return ASTCENC_ERR_BAD_CONTEXT;
  984. #else
  985. astcenc_contexti* ctx = &ctxo->context;
  986. if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
  987. {
  988. return ASTCENC_ERR_BAD_CONTEXT;
  989. }
  990. ctxo->manage_avg.reset();
  991. ctxo->manage_compress.reset();
  992. return ASTCENC_SUCCESS;
  993. #endif
  994. }
  995. /* See header for documentation. */
  996. astcenc_error astcenc_decompress_image(
  997. astcenc_context* ctxo,
  998. const uint8_t* data,
  999. size_t data_len,
  1000. astcenc_image* image_outp,
  1001. const astcenc_swizzle* swizzle,
  1002. unsigned int thread_index
  1003. ) {
  1004. astcenc_error status;
  1005. astcenc_image& image_out = *image_outp;
  1006. astcenc_contexti* ctx = &ctxo->context;
  1007. // Today this doesn't matter (working set on stack) but might in future ...
  1008. if (thread_index >= ctx->thread_count)
  1009. {
  1010. return ASTCENC_ERR_BAD_PARAM;
  1011. }
  1012. status = validate_decompression_swizzle(*swizzle);
  1013. if (status != ASTCENC_SUCCESS)
  1014. {
  1015. return status;
  1016. }
  1017. unsigned int block_x = ctx->config.block_x;
  1018. unsigned int block_y = ctx->config.block_y;
  1019. unsigned int block_z = ctx->config.block_z;
  1020. unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x;
  1021. unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y;
  1022. unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z;
  1023. int row_blocks = xblocks;
  1024. int plane_blocks = xblocks * yblocks;
  1025. // Check we have enough output space (16 bytes per block)
  1026. size_t size_needed = xblocks * yblocks * zblocks * 16;
  1027. if (data_len < size_needed)
  1028. {
  1029. return ASTCENC_ERR_OUT_OF_MEM;
  1030. }
  1031. image_block blk;
  1032. blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
  1033. // If context thread count is one then implicitly reset
  1034. if (ctx->thread_count == 1)
  1035. {
  1036. astcenc_decompress_reset(ctxo);
  1037. }
  1038. // Only the first thread actually runs the initializer
  1039. ctxo->manage_decompress.init(zblocks * yblocks * xblocks);
  1040. // All threads run this processing loop until there is no work remaining
  1041. while (true)
  1042. {
  1043. unsigned int count;
  1044. unsigned int base = ctxo->manage_decompress.get_task_assignment(128, count);
  1045. if (!count)
  1046. {
  1047. break;
  1048. }
  1049. for (unsigned int i = base; i < base + count; i++)
  1050. {
  1051. // Decode i into x, y, z block indices
  1052. int z = i / plane_blocks;
  1053. unsigned int rem = i - (z * plane_blocks);
  1054. int y = rem / row_blocks;
  1055. int x = rem - (y * row_blocks);
  1056. unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16;
  1057. const uint8_t* bp = data + offset;
  1058. const physical_compressed_block& pcb = *reinterpret_cast<const physical_compressed_block*>(bp);
  1059. symbolic_compressed_block scb;
  1060. physical_to_symbolic(*ctx->bsd, pcb, scb);
  1061. decompress_symbolic_block(ctx->config.profile, *ctx->bsd,
  1062. x * block_x, y * block_y, z * block_z,
  1063. scb, blk);
  1064. store_image_block(image_out, blk, *ctx->bsd,
  1065. x * block_x, y * block_y, z * block_z, *swizzle);
  1066. }
  1067. ctxo->manage_decompress.complete_task_assignment(count);
  1068. }
  1069. return ASTCENC_SUCCESS;
  1070. }
  1071. /* See header for documentation. */
  1072. astcenc_error astcenc_decompress_reset(
  1073. astcenc_context* ctxo
  1074. ) {
  1075. ctxo->manage_decompress.reset();
  1076. return ASTCENC_SUCCESS;
  1077. }
  1078. /* See header for documentation. */
  1079. astcenc_error astcenc_get_block_info(
  1080. astcenc_context* ctxo,
  1081. const uint8_t data[16],
  1082. astcenc_block_info* info
  1083. ) {
  1084. #if defined(ASTCENC_DECOMPRESS_ONLY)
  1085. (void)ctxo;
  1086. (void)data;
  1087. (void)info;
  1088. return ASTCENC_ERR_BAD_CONTEXT;
  1089. #else
  1090. astcenc_contexti* ctx = &ctxo->context;
  1091. // Decode the compressed data into a symbolic form
  1092. const physical_compressed_block&pcb = *reinterpret_cast<const physical_compressed_block*>(data);
  1093. symbolic_compressed_block scb;
  1094. physical_to_symbolic(*ctx->bsd, pcb, scb);
  1095. // Fetch the appropriate partition and decimation tables
  1096. block_size_descriptor& bsd = *ctx->bsd;
  1097. // Start from a clean slate
  1098. memset(info, 0, sizeof(*info));
  1099. // Basic info we can always populate
  1100. info->profile = ctx->config.profile;
  1101. info->block_x = ctx->config.block_x;
  1102. info->block_y = ctx->config.block_y;
  1103. info->block_z = ctx->config.block_z;
  1104. info->texel_count = bsd.texel_count;
  1105. // Check for error blocks first
  1106. info->is_error_block = scb.block_type == SYM_BTYPE_ERROR;
  1107. if (info->is_error_block)
  1108. {
  1109. return ASTCENC_SUCCESS;
  1110. }
  1111. // Check for constant color blocks second
  1112. info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 ||
  1113. scb.block_type == SYM_BTYPE_CONST_U16;
  1114. if (info->is_constant_block)
  1115. {
  1116. return ASTCENC_SUCCESS;
  1117. }
  1118. // Otherwise handle a full block ; known to be valid after conditions above have been checked
  1119. int partition_count = scb.partition_count;
  1120. const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
  1121. const block_mode& bm = bsd.get_block_mode(scb.block_mode);
  1122. const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
  1123. info->weight_x = di.weight_x;
  1124. info->weight_y = di.weight_y;
  1125. info->weight_z = di.weight_z;
  1126. info->is_dual_plane_block = bm.is_dual_plane != 0;
  1127. info->partition_count = scb.partition_count;
  1128. info->partition_index = scb.partition_index;
  1129. info->dual_plane_component = scb.plane2_component;
  1130. info->color_level_count = get_quant_level(scb.get_color_quant_mode());
  1131. info->weight_level_count = get_quant_level(bm.get_weight_quant_mode());
  1132. // Unpack color endpoints for each active partition
  1133. for (unsigned int i = 0; i < scb.partition_count; i++)
  1134. {
  1135. bool rgb_hdr;
  1136. bool a_hdr;
  1137. vint4 endpnt[2];
  1138. unpack_color_endpoints(ctx->config.profile,
  1139. scb.color_formats[i],
  1140. scb.get_color_quant_mode(),
  1141. scb.color_values[i],
  1142. rgb_hdr, a_hdr,
  1143. endpnt[0], endpnt[1]);
  1144. // Store the color endpoint mode info
  1145. info->color_endpoint_modes[i] = scb.color_formats[i];
  1146. info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr;
  1147. // Store the unpacked and decoded color endpoint
  1148. vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr);
  1149. for (int j = 0; j < 2; j++)
  1150. {
  1151. vint4 color_lns = lns_to_sf16(endpnt[j]);
  1152. vint4 color_unorm = unorm16_to_sf16(endpnt[j]);
  1153. vint4 datai = select(color_unorm, color_lns, hdr_mask);
  1154. store(float16_to_float(datai), info->color_endpoints[i][j]);
  1155. }
  1156. }
  1157. // Unpack weights for each texel
  1158. int weight_plane1[BLOCK_MAX_TEXELS];
  1159. int weight_plane2[BLOCK_MAX_TEXELS];
  1160. unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2);
  1161. for (unsigned int i = 0; i < bsd.texel_count; i++)
  1162. {
  1163. info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
  1164. if (info->is_dual_plane_block)
  1165. {
  1166. info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
  1167. }
  1168. }
  1169. // Unpack partition assignments for each texel
  1170. for (unsigned int i = 0; i < bsd.texel_count; i++)
  1171. {
  1172. info->partition_assignment[i] = pi.partition_of_texel[i];
  1173. }
  1174. return ASTCENC_SUCCESS;
  1175. #endif
  1176. }
  1177. /* See header for documentation. */
  1178. const char* astcenc_get_error_string(
  1179. astcenc_error status
  1180. ) {
  1181. // Values in this enum are from an external user, so not guaranteed to be
  1182. // bounded to the enum values
  1183. switch (static_cast<int>(status))
  1184. {
  1185. case ASTCENC_SUCCESS:
  1186. return "ASTCENC_SUCCESS";
  1187. case ASTCENC_ERR_OUT_OF_MEM:
  1188. return "ASTCENC_ERR_OUT_OF_MEM";
  1189. case ASTCENC_ERR_BAD_CPU_FLOAT:
  1190. return "ASTCENC_ERR_BAD_CPU_FLOAT";
  1191. case ASTCENC_ERR_BAD_CPU_ISA:
  1192. return "ASTCENC_ERR_BAD_CPU_ISA";
  1193. case ASTCENC_ERR_BAD_PARAM:
  1194. return "ASTCENC_ERR_BAD_PARAM";
  1195. case ASTCENC_ERR_BAD_BLOCK_SIZE:
  1196. return "ASTCENC_ERR_BAD_BLOCK_SIZE";
  1197. case ASTCENC_ERR_BAD_PROFILE:
  1198. return "ASTCENC_ERR_BAD_PROFILE";
  1199. case ASTCENC_ERR_BAD_QUALITY:
  1200. return "ASTCENC_ERR_BAD_QUALITY";
  1201. case ASTCENC_ERR_BAD_FLAGS:
  1202. return "ASTCENC_ERR_BAD_FLAGS";
  1203. case ASTCENC_ERR_BAD_SWIZZLE:
  1204. return "ASTCENC_ERR_BAD_SWIZZLE";
  1205. case ASTCENC_ERR_BAD_CONTEXT:
  1206. return "ASTCENC_ERR_BAD_CONTEXT";
  1207. case ASTCENC_ERR_NOT_IMPLEMENTED:
  1208. return "ASTCENC_ERR_NOT_IMPLEMENTED";
  1209. #if defined(ASTCENC_DIAGNOSTICS)
  1210. case ASTCENC_ERR_DTRACE_FAILURE:
  1211. return "ASTCENC_ERR_DTRACE_FAILURE";
  1212. #endif
  1213. default:
  1214. return nullptr;
  1215. }
  1216. }