basisu_frontend.cpp 119 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385
  1. // basisu_frontend.cpp
  2. // Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. // TODO:
  17. // This code originally supported full ETC1 and ETC1S, so there's some legacy stuff to be cleaned up in here.
  18. // Add endpoint tiling support (where we force adjacent blocks to use the same endpoints during quantization), for a ~10% or more increase in bitrate at same SSIM. The backend already supports this.
  19. //
  20. #include "../transcoder/basisu.h"
  21. #include "basisu_frontend.h"
  22. #include "basisu_opencl.h"
  23. #include <unordered_set>
  24. #include <unordered_map>
  25. #if BASISU_SUPPORT_SSE
  26. #define CPPSPMD_NAME(a) a##_sse41
  27. #include "basisu_kernels_declares.h"
  28. #endif
  29. #define BASISU_FRONTEND_VERIFY(c) do { if (!(c)) handle_verify_failure(__LINE__); } while(0)
  30. namespace basisu
  31. {
  32. const uint32_t cMaxCodebookCreationThreads = 8;
  33. const uint32_t BASISU_MAX_ENDPOINT_REFINEMENT_STEPS = 3;
  34. //const uint32_t BASISU_MAX_SELECTOR_REFINEMENT_STEPS = 3;
  35. const uint32_t BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE = 16;
  36. const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 = 32;
  37. const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT = 16;
  38. // TODO - How to handle internal verifies in the basisu lib
  39. static inline void handle_verify_failure(int line)
  40. {
  41. error_printf("basisu_frontend: verify check failed at line %i!\n", line);
  42. abort();
  43. }
  44. bool basisu_frontend::init(const params &p)
  45. {
  46. debug_printf("basisu_frontend::init: Multithreaded: %u, Job pool total threads: %u, NumEndpointClusters: %u, NumSelectorClusters: %u, Perceptual: %u, CompressionLevel: %u\n",
  47. p.m_multithreaded, p.m_pJob_pool ? p.m_pJob_pool->get_total_threads() : 0,
  48. p.m_max_endpoint_clusters, p.m_max_selector_clusters, p.m_perceptual, p.m_compression_level);
  49. if ((p.m_max_endpoint_clusters < 1) || (p.m_max_endpoint_clusters > cMaxEndpointClusters))
  50. return false;
  51. if ((p.m_max_selector_clusters < 1) || (p.m_max_selector_clusters > cMaxSelectorClusters))
  52. return false;
  53. m_source_blocks.resize(0);
  54. append_vector(m_source_blocks, p.m_pSource_blocks, p.m_num_source_blocks);
  55. m_params = p;
  56. if (m_params.m_pOpenCL_context)
  57. {
  58. BASISU_ASSUME(sizeof(cl_pixel_block) == sizeof(pixel_block));
  59. // Upload the RGBA pixel blocks a single time.
  60. if (!opencl_set_pixel_blocks(m_params.m_pOpenCL_context, m_source_blocks.size(), (cl_pixel_block*)m_source_blocks.data()))
  61. {
  62. // This is not fatal, we just won't use OpenCL.
  63. error_printf("basisu_frontend::init: opencl_set_pixel_blocks() failed\n");
  64. m_params.m_pOpenCL_context = nullptr;
  65. m_opencl_failed = true;
  66. }
  67. }
  68. m_encoded_blocks.resize(m_params.m_num_source_blocks);
  69. memset(&m_encoded_blocks[0], 0, m_encoded_blocks.size() * sizeof(m_encoded_blocks[0]));
  70. m_num_endpoint_codebook_iterations = 1;
  71. m_num_selector_codebook_iterations = 1;
  72. switch (p.m_compression_level)
  73. {
  74. case 0:
  75. {
  76. m_endpoint_refinement = false;
  77. m_use_hierarchical_endpoint_codebooks = true;
  78. m_use_hierarchical_selector_codebooks = true;
  79. break;
  80. }
  81. case 1:
  82. {
  83. m_endpoint_refinement = true;
  84. m_use_hierarchical_endpoint_codebooks = true;
  85. m_use_hierarchical_selector_codebooks = true;
  86. break;
  87. }
  88. case 2:
  89. {
  90. m_endpoint_refinement = true;
  91. m_use_hierarchical_endpoint_codebooks = true;
  92. m_use_hierarchical_selector_codebooks = true;
  93. break;
  94. }
  95. case 3:
  96. {
  97. m_endpoint_refinement = true;
  98. m_use_hierarchical_endpoint_codebooks = false;
  99. m_use_hierarchical_selector_codebooks = false;
  100. break;
  101. }
  102. case 4:
  103. {
  104. m_endpoint_refinement = true;
  105. m_use_hierarchical_endpoint_codebooks = true;
  106. m_use_hierarchical_selector_codebooks = true;
  107. m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
  108. m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
  109. break;
  110. }
  111. case 5:
  112. {
  113. m_endpoint_refinement = true;
  114. m_use_hierarchical_endpoint_codebooks = false;
  115. m_use_hierarchical_selector_codebooks = false;
  116. m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
  117. m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
  118. break;
  119. }
  120. case 6:
  121. default:
  122. {
  123. m_endpoint_refinement = true;
  124. m_use_hierarchical_endpoint_codebooks = false;
  125. m_use_hierarchical_selector_codebooks = false;
  126. m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS*2;
  127. m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS*2;
  128. break;
  129. }
  130. }
  131. if (m_params.m_disable_hierarchical_endpoint_codebooks)
  132. m_use_hierarchical_endpoint_codebooks = false;
  133. debug_printf("Endpoint refinement: %u, Hierarchical endpoint codebooks: %u, Hierarchical selector codebooks: %u, Endpoint codebook iters: %u, Selector codebook iters: %u\n",
  134. m_endpoint_refinement, m_use_hierarchical_endpoint_codebooks, m_use_hierarchical_selector_codebooks, m_num_endpoint_codebook_iterations, m_num_selector_codebook_iterations);
  135. return true;
  136. }
  137. bool basisu_frontend::compress()
  138. {
  139. debug_printf("basisu_frontend::compress\n");
  140. m_total_blocks = m_params.m_num_source_blocks;
  141. m_total_pixels = m_total_blocks * cPixelBlockTotalPixels;
  142. // Encode the initial high quality ETC1S texture
  143. init_etc1_images();
  144. // First quantize the ETC1S endpoints
  145. if (m_params.m_pGlobal_codebooks)
  146. {
  147. init_global_codebooks();
  148. }
  149. else
  150. {
  151. init_endpoint_training_vectors();
  152. generate_endpoint_clusters();
  153. for (uint32_t refine_endpoint_step = 0; refine_endpoint_step < m_num_endpoint_codebook_iterations; refine_endpoint_step++)
  154. {
  155. if (m_params.m_validate)
  156. {
  157. BASISU_FRONTEND_VERIFY(check_etc1s_constraints());
  158. BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
  159. }
  160. if (refine_endpoint_step)
  161. {
  162. introduce_new_endpoint_clusters();
  163. }
  164. if (m_params.m_validate)
  165. {
  166. BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
  167. }
  168. generate_endpoint_codebook(refine_endpoint_step);
  169. if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization))
  170. {
  171. char buf[256];
  172. snprintf(buf, sizeof(buf), "endpoint_cluster_vis_pre_%u.png", refine_endpoint_step);
  173. dump_endpoint_clusterization_visualization(buf, false);
  174. }
  175. bool early_out = false;
  176. if (m_endpoint_refinement)
  177. {
  178. //dump_endpoint_clusterization_visualization("endpoint_clusters_before_refinement.png");
  179. if (!refine_endpoint_clusterization())
  180. early_out = true;
  181. if ((m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) && (!refine_endpoint_step) && (m_num_endpoint_codebook_iterations == 1))
  182. {
  183. eliminate_redundant_or_empty_endpoint_clusters();
  184. generate_endpoint_codebook(basisu::maximum(1U, refine_endpoint_step));
  185. }
  186. if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization))
  187. {
  188. char buf[256];
  189. snprintf(buf, sizeof(buf), "endpoint_cluster_vis_post_%u.png", refine_endpoint_step);
  190. dump_endpoint_clusterization_visualization(buf, false);
  191. snprintf(buf, sizeof(buf), "endpoint_cluster_colors_vis_post_%u.png", refine_endpoint_step);
  192. dump_endpoint_clusterization_visualization(buf, true);
  193. }
  194. }
  195. if (m_params.m_validate)
  196. {
  197. BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
  198. }
  199. eliminate_redundant_or_empty_endpoint_clusters();
  200. if (m_params.m_validate)
  201. {
  202. BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
  203. }
  204. if (m_params.m_debug_stats)
  205. debug_printf("Total endpoint clusters: %u\n", (uint32_t)m_endpoint_clusters.size());
  206. if (early_out)
  207. break;
  208. }
  209. if (m_params.m_validate)
  210. {
  211. BASISU_FRONTEND_VERIFY(check_etc1s_constraints());
  212. }
  213. generate_block_endpoint_clusters();
  214. create_initial_packed_texture();
  215. // Now quantize the ETC1S selectors
  216. generate_selector_clusters();
  217. if (m_use_hierarchical_selector_codebooks)
  218. compute_selector_clusters_within_each_parent_cluster();
  219. if (m_params.m_compression_level == 0)
  220. {
  221. create_optimized_selector_codebook(0);
  222. find_optimal_selector_clusters_for_each_block();
  223. introduce_special_selector_clusters();
  224. }
  225. else
  226. {
  227. const uint32_t num_refine_selector_steps = m_num_selector_codebook_iterations;
  228. for (uint32_t refine_selector_steps = 0; refine_selector_steps < num_refine_selector_steps; refine_selector_steps++)
  229. {
  230. create_optimized_selector_codebook(refine_selector_steps);
  231. find_optimal_selector_clusters_for_each_block();
  232. introduce_special_selector_clusters();
  233. if ((m_params.m_compression_level >= 4) || (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames))
  234. {
  235. if (!refine_block_endpoints_given_selectors())
  236. break;
  237. }
  238. }
  239. }
  240. optimize_selector_codebook();
  241. if (m_params.m_debug_stats)
  242. debug_printf("Total selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size());
  243. }
  244. finalize();
  245. if (m_params.m_validate)
  246. {
  247. if (!validate_output())
  248. return false;
  249. }
  250. debug_printf("basisu_frontend::compress: Done\n");
  251. return true;
  252. }
  253. bool basisu_frontend::init_global_codebooks()
  254. {
  255. const basist::basisu_lowlevel_etc1s_transcoder* pTranscoder = m_params.m_pGlobal_codebooks;
  256. const basist::basisu_lowlevel_etc1s_transcoder::endpoint_vec& endpoints = pTranscoder->get_endpoints();
  257. const basist::basisu_lowlevel_etc1s_transcoder::selector_vec& selectors = pTranscoder->get_selectors();
  258. m_endpoint_cluster_etc_params.resize(endpoints.size());
  259. for (uint32_t i = 0; i < endpoints.size(); i++)
  260. {
  261. m_endpoint_cluster_etc_params[i].m_inten_table[0] = endpoints[i].m_inten5;
  262. m_endpoint_cluster_etc_params[i].m_inten_table[1] = endpoints[i].m_inten5;
  263. m_endpoint_cluster_etc_params[i].m_color_unscaled[0].set(endpoints[i].m_color5.r, endpoints[i].m_color5.g, endpoints[i].m_color5.b, 255);
  264. m_endpoint_cluster_etc_params[i].m_color_used[0] = true;
  265. m_endpoint_cluster_etc_params[i].m_valid = true;
  266. }
  267. m_optimized_cluster_selectors.resize(selectors.size());
  268. for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
  269. {
  270. for (uint32_t y = 0; y < 4; y++)
  271. for (uint32_t x = 0; x < 4; x++)
  272. m_optimized_cluster_selectors[i].set_selector(x, y, selectors[i].get_selector(x, y));
  273. }
  274. m_block_endpoint_clusters_indices.resize(m_total_blocks);
  275. m_orig_encoded_blocks.resize(m_total_blocks);
  276. m_block_selector_cluster_index.resize(m_total_blocks);
  277. #if 0
  278. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  279. {
  280. const uint32_t first_index = block_index_iter;
  281. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  282. m_params.m_pJob_pool->add_job([this, first_index, last_index] {
  283. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  284. {
  285. const etc_block& blk = m_etc1_blocks_etc1s[block_index];
  286. const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
  287. etc_block trial_blk;
  288. trial_blk.set_block_color5_etc1s(blk.m_color_unscaled[0]);
  289. trial_blk.set_flip_bit(true);
  290. uint64_t best_err = UINT64_MAX;
  291. uint32_t best_index = 0;
  292. for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
  293. {
  294. trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits());
  295. const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
  296. if (cur_err < best_err)
  297. {
  298. best_err = cur_err;
  299. best_index = i;
  300. if (!cur_err)
  301. break;
  302. }
  303. } // block_index
  304. m_block_selector_cluster_index[block_index] = best_index;
  305. }
  306. });
  307. }
  308. m_params.m_pJob_pool->wait_for_all();
  309. m_encoded_blocks.resize(m_total_blocks);
  310. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  311. {
  312. const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
  313. const uint32_t selector_index = m_block_selector_cluster_index[block_index];
  314. etc_block& blk = m_encoded_blocks[block_index];
  315. blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]);
  316. blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]);
  317. blk.set_flip_bit(true);
  318. blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits());
  319. }
  320. #endif
  321. // HACK HACK
  322. const uint32_t NUM_PASSES = 3;
  323. for (uint32_t pass = 0; pass < NUM_PASSES; pass++)
  324. {
  325. debug_printf("init_global_codebooks: pass %u\n", pass);
  326. const uint32_t N = 128;
  327. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  328. {
  329. const uint32_t first_index = block_index_iter;
  330. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  331. m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] {
  332. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  333. {
  334. const etc_block& blk = pass ? m_encoded_blocks[block_index] : m_etc1_blocks_etc1s[block_index];
  335. const uint32_t blk_raw_selector_bits = blk.get_raw_selector_bits();
  336. etc_block trial_blk(blk);
  337. trial_blk.set_raw_selector_bits(blk_raw_selector_bits);
  338. trial_blk.set_flip_bit(true);
  339. uint64_t best_err = UINT64_MAX;
  340. uint32_t best_index = 0;
  341. etc_block best_block(trial_blk);
  342. for (uint32_t i = 0; i < m_endpoint_cluster_etc_params.size(); i++)
  343. {
  344. if (m_endpoint_cluster_etc_params[i].m_inten_table[0] > blk.get_inten_table(0))
  345. continue;
  346. trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[i].m_color_unscaled[0]);
  347. trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[i].m_inten_table[0]);
  348. const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr();
  349. uint64_t cur_err;
  350. if (!pass)
  351. cur_err = trial_blk.determine_selectors(pSource_pixels, m_params.m_perceptual);
  352. else
  353. cur_err = trial_blk.evaluate_etc1_error(pSource_pixels, m_params.m_perceptual);
  354. if (cur_err < best_err)
  355. {
  356. best_err = cur_err;
  357. best_index = i;
  358. best_block = trial_blk;
  359. if (!cur_err)
  360. break;
  361. }
  362. }
  363. m_block_endpoint_clusters_indices[block_index][0] = best_index;
  364. m_block_endpoint_clusters_indices[block_index][1] = best_index;
  365. m_orig_encoded_blocks[block_index] = best_block;
  366. } // block_index
  367. });
  368. }
  369. m_params.m_pJob_pool->wait_for_all();
  370. m_endpoint_clusters.resize(0);
  371. m_endpoint_clusters.resize(endpoints.size());
  372. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  373. {
  374. const uint32_t endpoint_cluster_index = m_block_endpoint_clusters_indices[block_index][0];
  375. m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2);
  376. m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2 + 1);
  377. }
  378. m_block_selector_cluster_index.resize(m_total_blocks);
  379. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  380. {
  381. const uint32_t first_index = block_index_iter;
  382. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  383. m_params.m_pJob_pool->add_job([this, first_index, last_index] {
  384. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  385. {
  386. const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
  387. etc_block trial_blk;
  388. trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_color_unscaled[0]);
  389. trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_inten_table[0]);
  390. trial_blk.set_flip_bit(true);
  391. uint64_t best_err = UINT64_MAX;
  392. uint32_t best_index = 0;
  393. for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
  394. {
  395. trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits());
  396. const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
  397. if (cur_err < best_err)
  398. {
  399. best_err = cur_err;
  400. best_index = i;
  401. if (!cur_err)
  402. break;
  403. }
  404. } // block_index
  405. m_block_selector_cluster_index[block_index] = best_index;
  406. }
  407. });
  408. }
  409. m_params.m_pJob_pool->wait_for_all();
  410. m_encoded_blocks.resize(m_total_blocks);
  411. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  412. {
  413. const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
  414. const uint32_t selector_index = m_block_selector_cluster_index[block_index];
  415. etc_block& blk = m_encoded_blocks[block_index];
  416. blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]);
  417. blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]);
  418. blk.set_flip_bit(true);
  419. blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits());
  420. }
  421. } // pass
  422. m_selector_cluster_block_indices.resize(selectors.size());
  423. for (uint32_t block_index = 0; block_index < m_etc1_blocks_etc1s.size(); block_index++)
  424. m_selector_cluster_block_indices[m_block_selector_cluster_index[block_index]].push_back(block_index);
  425. return true;
  426. }
  427. void basisu_frontend::introduce_special_selector_clusters()
  428. {
  429. debug_printf("introduce_special_selector_clusters\n");
  430. uint32_t total_blocks_relocated = 0;
  431. const uint32_t initial_selector_clusters = m_selector_cluster_block_indices.size_u32();
  432. bool_vec block_relocated_flags(m_total_blocks);
  433. // Make sure the selector codebook always has pure flat blocks for each possible selector, to avoid obvious artifacts.
  434. // optimize_selector_codebook() will clean up any redundant clusters we create here.
  435. for (uint32_t sel = 0; sel < 4; sel++)
  436. {
  437. etc_block blk;
  438. clear_obj(blk);
  439. for (uint32_t j = 0; j < 16; j++)
  440. blk.set_selector(j & 3, j >> 2, sel);
  441. int k;
  442. for (k = 0; k < (int)m_optimized_cluster_selectors.size(); k++)
  443. if (m_optimized_cluster_selectors[k].get_raw_selector_bits() == blk.get_raw_selector_bits())
  444. break;
  445. if (k < (int)m_optimized_cluster_selectors.size())
  446. continue;
  447. debug_printf("Introducing sel %u\n", sel);
  448. const uint32_t new_selector_cluster_index = m_optimized_cluster_selectors.size_u32();
  449. m_optimized_cluster_selectors.push_back(blk);
  450. vector_ensure_element_is_valid(m_selector_cluster_block_indices, new_selector_cluster_index);
  451. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  452. {
  453. if (m_orig_encoded_blocks[block_index].get_raw_selector_bits() != blk.get_raw_selector_bits())
  454. continue;
  455. // See if using flat selectors actually decreases the block's error.
  456. const uint32_t old_selector_cluster_index = m_block_selector_cluster_index[block_index];
  457. etc_block cur_blk;
  458. const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0);
  459. cur_blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false));
  460. cur_blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(endpoint_cluster_index, false));
  461. cur_blk.set_raw_selector_bits(get_selector_cluster_selector_bits(old_selector_cluster_index).get_raw_selector_bits());
  462. cur_blk.set_flip_bit(true);
  463. const uint64_t cur_err = cur_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
  464. cur_blk.set_raw_selector_bits(blk.get_raw_selector_bits());
  465. const uint64_t new_err = cur_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
  466. if (new_err >= cur_err)
  467. continue;
  468. // Change the block to use the new cluster
  469. m_block_selector_cluster_index[block_index] = new_selector_cluster_index;
  470. m_selector_cluster_block_indices[new_selector_cluster_index].push_back(block_index);
  471. block_relocated_flags[block_index] = true;
  472. #if 0
  473. int j = vector_find(m_selector_cluster_block_indices[old_selector_cluster_index], block_index);
  474. if (j >= 0)
  475. m_selector_cluster_block_indices[old_selector_cluster_index].erase(m_selector_cluster_block_indices[old_selector_cluster_index].begin() + j);
  476. #endif
  477. total_blocks_relocated++;
  478. m_encoded_blocks[block_index].set_raw_selector_bits(blk.get_raw_selector_bits());
  479. } // block_index
  480. } // sel
  481. if (total_blocks_relocated)
  482. {
  483. debug_printf("Fixing selector codebook\n");
  484. for (int selector_cluster_index = 0; selector_cluster_index < (int)initial_selector_clusters; selector_cluster_index++)
  485. {
  486. uint_vec& block_indices = m_selector_cluster_block_indices[selector_cluster_index];
  487. uint32_t dst_ofs = 0;
  488. for (uint32_t i = 0; i < block_indices.size(); i++)
  489. {
  490. const uint32_t block_index = block_indices[i];
  491. if (!block_relocated_flags[block_index])
  492. block_indices[dst_ofs++] = block_index;
  493. }
  494. block_indices.resize(dst_ofs);
  495. }
  496. }
  497. debug_printf("Total blocks relocated to new flat selector clusters: %u\n", total_blocks_relocated);
  498. }
  499. // This method will change the number and ordering of the selector codebook clusters.
  500. void basisu_frontend::optimize_selector_codebook()
  501. {
  502. debug_printf("optimize_selector_codebook\n");
  503. const uint32_t orig_total_selector_clusters = m_optimized_cluster_selectors.size_u32();
  504. bool_vec selector_cluster_was_used(m_optimized_cluster_selectors.size());
  505. for (uint32_t i = 0; i < m_total_blocks; i++)
  506. selector_cluster_was_used[m_block_selector_cluster_index[i]] = true;
  507. int_vec old_to_new(m_optimized_cluster_selectors.size());
  508. int_vec new_to_old;
  509. uint32_t total_new_entries = 0;
  510. std::unordered_map<uint32_t, uint32_t> selector_hashmap;
  511. for (int i = 0; i < static_cast<int>(m_optimized_cluster_selectors.size()); i++)
  512. {
  513. if (!selector_cluster_was_used[i])
  514. {
  515. old_to_new[i] = -1;
  516. continue;
  517. }
  518. const uint32_t raw_selector_bits = m_optimized_cluster_selectors[i].get_raw_selector_bits();
  519. auto find_res = selector_hashmap.insert(std::make_pair(raw_selector_bits, total_new_entries));
  520. if (!find_res.second)
  521. {
  522. old_to_new[i] = (find_res.first)->second;
  523. continue;
  524. }
  525. old_to_new[i] = total_new_entries++;
  526. new_to_old.push_back(i);
  527. }
  528. debug_printf("Original selector clusters: %u, new cluster selectors: %u\n", orig_total_selector_clusters, total_new_entries);
  529. for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++)
  530. {
  531. BASISU_FRONTEND_VERIFY((old_to_new[m_block_selector_cluster_index[i]] >= 0) && (old_to_new[m_block_selector_cluster_index[i]] < (int)total_new_entries));
  532. m_block_selector_cluster_index[i] = old_to_new[m_block_selector_cluster_index[i]];
  533. }
  534. basisu::vector<etc_block> new_optimized_cluster_selectors(m_optimized_cluster_selectors.size() ? total_new_entries : 0);
  535. basisu::vector<uint_vec> new_selector_cluster_indices(m_selector_cluster_block_indices.size() ? total_new_entries : 0);
  536. for (uint32_t i = 0; i < total_new_entries; i++)
  537. {
  538. if (m_optimized_cluster_selectors.size())
  539. new_optimized_cluster_selectors[i] = m_optimized_cluster_selectors[new_to_old[i]];
  540. //if (m_selector_cluster_block_indices.size())
  541. // new_selector_cluster_indices[i] = m_selector_cluster_block_indices[new_to_old[i]];
  542. }
  543. for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++)
  544. {
  545. new_selector_cluster_indices[m_block_selector_cluster_index[i]].push_back(i);
  546. }
  547. m_optimized_cluster_selectors.swap(new_optimized_cluster_selectors);
  548. m_selector_cluster_block_indices.swap(new_selector_cluster_indices);
  549. // This isn't strictly necessary - doing it for completeness/future sanity.
  550. if (m_selector_clusters_within_each_parent_cluster.size())
  551. {
  552. for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
  553. for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++)
  554. m_selector_clusters_within_each_parent_cluster[i][j] = old_to_new[m_selector_clusters_within_each_parent_cluster[i][j]];
  555. }
  556. debug_printf("optimize_selector_codebook: Before: %u After: %u\n", orig_total_selector_clusters, total_new_entries);
  557. }
  558. void basisu_frontend::init_etc1_images()
  559. {
  560. debug_printf("basisu_frontend::init_etc1_images\n");
  561. interval_timer tm;
  562. tm.start();
  563. m_etc1_blocks_etc1s.resize(m_total_blocks);
  564. bool use_cpu = true;
  565. if (m_params.m_pOpenCL_context)
  566. {
  567. uint32_t total_perms = 64;
  568. if (m_params.m_compression_level == 0)
  569. total_perms = 4;
  570. else if (m_params.m_compression_level == 1)
  571. total_perms = 16;
  572. else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
  573. total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS;
  574. bool status = opencl_encode_etc1s_blocks(m_params.m_pOpenCL_context, m_etc1_blocks_etc1s.data(), m_params.m_perceptual, total_perms);
  575. if (status)
  576. use_cpu = false;
  577. else
  578. {
  579. error_printf("basisu_frontend::init_etc1_images: opencl_encode_etc1s_blocks() failed! Using CPU.\n");
  580. m_params.m_pOpenCL_context = nullptr;
  581. m_opencl_failed = true;
  582. }
  583. }
  584. if (use_cpu)
  585. {
  586. const uint32_t N = 4096;
  587. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  588. {
  589. const uint32_t first_index = block_index_iter;
  590. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  591. m_params.m_pJob_pool->add_job([this, first_index, last_index] {
  592. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  593. {
  594. const pixel_block& source_blk = get_source_pixel_block(block_index);
  595. etc1_optimizer optimizer;
  596. etc1_optimizer::params optimizer_params;
  597. etc1_optimizer::results optimizer_results;
  598. if (m_params.m_compression_level == 0)
  599. optimizer_params.m_quality = cETCQualityFast;
  600. else if (m_params.m_compression_level == 1)
  601. optimizer_params.m_quality = cETCQualityMedium;
  602. else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
  603. optimizer_params.m_quality = cETCQualityUber;
  604. optimizer_params.m_num_src_pixels = 16;
  605. optimizer_params.m_pSrc_pixels = source_blk.get_ptr();
  606. optimizer_params.m_perceptual = m_params.m_perceptual;
  607. uint8_t selectors[16];
  608. optimizer_results.m_pSelectors = selectors;
  609. optimizer_results.m_n = 16;
  610. optimizer.init(optimizer_params, optimizer_results);
  611. if (!optimizer.compute())
  612. BASISU_FRONTEND_VERIFY(false);
  613. etc_block& blk = m_etc1_blocks_etc1s[block_index];
  614. memset(&blk, 0, sizeof(blk));
  615. blk.set_block_color5_etc1s(optimizer_results.m_block_color_unscaled);
  616. blk.set_inten_tables_etc1s(optimizer_results.m_block_inten_table);
  617. blk.set_flip_bit(true);
  618. for (uint32_t y = 0; y < 4; y++)
  619. for (uint32_t x = 0; x < 4; x++)
  620. blk.set_selector(x, y, selectors[x + y * 4]);
  621. }
  622. });
  623. }
  624. m_params.m_pJob_pool->wait_for_all();
  625. } // use_cpu
  626. debug_printf("init_etc1_images: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  627. }
  628. void basisu_frontend::init_endpoint_training_vectors()
  629. {
  630. debug_printf("init_endpoint_training_vectors\n");
  631. vec6F_quantizer::array_of_weighted_training_vecs &training_vecs = m_endpoint_clusterizer.get_training_vecs();
  632. training_vecs.resize(m_total_blocks * 2);
  633. const uint32_t N = 16384;
  634. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  635. {
  636. const uint32_t first_index = block_index_iter;
  637. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  638. m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
  639. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  640. {
  641. const etc_block &blk = m_etc1_blocks_etc1s[block_index];
  642. color_rgba block_colors[2];
  643. blk.get_block_low_high_colors(block_colors, 0);
  644. vec6F v;
  645. v[0] = block_colors[0].r * (1.0f / 255.0f);
  646. v[1] = block_colors[0].g * (1.0f / 255.0f);
  647. v[2] = block_colors[0].b * (1.0f / 255.0f);
  648. v[3] = block_colors[1].r * (1.0f / 255.0f);
  649. v[4] = block_colors[1].g * (1.0f / 255.0f);
  650. v[5] = block_colors[1].b * (1.0f / 255.0f);
  651. training_vecs[block_index * 2 + 0] = std::make_pair(v, 1);
  652. training_vecs[block_index * 2 + 1] = std::make_pair(v, 1);
  653. } // block_index;
  654. } );
  655. } // block_index_iter
  656. m_params.m_pJob_pool->wait_for_all();
  657. }
  658. void basisu_frontend::generate_endpoint_clusters()
  659. {
  660. debug_printf("Begin endpoint quantization\n");
  661. const uint32_t parent_codebook_size = (m_params.m_max_endpoint_clusters >= 256) ? BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE : 0;
  662. uint32_t max_threads = 0;
  663. max_threads = m_params.m_multithreaded ? minimum<int>(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0;
  664. if (m_params.m_pJob_pool)
  665. max_threads = minimum<int>((int)m_params.m_pJob_pool->get_total_threads(), max_threads);
  666. debug_printf("max_threads: %u\n", max_threads);
  667. bool status = generate_hierarchical_codebook_threaded(m_endpoint_clusterizer,
  668. m_params.m_max_endpoint_clusters, m_use_hierarchical_endpoint_codebooks ? parent_codebook_size : 0,
  669. m_endpoint_clusters,
  670. m_endpoint_parent_clusters,
  671. max_threads, m_params.m_pJob_pool, true);
  672. BASISU_FRONTEND_VERIFY(status);
  673. if (m_use_hierarchical_endpoint_codebooks)
  674. {
  675. if (!m_endpoint_parent_clusters.size())
  676. {
  677. m_endpoint_parent_clusters.resize(0);
  678. m_endpoint_parent_clusters.resize(1);
  679. for (uint32_t i = 0; i < m_total_blocks; i++)
  680. {
  681. m_endpoint_parent_clusters[0].push_back(i*2);
  682. m_endpoint_parent_clusters[0].push_back(i*2+1);
  683. }
  684. }
  685. BASISU_ASSUME(BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE <= UINT8_MAX);
  686. m_block_parent_endpoint_cluster.resize(0);
  687. m_block_parent_endpoint_cluster.resize(m_total_blocks);
  688. vector_set_all(m_block_parent_endpoint_cluster, 0xFF);
  689. for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_endpoint_parent_clusters.size(); parent_cluster_index++)
  690. {
  691. const uint_vec &cluster = m_endpoint_parent_clusters[parent_cluster_index];
  692. for (uint32_t j = 0; j < cluster.size(); j++)
  693. {
  694. const uint32_t block_index = cluster[j] >> 1;
  695. m_block_parent_endpoint_cluster[block_index] = static_cast<uint8_t>(parent_cluster_index);
  696. }
  697. }
  698. for (uint32_t i = 0; i < m_total_blocks; i++)
  699. {
  700. BASISU_FRONTEND_VERIFY(m_block_parent_endpoint_cluster[i] != 0xFF);
  701. }
  702. // Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong.
  703. for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
  704. {
  705. const uint_vec &cluster = m_endpoint_clusters[cluster_index];
  706. uint32_t parent_cluster_index = 0;
  707. for (uint32_t j = 0; j < cluster.size(); j++)
  708. {
  709. const uint32_t block_index = cluster[j] >> 1;
  710. BASISU_FRONTEND_VERIFY(block_index < m_block_parent_endpoint_cluster.size());
  711. if (!j)
  712. {
  713. parent_cluster_index = m_block_parent_endpoint_cluster[block_index];
  714. }
  715. else
  716. {
  717. BASISU_FRONTEND_VERIFY(m_block_parent_endpoint_cluster[block_index] == parent_cluster_index);
  718. }
  719. }
  720. }
  721. }
  722. if (m_params.m_debug_stats)
  723. debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", m_endpoint_clusters.size_u32(), m_endpoint_parent_clusters.size_u32());
  724. }
  725. // Iterate through each array of endpoint cluster block indices and set the m_block_endpoint_clusters_indices[][] array to indicaste which cluster index each block uses.
  726. void basisu_frontend::generate_block_endpoint_clusters()
  727. {
  728. m_block_endpoint_clusters_indices.resize(m_total_blocks);
  729. for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
  730. {
  731. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  732. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  733. {
  734. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  735. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  736. m_block_endpoint_clusters_indices[block_index][subblock_index] = cluster_index;
  737. } // cluster_indices_iter
  738. }
  739. if (m_params.m_validate)
  740. {
  741. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  742. {
  743. uint32_t cluster_0 = m_block_endpoint_clusters_indices[block_index][0];
  744. uint32_t cluster_1 = m_block_endpoint_clusters_indices[block_index][1];
  745. BASISU_FRONTEND_VERIFY(cluster_0 == cluster_1);
  746. }
  747. }
  748. }
  749. void basisu_frontend::compute_endpoint_clusters_within_each_parent_cluster()
  750. {
  751. generate_block_endpoint_clusters();
  752. m_endpoint_clusters_within_each_parent_cluster.resize(0);
  753. m_endpoint_clusters_within_each_parent_cluster.resize(m_endpoint_parent_clusters.size());
  754. // Note: It's possible that some blocks got moved into the same cluster, but live in different parent clusters.
  755. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  756. {
  757. const uint32_t cluster_index = m_block_endpoint_clusters_indices[block_index][0];
  758. const uint32_t parent_cluster_index = m_block_parent_endpoint_cluster[block_index];
  759. m_endpoint_clusters_within_each_parent_cluster[parent_cluster_index].push_back(cluster_index);
  760. }
  761. for (uint32_t i = 0; i < m_endpoint_clusters_within_each_parent_cluster.size(); i++)
  762. {
  763. uint_vec &cluster_indices = m_endpoint_clusters_within_each_parent_cluster[i];
  764. BASISU_FRONTEND_VERIFY(cluster_indices.size());
  765. vector_sort(cluster_indices);
  766. auto last = std::unique(cluster_indices.begin(), cluster_indices.end());
  767. cluster_indices.erase(last, cluster_indices.end());
  768. }
  769. }
  770. void basisu_frontend::compute_endpoint_subblock_error_vec()
  771. {
  772. m_subblock_endpoint_quant_err_vec.resize(0);
  773. const uint32_t N = 512;
  774. for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N)
  775. {
  776. const uint32_t first_index = cluster_index_iter;
  777. const uint32_t last_index = minimum<uint32_t>(m_endpoint_clusters.size_u32(), cluster_index_iter + N);
  778. m_params.m_pJob_pool->add_job( [this, first_index, last_index] {
  779. for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
  780. {
  781. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  782. assert(cluster_indices.size());
  783. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  784. {
  785. basisu::vector<color_rgba> cluster_pixels(8);
  786. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  787. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  788. const bool flipped = true;
  789. const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr();
  790. for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++)
  791. {
  792. cluster_pixels[pixel_index] = pSource_block_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]];
  793. }
  794. const endpoint_cluster_etc_params &etc_params = m_endpoint_cluster_etc_params[cluster_index];
  795. assert(etc_params.m_valid);
  796. color_rgba block_colors[4];
  797. etc_block::get_block_colors5(block_colors, etc_params.m_color_unscaled[0], etc_params.m_inten_table[0], true);
  798. uint64_t total_err = 0;
  799. for (uint32_t i = 0; i < 8; i++)
  800. {
  801. const color_rgba &c = cluster_pixels[i];
  802. uint64_t best_err = UINT64_MAX;
  803. //uint32_t best_index = 0;
  804. for (uint32_t s = 0; s < 4; s++)
  805. {
  806. uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false);
  807. if (err < best_err)
  808. {
  809. best_err = err;
  810. //best_index = s;
  811. }
  812. }
  813. total_err += best_err;
  814. }
  815. subblock_endpoint_quant_err quant_err;
  816. quant_err.m_total_err = total_err;
  817. quant_err.m_cluster_index = cluster_index;
  818. quant_err.m_cluster_subblock_index = cluster_indices_iter;
  819. quant_err.m_block_index = block_index;
  820. quant_err.m_subblock_index = subblock_index;
  821. {
  822. std::lock_guard<std::mutex> lock(m_lock);
  823. m_subblock_endpoint_quant_err_vec.push_back(quant_err);
  824. }
  825. }
  826. } // cluster_index
  827. } );
  828. } // cluster_index_iter
  829. m_params.m_pJob_pool->wait_for_all();
  830. vector_sort(m_subblock_endpoint_quant_err_vec);
  831. }
  832. void basisu_frontend::introduce_new_endpoint_clusters()
  833. {
  834. debug_printf("introduce_new_endpoint_clusters\n");
  835. generate_block_endpoint_clusters();
  836. int num_new_endpoint_clusters = m_params.m_max_endpoint_clusters - m_endpoint_clusters.size_u32();
  837. if (num_new_endpoint_clusters <= 0)
  838. return;
  839. compute_endpoint_subblock_error_vec();
  840. const uint32_t num_orig_endpoint_clusters = m_endpoint_clusters.size_u32();
  841. std::unordered_set<uint32_t> training_vector_was_relocated;
  842. uint_vec cluster_sizes(num_orig_endpoint_clusters);
  843. for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++)
  844. cluster_sizes[i] = m_endpoint_clusters[i].size_u32();
  845. std::unordered_set<uint32_t> ignore_cluster;
  846. uint32_t total_new_clusters = 0;
  847. while (num_new_endpoint_clusters)
  848. {
  849. if (m_subblock_endpoint_quant_err_vec.size() == 0)
  850. break;
  851. subblock_endpoint_quant_err subblock_to_move(m_subblock_endpoint_quant_err_vec.back());
  852. m_subblock_endpoint_quant_err_vec.pop_back();
  853. if (unordered_set_contains(ignore_cluster, subblock_to_move.m_cluster_index))
  854. continue;
  855. uint32_t training_vector_index = subblock_to_move.m_block_index * 2 + subblock_to_move.m_subblock_index;
  856. if (cluster_sizes[subblock_to_move.m_cluster_index] <= 2)
  857. continue;
  858. if (unordered_set_contains(training_vector_was_relocated, training_vector_index))
  859. continue;
  860. if (unordered_set_contains(training_vector_was_relocated, training_vector_index ^ 1))
  861. continue;
  862. #if 0
  863. const uint32_t block_index = subblock_to_move.m_block_index;
  864. const etc_block& blk = m_etc1_blocks_etc1s[block_index];
  865. uint32_t ls, hs;
  866. blk.get_selector_range(ls, hs);
  867. if (ls != hs)
  868. continue;
  869. #endif
  870. //const uint32_t new_endpoint_cluster_index = (uint32_t)m_endpoint_clusters.size();
  871. enlarge_vector(m_endpoint_clusters, 1)->push_back(training_vector_index);
  872. enlarge_vector(m_endpoint_cluster_etc_params, 1);
  873. assert(m_endpoint_clusters.size() == m_endpoint_cluster_etc_params.size());
  874. training_vector_was_relocated.insert(training_vector_index);
  875. m_endpoint_clusters.back().push_back(training_vector_index ^ 1);
  876. training_vector_was_relocated.insert(training_vector_index ^ 1);
  877. BASISU_FRONTEND_VERIFY(cluster_sizes[subblock_to_move.m_cluster_index] >= 2);
  878. cluster_sizes[subblock_to_move.m_cluster_index] -= 2;
  879. ignore_cluster.insert(subblock_to_move.m_cluster_index);
  880. total_new_clusters++;
  881. num_new_endpoint_clusters--;
  882. }
  883. debug_printf("Introduced %i new endpoint clusters\n", total_new_clusters);
  884. for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++)
  885. {
  886. uint_vec &cluster_indices = m_endpoint_clusters[i];
  887. uint_vec new_cluster_indices;
  888. for (uint32_t j = 0; j < cluster_indices.size(); j++)
  889. {
  890. uint32_t training_vector_index = cluster_indices[j];
  891. if (!unordered_set_contains(training_vector_was_relocated, training_vector_index))
  892. new_cluster_indices.push_back(training_vector_index);
  893. }
  894. if (cluster_indices.size() != new_cluster_indices.size())
  895. {
  896. BASISU_FRONTEND_VERIFY(new_cluster_indices.size() > 0);
  897. cluster_indices.swap(new_cluster_indices);
  898. }
  899. }
  900. generate_block_endpoint_clusters();
  901. }
  902. struct color_rgba_hasher
  903. {
  904. inline std::size_t operator()(const color_rgba& k) const
  905. {
  906. uint32_t v = *(const uint32_t*)&k;
  907. //return bitmix32(v);
  908. //v ^= (v << 10);
  909. //v ^= (v >> 12);
  910. return v;
  911. }
  912. };
  913. // Given each endpoint cluster, gather all the block pixels which are in that cluster and compute optimized ETC1S endpoints for them.
  914. // TODO: Don't optimize endpoint clusters which haven't changed.
  915. // If step>=1, we check to ensure the new endpoint values actually decrease quantization error.
  916. void basisu_frontend::generate_endpoint_codebook(uint32_t step)
  917. {
  918. debug_printf("generate_endpoint_codebook\n");
  919. interval_timer tm;
  920. tm.start();
  921. m_endpoint_cluster_etc_params.resize(m_endpoint_clusters.size());
  922. bool use_cpu = true;
  923. // TODO: Get this working when step>0
  924. if (m_params.m_pOpenCL_context && !step)
  925. {
  926. const uint32_t total_clusters = (uint32_t)m_endpoint_clusters.size();
  927. basisu::vector<cl_pixel_cluster> pixel_clusters(total_clusters);
  928. std::vector<color_rgba> input_pixels;
  929. input_pixels.reserve(m_total_blocks * 16);
  930. std::vector<uint32_t> pixel_weights;
  931. pixel_weights.reserve(m_total_blocks * 16);
  932. uint_vec cluster_sizes(total_clusters);
  933. //typedef basisu::hash_map<color_rgba, uint32_t, color_rgba_hasher> color_hasher_type;
  934. //color_hasher_type color_hasher;
  935. //color_hasher.reserve(2048);
  936. interval_timer hash_tm;
  937. hash_tm.start();
  938. basisu::vector<uint32_t> colors, colors2;
  939. colors.reserve(65536);
  940. colors2.reserve(65536);
  941. for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
  942. {
  943. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  944. assert((cluster_indices.size() & 1) == 0);
  945. #if 0
  946. uint64_t first_pixel_index = input_pixels.size();
  947. const uint32_t total_pixels = 16 * (cluster_indices.size() / 2);
  948. input_pixels.resize(input_pixels.size() + total_pixels);
  949. pixel_weights.resize(pixel_weights.size() + total_pixels);
  950. uint64_t dst_ofs = first_pixel_index;
  951. uint64_t total_r = 0, total_g = 0, total_b = 0;
  952. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  953. {
  954. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  955. if (subblock_index)
  956. continue;
  957. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  958. const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  959. for (uint32_t i = 0; i < 16; i++)
  960. {
  961. input_pixels[dst_ofs] = pBlock_pixels[i];
  962. pixel_weights[dst_ofs] = 1;
  963. dst_ofs++;
  964. total_r += pBlock_pixels[i].r;
  965. total_g += pBlock_pixels[i].g;
  966. total_b += pBlock_pixels[i].b;
  967. }
  968. }
  969. //printf("%i %f %f %f\n", cluster_index, total_r / (float)total_pixels, total_g / (float)total_pixels, total_b / (float)total_pixels);
  970. pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
  971. pixel_clusters[cluster_index].m_total_pixels = total_pixels;
  972. cluster_sizes[cluster_index] = total_pixels;
  973. #elif 1
  974. colors.resize(cluster_indices.size() * 8);
  975. colors2.resize(cluster_indices.size() * 8);
  976. uint32_t dst_ofs = 0;
  977. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  978. {
  979. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  980. if (subblock_index)
  981. continue;
  982. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  983. const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  984. memcpy(colors.data() + dst_ofs, pBlock_pixels, sizeof(color_rgba) * 16);
  985. dst_ofs += 16;
  986. } // cluster_indices_iter
  987. uint32_t* pSorted = radix_sort((uint32_t)colors.size(), colors.data(), colors2.data(), 0, 3);
  988. const uint64_t first_pixel_index = input_pixels.size();
  989. uint32_t prev_color = 0, cur_weight = 0;
  990. for (uint32_t i = 0; i < colors.size(); i++)
  991. {
  992. uint32_t cur_color = pSorted[i];
  993. if (cur_color == prev_color)
  994. {
  995. if (++cur_weight == 0)
  996. cur_weight--;
  997. }
  998. else
  999. {
  1000. if (cur_weight)
  1001. {
  1002. input_pixels.push_back(*(const color_rgba*)&prev_color);
  1003. pixel_weights.push_back(cur_weight);
  1004. }
  1005. prev_color = cur_color;
  1006. cur_weight = 1;
  1007. }
  1008. }
  1009. if (cur_weight)
  1010. {
  1011. input_pixels.push_back(*(const color_rgba*)&prev_color);
  1012. pixel_weights.push_back(cur_weight);
  1013. }
  1014. uint32_t total_unique_pixels = (uint32_t)(input_pixels.size() - first_pixel_index);
  1015. pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
  1016. pixel_clusters[cluster_index].m_total_pixels = total_unique_pixels;
  1017. cluster_sizes[cluster_index] = total_unique_pixels;
  1018. #else
  1019. color_hasher.reset();
  1020. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  1021. {
  1022. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  1023. if (subblock_index)
  1024. continue;
  1025. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  1026. const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  1027. uint32_t *pPrev_weight = nullptr;
  1028. color_rgba prev_color;
  1029. {
  1030. color_rgba cur_color = pBlock_pixels[0];
  1031. auto res = color_hasher.insert(cur_color, 0);
  1032. uint32_t& weight = (res.first)->second;
  1033. if (weight != UINT32_MAX)
  1034. weight++;
  1035. prev_color = cur_color;
  1036. pPrev_weight = &(res.first)->second;
  1037. }
  1038. for (uint32_t i = 1; i < 16; i++)
  1039. {
  1040. color_rgba cur_color = pBlock_pixels[i];
  1041. if (cur_color == prev_color)
  1042. {
  1043. if (*pPrev_weight != UINT32_MAX)
  1044. *pPrev_weight = *pPrev_weight + 1;
  1045. }
  1046. else
  1047. {
  1048. auto res = color_hasher.insert(cur_color, 0);
  1049. uint32_t& weight = (res.first)->second;
  1050. if (weight != UINT32_MAX)
  1051. weight++;
  1052. prev_color = cur_color;
  1053. pPrev_weight = &(res.first)->second;
  1054. }
  1055. }
  1056. } // cluster_indices_iter
  1057. const uint64_t first_pixel_index = input_pixels.size();
  1058. uint32_t total_unique_pixels = color_hasher.size();
  1059. pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
  1060. pixel_clusters[cluster_index].m_total_pixels = total_unique_pixels;
  1061. input_pixels.resize(first_pixel_index + total_unique_pixels);
  1062. pixel_weights.resize(first_pixel_index + total_unique_pixels);
  1063. uint32_t j = 0;
  1064. for (auto it = color_hasher.begin(); it != color_hasher.end(); ++it, ++j)
  1065. {
  1066. input_pixels[first_pixel_index + j] = it->first;
  1067. pixel_weights[first_pixel_index + j] = it->second;
  1068. }
  1069. cluster_sizes[cluster_index] = total_unique_pixels;
  1070. #endif
  1071. } // cluster_index
  1072. debug_printf("Total hash time: %3.3f secs\n", hash_tm.get_elapsed_secs());
  1073. debug_printf("Total unique colors: %llu\n", input_pixels.size());
  1074. uint_vec sorted_cluster_indices_new_to_old(total_clusters);
  1075. indirect_sort(total_clusters, sorted_cluster_indices_new_to_old.data(), cluster_sizes.data());
  1076. //for (uint32_t i = 0; i < total_clusters; i++)
  1077. // sorted_cluster_indices_new_to_old[i] = i;
  1078. uint_vec sorted_cluster_indices_old_to_new(total_clusters);
  1079. for (uint32_t i = 0; i < total_clusters; i++)
  1080. sorted_cluster_indices_old_to_new[sorted_cluster_indices_new_to_old[i]] = i;
  1081. basisu::vector<cl_pixel_cluster> sorted_pixel_clusters(total_clusters);
  1082. for (uint32_t i = 0; i < total_clusters; i++)
  1083. sorted_pixel_clusters[i] = pixel_clusters[sorted_cluster_indices_new_to_old[i]];
  1084. uint32_t total_perms = 64;
  1085. if (m_params.m_compression_level <= 1)
  1086. total_perms = 16;
  1087. else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
  1088. total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS;
  1089. basisu::vector<etc_block> output_blocks(total_clusters);
  1090. if (opencl_encode_etc1s_pixel_clusters(
  1091. m_params.m_pOpenCL_context,
  1092. output_blocks.data(),
  1093. total_clusters,
  1094. sorted_pixel_clusters.data(),
  1095. input_pixels.size(),
  1096. input_pixels.data(),
  1097. pixel_weights.data(),
  1098. m_params.m_perceptual, total_perms))
  1099. {
  1100. for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
  1101. {
  1102. const uint32_t new_cluster_index = sorted_cluster_indices_old_to_new[old_cluster_index];
  1103. const etc_block& blk = output_blocks[new_cluster_index];
  1104. endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[old_cluster_index];
  1105. prev_etc_params.m_valid = true;
  1106. etc_block::unpack_color5(prev_etc_params.m_color_unscaled[0], blk.get_base5_color(), false);
  1107. prev_etc_params.m_inten_table[0] = blk.get_inten_table(0);
  1108. prev_etc_params.m_color_error[0] = 0; // dummy value - we don't actually use this
  1109. }
  1110. use_cpu = false;
  1111. }
  1112. else
  1113. {
  1114. error_printf("basisu_frontend::generate_endpoint_codebook: opencl_encode_etc1s_pixel_clusters() failed! Using CPU.\n");
  1115. m_params.m_pOpenCL_context = nullptr;
  1116. m_opencl_failed = true;
  1117. }
  1118. } // if (opencl_is_available() && m_params.m_use_opencl)
  1119. if (use_cpu)
  1120. {
  1121. const uint32_t N = 128;
  1122. for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N)
  1123. {
  1124. const uint32_t first_index = cluster_index_iter;
  1125. const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N);
  1126. m_params.m_pJob_pool->add_job([this, first_index, last_index, step] {
  1127. for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
  1128. {
  1129. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  1130. BASISU_FRONTEND_VERIFY(cluster_indices.size());
  1131. const uint32_t total_pixels = (uint32_t)cluster_indices.size() * 8;
  1132. basisu::vector<color_rgba> cluster_pixels(total_pixels);
  1133. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  1134. {
  1135. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  1136. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  1137. const bool flipped = true;
  1138. const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  1139. for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++)
  1140. {
  1141. const color_rgba& c = pBlock_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]];
  1142. cluster_pixels[cluster_indices_iter * 8 + pixel_index] = c;
  1143. }
  1144. }
  1145. endpoint_cluster_etc_params new_subblock_params;
  1146. {
  1147. etc1_optimizer optimizer;
  1148. etc1_solution_coordinates solutions[2];
  1149. etc1_optimizer::params cluster_optimizer_params;
  1150. cluster_optimizer_params.m_num_src_pixels = total_pixels;
  1151. cluster_optimizer_params.m_pSrc_pixels = &cluster_pixels[0];
  1152. cluster_optimizer_params.m_use_color4 = false;
  1153. cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
  1154. if (m_params.m_compression_level <= 1)
  1155. cluster_optimizer_params.m_quality = cETCQualityMedium;
  1156. else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
  1157. cluster_optimizer_params.m_quality = cETCQualityUber;
  1158. etc1_optimizer::results cluster_optimizer_results;
  1159. basisu::vector<uint8_t> cluster_selectors(total_pixels);
  1160. cluster_optimizer_results.m_n = total_pixels;
  1161. cluster_optimizer_results.m_pSelectors = &cluster_selectors[0];
  1162. optimizer.init(cluster_optimizer_params, cluster_optimizer_results);
  1163. if (!optimizer.compute())
  1164. BASISU_FRONTEND_VERIFY(false);
  1165. new_subblock_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled;
  1166. new_subblock_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table;
  1167. new_subblock_params.m_color_error[0] = cluster_optimizer_results.m_error;
  1168. }
  1169. endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[cluster_index];
  1170. bool use_new_subblock_params = false;
  1171. if ((!step) || (!prev_etc_params.m_valid))
  1172. use_new_subblock_params = true;
  1173. else
  1174. {
  1175. assert(prev_etc_params.m_valid);
  1176. uint64_t total_prev_err = 0;
  1177. {
  1178. color_rgba block_colors[4];
  1179. etc_block::get_block_colors5(block_colors, prev_etc_params.m_color_unscaled[0], prev_etc_params.m_inten_table[0], false);
  1180. uint64_t total_err = 0;
  1181. for (uint32_t i = 0; i < total_pixels; i++)
  1182. {
  1183. const color_rgba& c = cluster_pixels[i];
  1184. uint64_t best_err = UINT64_MAX;
  1185. //uint32_t best_index = 0;
  1186. for (uint32_t s = 0; s < 4; s++)
  1187. {
  1188. uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false);
  1189. if (err < best_err)
  1190. {
  1191. best_err = err;
  1192. //best_index = s;
  1193. }
  1194. }
  1195. total_err += best_err;
  1196. }
  1197. total_prev_err += total_err;
  1198. }
  1199. // See if we should update this cluster's endpoints (if the error has actually fallen)
  1200. if (total_prev_err > new_subblock_params.m_color_error[0])
  1201. {
  1202. use_new_subblock_params = true;
  1203. }
  1204. }
  1205. if (use_new_subblock_params)
  1206. {
  1207. new_subblock_params.m_valid = true;
  1208. prev_etc_params = new_subblock_params;
  1209. }
  1210. } // cluster_index
  1211. });
  1212. } // cluster_index_iter
  1213. m_params.m_pJob_pool->wait_for_all();
  1214. }
  1215. debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  1216. }
  1217. bool basisu_frontend::check_etc1s_constraints() const
  1218. {
  1219. basisu::vector<vec2U> block_clusters(m_total_blocks);
  1220. for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
  1221. {
  1222. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  1223. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  1224. {
  1225. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  1226. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  1227. block_clusters[block_index][subblock_index] = cluster_index;
  1228. } // cluster_indices_iter
  1229. }
  1230. for (uint32_t i = 0; i < m_total_blocks; i++)
  1231. {
  1232. if (block_clusters[i][0] != block_clusters[i][1])
  1233. return false;
  1234. }
  1235. return true;
  1236. }
  1237. // For each block, determine which ETC1S endpoint cluster can encode that block with lowest error.
  1238. // This reassigns blocks to different endpoint clusters.
  1239. uint32_t basisu_frontend::refine_endpoint_clusterization()
  1240. {
  1241. debug_printf("refine_endpoint_clusterization\n");
  1242. if (m_use_hierarchical_endpoint_codebooks)
  1243. compute_endpoint_clusters_within_each_parent_cluster();
  1244. // Note: It's possible that an endpoint cluster may live in more than one parent cluster after the first refinement step.
  1245. basisu::vector<vec2U> block_clusters(m_total_blocks);
  1246. for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
  1247. {
  1248. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  1249. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  1250. {
  1251. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  1252. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  1253. block_clusters[block_index][subblock_index] = cluster_index;
  1254. } // cluster_indices_iter
  1255. }
  1256. //----------------------------------------------------------
  1257. // Create a new endpoint clusterization
  1258. interval_timer tm;
  1259. tm.start();
  1260. uint_vec best_cluster_indices(m_total_blocks);
  1261. bool use_cpu = true;
  1262. // TODO: Support non-hierarchical endpoint codebooks here
  1263. if (m_params.m_pOpenCL_context && m_use_hierarchical_endpoint_codebooks)
  1264. {
  1265. // For the OpenCL kernel, we order the parent endpoint clusters by smallest to largest for efficiency.
  1266. // We also prepare an array of block info structs that point into this new parent endpoint cluster array.
  1267. const uint32_t total_parent_clusters = (uint32_t)m_endpoint_clusters_within_each_parent_cluster.size();
  1268. basisu::vector<cl_block_info_struct> cl_block_info_structs(m_total_blocks);
  1269. // the size of each parent cluster, in total clusters
  1270. uint_vec parent_cluster_sizes(total_parent_clusters);
  1271. for (uint32_t i = 0; i < total_parent_clusters; i++)
  1272. parent_cluster_sizes[i] = (uint32_t)m_endpoint_clusters_within_each_parent_cluster[i].size();
  1273. uint_vec first_parent_cluster_ofs(total_parent_clusters);
  1274. uint32_t cur_ofs = 0;
  1275. for (uint32_t i = 0; i < total_parent_clusters; i++)
  1276. {
  1277. first_parent_cluster_ofs[i] = cur_ofs;
  1278. cur_ofs += parent_cluster_sizes[i];
  1279. }
  1280. // Note: total_actual_endpoint_clusters is not necessarly equal to m_endpoint_clusters.size(), because clusters may live in multiple parent clusters after the first refinement step.
  1281. BASISU_FRONTEND_VERIFY(cur_ofs >= m_endpoint_clusters.size());
  1282. const uint32_t total_actual_endpoint_clusters = cur_ofs;
  1283. basisu::vector<cl_endpoint_cluster_struct> cl_endpoint_cluster_structs(total_actual_endpoint_clusters);
  1284. for (uint32_t i = 0; i < total_parent_clusters; i++)
  1285. {
  1286. const uint32_t dst_ofs = first_parent_cluster_ofs[i];
  1287. const uint32_t parent_cluster_size = parent_cluster_sizes[i];
  1288. assert(m_endpoint_clusters_within_each_parent_cluster[i].size() == parent_cluster_size);
  1289. for (uint32_t j = 0; j < parent_cluster_size; j++)
  1290. {
  1291. const uint32_t endpoint_cluster_index = m_endpoint_clusters_within_each_parent_cluster[i][j];
  1292. color_rgba cluster_etc_base_color(m_endpoint_cluster_etc_params[endpoint_cluster_index].m_color_unscaled[0]);
  1293. uint32_t cluster_etc_inten = m_endpoint_cluster_etc_params[endpoint_cluster_index].m_inten_table[0];
  1294. cl_endpoint_cluster_structs[dst_ofs + j].m_unscaled_color = cluster_etc_base_color;
  1295. cl_endpoint_cluster_structs[dst_ofs + j].m_etc_inten = (uint8_t)cluster_etc_inten;
  1296. cl_endpoint_cluster_structs[dst_ofs + j].m_cluster_index = (uint16_t)endpoint_cluster_index;
  1297. }
  1298. }
  1299. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  1300. {
  1301. const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster[block_index];
  1302. cl_block_info_structs[block_index].m_num_clusters = (uint16_t)(parent_cluster_sizes[block_parent_endpoint_cluster_index]);
  1303. cl_block_info_structs[block_index].m_first_cluster_ofs = (uint16_t)(first_parent_cluster_ofs[block_parent_endpoint_cluster_index]);
  1304. const uint32_t block_cluster_index = block_clusters[block_index][0];
  1305. cl_block_info_structs[block_index].m_cur_cluster_index = (uint16_t)block_cluster_index;
  1306. cl_block_info_structs[block_index].m_cur_cluster_etc_inten = (uint8_t)m_endpoint_cluster_etc_params[block_cluster_index].m_inten_table[0];
  1307. }
  1308. uint_vec block_cluster_indices(m_total_blocks);
  1309. for (uint32_t i = 0; i < m_total_blocks; i++)
  1310. block_cluster_indices[i] = block_clusters[i][0];
  1311. uint_vec sorted_block_indices(m_total_blocks);
  1312. indirect_sort(m_total_blocks, sorted_block_indices.data(), block_cluster_indices.data());
  1313. bool status = opencl_refine_endpoint_clusterization(
  1314. m_params.m_pOpenCL_context,
  1315. cl_block_info_structs.data(),
  1316. total_actual_endpoint_clusters,
  1317. cl_endpoint_cluster_structs.data(),
  1318. sorted_block_indices.data(),
  1319. best_cluster_indices.data(),
  1320. m_params.m_perceptual);
  1321. if (status)
  1322. {
  1323. use_cpu = false;
  1324. }
  1325. else
  1326. {
  1327. error_printf("basisu_frontend::refine_endpoint_clusterization: opencl_refine_endpoint_clusterization() failed! Using CPU.\n");
  1328. m_params.m_pOpenCL_context = nullptr;
  1329. m_opencl_failed = true;
  1330. }
  1331. }
  1332. if (use_cpu)
  1333. {
  1334. const uint32_t N = 1024;
  1335. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  1336. {
  1337. const uint32_t first_index = block_index_iter;
  1338. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  1339. m_params.m_pJob_pool->add_job([this, first_index, last_index, &best_cluster_indices, &block_clusters] {
  1340. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  1341. {
  1342. const uint32_t cluster_index = block_clusters[block_index][0];
  1343. BASISU_FRONTEND_VERIFY(cluster_index == block_clusters[block_index][1]);
  1344. const color_rgba* pSubblock_pixels = get_source_pixel_block(block_index).get_ptr();
  1345. const uint32_t num_subblock_pixels = 16;
  1346. uint64_t best_cluster_err = INT64_MAX;
  1347. uint32_t best_cluster_index = 0;
  1348. const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster.size() ? m_block_parent_endpoint_cluster[block_index] : 0;
  1349. const uint_vec* pCluster_indices = m_endpoint_clusters_within_each_parent_cluster.size() ? &m_endpoint_clusters_within_each_parent_cluster[block_parent_endpoint_cluster_index] : nullptr;
  1350. const uint32_t total_clusters = m_use_hierarchical_endpoint_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_endpoint_clusters.size();
  1351. for (uint32_t i = 0; i < total_clusters; i++)
  1352. {
  1353. const uint32_t cluster_iter = m_use_hierarchical_endpoint_codebooks ? (*pCluster_indices)[i] : i;
  1354. color_rgba cluster_etc_base_color(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0]);
  1355. uint32_t cluster_etc_inten = m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0];
  1356. uint64_t total_err = 0;
  1357. const uint32_t low_selector = 0;//subblock_etc_params_vec[j].m_low_selectors[0];
  1358. const uint32_t high_selector = 3;//subblock_etc_params_vec[j].m_high_selectors[0];
  1359. color_rgba subblock_colors[4];
  1360. // Can't assign it here - may result in too much error when selector quant occurs
  1361. if (cluster_etc_inten > m_endpoint_cluster_etc_params[cluster_index].m_inten_table[0])
  1362. {
  1363. total_err = INT64_MAX;
  1364. goto skip_cluster;
  1365. }
  1366. etc_block::get_block_colors5(subblock_colors, cluster_etc_base_color, cluster_etc_inten);
  1367. #if 0
  1368. for (uint32_t p = 0; p < num_subblock_pixels; p++)
  1369. {
  1370. uint64_t best_err = UINT64_MAX;
  1371. for (uint32_t r = low_selector; r <= high_selector; r++)
  1372. {
  1373. uint64_t err = color_distance(m_params.m_perceptual, pSubblock_pixels[p], subblock_colors[r], false);
  1374. best_err = minimum(best_err, err);
  1375. if (!best_err)
  1376. break;
  1377. }
  1378. total_err += best_err;
  1379. if (total_err > best_cluster_err)
  1380. break;
  1381. } // p
  1382. #else
  1383. if (m_params.m_perceptual)
  1384. {
  1385. if (!g_cpu_supports_sse41)
  1386. {
  1387. for (uint32_t p = 0; p < num_subblock_pixels; p++)
  1388. {
  1389. uint64_t best_err = UINT64_MAX;
  1390. for (uint32_t r = low_selector; r <= high_selector; r++)
  1391. {
  1392. uint64_t err = color_distance(true, pSubblock_pixels[p], subblock_colors[r], false);
  1393. best_err = minimum(best_err, err);
  1394. if (!best_err)
  1395. break;
  1396. }
  1397. total_err += best_err;
  1398. if (total_err > best_cluster_err)
  1399. break;
  1400. } // p
  1401. }
  1402. else
  1403. {
  1404. #if BASISU_SUPPORT_SSE
  1405. find_lowest_error_perceptual_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err);
  1406. #endif
  1407. }
  1408. }
  1409. else
  1410. {
  1411. if (!g_cpu_supports_sse41)
  1412. {
  1413. for (uint32_t p = 0; p < num_subblock_pixels; p++)
  1414. {
  1415. uint64_t best_err = UINT64_MAX;
  1416. for (uint32_t r = low_selector; r <= high_selector; r++)
  1417. {
  1418. uint64_t err = color_distance(false, pSubblock_pixels[p], subblock_colors[r], false);
  1419. best_err = minimum(best_err, err);
  1420. if (!best_err)
  1421. break;
  1422. }
  1423. total_err += best_err;
  1424. if (total_err > best_cluster_err)
  1425. break;
  1426. } // p
  1427. }
  1428. else
  1429. {
  1430. #if BASISU_SUPPORT_SSE
  1431. find_lowest_error_linear_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err);
  1432. #endif
  1433. }
  1434. }
  1435. #endif
  1436. skip_cluster:
  1437. if ((total_err < best_cluster_err) ||
  1438. ((cluster_iter == cluster_index) && (total_err == best_cluster_err)))
  1439. {
  1440. best_cluster_err = total_err;
  1441. best_cluster_index = cluster_iter;
  1442. if (!best_cluster_err)
  1443. break;
  1444. }
  1445. } // j
  1446. best_cluster_indices[block_index] = best_cluster_index;
  1447. } // block_index
  1448. });
  1449. } // block_index_iter
  1450. m_params.m_pJob_pool->wait_for_all();
  1451. } // use_cpu
  1452. debug_printf("refine_endpoint_clusterization time: %3.3f secs\n", tm.get_elapsed_secs());
  1453. basisu::vector<typename basisu::vector<uint32_t> > optimized_endpoint_clusters(m_endpoint_clusters.size());
  1454. uint32_t total_subblocks_reassigned = 0;
  1455. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  1456. {
  1457. const uint32_t training_vector_index = block_index * 2 + 0;
  1458. const uint32_t orig_cluster_index = block_clusters[block_index][0];
  1459. const uint32_t best_cluster_index = best_cluster_indices[block_index];
  1460. optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index);
  1461. optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index + 1);
  1462. if (best_cluster_index != orig_cluster_index)
  1463. {
  1464. total_subblocks_reassigned++;
  1465. }
  1466. }
  1467. debug_printf("total_subblocks_reassigned: %u\n", total_subblocks_reassigned);
  1468. m_endpoint_clusters = optimized_endpoint_clusters;
  1469. return total_subblocks_reassigned;
  1470. }
  1471. void basisu_frontend::eliminate_redundant_or_empty_endpoint_clusters()
  1472. {
  1473. debug_printf("eliminate_redundant_or_empty_endpoint_clusters\n");
  1474. // Step 1: Sort endpoint clusters by the base colors/intens
  1475. uint_vec sorted_endpoint_cluster_indices(m_endpoint_clusters.size());
  1476. for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
  1477. sorted_endpoint_cluster_indices[i] = i;
  1478. indirect_sort((uint32_t)m_endpoint_clusters.size(), &sorted_endpoint_cluster_indices[0], &m_endpoint_cluster_etc_params[0]);
  1479. basisu::vector<basisu::vector<uint32_t> > new_endpoint_clusters(m_endpoint_clusters.size());
  1480. basisu::vector<endpoint_cluster_etc_params> new_subblock_etc_params(m_endpoint_clusters.size());
  1481. for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
  1482. {
  1483. uint32_t j = sorted_endpoint_cluster_indices[i];
  1484. new_endpoint_clusters[i] = m_endpoint_clusters[j];
  1485. new_subblock_etc_params[i] = m_endpoint_cluster_etc_params[j];
  1486. }
  1487. new_endpoint_clusters.swap(m_endpoint_clusters);
  1488. new_subblock_etc_params.swap(m_endpoint_cluster_etc_params);
  1489. // Step 2: Eliminate redundant endpoint clusters, or empty endpoint clusters
  1490. new_endpoint_clusters.resize(0);
  1491. new_subblock_etc_params.resize(0);
  1492. for (int i = 0; i < (int)m_endpoint_clusters.size(); )
  1493. {
  1494. if (!m_endpoint_clusters[i].size())
  1495. {
  1496. i++;
  1497. continue;
  1498. }
  1499. int j;
  1500. for (j = i + 1; j < (int)m_endpoint_clusters.size(); j++)
  1501. {
  1502. if (!(m_endpoint_cluster_etc_params[i] == m_endpoint_cluster_etc_params[j]))
  1503. break;
  1504. }
  1505. new_endpoint_clusters.push_back(m_endpoint_clusters[i]);
  1506. new_subblock_etc_params.push_back(m_endpoint_cluster_etc_params[i]);
  1507. for (int k = i + 1; k < j; k++)
  1508. {
  1509. append_vector(new_endpoint_clusters.back(), m_endpoint_clusters[k]);
  1510. }
  1511. i = j;
  1512. }
  1513. if (m_endpoint_clusters.size() != new_endpoint_clusters.size())
  1514. {
  1515. if (m_params.m_debug_stats)
  1516. debug_printf("Eliminated %u redundant or empty clusters\n", (uint32_t)(m_endpoint_clusters.size() - new_endpoint_clusters.size()));
  1517. m_endpoint_clusters.swap(new_endpoint_clusters);
  1518. m_endpoint_cluster_etc_params.swap(new_subblock_etc_params);
  1519. }
  1520. }
  1521. void basisu_frontend::create_initial_packed_texture()
  1522. {
  1523. debug_printf("create_initial_packed_texture\n");
  1524. interval_timer tm;
  1525. tm.start();
  1526. bool use_cpu = true;
  1527. if ((m_params.m_pOpenCL_context) && (opencl_is_available()))
  1528. {
  1529. basisu::vector<color_rgba> block_etc5_color_intens(m_total_blocks);
  1530. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  1531. {
  1532. uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0];
  1533. const color_rgba& color_unscaled = m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0];
  1534. uint32_t inten = m_endpoint_cluster_etc_params[cluster0].m_inten_table[0];
  1535. block_etc5_color_intens[block_index].set(color_unscaled.r, color_unscaled.g, color_unscaled.b, inten);
  1536. }
  1537. bool status = opencl_determine_selectors(m_params.m_pOpenCL_context, block_etc5_color_intens.data(),
  1538. m_encoded_blocks.data(),
  1539. m_params.m_perceptual);
  1540. if (!status)
  1541. {
  1542. error_printf("basisu_frontend::create_initial_packed_texture: opencl_determine_selectors() failed! Using CPU.\n");
  1543. m_params.m_pOpenCL_context = nullptr;
  1544. m_opencl_failed = true;
  1545. }
  1546. else
  1547. {
  1548. use_cpu = false;
  1549. }
  1550. }
  1551. if (use_cpu)
  1552. {
  1553. const uint32_t N = 4096;
  1554. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  1555. {
  1556. const uint32_t first_index = block_index_iter;
  1557. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  1558. m_params.m_pJob_pool->add_job([this, first_index, last_index] {
  1559. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  1560. {
  1561. uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0];
  1562. uint32_t cluster1 = m_block_endpoint_clusters_indices[block_index][1];
  1563. BASISU_FRONTEND_VERIFY(cluster0 == cluster1);
  1564. const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr();
  1565. etc_block& blk = m_encoded_blocks[block_index];
  1566. color_rgba unscaled[2] = { m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0], m_endpoint_cluster_etc_params[cluster1].m_color_unscaled[0] };
  1567. uint32_t inten[2] = { m_endpoint_cluster_etc_params[cluster0].m_inten_table[0], m_endpoint_cluster_etc_params[cluster1].m_inten_table[0] };
  1568. blk.set_block_color5(unscaled[0], unscaled[1]);
  1569. blk.set_flip_bit(true);
  1570. blk.set_inten_table(0, inten[0]);
  1571. blk.set_inten_table(1, inten[1]);
  1572. blk.determine_selectors(pSource_pixels, m_params.m_perceptual);
  1573. } // block_index
  1574. });
  1575. } // block_index_iter
  1576. m_params.m_pJob_pool->wait_for_all();
  1577. } // use_cpu
  1578. m_orig_encoded_blocks = m_encoded_blocks;
  1579. debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  1580. }
  1581. void basisu_frontend::compute_selector_clusters_within_each_parent_cluster()
  1582. {
  1583. uint_vec block_selector_cluster_indices(m_total_blocks);
  1584. for (int cluster_index = 0; cluster_index < static_cast<int>(m_selector_cluster_block_indices.size()); cluster_index++)
  1585. {
  1586. const basisu::vector<uint32_t>& cluster_indices = m_selector_cluster_block_indices[cluster_index];
  1587. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  1588. {
  1589. const uint32_t block_index = cluster_indices[cluster_indices_iter];
  1590. block_selector_cluster_indices[block_index] = cluster_index;
  1591. } // cluster_indices_iter
  1592. } // cluster_index
  1593. m_selector_clusters_within_each_parent_cluster.resize(0);
  1594. m_selector_clusters_within_each_parent_cluster.resize(m_selector_parent_cluster_block_indices.size());
  1595. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  1596. {
  1597. const uint32_t cluster_index = block_selector_cluster_indices[block_index];
  1598. const uint32_t parent_cluster_index = m_block_parent_selector_cluster[block_index];
  1599. m_selector_clusters_within_each_parent_cluster[parent_cluster_index].push_back(cluster_index);
  1600. }
  1601. for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
  1602. {
  1603. uint_vec &cluster_indices = m_selector_clusters_within_each_parent_cluster[i];
  1604. BASISU_FRONTEND_VERIFY(cluster_indices.size());
  1605. vector_sort(cluster_indices);
  1606. auto last = std::unique(cluster_indices.begin(), cluster_indices.end());
  1607. cluster_indices.erase(last, cluster_indices.end());
  1608. }
  1609. }
  1610. void basisu_frontend::generate_selector_clusters()
  1611. {
  1612. debug_printf("generate_selector_clusters\n");
  1613. typedef tree_vector_quant<vec16F> vec16F_clusterizer;
  1614. vec16F_clusterizer::array_of_weighted_training_vecs training_vecs(m_total_blocks);
  1615. const uint32_t N = 4096;
  1616. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  1617. {
  1618. const uint32_t first_index = block_index_iter;
  1619. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  1620. m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
  1621. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  1622. {
  1623. const etc_block &blk = m_encoded_blocks[block_index];
  1624. vec16F v;
  1625. for (uint32_t y = 0; y < 4; y++)
  1626. for (uint32_t x = 0; x < 4; x++)
  1627. v[x + y * 4] = static_cast<float>(blk.get_selector(x, y));
  1628. const uint32_t subblock_index = (blk.get_inten_table(0) > blk.get_inten_table(1)) ? 0 : 1;
  1629. color_rgba block_colors[2];
  1630. blk.get_block_low_high_colors(block_colors, subblock_index);
  1631. const uint32_t dist = color_distance(m_params.m_perceptual, block_colors[0], block_colors[1], false);
  1632. const uint32_t cColorDistToWeight = 300;
  1633. const uint32_t cMaxWeight = 4096;
  1634. uint32_t weight = clamp<uint32_t>(dist / cColorDistToWeight, 1, cMaxWeight);
  1635. training_vecs[block_index].first = v;
  1636. training_vecs[block_index].second = weight;
  1637. } // block_index
  1638. } );
  1639. } // block_index_iter
  1640. m_params.m_pJob_pool->wait_for_all();
  1641. vec16F_clusterizer selector_clusterizer;
  1642. for (uint32_t i = 0; i < m_total_blocks; i++)
  1643. selector_clusterizer.add_training_vec(training_vecs[i].first, training_vecs[i].second);
  1644. const int selector_parent_codebook_size = (m_params.m_compression_level <= 1) ? BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 : BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT;
  1645. const uint32_t parent_codebook_size = (m_params.m_max_selector_clusters >= 256) ? selector_parent_codebook_size : 0;
  1646. debug_printf("Using selector parent codebook size %u\n", parent_codebook_size);
  1647. uint32_t max_threads = 0;
  1648. max_threads = m_params.m_multithreaded ? minimum<int>(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0;
  1649. if (m_params.m_pJob_pool)
  1650. max_threads = minimum<int>((int)m_params.m_pJob_pool->get_total_threads(), max_threads);
  1651. bool status = generate_hierarchical_codebook_threaded(selector_clusterizer,
  1652. m_params.m_max_selector_clusters, m_use_hierarchical_selector_codebooks ? parent_codebook_size : 0,
  1653. m_selector_cluster_block_indices,
  1654. m_selector_parent_cluster_block_indices,
  1655. max_threads, m_params.m_pJob_pool, false);
  1656. BASISU_FRONTEND_VERIFY(status);
  1657. if (m_use_hierarchical_selector_codebooks)
  1658. {
  1659. if (!m_selector_parent_cluster_block_indices.size())
  1660. {
  1661. m_selector_parent_cluster_block_indices.resize(0);
  1662. m_selector_parent_cluster_block_indices.resize(1);
  1663. for (uint32_t i = 0; i < m_total_blocks; i++)
  1664. m_selector_parent_cluster_block_indices[0].push_back(i);
  1665. }
  1666. BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 <= UINT8_MAX);
  1667. BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT <= UINT8_MAX);
  1668. m_block_parent_selector_cluster.resize(0);
  1669. m_block_parent_selector_cluster.resize(m_total_blocks);
  1670. vector_set_all(m_block_parent_selector_cluster, 0xFF);
  1671. for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_selector_parent_cluster_block_indices.size(); parent_cluster_index++)
  1672. {
  1673. const uint_vec &cluster = m_selector_parent_cluster_block_indices[parent_cluster_index];
  1674. for (uint32_t j = 0; j < cluster.size(); j++)
  1675. m_block_parent_selector_cluster[cluster[j]] = static_cast<uint8_t>(parent_cluster_index);
  1676. }
  1677. for (uint32_t i = 0; i < m_total_blocks; i++)
  1678. {
  1679. BASISU_FRONTEND_VERIFY(m_block_parent_selector_cluster[i] != 0xFF);
  1680. }
  1681. // Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong.
  1682. for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_block_indices.size(); cluster_index++)
  1683. {
  1684. const uint_vec &cluster = m_selector_cluster_block_indices[cluster_index];
  1685. uint32_t parent_cluster_index = 0;
  1686. for (uint32_t j = 0; j < cluster.size(); j++)
  1687. {
  1688. const uint32_t block_index = cluster[j];
  1689. if (!j)
  1690. {
  1691. parent_cluster_index = m_block_parent_selector_cluster[block_index];
  1692. }
  1693. else
  1694. {
  1695. BASISU_FRONTEND_VERIFY(m_block_parent_selector_cluster[block_index] == parent_cluster_index);
  1696. }
  1697. }
  1698. }
  1699. }
  1700. debug_printf("Total selector clusters: %u, total parent selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size(), (uint32_t)m_selector_parent_cluster_block_indices.size());
  1701. }
  1702. void basisu_frontend::create_optimized_selector_codebook(uint32_t iter)
  1703. {
  1704. debug_printf("create_optimized_selector_codebook\n");
  1705. interval_timer tm;
  1706. tm.start();
  1707. const uint32_t total_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size();
  1708. debug_printf("Total selector clusters (from m_selector_cluster_block_indices.size()): %u\n", (uint32_t)m_selector_cluster_block_indices.size());
  1709. m_optimized_cluster_selectors.resize(total_selector_clusters);
  1710. // For each selector codebook entry, and for each of the 4x4 selectors, determine which selector minimizes the error across all the blocks that use that quantized selector.
  1711. const uint32_t N = 256;
  1712. for (uint32_t cluster_index_iter = 0; cluster_index_iter < total_selector_clusters; cluster_index_iter += N)
  1713. {
  1714. const uint32_t first_index = cluster_index_iter;
  1715. const uint32_t last_index = minimum<uint32_t>((uint32_t)total_selector_clusters, cluster_index_iter + N);
  1716. m_params.m_pJob_pool->add_job([this, first_index, last_index] {
  1717. for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
  1718. {
  1719. const basisu::vector<uint32_t>& cluster_block_indices = m_selector_cluster_block_indices[cluster_index];
  1720. if (!cluster_block_indices.size())
  1721. continue;
  1722. uint64_t overall_best_err = 0;
  1723. (void)overall_best_err;
  1724. uint64_t total_err[4][4][4];
  1725. clear_obj(total_err);
  1726. for (uint32_t cluster_block_index = 0; cluster_block_index < cluster_block_indices.size(); cluster_block_index++)
  1727. {
  1728. const uint32_t block_index = cluster_block_indices[cluster_block_index];
  1729. const etc_block& blk = m_encoded_blocks[block_index];
  1730. color_rgba blk_colors[4];
  1731. blk.get_block_colors(blk_colors, 0);
  1732. for (uint32_t y = 0; y < 4; y++)
  1733. {
  1734. for (uint32_t x = 0; x < 4; x++)
  1735. {
  1736. const color_rgba& orig_color = get_source_pixel_block(block_index)(x, y);
  1737. if (m_params.m_perceptual)
  1738. {
  1739. for (uint32_t s = 0; s < 4; s++)
  1740. total_err[y][x][s] += color_distance(true, blk_colors[s], orig_color, false);
  1741. }
  1742. else
  1743. {
  1744. for (uint32_t s = 0; s < 4; s++)
  1745. total_err[y][x][s] += color_distance(false, blk_colors[s], orig_color, false);
  1746. }
  1747. } // x
  1748. } // y
  1749. } // cluster_block_index
  1750. for (uint32_t y = 0; y < 4; y++)
  1751. {
  1752. for (uint32_t x = 0; x < 4; x++)
  1753. {
  1754. uint64_t best_err = total_err[y][x][0];
  1755. uint8_t best_sel = 0;
  1756. for (uint32_t s = 1; s < 4; s++)
  1757. {
  1758. if (total_err[y][x][s] < best_err)
  1759. {
  1760. best_err = total_err[y][x][s];
  1761. best_sel = (uint8_t)s;
  1762. }
  1763. }
  1764. m_optimized_cluster_selectors[cluster_index].set_selector(x, y, best_sel);
  1765. overall_best_err += best_err;
  1766. } // x
  1767. } // y
  1768. } // cluster_index
  1769. });
  1770. } // cluster_index_iter
  1771. m_params.m_pJob_pool->wait_for_all();
  1772. debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  1773. if (m_params.m_debug_images)
  1774. {
  1775. uint32_t max_selector_cluster_size = 0;
  1776. for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
  1777. max_selector_cluster_size = maximum<uint32_t>(max_selector_cluster_size, (uint32_t)m_selector_cluster_block_indices[i].size());
  1778. if ((max_selector_cluster_size * 5) < 32768)
  1779. {
  1780. const uint32_t x_spacer_len = 16;
  1781. image selector_cluster_vis(x_spacer_len + max_selector_cluster_size * 5, (uint32_t)m_selector_cluster_block_indices.size() * 5);
  1782. for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++)
  1783. {
  1784. const basisu::vector<uint32_t> &cluster_block_indices = m_selector_cluster_block_indices[selector_cluster_index];
  1785. for (uint32_t y = 0; y < 4; y++)
  1786. for (uint32_t x = 0; x < 4; x++)
  1787. selector_cluster_vis.set_clipped(x_spacer_len + x - 12, selector_cluster_index * 5 + y, color_rgba((m_optimized_cluster_selectors[selector_cluster_index].get_selector(x, y) * 255) / 3));
  1788. for (uint32_t i = 0; i < cluster_block_indices.size(); i++)
  1789. {
  1790. uint32_t block_index = cluster_block_indices[i];
  1791. const etc_block &blk = m_orig_encoded_blocks[block_index];
  1792. for (uint32_t y = 0; y < 4; y++)
  1793. for (uint32_t x = 0; x < 4; x++)
  1794. selector_cluster_vis.set_clipped(x_spacer_len + x + 5 * i, selector_cluster_index * 5 + y, color_rgba((blk.get_selector(x, y) * 255) / 3));
  1795. }
  1796. }
  1797. char buf[256];
  1798. snprintf(buf, sizeof(buf), "selector_cluster_vis_%u.png", iter);
  1799. save_png(buf, selector_cluster_vis);
  1800. }
  1801. }
  1802. }
  1803. // For each block: Determine which quantized selectors best encode that block, given its quantized endpoints.
  1804. // Note that this method may leave some empty clusters (i.e. arrays with no block indices), including at the end.
  1805. void basisu_frontend::find_optimal_selector_clusters_for_each_block()
  1806. {
  1807. debug_printf("find_optimal_selector_clusters_for_each_block\n");
  1808. interval_timer tm;
  1809. tm.start();
  1810. if (m_params.m_validate)
  1811. {
  1812. // Sanity checks
  1813. BASISU_FRONTEND_VERIFY(m_selector_cluster_block_indices.size() == m_optimized_cluster_selectors.size());
  1814. for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
  1815. {
  1816. for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++)
  1817. {
  1818. BASISU_FRONTEND_VERIFY(m_selector_clusters_within_each_parent_cluster[i][j] < m_optimized_cluster_selectors.size());
  1819. }
  1820. }
  1821. }
  1822. m_block_selector_cluster_index.resize(m_total_blocks);
  1823. if (m_params.m_compression_level == 0)
  1824. {
  1825. // Just leave the blocks in their original selector clusters.
  1826. for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++)
  1827. {
  1828. for (uint32_t j = 0; j < m_selector_cluster_block_indices[selector_cluster_index].size(); j++)
  1829. {
  1830. const uint32_t block_index = m_selector_cluster_block_indices[selector_cluster_index][j];
  1831. m_block_selector_cluster_index[block_index] = selector_cluster_index;
  1832. etc_block& blk = m_encoded_blocks[block_index];
  1833. blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_cluster_index].get_raw_selector_bits());
  1834. }
  1835. }
  1836. debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  1837. return;
  1838. }
  1839. bool use_cpu = true;
  1840. if ((m_params.m_pOpenCL_context) && m_use_hierarchical_selector_codebooks)
  1841. {
  1842. const uint32_t num_parent_clusters = m_selector_clusters_within_each_parent_cluster.size_u32();
  1843. basisu::vector<fosc_selector_struct> selector_structs;
  1844. selector_structs.reserve(m_optimized_cluster_selectors.size());
  1845. uint_vec parent_selector_cluster_offsets(num_parent_clusters);
  1846. uint_vec selector_cluster_indices;
  1847. selector_cluster_indices.reserve(m_optimized_cluster_selectors.size());
  1848. uint32_t cur_ofs = 0;
  1849. for (uint32_t parent_index = 0; parent_index < num_parent_clusters; parent_index++)
  1850. {
  1851. parent_selector_cluster_offsets[parent_index] = cur_ofs;
  1852. for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[parent_index].size(); j++)
  1853. {
  1854. const uint32_t selector_cluster_index = m_selector_clusters_within_each_parent_cluster[parent_index][j];
  1855. uint32_t sel_bits = 0;
  1856. for (uint32_t p = 0; p < 16; p++)
  1857. sel_bits |= (m_optimized_cluster_selectors[selector_cluster_index].get_selector(p & 3, p >> 2) << (p * 2));
  1858. selector_structs.enlarge(1)->m_packed_selectors = sel_bits;
  1859. selector_cluster_indices.push_back(selector_cluster_index);
  1860. }
  1861. cur_ofs += m_selector_clusters_within_each_parent_cluster[parent_index].size_u32();
  1862. }
  1863. const uint32_t total_input_selectors = cur_ofs;
  1864. basisu::vector<fosc_block_struct> block_structs(m_total_blocks);
  1865. for (uint32_t i = 0; i < m_total_blocks; i++)
  1866. {
  1867. const uint32_t parent_selector_cluster = m_block_parent_selector_cluster[i];
  1868. const etc_block& blk = m_encoded_blocks[i];
  1869. blk.unpack_color5(block_structs[i].m_etc_color5_inten, blk.get_base5_color(), false);
  1870. block_structs[i].m_etc_color5_inten.a = (uint8_t)blk.get_inten_table(0);
  1871. block_structs[i].m_first_selector = parent_selector_cluster_offsets[parent_selector_cluster];
  1872. block_structs[i].m_num_selectors = m_selector_clusters_within_each_parent_cluster[parent_selector_cluster].size_u32();
  1873. }
  1874. uint_vec output_selector_cluster_indices(m_total_blocks);
  1875. bool status = opencl_find_optimal_selector_clusters_for_each_block(
  1876. m_params.m_pOpenCL_context,
  1877. block_structs.data(),
  1878. total_input_selectors,
  1879. selector_structs.data(),
  1880. selector_cluster_indices.data(),
  1881. output_selector_cluster_indices.data(),
  1882. m_params.m_perceptual);
  1883. if (!status)
  1884. {
  1885. error_printf("basisu_frontend::find_optimal_selector_clusters_for_each_block: opencl_find_optimal_selector_clusters_for_each_block() failed! Using CPU.\n");
  1886. m_params.m_pOpenCL_context = nullptr;
  1887. m_opencl_failed = true;
  1888. }
  1889. else
  1890. {
  1891. for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
  1892. {
  1893. m_selector_cluster_block_indices[i].resize(0);
  1894. m_selector_cluster_block_indices[i].reserve(128);
  1895. }
  1896. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  1897. {
  1898. etc_block& blk = m_encoded_blocks[block_index];
  1899. uint32_t best_cluster_index = output_selector_cluster_indices[block_index];
  1900. blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits());
  1901. m_block_selector_cluster_index[block_index] = best_cluster_index;
  1902. vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index);
  1903. m_selector_cluster_block_indices[best_cluster_index].push_back(block_index);
  1904. }
  1905. use_cpu = false;
  1906. }
  1907. }
  1908. if (use_cpu)
  1909. {
  1910. basisu::vector<uint8_t> unpacked_optimized_cluster_selectors(16 * m_optimized_cluster_selectors.size());
  1911. for (uint32_t cluster_index = 0; cluster_index < m_optimized_cluster_selectors.size(); cluster_index++)
  1912. {
  1913. for (uint32_t y = 0; y < 4; y++)
  1914. {
  1915. for (uint32_t x = 0; x < 4; x++)
  1916. {
  1917. unpacked_optimized_cluster_selectors[cluster_index * 16 + y * 4 + x] = (uint8_t)m_optimized_cluster_selectors[cluster_index].get_selector(x, y);
  1918. }
  1919. }
  1920. }
  1921. const uint32_t N = 2048;
  1922. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  1923. {
  1924. const uint32_t first_index = block_index_iter;
  1925. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  1926. m_params.m_pJob_pool->add_job( [this, first_index, last_index, &unpacked_optimized_cluster_selectors] {
  1927. int prev_best_cluster_index = 0;
  1928. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  1929. {
  1930. const pixel_block& block = get_source_pixel_block(block_index);
  1931. etc_block& blk = m_encoded_blocks[block_index];
  1932. if ((block_index > first_index) && (block == get_source_pixel_block(block_index - 1)))
  1933. {
  1934. blk.set_raw_selector_bits(m_optimized_cluster_selectors[prev_best_cluster_index].get_raw_selector_bits());
  1935. m_block_selector_cluster_index[block_index] = prev_best_cluster_index;
  1936. continue;
  1937. }
  1938. const color_rgba* pBlock_pixels = block.get_ptr();
  1939. color_rgba trial_block_colors[4];
  1940. blk.get_block_colors_etc1s(trial_block_colors);
  1941. // precompute errors for the i-th block pixel and selector sel: [sel][i]
  1942. uint32_t trial_errors[4][16];
  1943. if (m_params.m_perceptual)
  1944. {
  1945. for (uint32_t sel = 0; sel < 4; ++sel)
  1946. for (uint32_t i = 0; i < 16; ++i)
  1947. trial_errors[sel][i] = color_distance(true, pBlock_pixels[i], trial_block_colors[sel], false);
  1948. }
  1949. else
  1950. {
  1951. for (uint32_t sel = 0; sel < 4; ++sel)
  1952. for (uint32_t i = 0; i < 16; ++i)
  1953. trial_errors[sel][i] = color_distance(false, pBlock_pixels[i], trial_block_colors[sel], false);
  1954. }
  1955. // Compute the minimum possible errors (given any selectors) for pixels 0-15
  1956. uint64_t min_possible_error_0_15 = 0;
  1957. for (uint32_t i = 0; i < 16; i++)
  1958. min_possible_error_0_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
  1959. // Compute the minimum possible errors (given any selectors) for pixels 4-15
  1960. uint64_t min_possible_error_4_15 = 0;
  1961. for (uint32_t i = 4; i < 16; i++)
  1962. min_possible_error_4_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
  1963. // Compute the minimum possible errors (given any selectors) for pixels 8-15
  1964. uint64_t min_possible_error_8_15 = 0;
  1965. for (uint32_t i = 8; i < 16; i++)
  1966. min_possible_error_8_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
  1967. // Compute the minimum possible errors (given any selectors) for pixels 12-15
  1968. uint64_t min_possible_error_12_15 = 0;
  1969. for (uint32_t i = 12; i < 16; i++)
  1970. min_possible_error_12_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
  1971. uint64_t best_cluster_err = INT64_MAX;
  1972. uint32_t best_cluster_index = 0;
  1973. const uint32_t parent_selector_cluster = m_block_parent_selector_cluster.size() ? m_block_parent_selector_cluster[block_index] : 0;
  1974. const uint_vec *pCluster_indices = m_selector_clusters_within_each_parent_cluster.size() ? &m_selector_clusters_within_each_parent_cluster[parent_selector_cluster] : nullptr;
  1975. const uint32_t total_clusters = m_use_hierarchical_selector_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_selector_cluster_block_indices.size();
  1976. #if 0
  1977. for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++)
  1978. {
  1979. const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter;
  1980. const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index];
  1981. uint64_t trial_err = 0;
  1982. for (int y = 0; y < 4; y++)
  1983. {
  1984. for (int x = 0; x < 4; x++)
  1985. {
  1986. const uint32_t sel = cluster_blk.get_selector(x, y);
  1987. trial_err += color_distance(m_params.m_perceptual, trial_block_colors[sel], pBlock_pixels[x + y * 4], false);
  1988. if (trial_err > best_cluster_err)
  1989. goto early_out;
  1990. }
  1991. }
  1992. if (trial_err < best_cluster_err)
  1993. {
  1994. best_cluster_err = trial_err;
  1995. best_cluster_index = cluster_index;
  1996. if (!best_cluster_err)
  1997. break;
  1998. }
  1999. early_out:
  2000. ;
  2001. }
  2002. #else
  2003. for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++)
  2004. {
  2005. const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter;
  2006. const uint8_t* pSels = &unpacked_optimized_cluster_selectors[cluster_index * 16];
  2007. uint64_t trial_err = (uint64_t)trial_errors[pSels[0]][0] + trial_errors[pSels[1]][1] + trial_errors[pSels[2]][2] + trial_errors[pSels[3]][3];
  2008. if ((trial_err + min_possible_error_4_15) >= best_cluster_err)
  2009. continue;
  2010. trial_err += (uint64_t)trial_errors[pSels[4]][4] + trial_errors[pSels[5]][5] + trial_errors[pSels[6]][6] + trial_errors[pSels[7]][7];
  2011. if ((trial_err + min_possible_error_8_15) >= best_cluster_err)
  2012. continue;
  2013. trial_err += (uint64_t)trial_errors[pSels[8]][8] + trial_errors[pSels[9]][9] + trial_errors[pSels[10]][10] + trial_errors[pSels[11]][11];
  2014. if ((trial_err + min_possible_error_12_15) >= best_cluster_err)
  2015. continue;
  2016. trial_err += (uint64_t)trial_errors[pSels[12]][12] + trial_errors[pSels[13]][13] + trial_errors[pSels[14]][14] + trial_errors[pSels[15]][15];
  2017. if (trial_err < best_cluster_err)
  2018. {
  2019. best_cluster_err = trial_err;
  2020. best_cluster_index = cluster_index;
  2021. if (best_cluster_err == min_possible_error_0_15)
  2022. break;
  2023. }
  2024. } // cluster_iter
  2025. #endif
  2026. blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits());
  2027. m_block_selector_cluster_index[block_index] = best_cluster_index;
  2028. prev_best_cluster_index = best_cluster_index;
  2029. } // block_index
  2030. } );
  2031. } // block_index_iter
  2032. m_params.m_pJob_pool->wait_for_all();
  2033. for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
  2034. {
  2035. m_selector_cluster_block_indices[i].resize(0);
  2036. m_selector_cluster_block_indices[i].reserve(128);
  2037. }
  2038. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  2039. {
  2040. const uint32_t best_cluster_index = m_block_selector_cluster_index[block_index];
  2041. vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index);
  2042. m_selector_cluster_block_indices[best_cluster_index].push_back(block_index);
  2043. }
  2044. } // if (use_cpu)
  2045. debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  2046. }
  2047. // TODO: Remove old ETC1 specific stuff, and thread this.
  2048. uint32_t basisu_frontend::refine_block_endpoints_given_selectors()
  2049. {
  2050. debug_printf("refine_block_endpoints_given_selectors\n");
  2051. for (int block_index = 0; block_index < static_cast<int>(m_total_blocks); block_index++)
  2052. {
  2053. //uint32_t selector_cluster = m_block_selector_cluster_index(block_x, block_y);
  2054. vec2U &endpoint_clusters = m_block_endpoint_clusters_indices[block_index];
  2055. m_endpoint_cluster_etc_params[endpoint_clusters[0]].m_subblocks.push_back(block_index * 2);
  2056. m_endpoint_cluster_etc_params[endpoint_clusters[1]].m_subblocks.push_back(block_index * 2 + 1);
  2057. }
  2058. uint32_t total_subblocks_refined = 0;
  2059. uint32_t total_subblocks_examined = 0;
  2060. for (uint32_t endpoint_cluster_index = 0; endpoint_cluster_index < m_endpoint_cluster_etc_params.size(); endpoint_cluster_index++)
  2061. {
  2062. endpoint_cluster_etc_params &subblock_params = m_endpoint_cluster_etc_params[endpoint_cluster_index];
  2063. const uint_vec &subblocks = subblock_params.m_subblocks;
  2064. //uint32_t total_pixels = subblock.m_subblocks.size() * 8;
  2065. basisu::vector<color_rgba> subblock_colors[2]; // [use_individual_mode]
  2066. uint8_vec subblock_selectors[2];
  2067. uint64_t cur_subblock_err[2] = { 0, 0 };
  2068. for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++)
  2069. {
  2070. uint32_t training_vector_index = subblocks[subblock_iter];
  2071. uint32_t block_index = training_vector_index >> 1;
  2072. uint32_t subblock_index = training_vector_index & 1;
  2073. const bool is_flipped = true;
  2074. const etc_block &blk = m_encoded_blocks[block_index];
  2075. const bool use_individual_mode = !blk.get_diff_bit();
  2076. const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr();
  2077. color_rgba unpacked_block_pixels[16];
  2078. unpack_etc1(blk, unpacked_block_pixels);
  2079. for (uint32_t i = 0; i < 8; i++)
  2080. {
  2081. const uint32_t pixel_index = g_etc1_pixel_indices[is_flipped][subblock_index][i];
  2082. const etc_coord2 &coords = g_etc1_pixel_coords[is_flipped][subblock_index][i];
  2083. subblock_colors[use_individual_mode].push_back(pSource_block_pixels[pixel_index]);
  2084. cur_subblock_err[use_individual_mode] += color_distance(m_params.m_perceptual, pSource_block_pixels[pixel_index], unpacked_block_pixels[pixel_index], false);
  2085. subblock_selectors[use_individual_mode].push_back(static_cast<uint8_t>(blk.get_selector(coords.m_x, coords.m_y)));
  2086. }
  2087. } // subblock_iter
  2088. etc1_optimizer::results cluster_optimizer_results[2];
  2089. bool results_valid[2] = { false, false };
  2090. clear_obj(cluster_optimizer_results);
  2091. basisu::vector<uint8_t> cluster_selectors[2];
  2092. for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++)
  2093. {
  2094. const uint32_t total_pixels = (uint32_t)subblock_colors[use_individual_mode].size();
  2095. if (!total_pixels)
  2096. continue;
  2097. total_subblocks_examined += total_pixels / 8;
  2098. etc1_optimizer optimizer;
  2099. etc1_solution_coordinates solutions[2];
  2100. etc1_optimizer::params cluster_optimizer_params;
  2101. cluster_optimizer_params.m_num_src_pixels = total_pixels;
  2102. cluster_optimizer_params.m_pSrc_pixels = &subblock_colors[use_individual_mode][0];
  2103. cluster_optimizer_params.m_use_color4 = use_individual_mode != 0;
  2104. cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
  2105. cluster_optimizer_params.m_pForce_selectors = &subblock_selectors[use_individual_mode][0];
  2106. cluster_optimizer_params.m_quality = cETCQualityUber;
  2107. cluster_selectors[use_individual_mode].resize(total_pixels);
  2108. cluster_optimizer_results[use_individual_mode].m_n = total_pixels;
  2109. cluster_optimizer_results[use_individual_mode].m_pSelectors = &cluster_selectors[use_individual_mode][0];
  2110. optimizer.init(cluster_optimizer_params, cluster_optimizer_results[use_individual_mode]);
  2111. if (!optimizer.compute())
  2112. continue;
  2113. if (cluster_optimizer_results[use_individual_mode].m_error < cur_subblock_err[use_individual_mode])
  2114. results_valid[use_individual_mode] = true;
  2115. } // use_individual_mode
  2116. for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++)
  2117. {
  2118. if (!results_valid[use_individual_mode])
  2119. continue;
  2120. uint32_t num_passes = use_individual_mode ? 1 : 2;
  2121. bool all_passed5 = true;
  2122. for (uint32_t pass = 0; pass < num_passes; pass++)
  2123. {
  2124. for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++)
  2125. {
  2126. const uint32_t training_vector_index = subblocks[subblock_iter];
  2127. const uint32_t block_index = training_vector_index >> 1;
  2128. const uint32_t subblock_index = training_vector_index & 1;
  2129. //const bool is_flipped = true;
  2130. etc_block &blk = m_encoded_blocks[block_index];
  2131. if (!blk.get_diff_bit() != static_cast<bool>(use_individual_mode != 0))
  2132. continue;
  2133. if (use_individual_mode)
  2134. {
  2135. blk.set_base4_color(subblock_index, etc_block::pack_color4(cluster_optimizer_results[1].m_block_color_unscaled, false));
  2136. blk.set_inten_table(subblock_index, cluster_optimizer_results[1].m_block_inten_table);
  2137. subblock_params.m_color_error[1] = cluster_optimizer_results[1].m_error;
  2138. subblock_params.m_inten_table[1] = cluster_optimizer_results[1].m_block_inten_table;
  2139. subblock_params.m_color_unscaled[1] = cluster_optimizer_results[1].m_block_color_unscaled;
  2140. total_subblocks_refined++;
  2141. }
  2142. else
  2143. {
  2144. const uint16_t base_color5 = blk.get_base5_color();
  2145. const uint16_t delta_color3 = blk.get_delta3_color();
  2146. uint32_t r[2], g[2], b[2];
  2147. etc_block::unpack_color5(r[0], g[0], b[0], base_color5, false);
  2148. bool success = etc_block::unpack_color5(r[1], g[1], b[1], base_color5, delta_color3, false);
  2149. assert(success);
  2150. BASISU_NOTE_UNUSED(success);
  2151. r[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.r;
  2152. g[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.g;
  2153. b[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.b;
  2154. color_rgba colors[2] = { color_rgba(r[0], g[0], b[0], 255), color_rgba(r[1], g[1], b[1], 255) };
  2155. if (!etc_block::try_pack_color5_delta3(colors))
  2156. {
  2157. all_passed5 = false;
  2158. break;
  2159. }
  2160. if ((pass == 1) && (all_passed5))
  2161. {
  2162. blk.set_block_color5(colors[0], colors[1]);
  2163. blk.set_inten_table(subblock_index, cluster_optimizer_results[0].m_block_inten_table);
  2164. subblock_params.m_color_error[0] = cluster_optimizer_results[0].m_error;
  2165. subblock_params.m_inten_table[0] = cluster_optimizer_results[0].m_block_inten_table;
  2166. subblock_params.m_color_unscaled[0] = cluster_optimizer_results[0].m_block_color_unscaled;
  2167. total_subblocks_refined++;
  2168. }
  2169. }
  2170. } // subblock_iter
  2171. } // pass
  2172. } // use_individual_mode
  2173. } // endpoint_cluster_index
  2174. if (m_params.m_debug_stats)
  2175. debug_printf("Total subblock endpoints refined: %u (%3.1f%%)\n", total_subblocks_refined, total_subblocks_refined * 100.0f / total_subblocks_examined);
  2176. return total_subblocks_refined;
  2177. }
  2178. void basisu_frontend::dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors)
  2179. {
  2180. debug_printf("dump_endpoint_clusterization_visualization\n");
  2181. uint32_t max_endpoint_cluster_size = 0;
  2182. basisu::vector<uint32_t> cluster_sizes(m_endpoint_clusters.size());
  2183. basisu::vector<uint32_t> sorted_cluster_indices(m_endpoint_clusters.size());
  2184. for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
  2185. {
  2186. max_endpoint_cluster_size = maximum<uint32_t>(max_endpoint_cluster_size, (uint32_t)m_endpoint_clusters[i].size());
  2187. cluster_sizes[i] = (uint32_t)m_endpoint_clusters[i].size();
  2188. }
  2189. if (!max_endpoint_cluster_size)
  2190. return;
  2191. for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
  2192. sorted_cluster_indices[i] = i;
  2193. //indexed_heap_sort(endpoint_clusters.size(), cluster_sizes.get_ptr(), sorted_cluster_indices.get_ptr());
  2194. image endpoint_cluster_vis(12 + minimum<uint32_t>(max_endpoint_cluster_size, 2048) * 5, (uint32_t)m_endpoint_clusters.size() * 3);
  2195. for (uint32_t unsorted_cluster_iter = 0; unsorted_cluster_iter < m_endpoint_clusters.size(); unsorted_cluster_iter++)
  2196. {
  2197. const uint32_t cluster_iter = sorted_cluster_indices[unsorted_cluster_iter];
  2198. etc_block blk;
  2199. blk.clear();
  2200. blk.set_flip_bit(false);
  2201. blk.set_diff_bit(true);
  2202. blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0]);
  2203. blk.set_base5_color(etc_block::pack_color5(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0], false));
  2204. color_rgba blk_colors[4];
  2205. blk.get_block_colors(blk_colors, 0);
  2206. for (uint32_t i = 0; i < 4; i++)
  2207. endpoint_cluster_vis.fill_box(i * 2, 3 * unsorted_cluster_iter, 2, 2, blk_colors[i]);
  2208. for (uint32_t subblock_iter = 0; subblock_iter < m_endpoint_clusters[cluster_iter].size(); subblock_iter++)
  2209. {
  2210. uint32_t training_vector_index = m_endpoint_clusters[cluster_iter][subblock_iter];
  2211. const uint32_t block_index = training_vector_index >> 1;
  2212. const uint32_t subblock_index = training_vector_index & 1;
  2213. const etc_block& blk2 = m_etc1_blocks_etc1s[block_index];
  2214. const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  2215. color_rgba subblock_pixels[8];
  2216. if (vis_endpoint_colors)
  2217. {
  2218. color_rgba colors[2];
  2219. blk2.get_block_low_high_colors(colors, subblock_index);
  2220. for (uint32_t i = 0; i < 8; i++)
  2221. subblock_pixels[i] = colors[subblock_index];
  2222. }
  2223. else
  2224. {
  2225. for (uint32_t i = 0; i < 8; i++)
  2226. subblock_pixels[i] = pBlock_pixels[g_etc1_pixel_indices[blk2.get_flip_bit()][subblock_index][i]];
  2227. }
  2228. endpoint_cluster_vis.set_block_clipped(subblock_pixels, 12 + 5 * subblock_iter, 3 * unsorted_cluster_iter, 4, 2);
  2229. }
  2230. }
  2231. save_png(pFilename, endpoint_cluster_vis);
  2232. debug_printf("Wrote debug visualization file %s\n", pFilename);
  2233. }
  2234. void basisu_frontend::finalize()
  2235. {
  2236. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  2237. {
  2238. for (uint32_t subblock_index = 0; subblock_index < 2; subblock_index++)
  2239. {
  2240. const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, subblock_index);
  2241. m_endpoint_cluster_etc_params[endpoint_cluster_index].m_color_used[0] = true;
  2242. }
  2243. }
  2244. }
  2245. // The backend has remapped the block endpoints while optimizing the output symbols for better rate distortion performance, so let's go and reoptimize the endpoint codebook.
  2246. // This is currently the only place where the backend actually goes and changes the quantization and calls the frontend to fix things up.
  2247. // This is basically a bottom up clusterization stage, where some leaves can be combined.
  2248. void basisu_frontend::reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices)
  2249. {
  2250. debug_printf("reoptimize_remapped_endpoints\n");
  2251. basisu::vector<uint_vec> new_endpoint_cluster_block_indices(m_endpoint_clusters.size());
  2252. for (uint32_t i = 0; i < new_block_endpoints.size(); i++)
  2253. new_endpoint_cluster_block_indices[new_block_endpoints[i]].push_back(i);
  2254. basisu::vector<uint8_t> cluster_valid(new_endpoint_cluster_block_indices.size());
  2255. basisu::vector<uint8_t> cluster_improved(new_endpoint_cluster_block_indices.size());
  2256. const uint32_t N = 256;
  2257. for (uint32_t cluster_index_iter = 0; cluster_index_iter < new_endpoint_cluster_block_indices.size(); cluster_index_iter += N)
  2258. {
  2259. const uint32_t first_index = cluster_index_iter;
  2260. const uint32_t last_index = minimum<uint32_t>((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N);
  2261. m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] {
  2262. for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
  2263. {
  2264. const basisu::vector<uint32_t>& cluster_block_indices = new_endpoint_cluster_block_indices[cluster_index];
  2265. if (!cluster_block_indices.size())
  2266. continue;
  2267. const uint32_t total_pixels = (uint32_t)cluster_block_indices.size() * 16;
  2268. basisu::vector<color_rgba> cluster_pixels(total_pixels);
  2269. uint8_vec force_selectors(total_pixels);
  2270. etc_block blk;
  2271. blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(cluster_index, false));
  2272. blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(cluster_index, false));
  2273. blk.set_flip_bit(true);
  2274. uint64_t cur_err = 0;
  2275. for (uint32_t cluster_block_indices_iter = 0; cluster_block_indices_iter < cluster_block_indices.size(); cluster_block_indices_iter++)
  2276. {
  2277. const uint32_t block_index = cluster_block_indices[cluster_block_indices_iter];
  2278. const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  2279. memcpy(&cluster_pixels[cluster_block_indices_iter * 16], pBlock_pixels, 16 * sizeof(color_rgba));
  2280. const uint32_t selector_cluster_index = pBlock_selector_indices ? (*pBlock_selector_indices)[block_index] : get_block_selector_cluster_index(block_index);
  2281. const etc_block &blk_selectors = get_selector_cluster_selector_bits(selector_cluster_index);
  2282. blk.set_raw_selector_bits(blk_selectors.get_raw_selector_bits());
  2283. cur_err += blk.evaluate_etc1_error(pBlock_pixels, m_params.m_perceptual);
  2284. for (uint32_t y = 0; y < 4; y++)
  2285. for (uint32_t x = 0; x < 4; x++)
  2286. force_selectors[cluster_block_indices_iter * 16 + x + y * 4] = static_cast<uint8_t>(blk_selectors.get_selector(x, y));
  2287. }
  2288. endpoint_cluster_etc_params new_endpoint_cluster_etc_params;
  2289. {
  2290. etc1_optimizer optimizer;
  2291. etc1_solution_coordinates solutions[2];
  2292. etc1_optimizer::params cluster_optimizer_params;
  2293. cluster_optimizer_params.m_num_src_pixels = total_pixels;
  2294. cluster_optimizer_params.m_pSrc_pixels = &cluster_pixels[0];
  2295. cluster_optimizer_params.m_use_color4 = false;
  2296. cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
  2297. cluster_optimizer_params.m_pForce_selectors = &force_selectors[0];
  2298. if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
  2299. cluster_optimizer_params.m_quality = cETCQualityUber;
  2300. else
  2301. cluster_optimizer_params.m_quality = cETCQualitySlow;
  2302. etc1_optimizer::results cluster_optimizer_results;
  2303. basisu::vector<uint8_t> cluster_selectors(total_pixels);
  2304. cluster_optimizer_results.m_n = total_pixels;
  2305. cluster_optimizer_results.m_pSelectors = &cluster_selectors[0];
  2306. optimizer.init(cluster_optimizer_params, cluster_optimizer_results);
  2307. if (!optimizer.compute())
  2308. BASISU_FRONTEND_VERIFY(false);
  2309. new_endpoint_cluster_etc_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled;
  2310. new_endpoint_cluster_etc_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table;
  2311. new_endpoint_cluster_etc_params.m_color_error[0] = cluster_optimizer_results.m_error;
  2312. new_endpoint_cluster_etc_params.m_color_used[0] = true;
  2313. new_endpoint_cluster_etc_params.m_valid = true;
  2314. }
  2315. if (new_endpoint_cluster_etc_params.m_color_error[0] < cur_err)
  2316. {
  2317. m_endpoint_cluster_etc_params[cluster_index] = new_endpoint_cluster_etc_params;
  2318. cluster_improved[cluster_index] = true;
  2319. }
  2320. cluster_valid[cluster_index] = true;
  2321. } // cluster_index
  2322. } );
  2323. } // cluster_index_iter
  2324. m_params.m_pJob_pool->wait_for_all();
  2325. uint32_t total_unused_clusters = 0;
  2326. uint32_t total_improved_clusters = 0;
  2327. old_to_new_endpoint_cluster_indices.resize(m_endpoint_clusters.size());
  2328. vector_set_all(old_to_new_endpoint_cluster_indices, -1);
  2329. int total_new_endpoint_clusters = 0;
  2330. for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
  2331. {
  2332. if (!cluster_valid[old_cluster_index])
  2333. total_unused_clusters++;
  2334. else
  2335. old_to_new_endpoint_cluster_indices[old_cluster_index] = total_new_endpoint_clusters++;
  2336. if (cluster_improved[old_cluster_index])
  2337. total_improved_clusters++;
  2338. }
  2339. debug_printf("Total unused clusters: %u\n", total_unused_clusters);
  2340. debug_printf("Total improved_clusters: %u\n", total_improved_clusters);
  2341. debug_printf("Total endpoint clusters: %u\n", total_new_endpoint_clusters);
  2342. if (optimize_final_codebook)
  2343. {
  2344. cluster_subblock_etc_params_vec new_endpoint_cluster_etc_params(total_new_endpoint_clusters);
  2345. for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
  2346. {
  2347. if (old_to_new_endpoint_cluster_indices[old_cluster_index] >= 0)
  2348. new_endpoint_cluster_etc_params[old_to_new_endpoint_cluster_indices[old_cluster_index]] = m_endpoint_cluster_etc_params[old_cluster_index];
  2349. }
  2350. debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 1\n");
  2351. basisu::vector<uint_vec> new_endpoint_clusters(total_new_endpoint_clusters);
  2352. for (uint32_t block_index = 0; block_index < new_block_endpoints.size(); block_index++)
  2353. {
  2354. const uint32_t old_endpoint_cluster_index = new_block_endpoints[block_index];
  2355. const int new_endpoint_cluster_index = old_to_new_endpoint_cluster_indices[old_endpoint_cluster_index];
  2356. BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index >= 0);
  2357. BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index < (int)new_endpoint_clusters.size());
  2358. new_endpoint_clusters[new_endpoint_cluster_index].push_back(block_index * 2 + 0);
  2359. new_endpoint_clusters[new_endpoint_cluster_index].push_back(block_index * 2 + 1);
  2360. BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index < (int)new_endpoint_cluster_etc_params.size());
  2361. new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 0);
  2362. new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 1);
  2363. m_block_endpoint_clusters_indices[block_index][0] = new_endpoint_cluster_index;
  2364. m_block_endpoint_clusters_indices[block_index][1] = new_endpoint_cluster_index;
  2365. }
  2366. debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 2\n");
  2367. m_endpoint_clusters = new_endpoint_clusters;
  2368. m_endpoint_cluster_etc_params = new_endpoint_cluster_etc_params;
  2369. eliminate_redundant_or_empty_endpoint_clusters();
  2370. debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 3\n");
  2371. for (uint32_t new_cluster_index = 0; new_cluster_index < m_endpoint_clusters.size(); new_cluster_index++)
  2372. {
  2373. for (uint32_t cluster_block_iter = 0; cluster_block_iter < m_endpoint_clusters[new_cluster_index].size(); cluster_block_iter++)
  2374. {
  2375. const uint32_t subblock_index = m_endpoint_clusters[new_cluster_index][cluster_block_iter];
  2376. const uint32_t block_index = subblock_index >> 1;
  2377. m_block_endpoint_clusters_indices[block_index][0] = new_cluster_index;
  2378. m_block_endpoint_clusters_indices[block_index][1] = new_cluster_index;
  2379. const uint32_t old_cluster_index = new_block_endpoints[block_index];
  2380. old_to_new_endpoint_cluster_indices[old_cluster_index] = new_cluster_index;
  2381. }
  2382. }
  2383. debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 4\n");
  2384. for (uint32_t block_index = 0; block_index < m_encoded_blocks.size(); block_index++)
  2385. {
  2386. const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0);
  2387. m_encoded_blocks[block_index].set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false));
  2388. m_encoded_blocks[block_index].set_inten_tables_etc1s(get_endpoint_cluster_inten_table(endpoint_cluster_index, false));
  2389. }
  2390. debug_printf("Final (post-RDO) endpoint clusters: %u\n", m_endpoint_clusters.size());
  2391. }
  2392. //debug_printf("validate_output: %u\n", validate_output());
  2393. }
  2394. // Endpoint clusterization hierarchy integrity checker.
  2395. // Note this doesn't check for empty clusters.
  2396. bool basisu_frontend::validate_endpoint_cluster_hierarchy(bool ensure_clusters_have_same_parents) const
  2397. {
  2398. if (!m_endpoint_parent_clusters.size())
  2399. return true;
  2400. int_vec subblock_parent_indices(m_total_blocks * 2);
  2401. subblock_parent_indices.set_all(-1);
  2402. int_vec subblock_cluster_indices(m_total_blocks * 2);
  2403. subblock_cluster_indices.set_all(-1);
  2404. for (uint32_t parent_index = 0; parent_index < m_endpoint_parent_clusters.size(); parent_index++)
  2405. {
  2406. for (uint32_t i = 0; i < m_endpoint_parent_clusters[parent_index].size(); i++)
  2407. {
  2408. uint32_t subblock_index = m_endpoint_parent_clusters[parent_index][i];
  2409. if (subblock_index >= m_total_blocks * 2)
  2410. return false;
  2411. // If the endpoint cluster lives in more than one parent node, that's wrong.
  2412. if (subblock_parent_indices[subblock_index] != -1)
  2413. return false;
  2414. subblock_parent_indices[subblock_index] = parent_index;
  2415. }
  2416. }
  2417. // Make sure all endpoint clusters are present in the parent cluster.
  2418. for (uint32_t i = 0; i < subblock_parent_indices.size(); i++)
  2419. {
  2420. if (subblock_parent_indices[i] == -1)
  2421. return false;
  2422. }
  2423. for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
  2424. {
  2425. int parent_index = 0;
  2426. for (uint32_t i = 0; i < m_endpoint_clusters[cluster_index].size(); i++)
  2427. {
  2428. uint32_t subblock_index = m_endpoint_clusters[cluster_index][i];
  2429. if (subblock_index >= m_total_blocks * 2)
  2430. return false;
  2431. if (subblock_cluster_indices[subblock_index] != -1)
  2432. return false;
  2433. subblock_cluster_indices[subblock_index] = cluster_index;
  2434. // There are transformations on the endpoint clusters that can break the strict tree requirement
  2435. if (ensure_clusters_have_same_parents)
  2436. {
  2437. // Make sure all the subblocks are in the same parent cluster
  2438. if (!i)
  2439. parent_index = subblock_parent_indices[subblock_index];
  2440. else if (subblock_parent_indices[subblock_index] != parent_index)
  2441. return false;
  2442. }
  2443. }
  2444. }
  2445. // Make sure all endpoint clusters are present in the parent cluster.
  2446. for (uint32_t i = 0; i < subblock_cluster_indices.size(); i++)
  2447. {
  2448. if (subblock_cluster_indices[i] == -1)
  2449. return false;
  2450. }
  2451. return true;
  2452. }
  2453. // This is very slow and only intended for debugging/development. It's enabled using the "-validate_etc1s" command line option.
  2454. bool basisu_frontend::validate_output() const
  2455. {
  2456. debug_printf("validate_output\n");
  2457. if (!check_etc1s_constraints())
  2458. return false;
  2459. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  2460. {
  2461. //#define CHECK(x) do { if (!(x)) { DebugBreak(); return false; } } while(0)
  2462. #define CHECK(x) BASISU_FRONTEND_VERIFY(x);
  2463. CHECK(get_output_block(block_index).get_flip_bit() == true);
  2464. const bool diff_flag = get_diff_flag(block_index);
  2465. CHECK(diff_flag == true);
  2466. etc_block blk;
  2467. memset(&blk, 0, sizeof(blk));
  2468. blk.set_flip_bit(true);
  2469. blk.set_diff_bit(true);
  2470. const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0);
  2471. const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1);
  2472. // basisu only supports ETC1S, so these must be equal.
  2473. CHECK(endpoint_cluster0_index == endpoint_cluster1_index);
  2474. CHECK(blk.set_block_color5_check(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false)));
  2475. CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster0_index, false));
  2476. blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, false));
  2477. blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, false));
  2478. const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index);
  2479. CHECK(selector_cluster_index < get_total_selector_clusters());
  2480. CHECK(vector_find(get_selector_cluster_block_indices(selector_cluster_index), block_index) != -1);
  2481. blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits());
  2482. const etc_block &rdo_output_block = get_output_block(block_index);
  2483. CHECK(rdo_output_block.get_flip_bit() == blk.get_flip_bit());
  2484. CHECK(rdo_output_block.get_diff_bit() == blk.get_diff_bit());
  2485. CHECK(rdo_output_block.get_inten_table(0) == blk.get_inten_table(0));
  2486. CHECK(rdo_output_block.get_inten_table(1) == blk.get_inten_table(1));
  2487. CHECK(rdo_output_block.get_base5_color() == blk.get_base5_color());
  2488. CHECK(rdo_output_block.get_delta3_color() == blk.get_delta3_color());
  2489. CHECK(rdo_output_block.get_raw_selector_bits() == blk.get_raw_selector_bits());
  2490. #undef CHECK
  2491. }
  2492. return true;
  2493. }
  2494. void basisu_frontend::dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks)
  2495. {
  2496. gpu_image g;
  2497. g.init(texture_format::cETC1, num_blocks_x * 4, num_blocks_y * 4);
  2498. for (uint32_t y = 0; y < num_blocks_y; y++)
  2499. {
  2500. for (uint32_t x = 0; x < num_blocks_x; x++)
  2501. {
  2502. const uint32_t block_index = first_block + x + y * num_blocks_x;
  2503. etc_block &blk = *(etc_block *)g.get_block_ptr(x, y);
  2504. if (output_blocks)
  2505. blk = get_output_block(block_index);
  2506. else
  2507. {
  2508. const bool diff_flag = get_diff_flag(block_index);
  2509. blk.set_diff_bit(diff_flag);
  2510. blk.set_flip_bit(true);
  2511. const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0);
  2512. const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1);
  2513. if (diff_flag)
  2514. blk.set_block_color5(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false));
  2515. else
  2516. blk.set_block_color4(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, true), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, true));
  2517. blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, !diff_flag));
  2518. blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, !diff_flag));
  2519. const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index);
  2520. blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits());
  2521. }
  2522. }
  2523. }
  2524. image img;
  2525. g.unpack(img);
  2526. save_png(pFilename, img);
  2527. }
  2528. } // namespace basisu