basisu_frontend.cpp 120 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462
  1. // basisu_frontend.cpp
  2. // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. // TODO:
  17. // This code originally supported full ETC1 and ETC1S, so there's some legacy stuff to be cleaned up in here.
  18. // Add endpoint tiling support (where we force adjacent blocks to use the same endpoints during quantization), for a ~10% or more increase in bitrate at same SSIM. The backend already supports this.
  19. //
  20. #include "../transcoder/basisu.h"
  21. #include "basisu_frontend.h"
  22. #include "basisu_opencl.h"
  23. #include <unordered_set>
  24. #include <unordered_map>
  25. #if BASISU_SUPPORT_SSE
  26. #define CPPSPMD_NAME(a) a##_sse41
  27. #include "basisu_kernels_declares.h"
  28. #endif
  29. #define BASISU_FRONTEND_VERIFY(c) do { if (!(c)) handle_verify_failure(__LINE__); } while(0)
  30. namespace basisu
  31. {
  32. const uint32_t cMaxCodebookCreationThreads = 8;
  33. const uint32_t BASISU_MAX_ENDPOINT_REFINEMENT_STEPS = 3;
  34. //const uint32_t BASISU_MAX_SELECTOR_REFINEMENT_STEPS = 3;
  35. const uint32_t BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE = 16;
  36. const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 = 32;
  37. const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT = 16;
  38. // TODO - How to handle internal verifies in the basisu lib
  39. static inline void handle_verify_failure(int line)
  40. {
  41. error_printf("basisu_frontend: verify check failed at line %i!\n", line);
  42. abort();
  43. }
  44. bool basisu_frontend::init(const params &p)
  45. {
  46. debug_printf("basisu_frontend::init: Multithreaded: %u, Job pool total threads: %u, NumEndpointClusters: %u, NumSelectorClusters: %u, Perceptual: %u, CompressionLevel: %u\n",
  47. p.m_multithreaded, p.m_pJob_pool ? p.m_pJob_pool->get_total_threads() : 0,
  48. p.m_max_endpoint_clusters, p.m_max_selector_clusters, p.m_perceptual, p.m_compression_level);
  49. if ((p.m_max_endpoint_clusters < 1) || (p.m_max_endpoint_clusters > cMaxEndpointClusters))
  50. return false;
  51. if ((p.m_max_selector_clusters < 1) || (p.m_max_selector_clusters > cMaxSelectorClusters))
  52. return false;
  53. m_source_blocks.resize(0);
  54. append_vector(m_source_blocks, p.m_pSource_blocks, p.m_num_source_blocks);
  55. m_params = p;
  56. if (m_params.m_pOpenCL_context)
  57. {
  58. BASISU_ASSUME(sizeof(cl_pixel_block) == sizeof(pixel_block));
  59. // Upload the RGBA pixel blocks a single time.
  60. if (!opencl_set_pixel_blocks(m_params.m_pOpenCL_context, m_source_blocks.size(), (cl_pixel_block*)m_source_blocks.data()))
  61. {
  62. // This is not fatal, we just won't use OpenCL.
  63. error_printf("basisu_frontend::init: opencl_set_pixel_blocks() failed\n");
  64. m_params.m_pOpenCL_context = nullptr;
  65. m_opencl_failed = true;
  66. }
  67. }
  68. m_encoded_blocks.resize(m_params.m_num_source_blocks);
  69. memset(&m_encoded_blocks[0], 0, m_encoded_blocks.size() * sizeof(m_encoded_blocks[0]));
  70. m_num_endpoint_codebook_iterations = 1;
  71. m_num_selector_codebook_iterations = 1;
  72. switch (p.m_compression_level)
  73. {
  74. case 0:
  75. {
  76. m_endpoint_refinement = false;
  77. m_use_hierarchical_endpoint_codebooks = true;
  78. m_use_hierarchical_selector_codebooks = true;
  79. break;
  80. }
  81. case 1:
  82. {
  83. m_endpoint_refinement = true;
  84. m_use_hierarchical_endpoint_codebooks = true;
  85. m_use_hierarchical_selector_codebooks = true;
  86. break;
  87. }
  88. case 2:
  89. {
  90. m_endpoint_refinement = true;
  91. m_use_hierarchical_endpoint_codebooks = true;
  92. m_use_hierarchical_selector_codebooks = true;
  93. break;
  94. }
  95. case 3:
  96. {
  97. m_endpoint_refinement = true;
  98. m_use_hierarchical_endpoint_codebooks = false;
  99. m_use_hierarchical_selector_codebooks = false;
  100. break;
  101. }
  102. case 4:
  103. {
  104. m_endpoint_refinement = true;
  105. m_use_hierarchical_endpoint_codebooks = true;
  106. m_use_hierarchical_selector_codebooks = true;
  107. m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
  108. m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
  109. break;
  110. }
  111. case 5:
  112. {
  113. m_endpoint_refinement = true;
  114. m_use_hierarchical_endpoint_codebooks = false;
  115. m_use_hierarchical_selector_codebooks = false;
  116. m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
  117. m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
  118. break;
  119. }
  120. case 6:
  121. default:
  122. {
  123. m_endpoint_refinement = true;
  124. m_use_hierarchical_endpoint_codebooks = false;
  125. m_use_hierarchical_selector_codebooks = false;
  126. m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS*2;
  127. m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS*2;
  128. break;
  129. }
  130. }
  131. if (m_params.m_disable_hierarchical_endpoint_codebooks)
  132. m_use_hierarchical_endpoint_codebooks = false;
  133. debug_printf("Endpoint refinement: %u, Hierarchical endpoint codebooks: %u, Hierarchical selector codebooks: %u, Endpoint codebook iters: %u, Selector codebook iters: %u\n",
  134. m_endpoint_refinement, m_use_hierarchical_endpoint_codebooks, m_use_hierarchical_selector_codebooks, m_num_endpoint_codebook_iterations, m_num_selector_codebook_iterations);
  135. return true;
  136. }
  137. bool basisu_frontend::compress()
  138. {
  139. debug_printf("basisu_frontend::compress\n");
  140. m_total_blocks = m_params.m_num_source_blocks;
  141. m_total_pixels = m_total_blocks * cPixelBlockTotalPixels;
  142. // Encode the initial high quality ETC1S texture
  143. init_etc1_images();
  144. // First quantize the ETC1S endpoints
  145. if (m_params.m_pGlobal_codebooks)
  146. {
  147. init_global_codebooks();
  148. }
  149. else
  150. {
  151. init_endpoint_training_vectors();
  152. generate_endpoint_clusters();
  153. for (uint32_t refine_endpoint_step = 0; refine_endpoint_step < m_num_endpoint_codebook_iterations; refine_endpoint_step++)
  154. {
  155. if (m_params.m_validate)
  156. {
  157. BASISU_FRONTEND_VERIFY(check_etc1s_constraints());
  158. BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
  159. }
  160. if (refine_endpoint_step)
  161. {
  162. introduce_new_endpoint_clusters();
  163. }
  164. if (m_params.m_validate)
  165. {
  166. BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
  167. }
  168. generate_endpoint_codebook(refine_endpoint_step);
  169. if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization))
  170. {
  171. char buf[256];
  172. snprintf(buf, sizeof(buf), "endpoint_cluster_vis_pre_%u.png", refine_endpoint_step);
  173. dump_endpoint_clusterization_visualization(buf, false);
  174. }
  175. bool early_out = false;
  176. if (m_endpoint_refinement)
  177. {
  178. //dump_endpoint_clusterization_visualization("endpoint_clusters_before_refinement.png");
  179. if (!refine_endpoint_clusterization())
  180. early_out = true;
  181. if ((m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) && (!refine_endpoint_step) && (m_num_endpoint_codebook_iterations == 1))
  182. {
  183. eliminate_redundant_or_empty_endpoint_clusters();
  184. generate_endpoint_codebook(basisu::maximum(1U, refine_endpoint_step));
  185. }
  186. if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization))
  187. {
  188. char buf[256];
  189. snprintf(buf, sizeof(buf), "endpoint_cluster_vis_post_%u.png", refine_endpoint_step);
  190. dump_endpoint_clusterization_visualization(buf, false);
  191. snprintf(buf, sizeof(buf), "endpoint_cluster_colors_vis_post_%u.png", refine_endpoint_step);
  192. dump_endpoint_clusterization_visualization(buf, true);
  193. }
  194. }
  195. if (m_params.m_validate)
  196. {
  197. BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
  198. }
  199. eliminate_redundant_or_empty_endpoint_clusters();
  200. if (m_params.m_validate)
  201. {
  202. BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
  203. }
  204. if (m_params.m_debug_stats)
  205. debug_printf("Total endpoint clusters: %u\n", (uint32_t)m_endpoint_clusters.size());
  206. if (early_out)
  207. break;
  208. }
  209. if (m_params.m_validate)
  210. {
  211. BASISU_FRONTEND_VERIFY(check_etc1s_constraints());
  212. }
  213. generate_block_endpoint_clusters();
  214. create_initial_packed_texture();
  215. // Now quantize the ETC1S selectors
  216. generate_selector_clusters();
  217. if (m_use_hierarchical_selector_codebooks)
  218. compute_selector_clusters_within_each_parent_cluster();
  219. if (m_params.m_compression_level == 0)
  220. {
  221. create_optimized_selector_codebook(0);
  222. find_optimal_selector_clusters_for_each_block();
  223. introduce_special_selector_clusters();
  224. }
  225. else
  226. {
  227. const uint32_t num_refine_selector_steps = m_num_selector_codebook_iterations;
  228. for (uint32_t refine_selector_steps = 0; refine_selector_steps < num_refine_selector_steps; refine_selector_steps++)
  229. {
  230. create_optimized_selector_codebook(refine_selector_steps);
  231. find_optimal_selector_clusters_for_each_block();
  232. introduce_special_selector_clusters();
  233. if ((m_params.m_compression_level >= 4) || (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames))
  234. {
  235. if (!refine_block_endpoints_given_selectors())
  236. break;
  237. }
  238. }
  239. }
  240. optimize_selector_codebook();
  241. if (m_params.m_debug_stats)
  242. debug_printf("Total selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size());
  243. }
  244. finalize();
  245. if (m_params.m_validate)
  246. {
  247. if (!validate_output())
  248. return false;
  249. }
  250. debug_printf("basisu_frontend::compress: Done\n");
  251. return true;
  252. }
  253. bool basisu_frontend::init_global_codebooks()
  254. {
  255. const basist::basisu_lowlevel_etc1s_transcoder* pTranscoder = m_params.m_pGlobal_codebooks;
  256. const basist::basisu_lowlevel_etc1s_transcoder::endpoint_vec& endpoints = pTranscoder->get_endpoints();
  257. const basist::basisu_lowlevel_etc1s_transcoder::selector_vec& selectors = pTranscoder->get_selectors();
  258. m_endpoint_cluster_etc_params.resize(endpoints.size());
  259. for (uint32_t i = 0; i < endpoints.size(); i++)
  260. {
  261. m_endpoint_cluster_etc_params[i].m_inten_table[0] = endpoints[i].m_inten5;
  262. m_endpoint_cluster_etc_params[i].m_inten_table[1] = endpoints[i].m_inten5;
  263. m_endpoint_cluster_etc_params[i].m_color_unscaled[0].set(endpoints[i].m_color5.r, endpoints[i].m_color5.g, endpoints[i].m_color5.b, 255);
  264. m_endpoint_cluster_etc_params[i].m_color_used[0] = true;
  265. m_endpoint_cluster_etc_params[i].m_valid = true;
  266. }
  267. m_optimized_cluster_selectors.resize(selectors.size());
  268. for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
  269. {
  270. for (uint32_t y = 0; y < 4; y++)
  271. for (uint32_t x = 0; x < 4; x++)
  272. m_optimized_cluster_selectors[i].set_selector(x, y, selectors[i].get_selector(x, y));
  273. }
  274. m_block_endpoint_clusters_indices.resize(m_total_blocks);
  275. m_orig_encoded_blocks.resize(m_total_blocks);
  276. m_block_selector_cluster_index.resize(m_total_blocks);
  277. #if 0
  278. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  279. {
  280. const uint32_t first_index = block_index_iter;
  281. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  282. #ifndef __EMSCRIPTEN__
  283. m_params.m_pJob_pool->add_job([this, first_index, last_index] {
  284. #endif
  285. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  286. {
  287. const etc_block& blk = m_etc1_blocks_etc1s[block_index];
  288. const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
  289. etc_block trial_blk;
  290. trial_blk.set_block_color5_etc1s(blk.m_color_unscaled[0]);
  291. trial_blk.set_flip_bit(true);
  292. uint64_t best_err = UINT64_MAX;
  293. uint32_t best_index = 0;
  294. for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
  295. {
  296. trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits());
  297. const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
  298. if (cur_err < best_err)
  299. {
  300. best_err = cur_err;
  301. best_index = i;
  302. if (!cur_err)
  303. break;
  304. }
  305. } // block_index
  306. m_block_selector_cluster_index[block_index] = best_index;
  307. }
  308. #ifndef __EMSCRIPTEN__
  309. });
  310. #endif
  311. }
  312. #ifndef __EMSCRIPTEN__
  313. m_params.m_pJob_pool->wait_for_all();
  314. #endif
  315. m_encoded_blocks.resize(m_total_blocks);
  316. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  317. {
  318. const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
  319. const uint32_t selector_index = m_block_selector_cluster_index[block_index];
  320. etc_block& blk = m_encoded_blocks[block_index];
  321. blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]);
  322. blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]);
  323. blk.set_flip_bit(true);
  324. blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits());
  325. }
  326. #endif
  327. // HACK HACK
  328. const uint32_t NUM_PASSES = 3;
  329. for (uint32_t pass = 0; pass < NUM_PASSES; pass++)
  330. {
  331. debug_printf("init_global_codebooks: pass %u\n", pass);
  332. const uint32_t N = 128;
  333. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  334. {
  335. const uint32_t first_index = block_index_iter;
  336. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  337. #ifndef __EMSCRIPTEN__
  338. m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] {
  339. #endif
  340. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  341. {
  342. const etc_block& blk = pass ? m_encoded_blocks[block_index] : m_etc1_blocks_etc1s[block_index];
  343. const uint32_t blk_raw_selector_bits = blk.get_raw_selector_bits();
  344. etc_block trial_blk(blk);
  345. trial_blk.set_raw_selector_bits(blk_raw_selector_bits);
  346. trial_blk.set_flip_bit(true);
  347. uint64_t best_err = UINT64_MAX;
  348. uint32_t best_index = 0;
  349. etc_block best_block(trial_blk);
  350. for (uint32_t i = 0; i < m_endpoint_cluster_etc_params.size(); i++)
  351. {
  352. if (m_endpoint_cluster_etc_params[i].m_inten_table[0] > blk.get_inten_table(0))
  353. continue;
  354. trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[i].m_color_unscaled[0]);
  355. trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[i].m_inten_table[0]);
  356. const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr();
  357. uint64_t cur_err;
  358. if (!pass)
  359. cur_err = trial_blk.determine_selectors(pSource_pixels, m_params.m_perceptual);
  360. else
  361. cur_err = trial_blk.evaluate_etc1_error(pSource_pixels, m_params.m_perceptual);
  362. if (cur_err < best_err)
  363. {
  364. best_err = cur_err;
  365. best_index = i;
  366. best_block = trial_blk;
  367. if (!cur_err)
  368. break;
  369. }
  370. }
  371. m_block_endpoint_clusters_indices[block_index][0] = best_index;
  372. m_block_endpoint_clusters_indices[block_index][1] = best_index;
  373. m_orig_encoded_blocks[block_index] = best_block;
  374. } // block_index
  375. #ifndef __EMSCRIPTEN__
  376. });
  377. #endif
  378. }
  379. #ifndef __EMSCRIPTEN__
  380. m_params.m_pJob_pool->wait_for_all();
  381. #endif
  382. m_endpoint_clusters.resize(0);
  383. m_endpoint_clusters.resize(endpoints.size());
  384. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  385. {
  386. const uint32_t endpoint_cluster_index = m_block_endpoint_clusters_indices[block_index][0];
  387. m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2);
  388. m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2 + 1);
  389. }
  390. m_block_selector_cluster_index.resize(m_total_blocks);
  391. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  392. {
  393. const uint32_t first_index = block_index_iter;
  394. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  395. #ifndef __EMSCRIPTEN__
  396. m_params.m_pJob_pool->add_job([this, first_index, last_index] {
  397. #endif
  398. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  399. {
  400. const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
  401. etc_block trial_blk;
  402. trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_color_unscaled[0]);
  403. trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_inten_table[0]);
  404. trial_blk.set_flip_bit(true);
  405. uint64_t best_err = UINT64_MAX;
  406. uint32_t best_index = 0;
  407. for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
  408. {
  409. trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits());
  410. const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
  411. if (cur_err < best_err)
  412. {
  413. best_err = cur_err;
  414. best_index = i;
  415. if (!cur_err)
  416. break;
  417. }
  418. } // block_index
  419. m_block_selector_cluster_index[block_index] = best_index;
  420. }
  421. #ifndef __EMSCRIPTEN__
  422. });
  423. #endif
  424. }
  425. #ifndef __EMSCRIPTEN__
  426. m_params.m_pJob_pool->wait_for_all();
  427. #endif
  428. m_encoded_blocks.resize(m_total_blocks);
  429. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  430. {
  431. const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
  432. const uint32_t selector_index = m_block_selector_cluster_index[block_index];
  433. etc_block& blk = m_encoded_blocks[block_index];
  434. blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]);
  435. blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]);
  436. blk.set_flip_bit(true);
  437. blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits());
  438. }
  439. } // pass
  440. m_selector_cluster_block_indices.resize(selectors.size());
  441. for (uint32_t block_index = 0; block_index < m_etc1_blocks_etc1s.size(); block_index++)
  442. m_selector_cluster_block_indices[m_block_selector_cluster_index[block_index]].push_back(block_index);
  443. return true;
  444. }
  445. void basisu_frontend::introduce_special_selector_clusters()
  446. {
  447. debug_printf("introduce_special_selector_clusters\n");
  448. uint32_t total_blocks_relocated = 0;
  449. const uint32_t initial_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size();
  450. bool_vec block_relocated_flags(m_total_blocks);
  451. // Make sure the selector codebook always has pure flat blocks for each possible selector, to avoid obvious artifacts.
  452. // optimize_selector_codebook() will clean up any redundant clusters we create here.
  453. for (uint32_t sel = 0; sel < 4; sel++)
  454. {
  455. etc_block blk;
  456. clear_obj(blk);
  457. for (uint32_t j = 0; j < 16; j++)
  458. blk.set_selector(j & 3, j >> 2, sel);
  459. int k;
  460. for (k = 0; k < (int)m_optimized_cluster_selectors.size(); k++)
  461. if (m_optimized_cluster_selectors[k].get_raw_selector_bits() == blk.get_raw_selector_bits())
  462. break;
  463. if (k < (int)m_optimized_cluster_selectors.size())
  464. continue;
  465. debug_printf("Introducing sel %u\n", sel);
  466. const uint32_t new_selector_cluster_index = (uint32_t)m_optimized_cluster_selectors.size();
  467. m_optimized_cluster_selectors.push_back(blk);
  468. vector_ensure_element_is_valid(m_selector_cluster_block_indices, new_selector_cluster_index);
  469. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  470. {
  471. if (m_orig_encoded_blocks[block_index].get_raw_selector_bits() != blk.get_raw_selector_bits())
  472. continue;
  473. // See if using flat selectors actually decreases the block's error.
  474. const uint32_t old_selector_cluster_index = m_block_selector_cluster_index[block_index];
  475. etc_block cur_blk;
  476. const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0);
  477. cur_blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false));
  478. cur_blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(endpoint_cluster_index, false));
  479. cur_blk.set_raw_selector_bits(get_selector_cluster_selector_bits(old_selector_cluster_index).get_raw_selector_bits());
  480. cur_blk.set_flip_bit(true);
  481. const uint64_t cur_err = cur_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
  482. cur_blk.set_raw_selector_bits(blk.get_raw_selector_bits());
  483. const uint64_t new_err = cur_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
  484. if (new_err >= cur_err)
  485. continue;
  486. // Change the block to use the new cluster
  487. m_block_selector_cluster_index[block_index] = new_selector_cluster_index;
  488. m_selector_cluster_block_indices[new_selector_cluster_index].push_back(block_index);
  489. block_relocated_flags[block_index] = true;
  490. #if 0
  491. int j = vector_find(m_selector_cluster_block_indices[old_selector_cluster_index], block_index);
  492. if (j >= 0)
  493. m_selector_cluster_block_indices[old_selector_cluster_index].erase(m_selector_cluster_block_indices[old_selector_cluster_index].begin() + j);
  494. #endif
  495. total_blocks_relocated++;
  496. m_encoded_blocks[block_index].set_raw_selector_bits(blk.get_raw_selector_bits());
  497. } // block_index
  498. } // sel
  499. if (total_blocks_relocated)
  500. {
  501. debug_printf("Fixing selector codebook\n");
  502. for (int selector_cluster_index = 0; selector_cluster_index < (int)initial_selector_clusters; selector_cluster_index++)
  503. {
  504. uint_vec& block_indices = m_selector_cluster_block_indices[selector_cluster_index];
  505. uint32_t dst_ofs = 0;
  506. for (uint32_t i = 0; i < block_indices.size(); i++)
  507. {
  508. const uint32_t block_index = block_indices[i];
  509. if (!block_relocated_flags[block_index])
  510. block_indices[dst_ofs++] = block_index;
  511. }
  512. block_indices.resize(dst_ofs);
  513. }
  514. }
  515. debug_printf("Total blocks relocated to new flat selector clusters: %u\n", total_blocks_relocated);
  516. }
  517. // This method will change the number and ordering of the selector codebook clusters.
  518. void basisu_frontend::optimize_selector_codebook()
  519. {
  520. debug_printf("optimize_selector_codebook\n");
  521. const uint32_t orig_total_selector_clusters = (uint32_t)m_optimized_cluster_selectors.size();
  522. bool_vec selector_cluster_was_used(m_optimized_cluster_selectors.size());
  523. for (uint32_t i = 0; i < m_total_blocks; i++)
  524. selector_cluster_was_used[m_block_selector_cluster_index[i]] = true;
  525. int_vec old_to_new(m_optimized_cluster_selectors.size());
  526. int_vec new_to_old;
  527. uint32_t total_new_entries = 0;
  528. std::unordered_map<uint32_t, uint32_t> selector_hashmap;
  529. for (int i = 0; i < static_cast<int>(m_optimized_cluster_selectors.size()); i++)
  530. {
  531. if (!selector_cluster_was_used[i])
  532. {
  533. old_to_new[i] = -1;
  534. continue;
  535. }
  536. const uint32_t raw_selector_bits = m_optimized_cluster_selectors[i].get_raw_selector_bits();
  537. auto find_res = selector_hashmap.insert(std::make_pair(raw_selector_bits, total_new_entries));
  538. if (!find_res.second)
  539. {
  540. old_to_new[i] = (find_res.first)->second;
  541. continue;
  542. }
  543. old_to_new[i] = total_new_entries++;
  544. new_to_old.push_back(i);
  545. }
  546. debug_printf("Original selector clusters: %u, new cluster selectors: %u\n", orig_total_selector_clusters, total_new_entries);
  547. for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++)
  548. {
  549. BASISU_FRONTEND_VERIFY((old_to_new[m_block_selector_cluster_index[i]] >= 0) && (old_to_new[m_block_selector_cluster_index[i]] < (int)total_new_entries));
  550. m_block_selector_cluster_index[i] = old_to_new[m_block_selector_cluster_index[i]];
  551. }
  552. basisu::vector<etc_block> new_optimized_cluster_selectors(m_optimized_cluster_selectors.size() ? total_new_entries : 0);
  553. basisu::vector<uint_vec> new_selector_cluster_indices(m_selector_cluster_block_indices.size() ? total_new_entries : 0);
  554. for (uint32_t i = 0; i < total_new_entries; i++)
  555. {
  556. if (m_optimized_cluster_selectors.size())
  557. new_optimized_cluster_selectors[i] = m_optimized_cluster_selectors[new_to_old[i]];
  558. //if (m_selector_cluster_block_indices.size())
  559. // new_selector_cluster_indices[i] = m_selector_cluster_block_indices[new_to_old[i]];
  560. }
  561. for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++)
  562. {
  563. new_selector_cluster_indices[m_block_selector_cluster_index[i]].push_back(i);
  564. }
  565. m_optimized_cluster_selectors.swap(new_optimized_cluster_selectors);
  566. m_selector_cluster_block_indices.swap(new_selector_cluster_indices);
  567. // This isn't strictly necessary - doing it for completeness/future sanity.
  568. if (m_selector_clusters_within_each_parent_cluster.size())
  569. {
  570. for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
  571. for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++)
  572. m_selector_clusters_within_each_parent_cluster[i][j] = old_to_new[m_selector_clusters_within_each_parent_cluster[i][j]];
  573. }
  574. debug_printf("optimize_selector_codebook: Before: %u After: %u\n", orig_total_selector_clusters, total_new_entries);
  575. }
  576. void basisu_frontend::init_etc1_images()
  577. {
  578. debug_printf("basisu_frontend::init_etc1_images\n");
  579. interval_timer tm;
  580. tm.start();
  581. m_etc1_blocks_etc1s.resize(m_total_blocks);
  582. bool use_cpu = true;
  583. if (m_params.m_pOpenCL_context)
  584. {
  585. uint32_t total_perms = 64;
  586. if (m_params.m_compression_level == 0)
  587. total_perms = 4;
  588. else if (m_params.m_compression_level == 1)
  589. total_perms = 16;
  590. else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
  591. total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS;
  592. bool status = opencl_encode_etc1s_blocks(m_params.m_pOpenCL_context, m_etc1_blocks_etc1s.data(), m_params.m_perceptual, total_perms);
  593. if (status)
  594. use_cpu = false;
  595. else
  596. {
  597. error_printf("basisu_frontend::init_etc1_images: opencl_encode_etc1s_blocks() failed! Using CPU.\n");
  598. m_params.m_pOpenCL_context = nullptr;
  599. m_opencl_failed = true;
  600. }
  601. }
  602. if (use_cpu)
  603. {
  604. const uint32_t N = 4096;
  605. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  606. {
  607. const uint32_t first_index = block_index_iter;
  608. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  609. #ifndef __EMSCRIPTEN__
  610. m_params.m_pJob_pool->add_job([this, first_index, last_index] {
  611. #endif
  612. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  613. {
  614. const pixel_block& source_blk = get_source_pixel_block(block_index);
  615. etc1_optimizer optimizer;
  616. etc1_optimizer::params optimizer_params;
  617. etc1_optimizer::results optimizer_results;
  618. if (m_params.m_compression_level == 0)
  619. optimizer_params.m_quality = cETCQualityFast;
  620. else if (m_params.m_compression_level == 1)
  621. optimizer_params.m_quality = cETCQualityMedium;
  622. else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
  623. optimizer_params.m_quality = cETCQualityUber;
  624. optimizer_params.m_num_src_pixels = 16;
  625. optimizer_params.m_pSrc_pixels = source_blk.get_ptr();
  626. optimizer_params.m_perceptual = m_params.m_perceptual;
  627. uint8_t selectors[16];
  628. optimizer_results.m_pSelectors = selectors;
  629. optimizer_results.m_n = 16;
  630. optimizer.init(optimizer_params, optimizer_results);
  631. if (!optimizer.compute())
  632. BASISU_FRONTEND_VERIFY(false);
  633. etc_block& blk = m_etc1_blocks_etc1s[block_index];
  634. memset(&blk, 0, sizeof(blk));
  635. blk.set_block_color5_etc1s(optimizer_results.m_block_color_unscaled);
  636. blk.set_inten_tables_etc1s(optimizer_results.m_block_inten_table);
  637. blk.set_flip_bit(true);
  638. for (uint32_t y = 0; y < 4; y++)
  639. for (uint32_t x = 0; x < 4; x++)
  640. blk.set_selector(x, y, selectors[x + y * 4]);
  641. }
  642. #ifndef __EMSCRIPTEN__
  643. });
  644. #endif
  645. }
  646. #ifndef __EMSCRIPTEN__
  647. m_params.m_pJob_pool->wait_for_all();
  648. #endif
  649. } // use_cpu
  650. debug_printf("init_etc1_images: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  651. }
  652. void basisu_frontend::init_endpoint_training_vectors()
  653. {
  654. debug_printf("init_endpoint_training_vectors\n");
  655. vec6F_quantizer::array_of_weighted_training_vecs &training_vecs = m_endpoint_clusterizer.get_training_vecs();
  656. training_vecs.resize(m_total_blocks * 2);
  657. const uint32_t N = 16384;
  658. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  659. {
  660. const uint32_t first_index = block_index_iter;
  661. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  662. #ifndef __EMSCRIPTEN__
  663. m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
  664. #endif
  665. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  666. {
  667. const etc_block &blk = m_etc1_blocks_etc1s[block_index];
  668. color_rgba block_colors[2];
  669. blk.get_block_low_high_colors(block_colors, 0);
  670. vec6F v;
  671. v[0] = block_colors[0].r * (1.0f / 255.0f);
  672. v[1] = block_colors[0].g * (1.0f / 255.0f);
  673. v[2] = block_colors[0].b * (1.0f / 255.0f);
  674. v[3] = block_colors[1].r * (1.0f / 255.0f);
  675. v[4] = block_colors[1].g * (1.0f / 255.0f);
  676. v[5] = block_colors[1].b * (1.0f / 255.0f);
  677. training_vecs[block_index * 2 + 0] = std::make_pair(v, 1);
  678. training_vecs[block_index * 2 + 1] = std::make_pair(v, 1);
  679. } // block_index;
  680. #ifndef __EMSCRIPTEN__
  681. } );
  682. #endif
  683. } // block_index_iter
  684. #ifndef __EMSCRIPTEN__
  685. m_params.m_pJob_pool->wait_for_all();
  686. #endif
  687. }
  688. void basisu_frontend::generate_endpoint_clusters()
  689. {
  690. debug_printf("Begin endpoint quantization\n");
  691. const uint32_t parent_codebook_size = (m_params.m_max_endpoint_clusters >= 256) ? BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE : 0;
  692. uint32_t max_threads = 0;
  693. max_threads = m_params.m_multithreaded ? minimum<int>(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0;
  694. if (m_params.m_pJob_pool)
  695. max_threads = minimum<int>((int)m_params.m_pJob_pool->get_total_threads(), max_threads);
  696. debug_printf("max_threads: %u\n", max_threads);
  697. bool status = generate_hierarchical_codebook_threaded(m_endpoint_clusterizer,
  698. m_params.m_max_endpoint_clusters, m_use_hierarchical_endpoint_codebooks ? parent_codebook_size : 0,
  699. m_endpoint_clusters,
  700. m_endpoint_parent_clusters,
  701. max_threads, m_params.m_pJob_pool, true);
  702. BASISU_FRONTEND_VERIFY(status);
  703. if (m_use_hierarchical_endpoint_codebooks)
  704. {
  705. if (!m_endpoint_parent_clusters.size())
  706. {
  707. m_endpoint_parent_clusters.resize(0);
  708. m_endpoint_parent_clusters.resize(1);
  709. for (uint32_t i = 0; i < m_total_blocks; i++)
  710. {
  711. m_endpoint_parent_clusters[0].push_back(i*2);
  712. m_endpoint_parent_clusters[0].push_back(i*2+1);
  713. }
  714. }
  715. BASISU_ASSUME(BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE <= UINT8_MAX);
  716. m_block_parent_endpoint_cluster.resize(0);
  717. m_block_parent_endpoint_cluster.resize(m_total_blocks);
  718. vector_set_all(m_block_parent_endpoint_cluster, 0xFF);
  719. for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_endpoint_parent_clusters.size(); parent_cluster_index++)
  720. {
  721. const uint_vec &cluster = m_endpoint_parent_clusters[parent_cluster_index];
  722. for (uint32_t j = 0; j < cluster.size(); j++)
  723. {
  724. const uint32_t block_index = cluster[j] >> 1;
  725. m_block_parent_endpoint_cluster[block_index] = static_cast<uint8_t>(parent_cluster_index);
  726. }
  727. }
  728. for (uint32_t i = 0; i < m_total_blocks; i++)
  729. {
  730. BASISU_FRONTEND_VERIFY(m_block_parent_endpoint_cluster[i] != 0xFF);
  731. }
  732. // Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong.
  733. for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
  734. {
  735. const uint_vec &cluster = m_endpoint_clusters[cluster_index];
  736. uint32_t parent_cluster_index = 0;
  737. for (uint32_t j = 0; j < cluster.size(); j++)
  738. {
  739. const uint32_t block_index = cluster[j] >> 1;
  740. BASISU_FRONTEND_VERIFY(block_index < m_block_parent_endpoint_cluster.size());
  741. if (!j)
  742. {
  743. parent_cluster_index = m_block_parent_endpoint_cluster[block_index];
  744. }
  745. else
  746. {
  747. BASISU_FRONTEND_VERIFY(m_block_parent_endpoint_cluster[block_index] == parent_cluster_index);
  748. }
  749. }
  750. }
  751. }
  752. if (m_params.m_debug_stats)
  753. debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", (uint32_t)m_endpoint_clusters.size(), (uint32_t)m_endpoint_parent_clusters.size());
  754. }
  755. // Iterate through each array of endpoint cluster block indices and set the m_block_endpoint_clusters_indices[][] array to indicaste which cluster index each block uses.
  756. void basisu_frontend::generate_block_endpoint_clusters()
  757. {
  758. m_block_endpoint_clusters_indices.resize(m_total_blocks);
  759. for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
  760. {
  761. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  762. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  763. {
  764. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  765. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  766. m_block_endpoint_clusters_indices[block_index][subblock_index] = cluster_index;
  767. } // cluster_indices_iter
  768. }
  769. if (m_params.m_validate)
  770. {
  771. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  772. {
  773. uint32_t cluster_0 = m_block_endpoint_clusters_indices[block_index][0];
  774. uint32_t cluster_1 = m_block_endpoint_clusters_indices[block_index][1];
  775. BASISU_FRONTEND_VERIFY(cluster_0 == cluster_1);
  776. }
  777. }
  778. }
  779. void basisu_frontend::compute_endpoint_clusters_within_each_parent_cluster()
  780. {
  781. generate_block_endpoint_clusters();
  782. m_endpoint_clusters_within_each_parent_cluster.resize(0);
  783. m_endpoint_clusters_within_each_parent_cluster.resize(m_endpoint_parent_clusters.size());
  784. // Note: It's possible that some blocks got moved into the same cluster, but live in different parent clusters.
  785. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  786. {
  787. const uint32_t cluster_index = m_block_endpoint_clusters_indices[block_index][0];
  788. const uint32_t parent_cluster_index = m_block_parent_endpoint_cluster[block_index];
  789. m_endpoint_clusters_within_each_parent_cluster[parent_cluster_index].push_back(cluster_index);
  790. }
  791. for (uint32_t i = 0; i < m_endpoint_clusters_within_each_parent_cluster.size(); i++)
  792. {
  793. uint_vec &cluster_indices = m_endpoint_clusters_within_each_parent_cluster[i];
  794. BASISU_FRONTEND_VERIFY(cluster_indices.size());
  795. vector_sort(cluster_indices);
  796. auto last = std::unique(cluster_indices.begin(), cluster_indices.end());
  797. cluster_indices.erase(last, cluster_indices.end());
  798. }
  799. }
  800. void basisu_frontend::compute_endpoint_subblock_error_vec()
  801. {
  802. m_subblock_endpoint_quant_err_vec.resize(0);
  803. const uint32_t N = 512;
  804. for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N)
  805. {
  806. const uint32_t first_index = cluster_index_iter;
  807. const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N);
  808. #ifndef __EMSCRIPTEN__
  809. m_params.m_pJob_pool->add_job( [this, first_index, last_index] {
  810. #endif
  811. for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
  812. {
  813. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  814. assert(cluster_indices.size());
  815. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  816. {
  817. basisu::vector<color_rgba> cluster_pixels(8);
  818. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  819. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  820. const bool flipped = true;
  821. const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr();
  822. for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++)
  823. {
  824. cluster_pixels[pixel_index] = pSource_block_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]];
  825. }
  826. const endpoint_cluster_etc_params &etc_params = m_endpoint_cluster_etc_params[cluster_index];
  827. assert(etc_params.m_valid);
  828. color_rgba block_colors[4];
  829. etc_block::get_block_colors5(block_colors, etc_params.m_color_unscaled[0], etc_params.m_inten_table[0], true);
  830. uint64_t total_err = 0;
  831. for (uint32_t i = 0; i < 8; i++)
  832. {
  833. const color_rgba &c = cluster_pixels[i];
  834. uint64_t best_err = UINT64_MAX;
  835. //uint32_t best_index = 0;
  836. for (uint32_t s = 0; s < 4; s++)
  837. {
  838. uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false);
  839. if (err < best_err)
  840. {
  841. best_err = err;
  842. //best_index = s;
  843. }
  844. }
  845. total_err += best_err;
  846. }
  847. subblock_endpoint_quant_err quant_err;
  848. quant_err.m_total_err = total_err;
  849. quant_err.m_cluster_index = cluster_index;
  850. quant_err.m_cluster_subblock_index = cluster_indices_iter;
  851. quant_err.m_block_index = block_index;
  852. quant_err.m_subblock_index = subblock_index;
  853. {
  854. std::lock_guard<std::mutex> lock(m_lock);
  855. m_subblock_endpoint_quant_err_vec.push_back(quant_err);
  856. }
  857. }
  858. } // cluster_index
  859. #ifndef __EMSCRIPTEN__
  860. } );
  861. #endif
  862. } // cluster_index_iter
  863. #ifndef __EMSCRIPTEN__
  864. m_params.m_pJob_pool->wait_for_all();
  865. #endif
  866. vector_sort(m_subblock_endpoint_quant_err_vec);
  867. }
  868. void basisu_frontend::introduce_new_endpoint_clusters()
  869. {
  870. debug_printf("introduce_new_endpoint_clusters\n");
  871. generate_block_endpoint_clusters();
  872. int num_new_endpoint_clusters = m_params.m_max_endpoint_clusters - (uint32_t)m_endpoint_clusters.size();
  873. if (num_new_endpoint_clusters <= 0)
  874. return;
  875. compute_endpoint_subblock_error_vec();
  876. const uint32_t num_orig_endpoint_clusters = (uint32_t)m_endpoint_clusters.size();
  877. std::unordered_set<uint32_t> training_vector_was_relocated;
  878. uint_vec cluster_sizes(num_orig_endpoint_clusters);
  879. for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++)
  880. cluster_sizes[i] = (uint32_t)m_endpoint_clusters[i].size();
  881. std::unordered_set<uint32_t> ignore_cluster;
  882. uint32_t total_new_clusters = 0;
  883. while (num_new_endpoint_clusters)
  884. {
  885. if (m_subblock_endpoint_quant_err_vec.size() == 0)
  886. break;
  887. subblock_endpoint_quant_err subblock_to_move(m_subblock_endpoint_quant_err_vec.back());
  888. m_subblock_endpoint_quant_err_vec.pop_back();
  889. if (unordered_set_contains(ignore_cluster, subblock_to_move.m_cluster_index))
  890. continue;
  891. uint32_t training_vector_index = subblock_to_move.m_block_index * 2 + subblock_to_move.m_subblock_index;
  892. if (cluster_sizes[subblock_to_move.m_cluster_index] <= 2)
  893. continue;
  894. if (unordered_set_contains(training_vector_was_relocated, training_vector_index))
  895. continue;
  896. if (unordered_set_contains(training_vector_was_relocated, training_vector_index ^ 1))
  897. continue;
  898. #if 0
  899. const uint32_t block_index = subblock_to_move.m_block_index;
  900. const etc_block& blk = m_etc1_blocks_etc1s[block_index];
  901. uint32_t ls, hs;
  902. blk.get_selector_range(ls, hs);
  903. if (ls != hs)
  904. continue;
  905. #endif
  906. //const uint32_t new_endpoint_cluster_index = (uint32_t)m_endpoint_clusters.size();
  907. enlarge_vector(m_endpoint_clusters, 1)->push_back(training_vector_index);
  908. enlarge_vector(m_endpoint_cluster_etc_params, 1);
  909. assert(m_endpoint_clusters.size() == m_endpoint_cluster_etc_params.size());
  910. training_vector_was_relocated.insert(training_vector_index);
  911. m_endpoint_clusters.back().push_back(training_vector_index ^ 1);
  912. training_vector_was_relocated.insert(training_vector_index ^ 1);
  913. BASISU_FRONTEND_VERIFY(cluster_sizes[subblock_to_move.m_cluster_index] >= 2);
  914. cluster_sizes[subblock_to_move.m_cluster_index] -= 2;
  915. ignore_cluster.insert(subblock_to_move.m_cluster_index);
  916. total_new_clusters++;
  917. num_new_endpoint_clusters--;
  918. }
  919. debug_printf("Introduced %i new endpoint clusters\n", total_new_clusters);
  920. for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++)
  921. {
  922. uint_vec &cluster_indices = m_endpoint_clusters[i];
  923. uint_vec new_cluster_indices;
  924. for (uint32_t j = 0; j < cluster_indices.size(); j++)
  925. {
  926. uint32_t training_vector_index = cluster_indices[j];
  927. if (!unordered_set_contains(training_vector_was_relocated, training_vector_index))
  928. new_cluster_indices.push_back(training_vector_index);
  929. }
  930. if (cluster_indices.size() != new_cluster_indices.size())
  931. {
  932. BASISU_FRONTEND_VERIFY(new_cluster_indices.size() > 0);
  933. cluster_indices.swap(new_cluster_indices);
  934. }
  935. }
  936. generate_block_endpoint_clusters();
  937. }
  938. struct color_rgba_hasher
  939. {
  940. inline std::size_t operator()(const color_rgba& k) const
  941. {
  942. uint32_t v = *(const uint32_t*)&k;
  943. //return bitmix32(v);
  944. //v ^= (v << 10);
  945. //v ^= (v >> 12);
  946. return v;
  947. }
  948. };
  949. // Given each endpoint cluster, gather all the block pixels which are in that cluster and compute optimized ETC1S endpoints for them.
  950. // TODO: Don't optimize endpoint clusters which haven't changed.
  951. // If step>=1, we check to ensure the new endpoint values actually decrease quantization error.
  952. void basisu_frontend::generate_endpoint_codebook(uint32_t step)
  953. {
  954. debug_printf("generate_endpoint_codebook\n");
  955. interval_timer tm;
  956. tm.start();
  957. m_endpoint_cluster_etc_params.resize(m_endpoint_clusters.size());
  958. bool use_cpu = true;
  959. // TODO: Get this working when step>0
  960. if (m_params.m_pOpenCL_context && !step)
  961. {
  962. const uint32_t total_clusters = m_endpoint_clusters.size();
  963. basisu::vector<cl_pixel_cluster> pixel_clusters(total_clusters);
  964. std::vector<color_rgba> input_pixels;
  965. input_pixels.reserve(m_total_blocks * 16);
  966. std::vector<uint32_t> pixel_weights;
  967. pixel_weights.reserve(m_total_blocks * 16);
  968. uint_vec cluster_sizes(total_clusters);
  969. //typedef basisu::hash_map<color_rgba, uint32_t, color_rgba_hasher> color_hasher_type;
  970. //color_hasher_type color_hasher;
  971. //color_hasher.reserve(2048);
  972. interval_timer hash_tm;
  973. hash_tm.start();
  974. basisu::vector<uint32_t> colors, colors2;
  975. colors.reserve(65536);
  976. colors2.reserve(65536);
  977. for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
  978. {
  979. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  980. assert((cluster_indices.size() & 1) == 0);
  981. #if 0
  982. uint64_t first_pixel_index = input_pixels.size();
  983. const uint32_t total_pixels = 16 * (cluster_indices.size() / 2);
  984. input_pixels.resize(input_pixels.size() + total_pixels);
  985. pixel_weights.resize(pixel_weights.size() + total_pixels);
  986. uint64_t dst_ofs = first_pixel_index;
  987. uint64_t total_r = 0, total_g = 0, total_b = 0;
  988. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  989. {
  990. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  991. if (subblock_index)
  992. continue;
  993. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  994. const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  995. for (uint32_t i = 0; i < 16; i++)
  996. {
  997. input_pixels[dst_ofs] = pBlock_pixels[i];
  998. pixel_weights[dst_ofs] = 1;
  999. dst_ofs++;
  1000. total_r += pBlock_pixels[i].r;
  1001. total_g += pBlock_pixels[i].g;
  1002. total_b += pBlock_pixels[i].b;
  1003. }
  1004. }
  1005. //printf("%i %f %f %f\n", cluster_index, total_r / (float)total_pixels, total_g / (float)total_pixels, total_b / (float)total_pixels);
  1006. pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
  1007. pixel_clusters[cluster_index].m_total_pixels = total_pixels;
  1008. cluster_sizes[cluster_index] = total_pixels;
  1009. #elif 1
  1010. colors.resize(cluster_indices.size() * 8);
  1011. colors2.resize(cluster_indices.size() * 8);
  1012. uint32_t dst_ofs = 0;
  1013. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  1014. {
  1015. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  1016. if (subblock_index)
  1017. continue;
  1018. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  1019. const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  1020. memcpy(colors.data() + dst_ofs, pBlock_pixels, sizeof(color_rgba) * 16);
  1021. dst_ofs += 16;
  1022. } // cluster_indices_iter
  1023. uint32_t* pSorted = radix_sort(colors.size(), colors.data(), colors2.data(), 0, 3);
  1024. const uint64_t first_pixel_index = input_pixels.size();
  1025. uint32_t prev_color = 0, cur_weight = 0;
  1026. for (uint32_t i = 0; i < colors.size(); i++)
  1027. {
  1028. uint32_t cur_color = pSorted[i];
  1029. if (cur_color == prev_color)
  1030. {
  1031. if (++cur_weight == 0)
  1032. cur_weight--;
  1033. }
  1034. else
  1035. {
  1036. if (cur_weight)
  1037. {
  1038. input_pixels.push_back(*(const color_rgba*)&prev_color);
  1039. pixel_weights.push_back(cur_weight);
  1040. }
  1041. prev_color = cur_color;
  1042. cur_weight = 1;
  1043. }
  1044. }
  1045. if (cur_weight)
  1046. {
  1047. input_pixels.push_back(*(const color_rgba*)&prev_color);
  1048. pixel_weights.push_back(cur_weight);
  1049. }
  1050. uint32_t total_unique_pixels = (uint32_t)(input_pixels.size() - first_pixel_index);
  1051. pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
  1052. pixel_clusters[cluster_index].m_total_pixels = total_unique_pixels;
  1053. cluster_sizes[cluster_index] = total_unique_pixels;
  1054. #else
  1055. color_hasher.reset();
  1056. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  1057. {
  1058. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  1059. if (subblock_index)
  1060. continue;
  1061. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  1062. const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  1063. uint32_t *pPrev_weight = nullptr;
  1064. color_rgba prev_color;
  1065. {
  1066. color_rgba cur_color = pBlock_pixels[0];
  1067. auto res = color_hasher.insert(cur_color, 0);
  1068. uint32_t& weight = (res.first)->second;
  1069. if (weight != UINT32_MAX)
  1070. weight++;
  1071. prev_color = cur_color;
  1072. pPrev_weight = &(res.first)->second;
  1073. }
  1074. for (uint32_t i = 1; i < 16; i++)
  1075. {
  1076. color_rgba cur_color = pBlock_pixels[i];
  1077. if (cur_color == prev_color)
  1078. {
  1079. if (*pPrev_weight != UINT32_MAX)
  1080. *pPrev_weight = *pPrev_weight + 1;
  1081. }
  1082. else
  1083. {
  1084. auto res = color_hasher.insert(cur_color, 0);
  1085. uint32_t& weight = (res.first)->second;
  1086. if (weight != UINT32_MAX)
  1087. weight++;
  1088. prev_color = cur_color;
  1089. pPrev_weight = &(res.first)->second;
  1090. }
  1091. }
  1092. } // cluster_indices_iter
  1093. const uint64_t first_pixel_index = input_pixels.size();
  1094. uint32_t total_unique_pixels = color_hasher.size();
  1095. pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
  1096. pixel_clusters[cluster_index].m_total_pixels = total_unique_pixels;
  1097. input_pixels.resize(first_pixel_index + total_unique_pixels);
  1098. pixel_weights.resize(first_pixel_index + total_unique_pixels);
  1099. uint32_t j = 0;
  1100. for (auto it = color_hasher.begin(); it != color_hasher.end(); ++it, ++j)
  1101. {
  1102. input_pixels[first_pixel_index + j] = it->first;
  1103. pixel_weights[first_pixel_index + j] = it->second;
  1104. }
  1105. cluster_sizes[cluster_index] = total_unique_pixels;
  1106. #endif
  1107. } // cluster_index
  1108. debug_printf("Total hash time: %3.3f secs\n", hash_tm.get_elapsed_secs());
  1109. debug_printf("Total unique colors: %llu\n", input_pixels.size());
  1110. uint_vec sorted_cluster_indices_new_to_old(total_clusters);
  1111. indirect_sort(total_clusters, sorted_cluster_indices_new_to_old.data(), cluster_sizes.data());
  1112. //for (uint32_t i = 0; i < total_clusters; i++)
  1113. // sorted_cluster_indices_new_to_old[i] = i;
  1114. uint_vec sorted_cluster_indices_old_to_new(total_clusters);
  1115. for (uint32_t i = 0; i < total_clusters; i++)
  1116. sorted_cluster_indices_old_to_new[sorted_cluster_indices_new_to_old[i]] = i;
  1117. basisu::vector<cl_pixel_cluster> sorted_pixel_clusters(total_clusters);
  1118. for (uint32_t i = 0; i < total_clusters; i++)
  1119. sorted_pixel_clusters[i] = pixel_clusters[sorted_cluster_indices_new_to_old[i]];
  1120. uint32_t total_perms = 64;
  1121. if (m_params.m_compression_level <= 1)
  1122. total_perms = 16;
  1123. else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
  1124. total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS;
  1125. basisu::vector<etc_block> output_blocks(total_clusters);
  1126. if (opencl_encode_etc1s_pixel_clusters(
  1127. m_params.m_pOpenCL_context,
  1128. output_blocks.data(),
  1129. total_clusters,
  1130. sorted_pixel_clusters.data(),
  1131. input_pixels.size(),
  1132. input_pixels.data(),
  1133. pixel_weights.data(),
  1134. m_params.m_perceptual, total_perms))
  1135. {
  1136. for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
  1137. {
  1138. const uint32_t new_cluster_index = sorted_cluster_indices_old_to_new[old_cluster_index];
  1139. const etc_block& blk = output_blocks[new_cluster_index];
  1140. endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[old_cluster_index];
  1141. prev_etc_params.m_valid = true;
  1142. etc_block::unpack_color5(prev_etc_params.m_color_unscaled[0], blk.get_base5_color(), false);
  1143. prev_etc_params.m_inten_table[0] = blk.get_inten_table(0);
  1144. prev_etc_params.m_color_error[0] = 0; // dummy value - we don't actually use this
  1145. }
  1146. use_cpu = false;
  1147. }
  1148. else
  1149. {
  1150. error_printf("basisu_frontend::generate_endpoint_codebook: opencl_encode_etc1s_pixel_clusters() failed! Using CPU.\n");
  1151. m_params.m_pOpenCL_context = nullptr;
  1152. m_opencl_failed = true;
  1153. }
  1154. } // if (opencl_is_available() && m_params.m_use_opencl)
  1155. if (use_cpu)
  1156. {
  1157. const uint32_t N = 128;
  1158. for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N)
  1159. {
  1160. const uint32_t first_index = cluster_index_iter;
  1161. const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N);
  1162. #ifndef __EMSCRIPTEN__
  1163. m_params.m_pJob_pool->add_job([this, first_index, last_index, step] {
  1164. #endif
  1165. for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
  1166. {
  1167. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  1168. BASISU_FRONTEND_VERIFY(cluster_indices.size());
  1169. const uint32_t total_pixels = (uint32_t)cluster_indices.size() * 8;
  1170. basisu::vector<color_rgba> cluster_pixels(total_pixels);
  1171. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  1172. {
  1173. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  1174. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  1175. const bool flipped = true;
  1176. const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  1177. for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++)
  1178. {
  1179. const color_rgba& c = pBlock_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]];
  1180. cluster_pixels[cluster_indices_iter * 8 + pixel_index] = c;
  1181. }
  1182. }
  1183. endpoint_cluster_etc_params new_subblock_params;
  1184. {
  1185. etc1_optimizer optimizer;
  1186. etc1_solution_coordinates solutions[2];
  1187. etc1_optimizer::params cluster_optimizer_params;
  1188. cluster_optimizer_params.m_num_src_pixels = total_pixels;
  1189. cluster_optimizer_params.m_pSrc_pixels = &cluster_pixels[0];
  1190. cluster_optimizer_params.m_use_color4 = false;
  1191. cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
  1192. if (m_params.m_compression_level <= 1)
  1193. cluster_optimizer_params.m_quality = cETCQualityMedium;
  1194. else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
  1195. cluster_optimizer_params.m_quality = cETCQualityUber;
  1196. etc1_optimizer::results cluster_optimizer_results;
  1197. basisu::vector<uint8_t> cluster_selectors(total_pixels);
  1198. cluster_optimizer_results.m_n = total_pixels;
  1199. cluster_optimizer_results.m_pSelectors = &cluster_selectors[0];
  1200. optimizer.init(cluster_optimizer_params, cluster_optimizer_results);
  1201. if (!optimizer.compute())
  1202. BASISU_FRONTEND_VERIFY(false);
  1203. new_subblock_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled;
  1204. new_subblock_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table;
  1205. new_subblock_params.m_color_error[0] = cluster_optimizer_results.m_error;
  1206. }
  1207. endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[cluster_index];
  1208. bool use_new_subblock_params = false;
  1209. if ((!step) || (!prev_etc_params.m_valid))
  1210. use_new_subblock_params = true;
  1211. else
  1212. {
  1213. assert(prev_etc_params.m_valid);
  1214. uint64_t total_prev_err = 0;
  1215. {
  1216. color_rgba block_colors[4];
  1217. etc_block::get_block_colors5(block_colors, prev_etc_params.m_color_unscaled[0], prev_etc_params.m_inten_table[0], false);
  1218. uint64_t total_err = 0;
  1219. for (uint32_t i = 0; i < total_pixels; i++)
  1220. {
  1221. const color_rgba& c = cluster_pixels[i];
  1222. uint64_t best_err = UINT64_MAX;
  1223. //uint32_t best_index = 0;
  1224. for (uint32_t s = 0; s < 4; s++)
  1225. {
  1226. uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false);
  1227. if (err < best_err)
  1228. {
  1229. best_err = err;
  1230. //best_index = s;
  1231. }
  1232. }
  1233. total_err += best_err;
  1234. }
  1235. total_prev_err += total_err;
  1236. }
  1237. // See if we should update this cluster's endpoints (if the error has actually fallen)
  1238. if (total_prev_err > new_subblock_params.m_color_error[0])
  1239. {
  1240. use_new_subblock_params = true;
  1241. }
  1242. }
  1243. if (use_new_subblock_params)
  1244. {
  1245. new_subblock_params.m_valid = true;
  1246. prev_etc_params = new_subblock_params;
  1247. }
  1248. } // cluster_index
  1249. #ifndef __EMSCRIPTEN__
  1250. });
  1251. #endif
  1252. } // cluster_index_iter
  1253. #ifndef __EMSCRIPTEN__
  1254. m_params.m_pJob_pool->wait_for_all();
  1255. #endif
  1256. }
  1257. debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  1258. }
  1259. bool basisu_frontend::check_etc1s_constraints() const
  1260. {
  1261. basisu::vector<vec2U> block_clusters(m_total_blocks);
  1262. for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
  1263. {
  1264. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  1265. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  1266. {
  1267. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  1268. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  1269. block_clusters[block_index][subblock_index] = cluster_index;
  1270. } // cluster_indices_iter
  1271. }
  1272. for (uint32_t i = 0; i < m_total_blocks; i++)
  1273. {
  1274. if (block_clusters[i][0] != block_clusters[i][1])
  1275. return false;
  1276. }
  1277. return true;
  1278. }
  1279. // For each block, determine which ETC1S endpoint cluster can encode that block with lowest error.
  1280. // This reassigns blocks to different endpoint clusters.
  1281. uint32_t basisu_frontend::refine_endpoint_clusterization()
  1282. {
  1283. debug_printf("refine_endpoint_clusterization\n");
  1284. if (m_use_hierarchical_endpoint_codebooks)
  1285. compute_endpoint_clusters_within_each_parent_cluster();
  1286. // Note: It's possible that an endpoint cluster may live in more than one parent cluster after the first refinement step.
  1287. basisu::vector<vec2U> block_clusters(m_total_blocks);
  1288. for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
  1289. {
  1290. const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
  1291. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  1292. {
  1293. const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
  1294. const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
  1295. block_clusters[block_index][subblock_index] = cluster_index;
  1296. } // cluster_indices_iter
  1297. }
  1298. //----------------------------------------------------------
  1299. // Create a new endpoint clusterization
  1300. interval_timer tm;
  1301. tm.start();
  1302. uint_vec best_cluster_indices(m_total_blocks);
  1303. bool use_cpu = true;
  1304. // TODO: Support non-hierarchical endpoint codebooks here
  1305. if (m_params.m_pOpenCL_context && m_use_hierarchical_endpoint_codebooks)
  1306. {
  1307. // For the OpenCL kernel, we order the parent endpoint clusters by smallest to largest for efficiency.
  1308. // We also prepare an array of block info structs that point into this new parent endpoint cluster array.
  1309. const uint32_t total_parent_clusters = m_endpoint_clusters_within_each_parent_cluster.size();
  1310. basisu::vector<cl_block_info_struct> cl_block_info_structs(m_total_blocks);
  1311. // the size of each parent cluster, in total clusters
  1312. uint_vec parent_cluster_sizes(total_parent_clusters);
  1313. for (uint32_t i = 0; i < total_parent_clusters; i++)
  1314. parent_cluster_sizes[i] = m_endpoint_clusters_within_each_parent_cluster[i].size();
  1315. uint_vec first_parent_cluster_ofs(total_parent_clusters);
  1316. uint32_t cur_ofs = 0;
  1317. for (uint32_t i = 0; i < total_parent_clusters; i++)
  1318. {
  1319. first_parent_cluster_ofs[i] = cur_ofs;
  1320. cur_ofs += parent_cluster_sizes[i];
  1321. }
  1322. // Note: total_actual_endpoint_clusters is not necessarly equal to m_endpoint_clusters.size(), because clusters may live in multiple parent clusters after the first refinement step.
  1323. BASISU_FRONTEND_VERIFY(cur_ofs >= m_endpoint_clusters.size());
  1324. const uint32_t total_actual_endpoint_clusters = cur_ofs;
  1325. basisu::vector<cl_endpoint_cluster_struct> cl_endpoint_cluster_structs(total_actual_endpoint_clusters);
  1326. for (uint32_t i = 0; i < total_parent_clusters; i++)
  1327. {
  1328. const uint32_t dst_ofs = first_parent_cluster_ofs[i];
  1329. const uint32_t parent_cluster_size = parent_cluster_sizes[i];
  1330. assert(m_endpoint_clusters_within_each_parent_cluster[i].size() == parent_cluster_size);
  1331. for (uint32_t j = 0; j < parent_cluster_size; j++)
  1332. {
  1333. const uint32_t endpoint_cluster_index = m_endpoint_clusters_within_each_parent_cluster[i][j];
  1334. color_rgba cluster_etc_base_color(m_endpoint_cluster_etc_params[endpoint_cluster_index].m_color_unscaled[0]);
  1335. uint32_t cluster_etc_inten = m_endpoint_cluster_etc_params[endpoint_cluster_index].m_inten_table[0];
  1336. cl_endpoint_cluster_structs[dst_ofs + j].m_unscaled_color = cluster_etc_base_color;
  1337. cl_endpoint_cluster_structs[dst_ofs + j].m_etc_inten = (uint8_t)cluster_etc_inten;
  1338. cl_endpoint_cluster_structs[dst_ofs + j].m_cluster_index = (uint16_t)endpoint_cluster_index;
  1339. }
  1340. }
  1341. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  1342. {
  1343. const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster[block_index];
  1344. cl_block_info_structs[block_index].m_num_clusters = (uint16_t)(parent_cluster_sizes[block_parent_endpoint_cluster_index]);
  1345. cl_block_info_structs[block_index].m_first_cluster_ofs = (uint16_t)(first_parent_cluster_ofs[block_parent_endpoint_cluster_index]);
  1346. const uint32_t block_cluster_index = block_clusters[block_index][0];
  1347. cl_block_info_structs[block_index].m_cur_cluster_index = (uint16_t)block_cluster_index;
  1348. cl_block_info_structs[block_index].m_cur_cluster_etc_inten = (uint8_t)m_endpoint_cluster_etc_params[block_cluster_index].m_inten_table[0];
  1349. }
  1350. uint_vec block_cluster_indices(m_total_blocks);
  1351. for (uint32_t i = 0; i < m_total_blocks; i++)
  1352. block_cluster_indices[i] = block_clusters[i][0];
  1353. uint_vec sorted_block_indices(m_total_blocks);
  1354. indirect_sort(m_total_blocks, sorted_block_indices.data(), block_cluster_indices.data());
  1355. bool status = opencl_refine_endpoint_clusterization(
  1356. m_params.m_pOpenCL_context,
  1357. cl_block_info_structs.data(),
  1358. total_actual_endpoint_clusters,
  1359. cl_endpoint_cluster_structs.data(),
  1360. sorted_block_indices.data(),
  1361. best_cluster_indices.data(),
  1362. m_params.m_perceptual);
  1363. if (status)
  1364. {
  1365. use_cpu = false;
  1366. }
  1367. else
  1368. {
  1369. error_printf("basisu_frontend::refine_endpoint_clusterization: opencl_refine_endpoint_clusterization() failed! Using CPU.\n");
  1370. m_params.m_pOpenCL_context = nullptr;
  1371. m_opencl_failed = true;
  1372. }
  1373. }
  1374. if (use_cpu)
  1375. {
  1376. const uint32_t N = 1024;
  1377. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  1378. {
  1379. const uint32_t first_index = block_index_iter;
  1380. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  1381. #ifndef __EMSCRIPTEN__
  1382. m_params.m_pJob_pool->add_job([this, first_index, last_index, &best_cluster_indices, &block_clusters] {
  1383. #endif
  1384. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  1385. {
  1386. const uint32_t cluster_index = block_clusters[block_index][0];
  1387. BASISU_FRONTEND_VERIFY(cluster_index == block_clusters[block_index][1]);
  1388. const color_rgba* pSubblock_pixels = get_source_pixel_block(block_index).get_ptr();
  1389. const uint32_t num_subblock_pixels = 16;
  1390. uint64_t best_cluster_err = INT64_MAX;
  1391. uint32_t best_cluster_index = 0;
  1392. const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster.size() ? m_block_parent_endpoint_cluster[block_index] : 0;
  1393. const uint_vec* pCluster_indices = m_endpoint_clusters_within_each_parent_cluster.size() ? &m_endpoint_clusters_within_each_parent_cluster[block_parent_endpoint_cluster_index] : nullptr;
  1394. const uint32_t total_clusters = m_use_hierarchical_endpoint_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_endpoint_clusters.size();
  1395. for (uint32_t i = 0; i < total_clusters; i++)
  1396. {
  1397. const uint32_t cluster_iter = m_use_hierarchical_endpoint_codebooks ? (*pCluster_indices)[i] : i;
  1398. color_rgba cluster_etc_base_color(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0]);
  1399. uint32_t cluster_etc_inten = m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0];
  1400. uint64_t total_err = 0;
  1401. const uint32_t low_selector = 0;//subblock_etc_params_vec[j].m_low_selectors[0];
  1402. const uint32_t high_selector = 3;//subblock_etc_params_vec[j].m_high_selectors[0];
  1403. color_rgba subblock_colors[4];
  1404. // Can't assign it here - may result in too much error when selector quant occurs
  1405. if (cluster_etc_inten > m_endpoint_cluster_etc_params[cluster_index].m_inten_table[0])
  1406. {
  1407. total_err = INT64_MAX;
  1408. goto skip_cluster;
  1409. }
  1410. etc_block::get_block_colors5(subblock_colors, cluster_etc_base_color, cluster_etc_inten);
  1411. #if 0
  1412. for (uint32_t p = 0; p < num_subblock_pixels; p++)
  1413. {
  1414. uint64_t best_err = UINT64_MAX;
  1415. for (uint32_t r = low_selector; r <= high_selector; r++)
  1416. {
  1417. uint64_t err = color_distance(m_params.m_perceptual, pSubblock_pixels[p], subblock_colors[r], false);
  1418. best_err = minimum(best_err, err);
  1419. if (!best_err)
  1420. break;
  1421. }
  1422. total_err += best_err;
  1423. if (total_err > best_cluster_err)
  1424. break;
  1425. } // p
  1426. #else
  1427. if (m_params.m_perceptual)
  1428. {
  1429. if (!g_cpu_supports_sse41)
  1430. {
  1431. for (uint32_t p = 0; p < num_subblock_pixels; p++)
  1432. {
  1433. uint64_t best_err = UINT64_MAX;
  1434. for (uint32_t r = low_selector; r <= high_selector; r++)
  1435. {
  1436. uint64_t err = color_distance(true, pSubblock_pixels[p], subblock_colors[r], false);
  1437. best_err = minimum(best_err, err);
  1438. if (!best_err)
  1439. break;
  1440. }
  1441. total_err += best_err;
  1442. if (total_err > best_cluster_err)
  1443. break;
  1444. } // p
  1445. }
  1446. else
  1447. {
  1448. #if BASISU_SUPPORT_SSE
  1449. find_lowest_error_perceptual_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err);
  1450. #endif
  1451. }
  1452. }
  1453. else
  1454. {
  1455. if (!g_cpu_supports_sse41)
  1456. {
  1457. for (uint32_t p = 0; p < num_subblock_pixels; p++)
  1458. {
  1459. uint64_t best_err = UINT64_MAX;
  1460. for (uint32_t r = low_selector; r <= high_selector; r++)
  1461. {
  1462. uint64_t err = color_distance(false, pSubblock_pixels[p], subblock_colors[r], false);
  1463. best_err = minimum(best_err, err);
  1464. if (!best_err)
  1465. break;
  1466. }
  1467. total_err += best_err;
  1468. if (total_err > best_cluster_err)
  1469. break;
  1470. } // p
  1471. }
  1472. else
  1473. {
  1474. #if BASISU_SUPPORT_SSE
  1475. find_lowest_error_linear_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err);
  1476. #endif
  1477. }
  1478. }
  1479. #endif
  1480. skip_cluster:
  1481. if ((total_err < best_cluster_err) ||
  1482. ((cluster_iter == cluster_index) && (total_err == best_cluster_err)))
  1483. {
  1484. best_cluster_err = total_err;
  1485. best_cluster_index = cluster_iter;
  1486. if (!best_cluster_err)
  1487. break;
  1488. }
  1489. } // j
  1490. best_cluster_indices[block_index] = best_cluster_index;
  1491. } // block_index
  1492. #ifndef __EMSCRIPTEN__
  1493. });
  1494. #endif
  1495. } // block_index_iter
  1496. #ifndef __EMSCRIPTEN__
  1497. m_params.m_pJob_pool->wait_for_all();
  1498. #endif
  1499. } // use_cpu
  1500. debug_printf("refine_endpoint_clusterization time: %3.3f secs\n", tm.get_elapsed_secs());
  1501. basisu::vector<typename basisu::vector<uint32_t> > optimized_endpoint_clusters(m_endpoint_clusters.size());
  1502. uint32_t total_subblocks_reassigned = 0;
  1503. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  1504. {
  1505. const uint32_t training_vector_index = block_index * 2 + 0;
  1506. const uint32_t orig_cluster_index = block_clusters[block_index][0];
  1507. const uint32_t best_cluster_index = best_cluster_indices[block_index];
  1508. optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index);
  1509. optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index + 1);
  1510. if (best_cluster_index != orig_cluster_index)
  1511. {
  1512. total_subblocks_reassigned++;
  1513. }
  1514. }
  1515. debug_printf("total_subblocks_reassigned: %u\n", total_subblocks_reassigned);
  1516. m_endpoint_clusters = optimized_endpoint_clusters;
  1517. return total_subblocks_reassigned;
  1518. }
  1519. void basisu_frontend::eliminate_redundant_or_empty_endpoint_clusters()
  1520. {
  1521. debug_printf("eliminate_redundant_or_empty_endpoint_clusters\n");
  1522. // Step 1: Sort endpoint clusters by the base colors/intens
  1523. uint_vec sorted_endpoint_cluster_indices(m_endpoint_clusters.size());
  1524. for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
  1525. sorted_endpoint_cluster_indices[i] = i;
  1526. indirect_sort((uint32_t)m_endpoint_clusters.size(), &sorted_endpoint_cluster_indices[0], &m_endpoint_cluster_etc_params[0]);
  1527. basisu::vector<basisu::vector<uint32_t> > new_endpoint_clusters(m_endpoint_clusters.size());
  1528. basisu::vector<endpoint_cluster_etc_params> new_subblock_etc_params(m_endpoint_clusters.size());
  1529. for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
  1530. {
  1531. uint32_t j = sorted_endpoint_cluster_indices[i];
  1532. new_endpoint_clusters[i] = m_endpoint_clusters[j];
  1533. new_subblock_etc_params[i] = m_endpoint_cluster_etc_params[j];
  1534. }
  1535. new_endpoint_clusters.swap(m_endpoint_clusters);
  1536. new_subblock_etc_params.swap(m_endpoint_cluster_etc_params);
  1537. // Step 2: Eliminate redundant endpoint clusters, or empty endpoint clusters
  1538. new_endpoint_clusters.resize(0);
  1539. new_subblock_etc_params.resize(0);
  1540. for (int i = 0; i < (int)m_endpoint_clusters.size(); )
  1541. {
  1542. if (!m_endpoint_clusters[i].size())
  1543. {
  1544. i++;
  1545. continue;
  1546. }
  1547. int j;
  1548. for (j = i + 1; j < (int)m_endpoint_clusters.size(); j++)
  1549. {
  1550. if (!(m_endpoint_cluster_etc_params[i] == m_endpoint_cluster_etc_params[j]))
  1551. break;
  1552. }
  1553. new_endpoint_clusters.push_back(m_endpoint_clusters[i]);
  1554. new_subblock_etc_params.push_back(m_endpoint_cluster_etc_params[i]);
  1555. for (int k = i + 1; k < j; k++)
  1556. {
  1557. append_vector(new_endpoint_clusters.back(), m_endpoint_clusters[k]);
  1558. }
  1559. i = j;
  1560. }
  1561. if (m_endpoint_clusters.size() != new_endpoint_clusters.size())
  1562. {
  1563. if (m_params.m_debug_stats)
  1564. debug_printf("Eliminated %u redundant or empty clusters\n", (uint32_t)(m_endpoint_clusters.size() - new_endpoint_clusters.size()));
  1565. m_endpoint_clusters.swap(new_endpoint_clusters);
  1566. m_endpoint_cluster_etc_params.swap(new_subblock_etc_params);
  1567. }
  1568. }
  1569. void basisu_frontend::create_initial_packed_texture()
  1570. {
  1571. debug_printf("create_initial_packed_texture\n");
  1572. interval_timer tm;
  1573. tm.start();
  1574. bool use_cpu = true;
  1575. if ((m_params.m_pOpenCL_context) && (opencl_is_available()))
  1576. {
  1577. basisu::vector<color_rgba> block_etc5_color_intens(m_total_blocks);
  1578. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  1579. {
  1580. uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0];
  1581. const color_rgba& color_unscaled = m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0];
  1582. uint32_t inten = m_endpoint_cluster_etc_params[cluster0].m_inten_table[0];
  1583. block_etc5_color_intens[block_index].set(color_unscaled.r, color_unscaled.g, color_unscaled.b, inten);
  1584. }
  1585. bool status = opencl_determine_selectors(m_params.m_pOpenCL_context, block_etc5_color_intens.data(),
  1586. m_encoded_blocks.data(),
  1587. m_params.m_perceptual);
  1588. if (!status)
  1589. {
  1590. error_printf("basisu_frontend::create_initial_packed_texture: opencl_determine_selectors() failed! Using CPU.\n");
  1591. m_params.m_pOpenCL_context = nullptr;
  1592. m_opencl_failed = true;
  1593. }
  1594. else
  1595. {
  1596. use_cpu = false;
  1597. }
  1598. }
  1599. if (use_cpu)
  1600. {
  1601. const uint32_t N = 4096;
  1602. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  1603. {
  1604. const uint32_t first_index = block_index_iter;
  1605. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  1606. #ifndef __EMSCRIPTEN__
  1607. m_params.m_pJob_pool->add_job([this, first_index, last_index] {
  1608. #endif
  1609. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  1610. {
  1611. uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0];
  1612. uint32_t cluster1 = m_block_endpoint_clusters_indices[block_index][1];
  1613. BASISU_FRONTEND_VERIFY(cluster0 == cluster1);
  1614. const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr();
  1615. etc_block& blk = m_encoded_blocks[block_index];
  1616. color_rgba unscaled[2] = { m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0], m_endpoint_cluster_etc_params[cluster1].m_color_unscaled[0] };
  1617. uint32_t inten[2] = { m_endpoint_cluster_etc_params[cluster0].m_inten_table[0], m_endpoint_cluster_etc_params[cluster1].m_inten_table[0] };
  1618. blk.set_block_color5(unscaled[0], unscaled[1]);
  1619. blk.set_flip_bit(true);
  1620. blk.set_inten_table(0, inten[0]);
  1621. blk.set_inten_table(1, inten[1]);
  1622. blk.determine_selectors(pSource_pixels, m_params.m_perceptual);
  1623. } // block_index
  1624. #ifndef __EMSCRIPTEN__
  1625. });
  1626. #endif
  1627. } // block_index_iter
  1628. #ifndef __EMSCRIPTEN__
  1629. m_params.m_pJob_pool->wait_for_all();
  1630. #endif
  1631. } // use_cpu
  1632. m_orig_encoded_blocks = m_encoded_blocks;
  1633. debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  1634. }
  1635. void basisu_frontend::compute_selector_clusters_within_each_parent_cluster()
  1636. {
  1637. uint_vec block_selector_cluster_indices(m_total_blocks);
  1638. for (int cluster_index = 0; cluster_index < static_cast<int>(m_selector_cluster_block_indices.size()); cluster_index++)
  1639. {
  1640. const basisu::vector<uint32_t>& cluster_indices = m_selector_cluster_block_indices[cluster_index];
  1641. for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
  1642. {
  1643. const uint32_t block_index = cluster_indices[cluster_indices_iter];
  1644. block_selector_cluster_indices[block_index] = cluster_index;
  1645. } // cluster_indices_iter
  1646. } // cluster_index
  1647. m_selector_clusters_within_each_parent_cluster.resize(0);
  1648. m_selector_clusters_within_each_parent_cluster.resize(m_selector_parent_cluster_block_indices.size());
  1649. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  1650. {
  1651. const uint32_t cluster_index = block_selector_cluster_indices[block_index];
  1652. const uint32_t parent_cluster_index = m_block_parent_selector_cluster[block_index];
  1653. m_selector_clusters_within_each_parent_cluster[parent_cluster_index].push_back(cluster_index);
  1654. }
  1655. for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
  1656. {
  1657. uint_vec &cluster_indices = m_selector_clusters_within_each_parent_cluster[i];
  1658. BASISU_FRONTEND_VERIFY(cluster_indices.size());
  1659. vector_sort(cluster_indices);
  1660. auto last = std::unique(cluster_indices.begin(), cluster_indices.end());
  1661. cluster_indices.erase(last, cluster_indices.end());
  1662. }
  1663. }
  1664. void basisu_frontend::generate_selector_clusters()
  1665. {
  1666. debug_printf("generate_selector_clusters\n");
  1667. typedef tree_vector_quant<vec16F> vec16F_clusterizer;
  1668. vec16F_clusterizer::array_of_weighted_training_vecs training_vecs(m_total_blocks);
  1669. const uint32_t N = 4096;
  1670. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  1671. {
  1672. const uint32_t first_index = block_index_iter;
  1673. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  1674. #ifndef __EMSCRIPTEN__
  1675. m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
  1676. #endif
  1677. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  1678. {
  1679. const etc_block &blk = m_encoded_blocks[block_index];
  1680. vec16F v;
  1681. for (uint32_t y = 0; y < 4; y++)
  1682. for (uint32_t x = 0; x < 4; x++)
  1683. v[x + y * 4] = static_cast<float>(blk.get_selector(x, y));
  1684. const uint32_t subblock_index = (blk.get_inten_table(0) > blk.get_inten_table(1)) ? 0 : 1;
  1685. color_rgba block_colors[2];
  1686. blk.get_block_low_high_colors(block_colors, subblock_index);
  1687. const uint32_t dist = color_distance(m_params.m_perceptual, block_colors[0], block_colors[1], false);
  1688. const uint32_t cColorDistToWeight = 300;
  1689. const uint32_t cMaxWeight = 4096;
  1690. uint32_t weight = clamp<uint32_t>(dist / cColorDistToWeight, 1, cMaxWeight);
  1691. training_vecs[block_index].first = v;
  1692. training_vecs[block_index].second = weight;
  1693. } // block_index
  1694. #ifndef __EMSCRIPTEN__
  1695. } );
  1696. #endif
  1697. } // block_index_iter
  1698. #ifndef __EMSCRIPTEN__
  1699. m_params.m_pJob_pool->wait_for_all();
  1700. #endif
  1701. vec16F_clusterizer selector_clusterizer;
  1702. for (uint32_t i = 0; i < m_total_blocks; i++)
  1703. selector_clusterizer.add_training_vec(training_vecs[i].first, training_vecs[i].second);
  1704. const int selector_parent_codebook_size = (m_params.m_compression_level <= 1) ? BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 : BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT;
  1705. const uint32_t parent_codebook_size = (m_params.m_max_selector_clusters >= 256) ? selector_parent_codebook_size : 0;
  1706. debug_printf("Using selector parent codebook size %u\n", parent_codebook_size);
  1707. uint32_t max_threads = 0;
  1708. max_threads = m_params.m_multithreaded ? minimum<int>(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0;
  1709. if (m_params.m_pJob_pool)
  1710. max_threads = minimum<int>((int)m_params.m_pJob_pool->get_total_threads(), max_threads);
  1711. bool status = generate_hierarchical_codebook_threaded(selector_clusterizer,
  1712. m_params.m_max_selector_clusters, m_use_hierarchical_selector_codebooks ? parent_codebook_size : 0,
  1713. m_selector_cluster_block_indices,
  1714. m_selector_parent_cluster_block_indices,
  1715. max_threads, m_params.m_pJob_pool, false);
  1716. BASISU_FRONTEND_VERIFY(status);
  1717. if (m_use_hierarchical_selector_codebooks)
  1718. {
  1719. if (!m_selector_parent_cluster_block_indices.size())
  1720. {
  1721. m_selector_parent_cluster_block_indices.resize(0);
  1722. m_selector_parent_cluster_block_indices.resize(1);
  1723. for (uint32_t i = 0; i < m_total_blocks; i++)
  1724. m_selector_parent_cluster_block_indices[0].push_back(i);
  1725. }
  1726. BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 <= UINT8_MAX);
  1727. BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT <= UINT8_MAX);
  1728. m_block_parent_selector_cluster.resize(0);
  1729. m_block_parent_selector_cluster.resize(m_total_blocks);
  1730. vector_set_all(m_block_parent_selector_cluster, 0xFF);
  1731. for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_selector_parent_cluster_block_indices.size(); parent_cluster_index++)
  1732. {
  1733. const uint_vec &cluster = m_selector_parent_cluster_block_indices[parent_cluster_index];
  1734. for (uint32_t j = 0; j < cluster.size(); j++)
  1735. m_block_parent_selector_cluster[cluster[j]] = static_cast<uint8_t>(parent_cluster_index);
  1736. }
  1737. for (uint32_t i = 0; i < m_total_blocks; i++)
  1738. {
  1739. BASISU_FRONTEND_VERIFY(m_block_parent_selector_cluster[i] != 0xFF);
  1740. }
  1741. // Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong.
  1742. for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_block_indices.size(); cluster_index++)
  1743. {
  1744. const uint_vec &cluster = m_selector_cluster_block_indices[cluster_index];
  1745. uint32_t parent_cluster_index = 0;
  1746. for (uint32_t j = 0; j < cluster.size(); j++)
  1747. {
  1748. const uint32_t block_index = cluster[j];
  1749. if (!j)
  1750. {
  1751. parent_cluster_index = m_block_parent_selector_cluster[block_index];
  1752. }
  1753. else
  1754. {
  1755. BASISU_FRONTEND_VERIFY(m_block_parent_selector_cluster[block_index] == parent_cluster_index);
  1756. }
  1757. }
  1758. }
  1759. }
  1760. debug_printf("Total selector clusters: %u, total parent selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size(), (uint32_t)m_selector_parent_cluster_block_indices.size());
  1761. }
  1762. void basisu_frontend::create_optimized_selector_codebook(uint32_t iter)
  1763. {
  1764. debug_printf("create_optimized_selector_codebook\n");
  1765. interval_timer tm;
  1766. tm.start();
  1767. const uint32_t total_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size();
  1768. debug_printf("Total selector clusters (from m_selector_cluster_block_indices.size()): %u\n", (uint32_t)m_selector_cluster_block_indices.size());
  1769. m_optimized_cluster_selectors.resize(total_selector_clusters);
  1770. // For each selector codebook entry, and for each of the 4x4 selectors, determine which selector minimizes the error across all the blocks that use that quantized selector.
  1771. const uint32_t N = 256;
  1772. for (uint32_t cluster_index_iter = 0; cluster_index_iter < total_selector_clusters; cluster_index_iter += N)
  1773. {
  1774. const uint32_t first_index = cluster_index_iter;
  1775. const uint32_t last_index = minimum<uint32_t>((uint32_t)total_selector_clusters, cluster_index_iter + N);
  1776. #ifndef __EMSCRIPTEN__
  1777. m_params.m_pJob_pool->add_job([this, first_index, last_index] {
  1778. #endif
  1779. for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
  1780. {
  1781. const basisu::vector<uint32_t>& cluster_block_indices = m_selector_cluster_block_indices[cluster_index];
  1782. if (!cluster_block_indices.size())
  1783. continue;
  1784. uint64_t overall_best_err = 0;
  1785. uint64_t total_err[4][4][4];
  1786. clear_obj(total_err);
  1787. for (uint32_t cluster_block_index = 0; cluster_block_index < cluster_block_indices.size(); cluster_block_index++)
  1788. {
  1789. const uint32_t block_index = cluster_block_indices[cluster_block_index];
  1790. const etc_block& blk = m_encoded_blocks[block_index];
  1791. color_rgba blk_colors[4];
  1792. blk.get_block_colors(blk_colors, 0);
  1793. for (uint32_t y = 0; y < 4; y++)
  1794. {
  1795. for (uint32_t x = 0; x < 4; x++)
  1796. {
  1797. const color_rgba& orig_color = get_source_pixel_block(block_index)(x, y);
  1798. if (m_params.m_perceptual)
  1799. {
  1800. for (uint32_t s = 0; s < 4; s++)
  1801. total_err[y][x][s] += color_distance(true, blk_colors[s], orig_color, false);
  1802. }
  1803. else
  1804. {
  1805. for (uint32_t s = 0; s < 4; s++)
  1806. total_err[y][x][s] += color_distance(false, blk_colors[s], orig_color, false);
  1807. }
  1808. } // x
  1809. } // y
  1810. } // cluster_block_index
  1811. for (uint32_t y = 0; y < 4; y++)
  1812. {
  1813. for (uint32_t x = 0; x < 4; x++)
  1814. {
  1815. uint64_t best_err = total_err[y][x][0];
  1816. uint8_t best_sel = 0;
  1817. for (uint32_t s = 1; s < 4; s++)
  1818. {
  1819. if (total_err[y][x][s] < best_err)
  1820. {
  1821. best_err = total_err[y][x][s];
  1822. best_sel = (uint8_t)s;
  1823. }
  1824. }
  1825. m_optimized_cluster_selectors[cluster_index].set_selector(x, y, best_sel);
  1826. overall_best_err += best_err;
  1827. } // x
  1828. } // y
  1829. } // cluster_index
  1830. #ifndef __EMSCRIPTEN__
  1831. });
  1832. #endif
  1833. } // cluster_index_iter
  1834. #ifndef __EMSCRIPTEN__
  1835. m_params.m_pJob_pool->wait_for_all();
  1836. #endif
  1837. debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  1838. if (m_params.m_debug_images)
  1839. {
  1840. uint32_t max_selector_cluster_size = 0;
  1841. for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
  1842. max_selector_cluster_size = maximum<uint32_t>(max_selector_cluster_size, (uint32_t)m_selector_cluster_block_indices[i].size());
  1843. if ((max_selector_cluster_size * 5) < 32768)
  1844. {
  1845. const uint32_t x_spacer_len = 16;
  1846. image selector_cluster_vis(x_spacer_len + max_selector_cluster_size * 5, (uint32_t)m_selector_cluster_block_indices.size() * 5);
  1847. for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++)
  1848. {
  1849. const basisu::vector<uint32_t> &cluster_block_indices = m_selector_cluster_block_indices[selector_cluster_index];
  1850. for (uint32_t y = 0; y < 4; y++)
  1851. for (uint32_t x = 0; x < 4; x++)
  1852. selector_cluster_vis.set_clipped(x_spacer_len + x - 12, selector_cluster_index * 5 + y, color_rgba((m_optimized_cluster_selectors[selector_cluster_index].get_selector(x, y) * 255) / 3));
  1853. for (uint32_t i = 0; i < cluster_block_indices.size(); i++)
  1854. {
  1855. uint32_t block_index = cluster_block_indices[i];
  1856. const etc_block &blk = m_orig_encoded_blocks[block_index];
  1857. for (uint32_t y = 0; y < 4; y++)
  1858. for (uint32_t x = 0; x < 4; x++)
  1859. selector_cluster_vis.set_clipped(x_spacer_len + x + 5 * i, selector_cluster_index * 5 + y, color_rgba((blk.get_selector(x, y) * 255) / 3));
  1860. }
  1861. }
  1862. char buf[256];
  1863. snprintf(buf, sizeof(buf), "selector_cluster_vis_%u.png", iter);
  1864. save_png(buf, selector_cluster_vis);
  1865. }
  1866. }
  1867. }
  1868. // For each block: Determine which quantized selectors best encode that block, given its quantized endpoints.
  1869. // Note that this method may leave some empty clusters (i.e. arrays with no block indices), including at the end.
  1870. void basisu_frontend::find_optimal_selector_clusters_for_each_block()
  1871. {
  1872. debug_printf("find_optimal_selector_clusters_for_each_block\n");
  1873. interval_timer tm;
  1874. tm.start();
  1875. if (m_params.m_validate)
  1876. {
  1877. // Sanity checks
  1878. BASISU_FRONTEND_VERIFY(m_selector_cluster_block_indices.size() == m_optimized_cluster_selectors.size());
  1879. for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
  1880. {
  1881. for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++)
  1882. {
  1883. BASISU_FRONTEND_VERIFY(m_selector_clusters_within_each_parent_cluster[i][j] < m_optimized_cluster_selectors.size());
  1884. }
  1885. }
  1886. }
  1887. m_block_selector_cluster_index.resize(m_total_blocks);
  1888. if (m_params.m_compression_level == 0)
  1889. {
  1890. // Just leave the blocks in their original selector clusters.
  1891. for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++)
  1892. {
  1893. for (uint32_t j = 0; j < m_selector_cluster_block_indices[selector_cluster_index].size(); j++)
  1894. {
  1895. const uint32_t block_index = m_selector_cluster_block_indices[selector_cluster_index][j];
  1896. m_block_selector_cluster_index[block_index] = selector_cluster_index;
  1897. etc_block& blk = m_encoded_blocks[block_index];
  1898. blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_cluster_index].get_raw_selector_bits());
  1899. }
  1900. }
  1901. debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  1902. return;
  1903. }
  1904. bool use_cpu = true;
  1905. if ((m_params.m_pOpenCL_context) && m_use_hierarchical_selector_codebooks)
  1906. {
  1907. const uint32_t num_parent_clusters = m_selector_clusters_within_each_parent_cluster.size();
  1908. basisu::vector<fosc_selector_struct> selector_structs;
  1909. selector_structs.reserve(m_optimized_cluster_selectors.size());
  1910. uint_vec parent_selector_cluster_offsets(num_parent_clusters);
  1911. uint_vec selector_cluster_indices;
  1912. selector_cluster_indices.reserve(m_optimized_cluster_selectors.size());
  1913. uint32_t cur_ofs = 0;
  1914. for (uint32_t parent_index = 0; parent_index < num_parent_clusters; parent_index++)
  1915. {
  1916. parent_selector_cluster_offsets[parent_index] = cur_ofs;
  1917. for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[parent_index].size(); j++)
  1918. {
  1919. const uint32_t selector_cluster_index = m_selector_clusters_within_each_parent_cluster[parent_index][j];
  1920. uint32_t sel_bits = 0;
  1921. for (uint32_t p = 0; p < 16; p++)
  1922. sel_bits |= (m_optimized_cluster_selectors[selector_cluster_index].get_selector(p & 3, p >> 2) << (p * 2));
  1923. selector_structs.enlarge(1)->m_packed_selectors = sel_bits;
  1924. selector_cluster_indices.push_back(selector_cluster_index);
  1925. }
  1926. cur_ofs += m_selector_clusters_within_each_parent_cluster[parent_index].size();
  1927. }
  1928. const uint32_t total_input_selectors = cur_ofs;
  1929. basisu::vector<fosc_block_struct> block_structs(m_total_blocks);
  1930. for (uint32_t i = 0; i < m_total_blocks; i++)
  1931. {
  1932. const uint32_t parent_selector_cluster = m_block_parent_selector_cluster[i];
  1933. const etc_block& blk = m_encoded_blocks[i];
  1934. blk.unpack_color5(block_structs[i].m_etc_color5_inten, blk.get_base5_color(), false);
  1935. block_structs[i].m_etc_color5_inten.a = (uint8_t)blk.get_inten_table(0);
  1936. block_structs[i].m_first_selector = parent_selector_cluster_offsets[parent_selector_cluster];
  1937. block_structs[i].m_num_selectors = m_selector_clusters_within_each_parent_cluster[parent_selector_cluster].size();
  1938. }
  1939. uint_vec output_selector_cluster_indices(m_total_blocks);
  1940. bool status = opencl_find_optimal_selector_clusters_for_each_block(
  1941. m_params.m_pOpenCL_context,
  1942. block_structs.data(),
  1943. total_input_selectors,
  1944. selector_structs.data(),
  1945. selector_cluster_indices.data(),
  1946. output_selector_cluster_indices.data(),
  1947. m_params.m_perceptual);
  1948. if (!status)
  1949. {
  1950. error_printf("basisu_frontend::find_optimal_selector_clusters_for_each_block: opencl_find_optimal_selector_clusters_for_each_block() failed! Using CPU.\n");
  1951. m_params.m_pOpenCL_context = nullptr;
  1952. m_opencl_failed = true;
  1953. }
  1954. else
  1955. {
  1956. for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
  1957. {
  1958. m_selector_cluster_block_indices[i].resize(0);
  1959. m_selector_cluster_block_indices[i].reserve(128);
  1960. }
  1961. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  1962. {
  1963. etc_block& blk = m_encoded_blocks[block_index];
  1964. uint32_t best_cluster_index = output_selector_cluster_indices[block_index];
  1965. blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits());
  1966. m_block_selector_cluster_index[block_index] = best_cluster_index;
  1967. vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index);
  1968. m_selector_cluster_block_indices[best_cluster_index].push_back(block_index);
  1969. }
  1970. use_cpu = false;
  1971. }
  1972. }
  1973. if (use_cpu)
  1974. {
  1975. basisu::vector<uint8_t> unpacked_optimized_cluster_selectors(16 * m_optimized_cluster_selectors.size());
  1976. for (uint32_t cluster_index = 0; cluster_index < m_optimized_cluster_selectors.size(); cluster_index++)
  1977. {
  1978. for (uint32_t y = 0; y < 4; y++)
  1979. {
  1980. for (uint32_t x = 0; x < 4; x++)
  1981. {
  1982. unpacked_optimized_cluster_selectors[cluster_index * 16 + y * 4 + x] = (uint8_t)m_optimized_cluster_selectors[cluster_index].get_selector(x, y);
  1983. }
  1984. }
  1985. }
  1986. const uint32_t N = 2048;
  1987. for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
  1988. {
  1989. const uint32_t first_index = block_index_iter;
  1990. const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
  1991. #ifndef __EMSCRIPTEN__
  1992. m_params.m_pJob_pool->add_job( [this, first_index, last_index, &unpacked_optimized_cluster_selectors] {
  1993. #endif
  1994. int prev_best_cluster_index = 0;
  1995. for (uint32_t block_index = first_index; block_index < last_index; block_index++)
  1996. {
  1997. const pixel_block& block = get_source_pixel_block(block_index);
  1998. etc_block& blk = m_encoded_blocks[block_index];
  1999. if ((block_index > first_index) && (block == get_source_pixel_block(block_index - 1)))
  2000. {
  2001. blk.set_raw_selector_bits(m_optimized_cluster_selectors[prev_best_cluster_index].get_raw_selector_bits());
  2002. m_block_selector_cluster_index[block_index] = prev_best_cluster_index;
  2003. continue;
  2004. }
  2005. const color_rgba* pBlock_pixels = block.get_ptr();
  2006. color_rgba trial_block_colors[4];
  2007. blk.get_block_colors_etc1s(trial_block_colors);
  2008. // precompute errors for the i-th block pixel and selector sel: [sel][i]
  2009. uint32_t trial_errors[4][16];
  2010. if (m_params.m_perceptual)
  2011. {
  2012. for (uint32_t sel = 0; sel < 4; ++sel)
  2013. for (uint32_t i = 0; i < 16; ++i)
  2014. trial_errors[sel][i] = color_distance(true, pBlock_pixels[i], trial_block_colors[sel], false);
  2015. }
  2016. else
  2017. {
  2018. for (uint32_t sel = 0; sel < 4; ++sel)
  2019. for (uint32_t i = 0; i < 16; ++i)
  2020. trial_errors[sel][i] = color_distance(false, pBlock_pixels[i], trial_block_colors[sel], false);
  2021. }
  2022. // Compute the minimum possible errors (given any selectors) for pixels 0-15
  2023. uint64_t min_possible_error_0_15 = 0;
  2024. for (uint32_t i = 0; i < 16; i++)
  2025. min_possible_error_0_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
  2026. // Compute the minimum possible errors (given any selectors) for pixels 4-15
  2027. uint64_t min_possible_error_4_15 = 0;
  2028. for (uint32_t i = 4; i < 16; i++)
  2029. min_possible_error_4_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
  2030. // Compute the minimum possible errors (given any selectors) for pixels 8-15
  2031. uint64_t min_possible_error_8_15 = 0;
  2032. for (uint32_t i = 8; i < 16; i++)
  2033. min_possible_error_8_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
  2034. // Compute the minimum possible errors (given any selectors) for pixels 12-15
  2035. uint64_t min_possible_error_12_15 = 0;
  2036. for (uint32_t i = 12; i < 16; i++)
  2037. min_possible_error_12_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
  2038. uint64_t best_cluster_err = INT64_MAX;
  2039. uint32_t best_cluster_index = 0;
  2040. const uint32_t parent_selector_cluster = m_block_parent_selector_cluster.size() ? m_block_parent_selector_cluster[block_index] : 0;
  2041. const uint_vec *pCluster_indices = m_selector_clusters_within_each_parent_cluster.size() ? &m_selector_clusters_within_each_parent_cluster[parent_selector_cluster] : nullptr;
  2042. const uint32_t total_clusters = m_use_hierarchical_selector_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_selector_cluster_block_indices.size();
  2043. #if 0
  2044. for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++)
  2045. {
  2046. const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter;
  2047. const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index];
  2048. uint64_t trial_err = 0;
  2049. for (int y = 0; y < 4; y++)
  2050. {
  2051. for (int x = 0; x < 4; x++)
  2052. {
  2053. const uint32_t sel = cluster_blk.get_selector(x, y);
  2054. trial_err += color_distance(m_params.m_perceptual, trial_block_colors[sel], pBlock_pixels[x + y * 4], false);
  2055. if (trial_err > best_cluster_err)
  2056. goto early_out;
  2057. }
  2058. }
  2059. if (trial_err < best_cluster_err)
  2060. {
  2061. best_cluster_err = trial_err;
  2062. best_cluster_index = cluster_index;
  2063. if (!best_cluster_err)
  2064. break;
  2065. }
  2066. early_out:
  2067. ;
  2068. }
  2069. #else
  2070. for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++)
  2071. {
  2072. const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter;
  2073. const uint8_t* pSels = &unpacked_optimized_cluster_selectors[cluster_index * 16];
  2074. uint64_t trial_err = (uint64_t)trial_errors[pSels[0]][0] + trial_errors[pSels[1]][1] + trial_errors[pSels[2]][2] + trial_errors[pSels[3]][3];
  2075. if ((trial_err + min_possible_error_4_15) >= best_cluster_err)
  2076. continue;
  2077. trial_err += (uint64_t)trial_errors[pSels[4]][4] + trial_errors[pSels[5]][5] + trial_errors[pSels[6]][6] + trial_errors[pSels[7]][7];
  2078. if ((trial_err + min_possible_error_8_15) >= best_cluster_err)
  2079. continue;
  2080. trial_err += (uint64_t)trial_errors[pSels[8]][8] + trial_errors[pSels[9]][9] + trial_errors[pSels[10]][10] + trial_errors[pSels[11]][11];
  2081. if ((trial_err + min_possible_error_12_15) >= best_cluster_err)
  2082. continue;
  2083. trial_err += (uint64_t)trial_errors[pSels[12]][12] + trial_errors[pSels[13]][13] + trial_errors[pSels[14]][14] + trial_errors[pSels[15]][15];
  2084. if (trial_err < best_cluster_err)
  2085. {
  2086. best_cluster_err = trial_err;
  2087. best_cluster_index = cluster_index;
  2088. if (best_cluster_err == min_possible_error_0_15)
  2089. break;
  2090. }
  2091. } // cluster_iter
  2092. #endif
  2093. blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits());
  2094. m_block_selector_cluster_index[block_index] = best_cluster_index;
  2095. prev_best_cluster_index = best_cluster_index;
  2096. } // block_index
  2097. #ifndef __EMSCRIPTEN__
  2098. } );
  2099. #endif
  2100. } // block_index_iter
  2101. #ifndef __EMSCRIPTEN__
  2102. m_params.m_pJob_pool->wait_for_all();
  2103. #endif
  2104. for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
  2105. {
  2106. m_selector_cluster_block_indices[i].resize(0);
  2107. m_selector_cluster_block_indices[i].reserve(128);
  2108. }
  2109. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  2110. {
  2111. const uint32_t best_cluster_index = m_block_selector_cluster_index[block_index];
  2112. vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index);
  2113. m_selector_cluster_block_indices[best_cluster_index].push_back(block_index);
  2114. }
  2115. } // if (use_cpu)
  2116. debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  2117. }
  2118. // TODO: Remove old ETC1 specific stuff, and thread this.
  2119. uint32_t basisu_frontend::refine_block_endpoints_given_selectors()
  2120. {
  2121. debug_printf("refine_block_endpoints_given_selectors\n");
  2122. for (int block_index = 0; block_index < static_cast<int>(m_total_blocks); block_index++)
  2123. {
  2124. //uint32_t selector_cluster = m_block_selector_cluster_index(block_x, block_y);
  2125. vec2U &endpoint_clusters = m_block_endpoint_clusters_indices[block_index];
  2126. m_endpoint_cluster_etc_params[endpoint_clusters[0]].m_subblocks.push_back(block_index * 2);
  2127. m_endpoint_cluster_etc_params[endpoint_clusters[1]].m_subblocks.push_back(block_index * 2 + 1);
  2128. }
  2129. uint32_t total_subblocks_refined = 0;
  2130. uint32_t total_subblocks_examined = 0;
  2131. for (uint32_t endpoint_cluster_index = 0; endpoint_cluster_index < m_endpoint_cluster_etc_params.size(); endpoint_cluster_index++)
  2132. {
  2133. endpoint_cluster_etc_params &subblock_params = m_endpoint_cluster_etc_params[endpoint_cluster_index];
  2134. const uint_vec &subblocks = subblock_params.m_subblocks;
  2135. //uint32_t total_pixels = subblock.m_subblocks.size() * 8;
  2136. basisu::vector<color_rgba> subblock_colors[2]; // [use_individual_mode]
  2137. uint8_vec subblock_selectors[2];
  2138. uint64_t cur_subblock_err[2] = { 0, 0 };
  2139. for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++)
  2140. {
  2141. uint32_t training_vector_index = subblocks[subblock_iter];
  2142. uint32_t block_index = training_vector_index >> 1;
  2143. uint32_t subblock_index = training_vector_index & 1;
  2144. const bool is_flipped = true;
  2145. const etc_block &blk = m_encoded_blocks[block_index];
  2146. const bool use_individual_mode = !blk.get_diff_bit();
  2147. const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr();
  2148. color_rgba unpacked_block_pixels[16];
  2149. unpack_etc1(blk, unpacked_block_pixels);
  2150. for (uint32_t i = 0; i < 8; i++)
  2151. {
  2152. const uint32_t pixel_index = g_etc1_pixel_indices[is_flipped][subblock_index][i];
  2153. const etc_coord2 &coords = g_etc1_pixel_coords[is_flipped][subblock_index][i];
  2154. subblock_colors[use_individual_mode].push_back(pSource_block_pixels[pixel_index]);
  2155. cur_subblock_err[use_individual_mode] += color_distance(m_params.m_perceptual, pSource_block_pixels[pixel_index], unpacked_block_pixels[pixel_index], false);
  2156. subblock_selectors[use_individual_mode].push_back(static_cast<uint8_t>(blk.get_selector(coords.m_x, coords.m_y)));
  2157. }
  2158. } // subblock_iter
  2159. etc1_optimizer::results cluster_optimizer_results[2];
  2160. bool results_valid[2] = { false, false };
  2161. clear_obj(cluster_optimizer_results);
  2162. basisu::vector<uint8_t> cluster_selectors[2];
  2163. for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++)
  2164. {
  2165. const uint32_t total_pixels = (uint32_t)subblock_colors[use_individual_mode].size();
  2166. if (!total_pixels)
  2167. continue;
  2168. total_subblocks_examined += total_pixels / 8;
  2169. etc1_optimizer optimizer;
  2170. etc1_solution_coordinates solutions[2];
  2171. etc1_optimizer::params cluster_optimizer_params;
  2172. cluster_optimizer_params.m_num_src_pixels = total_pixels;
  2173. cluster_optimizer_params.m_pSrc_pixels = &subblock_colors[use_individual_mode][0];
  2174. cluster_optimizer_params.m_use_color4 = use_individual_mode != 0;
  2175. cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
  2176. cluster_optimizer_params.m_pForce_selectors = &subblock_selectors[use_individual_mode][0];
  2177. cluster_optimizer_params.m_quality = cETCQualityUber;
  2178. cluster_selectors[use_individual_mode].resize(total_pixels);
  2179. cluster_optimizer_results[use_individual_mode].m_n = total_pixels;
  2180. cluster_optimizer_results[use_individual_mode].m_pSelectors = &cluster_selectors[use_individual_mode][0];
  2181. optimizer.init(cluster_optimizer_params, cluster_optimizer_results[use_individual_mode]);
  2182. if (!optimizer.compute())
  2183. continue;
  2184. if (cluster_optimizer_results[use_individual_mode].m_error < cur_subblock_err[use_individual_mode])
  2185. results_valid[use_individual_mode] = true;
  2186. } // use_individual_mode
  2187. for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++)
  2188. {
  2189. if (!results_valid[use_individual_mode])
  2190. continue;
  2191. uint32_t num_passes = use_individual_mode ? 1 : 2;
  2192. bool all_passed5 = true;
  2193. for (uint32_t pass = 0; pass < num_passes; pass++)
  2194. {
  2195. for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++)
  2196. {
  2197. const uint32_t training_vector_index = subblocks[subblock_iter];
  2198. const uint32_t block_index = training_vector_index >> 1;
  2199. const uint32_t subblock_index = training_vector_index & 1;
  2200. //const bool is_flipped = true;
  2201. etc_block &blk = m_encoded_blocks[block_index];
  2202. if (!blk.get_diff_bit() != static_cast<bool>(use_individual_mode != 0))
  2203. continue;
  2204. if (use_individual_mode)
  2205. {
  2206. blk.set_base4_color(subblock_index, etc_block::pack_color4(cluster_optimizer_results[1].m_block_color_unscaled, false));
  2207. blk.set_inten_table(subblock_index, cluster_optimizer_results[1].m_block_inten_table);
  2208. subblock_params.m_color_error[1] = cluster_optimizer_results[1].m_error;
  2209. subblock_params.m_inten_table[1] = cluster_optimizer_results[1].m_block_inten_table;
  2210. subblock_params.m_color_unscaled[1] = cluster_optimizer_results[1].m_block_color_unscaled;
  2211. total_subblocks_refined++;
  2212. }
  2213. else
  2214. {
  2215. const uint16_t base_color5 = blk.get_base5_color();
  2216. const uint16_t delta_color3 = blk.get_delta3_color();
  2217. uint32_t r[2], g[2], b[2];
  2218. etc_block::unpack_color5(r[0], g[0], b[0], base_color5, false);
  2219. bool success = etc_block::unpack_color5(r[1], g[1], b[1], base_color5, delta_color3, false);
  2220. assert(success);
  2221. BASISU_NOTE_UNUSED(success);
  2222. r[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.r;
  2223. g[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.g;
  2224. b[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.b;
  2225. color_rgba colors[2] = { color_rgba(r[0], g[0], b[0], 255), color_rgba(r[1], g[1], b[1], 255) };
  2226. if (!etc_block::try_pack_color5_delta3(colors))
  2227. {
  2228. all_passed5 = false;
  2229. break;
  2230. }
  2231. if ((pass == 1) && (all_passed5))
  2232. {
  2233. blk.set_block_color5(colors[0], colors[1]);
  2234. blk.set_inten_table(subblock_index, cluster_optimizer_results[0].m_block_inten_table);
  2235. subblock_params.m_color_error[0] = cluster_optimizer_results[0].m_error;
  2236. subblock_params.m_inten_table[0] = cluster_optimizer_results[0].m_block_inten_table;
  2237. subblock_params.m_color_unscaled[0] = cluster_optimizer_results[0].m_block_color_unscaled;
  2238. total_subblocks_refined++;
  2239. }
  2240. }
  2241. } // subblock_iter
  2242. } // pass
  2243. } // use_individual_mode
  2244. } // endpoint_cluster_index
  2245. if (m_params.m_debug_stats)
  2246. debug_printf("Total subblock endpoints refined: %u (%3.1f%%)\n", total_subblocks_refined, total_subblocks_refined * 100.0f / total_subblocks_examined);
  2247. return total_subblocks_refined;
  2248. }
  2249. void basisu_frontend::dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors)
  2250. {
  2251. debug_printf("dump_endpoint_clusterization_visualization\n");
  2252. uint32_t max_endpoint_cluster_size = 0;
  2253. basisu::vector<uint32_t> cluster_sizes(m_endpoint_clusters.size());
  2254. basisu::vector<uint32_t> sorted_cluster_indices(m_endpoint_clusters.size());
  2255. for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
  2256. {
  2257. max_endpoint_cluster_size = maximum<uint32_t>(max_endpoint_cluster_size, (uint32_t)m_endpoint_clusters[i].size());
  2258. cluster_sizes[i] = (uint32_t)m_endpoint_clusters[i].size();
  2259. }
  2260. if (!max_endpoint_cluster_size)
  2261. return;
  2262. for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
  2263. sorted_cluster_indices[i] = i;
  2264. //indexed_heap_sort(endpoint_clusters.size(), cluster_sizes.get_ptr(), sorted_cluster_indices.get_ptr());
  2265. image endpoint_cluster_vis(12 + minimum<uint32_t>(max_endpoint_cluster_size, 2048) * 5, (uint32_t)m_endpoint_clusters.size() * 3);
  2266. for (uint32_t unsorted_cluster_iter = 0; unsorted_cluster_iter < m_endpoint_clusters.size(); unsorted_cluster_iter++)
  2267. {
  2268. const uint32_t cluster_iter = sorted_cluster_indices[unsorted_cluster_iter];
  2269. etc_block blk;
  2270. blk.clear();
  2271. blk.set_flip_bit(false);
  2272. blk.set_diff_bit(true);
  2273. blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0]);
  2274. blk.set_base5_color(etc_block::pack_color5(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0], false));
  2275. color_rgba blk_colors[4];
  2276. blk.get_block_colors(blk_colors, 0);
  2277. for (uint32_t i = 0; i < 4; i++)
  2278. endpoint_cluster_vis.fill_box(i * 2, 3 * unsorted_cluster_iter, 2, 2, blk_colors[i]);
  2279. for (uint32_t subblock_iter = 0; subblock_iter < m_endpoint_clusters[cluster_iter].size(); subblock_iter++)
  2280. {
  2281. uint32_t training_vector_index = m_endpoint_clusters[cluster_iter][subblock_iter];
  2282. const uint32_t block_index = training_vector_index >> 1;
  2283. const uint32_t subblock_index = training_vector_index & 1;
  2284. const etc_block& blk2 = m_etc1_blocks_etc1s[block_index];
  2285. const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  2286. color_rgba subblock_pixels[8];
  2287. if (vis_endpoint_colors)
  2288. {
  2289. color_rgba colors[2];
  2290. blk2.get_block_low_high_colors(colors, subblock_index);
  2291. for (uint32_t i = 0; i < 8; i++)
  2292. subblock_pixels[i] = colors[subblock_index];
  2293. }
  2294. else
  2295. {
  2296. for (uint32_t i = 0; i < 8; i++)
  2297. subblock_pixels[i] = pBlock_pixels[g_etc1_pixel_indices[blk2.get_flip_bit()][subblock_index][i]];
  2298. }
  2299. endpoint_cluster_vis.set_block_clipped(subblock_pixels, 12 + 5 * subblock_iter, 3 * unsorted_cluster_iter, 4, 2);
  2300. }
  2301. }
  2302. save_png(pFilename, endpoint_cluster_vis);
  2303. debug_printf("Wrote debug visualization file %s\n", pFilename);
  2304. }
  2305. void basisu_frontend::finalize()
  2306. {
  2307. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  2308. {
  2309. for (uint32_t subblock_index = 0; subblock_index < 2; subblock_index++)
  2310. {
  2311. const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, subblock_index);
  2312. m_endpoint_cluster_etc_params[endpoint_cluster_index].m_color_used[0] = true;
  2313. }
  2314. }
  2315. }
  2316. // The backend has remapped the block endpoints while optimizing the output symbols for better rate distortion performance, so let's go and reoptimize the endpoint codebook.
  2317. // This is currently the only place where the backend actually goes and changes the quantization and calls the frontend to fix things up.
  2318. // This is basically a bottom up clusterization stage, where some leaves can be combined.
  2319. void basisu_frontend::reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices)
  2320. {
  2321. debug_printf("reoptimize_remapped_endpoints\n");
  2322. basisu::vector<uint_vec> new_endpoint_cluster_block_indices(m_endpoint_clusters.size());
  2323. for (uint32_t i = 0; i < new_block_endpoints.size(); i++)
  2324. new_endpoint_cluster_block_indices[new_block_endpoints[i]].push_back(i);
  2325. basisu::vector<uint8_t> cluster_valid(new_endpoint_cluster_block_indices.size());
  2326. basisu::vector<uint8_t> cluster_improved(new_endpoint_cluster_block_indices.size());
  2327. const uint32_t N = 256;
  2328. for (uint32_t cluster_index_iter = 0; cluster_index_iter < new_endpoint_cluster_block_indices.size(); cluster_index_iter += N)
  2329. {
  2330. const uint32_t first_index = cluster_index_iter;
  2331. const uint32_t last_index = minimum<uint32_t>((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N);
  2332. #ifndef __EMSCRIPTEN__
  2333. m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] {
  2334. #endif
  2335. for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
  2336. {
  2337. const basisu::vector<uint32_t>& cluster_block_indices = new_endpoint_cluster_block_indices[cluster_index];
  2338. if (!cluster_block_indices.size())
  2339. continue;
  2340. const uint32_t total_pixels = (uint32_t)cluster_block_indices.size() * 16;
  2341. basisu::vector<color_rgba> cluster_pixels(total_pixels);
  2342. uint8_vec force_selectors(total_pixels);
  2343. etc_block blk;
  2344. blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(cluster_index, false));
  2345. blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(cluster_index, false));
  2346. blk.set_flip_bit(true);
  2347. uint64_t cur_err = 0;
  2348. for (uint32_t cluster_block_indices_iter = 0; cluster_block_indices_iter < cluster_block_indices.size(); cluster_block_indices_iter++)
  2349. {
  2350. const uint32_t block_index = cluster_block_indices[cluster_block_indices_iter];
  2351. const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
  2352. memcpy(&cluster_pixels[cluster_block_indices_iter * 16], pBlock_pixels, 16 * sizeof(color_rgba));
  2353. const uint32_t selector_cluster_index = pBlock_selector_indices ? (*pBlock_selector_indices)[block_index] : get_block_selector_cluster_index(block_index);
  2354. const etc_block &blk_selectors = get_selector_cluster_selector_bits(selector_cluster_index);
  2355. blk.set_raw_selector_bits(blk_selectors.get_raw_selector_bits());
  2356. cur_err += blk.evaluate_etc1_error(pBlock_pixels, m_params.m_perceptual);
  2357. for (uint32_t y = 0; y < 4; y++)
  2358. for (uint32_t x = 0; x < 4; x++)
  2359. force_selectors[cluster_block_indices_iter * 16 + x + y * 4] = static_cast<uint8_t>(blk_selectors.get_selector(x, y));
  2360. }
  2361. endpoint_cluster_etc_params new_endpoint_cluster_etc_params;
  2362. {
  2363. etc1_optimizer optimizer;
  2364. etc1_solution_coordinates solutions[2];
  2365. etc1_optimizer::params cluster_optimizer_params;
  2366. cluster_optimizer_params.m_num_src_pixels = total_pixels;
  2367. cluster_optimizer_params.m_pSrc_pixels = &cluster_pixels[0];
  2368. cluster_optimizer_params.m_use_color4 = false;
  2369. cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
  2370. cluster_optimizer_params.m_pForce_selectors = &force_selectors[0];
  2371. if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
  2372. cluster_optimizer_params.m_quality = cETCQualityUber;
  2373. else
  2374. cluster_optimizer_params.m_quality = cETCQualitySlow;
  2375. etc1_optimizer::results cluster_optimizer_results;
  2376. basisu::vector<uint8_t> cluster_selectors(total_pixels);
  2377. cluster_optimizer_results.m_n = total_pixels;
  2378. cluster_optimizer_results.m_pSelectors = &cluster_selectors[0];
  2379. optimizer.init(cluster_optimizer_params, cluster_optimizer_results);
  2380. if (!optimizer.compute())
  2381. BASISU_FRONTEND_VERIFY(false);
  2382. new_endpoint_cluster_etc_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled;
  2383. new_endpoint_cluster_etc_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table;
  2384. new_endpoint_cluster_etc_params.m_color_error[0] = cluster_optimizer_results.m_error;
  2385. new_endpoint_cluster_etc_params.m_color_used[0] = true;
  2386. new_endpoint_cluster_etc_params.m_valid = true;
  2387. }
  2388. if (new_endpoint_cluster_etc_params.m_color_error[0] < cur_err)
  2389. {
  2390. m_endpoint_cluster_etc_params[cluster_index] = new_endpoint_cluster_etc_params;
  2391. cluster_improved[cluster_index] = true;
  2392. }
  2393. cluster_valid[cluster_index] = true;
  2394. } // cluster_index
  2395. #ifndef __EMSCRIPTEN__
  2396. } );
  2397. #endif
  2398. } // cluster_index_iter
  2399. #ifndef __EMSCRIPTEN__
  2400. m_params.m_pJob_pool->wait_for_all();
  2401. #endif
  2402. uint32_t total_unused_clusters = 0;
  2403. uint32_t total_improved_clusters = 0;
  2404. old_to_new_endpoint_cluster_indices.resize(m_endpoint_clusters.size());
  2405. vector_set_all(old_to_new_endpoint_cluster_indices, -1);
  2406. int total_new_endpoint_clusters = 0;
  2407. for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
  2408. {
  2409. if (!cluster_valid[old_cluster_index])
  2410. total_unused_clusters++;
  2411. else
  2412. old_to_new_endpoint_cluster_indices[old_cluster_index] = total_new_endpoint_clusters++;
  2413. if (cluster_improved[old_cluster_index])
  2414. total_improved_clusters++;
  2415. }
  2416. debug_printf("Total unused clusters: %u\n", total_unused_clusters);
  2417. debug_printf("Total improved_clusters: %u\n", total_improved_clusters);
  2418. debug_printf("Total endpoint clusters: %u\n", total_new_endpoint_clusters);
  2419. if (optimize_final_codebook)
  2420. {
  2421. cluster_subblock_etc_params_vec new_endpoint_cluster_etc_params(total_new_endpoint_clusters);
  2422. for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
  2423. {
  2424. if (old_to_new_endpoint_cluster_indices[old_cluster_index] >= 0)
  2425. new_endpoint_cluster_etc_params[old_to_new_endpoint_cluster_indices[old_cluster_index]] = m_endpoint_cluster_etc_params[old_cluster_index];
  2426. }
  2427. debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 1\n");
  2428. basisu::vector<uint_vec> new_endpoint_clusters(total_new_endpoint_clusters);
  2429. for (uint32_t block_index = 0; block_index < new_block_endpoints.size(); block_index++)
  2430. {
  2431. const uint32_t old_endpoint_cluster_index = new_block_endpoints[block_index];
  2432. const int new_endpoint_cluster_index = old_to_new_endpoint_cluster_indices[old_endpoint_cluster_index];
  2433. BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index >= 0);
  2434. BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index < (int)new_endpoint_clusters.size());
  2435. new_endpoint_clusters[new_endpoint_cluster_index].push_back(block_index * 2 + 0);
  2436. new_endpoint_clusters[new_endpoint_cluster_index].push_back(block_index * 2 + 1);
  2437. BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index < (int)new_endpoint_cluster_etc_params.size());
  2438. new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 0);
  2439. new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 1);
  2440. m_block_endpoint_clusters_indices[block_index][0] = new_endpoint_cluster_index;
  2441. m_block_endpoint_clusters_indices[block_index][1] = new_endpoint_cluster_index;
  2442. }
  2443. debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 2\n");
  2444. m_endpoint_clusters = new_endpoint_clusters;
  2445. m_endpoint_cluster_etc_params = new_endpoint_cluster_etc_params;
  2446. eliminate_redundant_or_empty_endpoint_clusters();
  2447. debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 3\n");
  2448. for (uint32_t new_cluster_index = 0; new_cluster_index < m_endpoint_clusters.size(); new_cluster_index++)
  2449. {
  2450. for (uint32_t cluster_block_iter = 0; cluster_block_iter < m_endpoint_clusters[new_cluster_index].size(); cluster_block_iter++)
  2451. {
  2452. const uint32_t subblock_index = m_endpoint_clusters[new_cluster_index][cluster_block_iter];
  2453. const uint32_t block_index = subblock_index >> 1;
  2454. m_block_endpoint_clusters_indices[block_index][0] = new_cluster_index;
  2455. m_block_endpoint_clusters_indices[block_index][1] = new_cluster_index;
  2456. const uint32_t old_cluster_index = new_block_endpoints[block_index];
  2457. old_to_new_endpoint_cluster_indices[old_cluster_index] = new_cluster_index;
  2458. }
  2459. }
  2460. debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 4\n");
  2461. for (uint32_t block_index = 0; block_index < m_encoded_blocks.size(); block_index++)
  2462. {
  2463. const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0);
  2464. m_encoded_blocks[block_index].set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false));
  2465. m_encoded_blocks[block_index].set_inten_tables_etc1s(get_endpoint_cluster_inten_table(endpoint_cluster_index, false));
  2466. }
  2467. debug_printf("Final (post-RDO) endpoint clusters: %u\n", m_endpoint_clusters.size());
  2468. }
  2469. //debug_printf("validate_output: %u\n", validate_output());
  2470. }
  2471. // Endpoint clusterization hierarchy integrity checker.
  2472. // Note this doesn't check for empty clusters.
  2473. bool basisu_frontend::validate_endpoint_cluster_hierarchy(bool ensure_clusters_have_same_parents) const
  2474. {
  2475. if (!m_endpoint_parent_clusters.size())
  2476. return true;
  2477. int_vec subblock_parent_indices(m_total_blocks * 2);
  2478. subblock_parent_indices.set_all(-1);
  2479. int_vec subblock_cluster_indices(m_total_blocks * 2);
  2480. subblock_cluster_indices.set_all(-1);
  2481. for (uint32_t parent_index = 0; parent_index < m_endpoint_parent_clusters.size(); parent_index++)
  2482. {
  2483. for (uint32_t i = 0; i < m_endpoint_parent_clusters[parent_index].size(); i++)
  2484. {
  2485. uint32_t subblock_index = m_endpoint_parent_clusters[parent_index][i];
  2486. if (subblock_index >= m_total_blocks * 2)
  2487. return false;
  2488. // If the endpoint cluster lives in more than one parent node, that's wrong.
  2489. if (subblock_parent_indices[subblock_index] != -1)
  2490. return false;
  2491. subblock_parent_indices[subblock_index] = parent_index;
  2492. }
  2493. }
  2494. // Make sure all endpoint clusters are present in the parent cluster.
  2495. for (uint32_t i = 0; i < subblock_parent_indices.size(); i++)
  2496. {
  2497. if (subblock_parent_indices[i] == -1)
  2498. return false;
  2499. }
  2500. for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
  2501. {
  2502. int parent_index = 0;
  2503. for (uint32_t i = 0; i < m_endpoint_clusters[cluster_index].size(); i++)
  2504. {
  2505. uint32_t subblock_index = m_endpoint_clusters[cluster_index][i];
  2506. if (subblock_index >= m_total_blocks * 2)
  2507. return false;
  2508. if (subblock_cluster_indices[subblock_index] != -1)
  2509. return false;
  2510. subblock_cluster_indices[subblock_index] = cluster_index;
  2511. // There are transformations on the endpoint clusters that can break the strict tree requirement
  2512. if (ensure_clusters_have_same_parents)
  2513. {
  2514. // Make sure all the subblocks are in the same parent cluster
  2515. if (!i)
  2516. parent_index = subblock_parent_indices[subblock_index];
  2517. else if (subblock_parent_indices[subblock_index] != parent_index)
  2518. return false;
  2519. }
  2520. }
  2521. }
  2522. // Make sure all endpoint clusters are present in the parent cluster.
  2523. for (uint32_t i = 0; i < subblock_cluster_indices.size(); i++)
  2524. {
  2525. if (subblock_cluster_indices[i] == -1)
  2526. return false;
  2527. }
  2528. return true;
  2529. }
  2530. // This is very slow and only intended for debugging/development. It's enabled using the "-validate_etc1s" command line option.
  2531. bool basisu_frontend::validate_output() const
  2532. {
  2533. debug_printf("validate_output\n");
  2534. if (!check_etc1s_constraints())
  2535. return false;
  2536. for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
  2537. {
  2538. //#define CHECK(x) do { if (!(x)) { DebugBreak(); return false; } } while(0)
  2539. #define CHECK(x) BASISU_FRONTEND_VERIFY(x);
  2540. CHECK(get_output_block(block_index).get_flip_bit() == true);
  2541. const bool diff_flag = get_diff_flag(block_index);
  2542. CHECK(diff_flag == true);
  2543. etc_block blk;
  2544. memset(&blk, 0, sizeof(blk));
  2545. blk.set_flip_bit(true);
  2546. blk.set_diff_bit(true);
  2547. const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0);
  2548. const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1);
  2549. // basisu only supports ETC1S, so these must be equal.
  2550. CHECK(endpoint_cluster0_index == endpoint_cluster1_index);
  2551. CHECK(blk.set_block_color5_check(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false)));
  2552. CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster0_index, false));
  2553. blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, false));
  2554. blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, false));
  2555. const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index);
  2556. CHECK(selector_cluster_index < get_total_selector_clusters());
  2557. CHECK(vector_find(get_selector_cluster_block_indices(selector_cluster_index), block_index) != -1);
  2558. blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits());
  2559. const etc_block &rdo_output_block = get_output_block(block_index);
  2560. CHECK(rdo_output_block.get_flip_bit() == blk.get_flip_bit());
  2561. CHECK(rdo_output_block.get_diff_bit() == blk.get_diff_bit());
  2562. CHECK(rdo_output_block.get_inten_table(0) == blk.get_inten_table(0));
  2563. CHECK(rdo_output_block.get_inten_table(1) == blk.get_inten_table(1));
  2564. CHECK(rdo_output_block.get_base5_color() == blk.get_base5_color());
  2565. CHECK(rdo_output_block.get_delta3_color() == blk.get_delta3_color());
  2566. CHECK(rdo_output_block.get_raw_selector_bits() == blk.get_raw_selector_bits());
  2567. #undef CHECK
  2568. }
  2569. return true;
  2570. }
  2571. void basisu_frontend::dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks)
  2572. {
  2573. gpu_image g;
  2574. g.init(texture_format::cETC1, num_blocks_x * 4, num_blocks_y * 4);
  2575. for (uint32_t y = 0; y < num_blocks_y; y++)
  2576. {
  2577. for (uint32_t x = 0; x < num_blocks_x; x++)
  2578. {
  2579. const uint32_t block_index = first_block + x + y * num_blocks_x;
  2580. etc_block &blk = *(etc_block *)g.get_block_ptr(x, y);
  2581. if (output_blocks)
  2582. blk = get_output_block(block_index);
  2583. else
  2584. {
  2585. const bool diff_flag = get_diff_flag(block_index);
  2586. blk.set_diff_bit(diff_flag);
  2587. blk.set_flip_bit(true);
  2588. const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0);
  2589. const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1);
  2590. if (diff_flag)
  2591. blk.set_block_color5(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false));
  2592. else
  2593. blk.set_block_color4(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, true), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, true));
  2594. blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, !diff_flag));
  2595. blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, !diff_flag));
  2596. const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index);
  2597. blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits());
  2598. }
  2599. }
  2600. }
  2601. image img;
  2602. g.unpack(img);
  2603. save_png(pFilename, img);
  2604. }
  2605. } // namespace basisu