image_compress_betsy.cpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914
  1. /**************************************************************************/
  2. /* image_compress_betsy.cpp */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #include "image_compress_betsy.h"
  31. #include "core/config/project_settings.h"
  32. #include "betsy_bc1.h"
  33. #include "alpha_stitch.glsl.gen.h"
  34. #include "bc1.glsl.gen.h"
  35. #include "bc4.glsl.gen.h"
  36. #include "bc6h.glsl.gen.h"
  37. #include "rgb_to_rgba.glsl.gen.h"
  38. #include "servers/display/display_server.h"
  39. static Mutex betsy_mutex;
  40. static BetsyCompressor *betsy = nullptr;
  41. static const BetsyShaderType FORMAT_TO_TYPE[BETSY_FORMAT_MAX] = {
  42. BETSY_SHADER_BC1_STANDARD,
  43. BETSY_SHADER_BC1_DITHER,
  44. BETSY_SHADER_BC1_STANDARD,
  45. BETSY_SHADER_BC4_SIGNED,
  46. BETSY_SHADER_BC4_UNSIGNED,
  47. BETSY_SHADER_BC4_SIGNED,
  48. BETSY_SHADER_BC4_UNSIGNED,
  49. BETSY_SHADER_BC6_SIGNED,
  50. BETSY_SHADER_BC6_UNSIGNED,
  51. };
  52. static const RD::DataFormat BETSY_TO_RD_FORMAT[BETSY_FORMAT_MAX] = {
  53. RD::DATA_FORMAT_R32G32_UINT,
  54. RD::DATA_FORMAT_R32G32_UINT,
  55. RD::DATA_FORMAT_R32G32_UINT,
  56. RD::DATA_FORMAT_R32G32_UINT,
  57. RD::DATA_FORMAT_R32G32_UINT,
  58. RD::DATA_FORMAT_R32G32_UINT,
  59. RD::DATA_FORMAT_R32G32_UINT,
  60. RD::DATA_FORMAT_R32G32B32A32_UINT,
  61. RD::DATA_FORMAT_R32G32B32A32_UINT,
  62. };
  63. static const Image::Format BETSY_TO_IMAGE_FORMAT[BETSY_FORMAT_MAX] = {
  64. Image::FORMAT_DXT1,
  65. Image::FORMAT_DXT1,
  66. Image::FORMAT_DXT5,
  67. Image::FORMAT_RGTC_R,
  68. Image::FORMAT_RGTC_R,
  69. Image::FORMAT_RGTC_RG,
  70. Image::FORMAT_RGTC_RG,
  71. Image::FORMAT_BPTC_RGBF,
  72. Image::FORMAT_BPTC_RGBFU,
  73. };
  74. void BetsyCompressor::_init() {
  75. if (!DisplayServer::can_create_rendering_device()) {
  76. return;
  77. }
  78. // Create local RD.
  79. RenderingContextDriver *rcd = nullptr;
  80. RenderingDevice *rd = RenderingServer::get_singleton()->create_local_rendering_device();
  81. if (rd == nullptr) {
  82. #if defined(RD_ENABLED)
  83. #if defined(METAL_ENABLED)
  84. rcd = memnew(RenderingContextDriverMetal);
  85. rd = memnew(RenderingDevice);
  86. #endif
  87. #if defined(VULKAN_ENABLED)
  88. if (rcd == nullptr) {
  89. rcd = memnew(RenderingContextDriverVulkan);
  90. rd = memnew(RenderingDevice);
  91. }
  92. #endif
  93. #endif
  94. if (rcd != nullptr && rd != nullptr) {
  95. Error err = rcd->initialize();
  96. if (err == OK) {
  97. err = rd->initialize(rcd);
  98. }
  99. if (err != OK) {
  100. memdelete(rd);
  101. memdelete(rcd);
  102. rd = nullptr;
  103. rcd = nullptr;
  104. }
  105. }
  106. }
  107. ERR_FAIL_NULL_MSG(rd, "Unable to create a local RenderingDevice.");
  108. compress_rd = rd;
  109. compress_rcd = rcd;
  110. // Create the sampler state.
  111. RD::SamplerState src_sampler_state;
  112. {
  113. src_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
  114. src_sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
  115. src_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST;
  116. src_sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST;
  117. src_sampler_state.mip_filter = RD::SAMPLER_FILTER_NEAREST;
  118. }
  119. src_sampler = compress_rd->sampler_create(src_sampler_state);
  120. // Initialize RDShaderFiles.
  121. {
  122. Ref<RDShaderFile> bc1_shader;
  123. bc1_shader.instantiate();
  124. Error err = bc1_shader->parse_versions_from_text(bc1_shader_glsl);
  125. if (err != OK) {
  126. bc1_shader->print_errors("Betsy BC1 compress shader");
  127. }
  128. // Standard BC1 compression.
  129. cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled = compress_rd->shader_create_from_spirv(bc1_shader->get_spirv_stages("standard"));
  130. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled.is_null());
  131. cached_shaders[BETSY_SHADER_BC1_STANDARD].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled);
  132. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_STANDARD].pipeline.is_null());
  133. // Dither BC1 variant. Unused, so comment out for now.
  134. //cached_shaders[BETSY_SHADER_BC1_DITHER].compiled = compress_rd->shader_create_from_spirv(bc1_shader->get_spirv_stages("dithered"));
  135. //ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_DITHER].compiled.is_null());
  136. //cached_shaders[BETSY_SHADER_BC1_DITHER].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC1_DITHER].compiled);
  137. //ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_DITHER].pipeline.is_null());
  138. }
  139. {
  140. Ref<RDShaderFile> bc4_shader;
  141. bc4_shader.instantiate();
  142. Error err = bc4_shader->parse_versions_from_text(bc4_shader_glsl);
  143. if (err != OK) {
  144. bc4_shader->print_errors("Betsy BC4 compress shader");
  145. }
  146. // Signed BC4 compression. Unused, so comment out for now.
  147. //cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled = compress_rd->shader_create_from_spirv(bc4_shader->get_spirv_stages("signed"));
  148. //ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled.is_null());
  149. //cached_shaders[BETSY_SHADER_BC4_SIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled);
  150. //ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_SIGNED].pipeline.is_null());
  151. // Unsigned BC4 compression.
  152. cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled = compress_rd->shader_create_from_spirv(bc4_shader->get_spirv_stages("unsigned"));
  153. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled.is_null());
  154. cached_shaders[BETSY_SHADER_BC4_UNSIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled);
  155. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].pipeline.is_null());
  156. }
  157. {
  158. Ref<RDShaderFile> bc6h_shader;
  159. bc6h_shader.instantiate();
  160. Error err = bc6h_shader->parse_versions_from_text(bc6h_shader_glsl);
  161. if (err != OK) {
  162. bc6h_shader->print_errors("Betsy BC6 compress shader");
  163. }
  164. // Signed BC6 compression.
  165. cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled = compress_rd->shader_create_from_spirv(bc6h_shader->get_spirv_stages("signed"));
  166. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled.is_null());
  167. cached_shaders[BETSY_SHADER_BC6_SIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled);
  168. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_SIGNED].pipeline.is_null());
  169. // Unsigned BC6 compression.
  170. cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled = compress_rd->shader_create_from_spirv(bc6h_shader->get_spirv_stages("unsigned"));
  171. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled.is_null());
  172. cached_shaders[BETSY_SHADER_BC6_UNSIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled);
  173. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].pipeline.is_null());
  174. }
  175. {
  176. Ref<RDShaderFile> alpha_stitch_shader;
  177. alpha_stitch_shader.instantiate();
  178. Error err = alpha_stitch_shader->parse_versions_from_text(alpha_stitch_shader_glsl);
  179. if (err != OK) {
  180. alpha_stitch_shader->print_errors("Betsy alpha stitch shader");
  181. }
  182. cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled = compress_rd->shader_create_from_spirv(alpha_stitch_shader->get_spirv_stages());
  183. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled.is_null());
  184. cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled);
  185. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline.is_null());
  186. }
  187. {
  188. Ref<RDShaderFile> rgb_to_rgba_shader;
  189. rgb_to_rgba_shader.instantiate();
  190. Error err = rgb_to_rgba_shader->parse_versions_from_text(rgb_to_rgba_shader_glsl);
  191. if (err != OK) {
  192. rgb_to_rgba_shader->print_errors("Betsy RGB to RGBA shader");
  193. }
  194. // Float32.
  195. cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_float"));
  196. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled.is_null());
  197. cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled);
  198. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline.is_null());
  199. // Float16.
  200. cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_half"));
  201. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled.is_null());
  202. cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled);
  203. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline.is_null());
  204. // Unorm8.
  205. cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm8"));
  206. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled.is_null());
  207. cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled);
  208. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline.is_null());
  209. // Unorm16.
  210. cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm16"));
  211. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled.is_null());
  212. cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled);
  213. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline.is_null());
  214. }
  215. }
  216. void BetsyCompressor::init() {
  217. WorkerThreadPool::TaskID tid = WorkerThreadPool::get_singleton()->add_task(callable_mp(this, &BetsyCompressor::_thread_loop), true, "Betsy pump task", true);
  218. command_queue.set_pump_task_id(tid);
  219. command_queue.push(this, &BetsyCompressor::_assign_mt_ids, tid);
  220. command_queue.push_and_sync(this, &BetsyCompressor::_init);
  221. DEV_ASSERT(task_id == tid);
  222. }
  223. void BetsyCompressor::_assign_mt_ids(WorkerThreadPool::TaskID p_pump_task_id) {
  224. task_id = p_pump_task_id;
  225. }
  226. // Yield thread to WTP so other tasks can be done on it.
  227. // Automatically regains control as soon a task is pushed to the command queue.
  228. void BetsyCompressor::_thread_loop() {
  229. while (!exit) {
  230. WorkerThreadPool::get_singleton()->yield();
  231. command_queue.flush_all();
  232. }
  233. }
  234. void BetsyCompressor::_thread_exit() {
  235. exit = true;
  236. if (compress_rd != nullptr) {
  237. if (dxt1_encoding_table_buffer.is_valid()) {
  238. compress_rd->free_rid(dxt1_encoding_table_buffer);
  239. }
  240. compress_rd->free_rid(src_sampler);
  241. // Clear the shader cache, pipelines will be unreferenced automatically.
  242. for (int i = 0; i < BETSY_SHADER_MAX; i++) {
  243. if (cached_shaders[i].compiled.is_valid()) {
  244. compress_rd->free_rid(cached_shaders[i].compiled);
  245. }
  246. }
  247. // Free the RD (and RCD if necessary).
  248. memdelete(compress_rd);
  249. compress_rd = nullptr;
  250. if (compress_rcd != nullptr) {
  251. memdelete(compress_rcd);
  252. compress_rcd = nullptr;
  253. }
  254. }
  255. }
  256. void BetsyCompressor::finish() {
  257. command_queue.push(this, &BetsyCompressor::_thread_exit);
  258. if (task_id != WorkerThreadPool::INVALID_TASK_ID) {
  259. WorkerThreadPool::get_singleton()->wait_for_task_completion(task_id);
  260. task_id = WorkerThreadPool::INVALID_TASK_ID;
  261. }
  262. }
  263. // Helper functions.
  264. static int get_next_multiple(int n, int m) {
  265. return n + (m - (n % m));
  266. }
  267. static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format, bool &r_is_rgb) {
  268. r_is_rgb = false;
  269. switch (r_img->get_format()) {
  270. case Image::FORMAT_L8:
  271. r_img->convert(Image::FORMAT_RGBA8);
  272. r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  273. break;
  274. case Image::FORMAT_LA8:
  275. r_img->convert(Image::FORMAT_RGBA8);
  276. r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  277. break;
  278. case Image::FORMAT_R8:
  279. r_format = RD::DATA_FORMAT_R8_UNORM;
  280. break;
  281. case Image::FORMAT_RG8:
  282. r_format = RD::DATA_FORMAT_R8G8_UNORM;
  283. break;
  284. case Image::FORMAT_RGB8:
  285. r_is_rgb = true;
  286. r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  287. break;
  288. case Image::FORMAT_RGBA8:
  289. r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  290. break;
  291. case Image::FORMAT_RH:
  292. r_format = RD::DATA_FORMAT_R16_SFLOAT;
  293. break;
  294. case Image::FORMAT_RGH:
  295. r_format = RD::DATA_FORMAT_R16G16_SFLOAT;
  296. break;
  297. case Image::FORMAT_RGBH:
  298. r_is_rgb = true;
  299. r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
  300. break;
  301. case Image::FORMAT_RGBAH:
  302. r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
  303. break;
  304. case Image::FORMAT_RF:
  305. r_format = RD::DATA_FORMAT_R32_SFLOAT;
  306. break;
  307. case Image::FORMAT_RGF:
  308. r_format = RD::DATA_FORMAT_R32G32_SFLOAT;
  309. break;
  310. case Image::FORMAT_RGBF:
  311. r_is_rgb = true;
  312. r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
  313. break;
  314. case Image::FORMAT_RGBAF:
  315. r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
  316. break;
  317. case Image::FORMAT_RGBE9995:
  318. r_format = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32;
  319. break;
  320. case Image::FORMAT_R16:
  321. r_format = RD::DATA_FORMAT_R16_UNORM;
  322. break;
  323. case Image::FORMAT_RG16:
  324. r_format = RD::DATA_FORMAT_R16G16_UNORM;
  325. break;
  326. case Image::FORMAT_RGB16:
  327. r_is_rgb = true;
  328. r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM;
  329. break;
  330. case Image::FORMAT_RGBA16:
  331. r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM;
  332. break;
  333. default: {
  334. return ERR_UNAVAILABLE;
  335. }
  336. }
  337. return OK;
  338. }
  339. Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {
  340. uint64_t start_time = OS::get_singleton()->get_ticks_msec();
  341. // Return an error so that the compression can fall back to cpu compression
  342. if (compress_rd == nullptr) {
  343. return ERR_CANT_CREATE;
  344. }
  345. if (r_img->is_compressed()) {
  346. return ERR_INVALID_DATA;
  347. }
  348. int img_width = r_img->get_width();
  349. int img_height = r_img->get_height();
  350. if (img_width % 4 != 0 || img_height % 4 != 0) {
  351. img_width = img_width <= 2 ? img_width : (img_width + 3) & ~3;
  352. img_height = img_height <= 2 ? img_height : (img_height + 3) & ~3;
  353. }
  354. Error err = OK;
  355. // Destination format.
  356. Image::Format dest_format = BETSY_TO_IMAGE_FORMAT[p_format];
  357. RD::DataFormat dst_rd_format = BETSY_TO_RD_FORMAT[p_format];
  358. BetsyShaderType shader_type = FORMAT_TO_TYPE[p_format];
  359. BetsyShader shader = cached_shaders[shader_type];
  360. BetsyShader secondary_shader; // The secondary shader is used for alpha blocks. For BC it's BC4U and for ETC it's ETC2_RU (8-bit variant).
  361. BetsyShader stitch_shader;
  362. bool needs_alpha_block = false;
  363. switch (p_format) {
  364. case BETSY_FORMAT_BC3:
  365. case BETSY_FORMAT_BC5_UNSIGNED:
  366. needs_alpha_block = true;
  367. secondary_shader = cached_shaders[BETSY_SHADER_BC4_UNSIGNED];
  368. stitch_shader = cached_shaders[BETSY_SHADER_ALPHA_STITCH];
  369. break;
  370. default:
  371. break;
  372. }
  373. // src_texture format information.
  374. RD::TextureFormat src_texture_format;
  375. {
  376. src_texture_format.array_layers = 1;
  377. src_texture_format.depth = 1;
  378. src_texture_format.mipmaps = 1;
  379. src_texture_format.texture_type = RD::TEXTURE_TYPE_2D;
  380. src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
  381. }
  382. bool needs_rgb_to_rgba = false;
  383. err = get_src_texture_format(r_img, src_texture_format.format, needs_rgb_to_rgba);
  384. if (err != OK) {
  385. return err;
  386. }
  387. // For the destination format just copy the source format and change the usage bits.
  388. RD::TextureFormat dst_texture_format = src_texture_format;
  389. dst_texture_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
  390. dst_texture_format.format = dst_rd_format;
  391. RD::TextureFormat dst_texture_format_alpha;
  392. RD::TextureFormat dst_texture_format_combined;
  393. if (needs_alpha_block) {
  394. dst_texture_format_combined = dst_texture_format;
  395. dst_texture_format_combined.format = RD::DATA_FORMAT_R32G32B32A32_UINT;
  396. dst_texture_format.usage_bits |= RD::TEXTURE_USAGE_SAMPLING_BIT;
  397. dst_texture_format_alpha = dst_texture_format;
  398. dst_texture_format_alpha.format = RD::DATA_FORMAT_R32G32_UINT;
  399. }
  400. // Encoding table setup.
  401. if ((dest_format == Image::FORMAT_DXT1 || dest_format == Image::FORMAT_DXT5) && dxt1_encoding_table_buffer.is_null()) {
  402. LocalVector<float> dxt1_encoding_table;
  403. dxt1_encoding_table.resize(256 * 4);
  404. for (int i = 0; i < 256; i++) {
  405. dxt1_encoding_table[i * 2 + 0] = static_cast<float>(stb__OMatch5[i][0]);
  406. dxt1_encoding_table[i * 2 + 1] = static_cast<float>(stb__OMatch5[i][1]);
  407. dxt1_encoding_table[512 + (i * 2 + 0)] = static_cast<float>(stb__OMatch6[i][0]);
  408. dxt1_encoding_table[512 + (i * 2 + 1)] = static_cast<float>(stb__OMatch6[i][1]);
  409. }
  410. dxt1_encoding_table_buffer = compress_rd->storage_buffer_create(dxt1_encoding_table.size() * sizeof(float), Span<float>(dxt1_encoding_table).reinterpret<uint8_t>());
  411. }
  412. const int mip_count = r_img->get_mipmap_count() + 1;
  413. // Container for the compressed data.
  414. Vector<uint8_t> dst_data;
  415. dst_data.resize(Image::get_image_data_size(img_width, img_height, dest_format, r_img->has_mipmaps()));
  416. uint8_t *dst_data_ptr = dst_data.ptrw();
  417. Vector<Vector<uint8_t>> src_images;
  418. src_images.push_back(Vector<uint8_t>());
  419. Vector<uint8_t> *src_image_ptr = src_images.ptrw();
  420. // Compress each mipmap.
  421. for (int i = 0; i < mip_count; i++) {
  422. int width, height;
  423. Image::get_image_mipmap_offset_and_dimensions(img_width, img_height, dest_format, i, width, height);
  424. int64_t src_mip_ofs, src_mip_size;
  425. int src_mip_w, src_mip_h;
  426. r_img->get_mipmap_offset_size_and_dimensions(i, src_mip_ofs, src_mip_size, src_mip_w, src_mip_h);
  427. // Set the source texture width and size.
  428. src_texture_format.height = height;
  429. src_texture_format.width = width;
  430. // Set the destination texture width and size.
  431. dst_texture_format.height = (height + 3) >> 2;
  432. dst_texture_format.width = (width + 3) >> 2;
  433. // Pad textures to nearest block by smearing.
  434. if (width != src_mip_w || height != src_mip_h) {
  435. const uint8_t *src_mip_read = r_img->ptr() + src_mip_ofs;
  436. // Reserve the buffer for padded image data.
  437. int px_size = Image::get_format_pixel_size(r_img->get_format());
  438. src_image_ptr[0].resize(width * height * px_size);
  439. uint8_t *ptrw = src_image_ptr[0].ptrw();
  440. int x = 0, y = 0;
  441. for (y = 0; y < src_mip_h; y++) {
  442. for (x = 0; x < src_mip_w; x++) {
  443. memcpy(ptrw + (width * y + x) * px_size, src_mip_read + (src_mip_w * y + x) * px_size, px_size);
  444. }
  445. // First, smear in x.
  446. for (; x < width; x++) {
  447. memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - 1) * px_size, px_size);
  448. }
  449. }
  450. // Then, smear in y.
  451. for (; y < height; y++) {
  452. for (x = 0; x < width; x++) {
  453. memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - width) * px_size, px_size);
  454. }
  455. }
  456. } else {
  457. // Create a buffer filled with the source mip layer data.
  458. src_image_ptr[0].resize(src_mip_size);
  459. memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + src_mip_ofs, src_mip_size);
  460. }
  461. // Create the textures on the GPU.
  462. RID src_texture;
  463. RID dst_texture_primary = compress_rd->texture_create(dst_texture_format, RD::TextureView());
  464. if (needs_rgb_to_rgba) {
  465. // RGB textures cannot be sampled directly on most hardware, so we do a little trick involving a compute shader
  466. // which takes the input data as an SSBO and converts it directly into an RGBA image.
  467. BetsyShaderType rgb_shader_type = BETSY_SHADER_MAX;
  468. switch (r_img->get_format()) {
  469. case Image::FORMAT_RGB8:
  470. rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM8;
  471. break;
  472. case Image::FORMAT_RGBH:
  473. rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_HALF;
  474. break;
  475. case Image::FORMAT_RGBF:
  476. rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_FLOAT;
  477. break;
  478. case Image::FORMAT_RGB16:
  479. rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM16;
  480. break;
  481. default:
  482. break;
  483. }
  484. // The source 'RGB' buffer.
  485. RID source_buffer = compress_rd->storage_buffer_create(src_image_ptr[0].size(), src_image_ptr[0].span());
  486. RD::TextureFormat rgba_texture_format = src_texture_format;
  487. rgba_texture_format.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
  488. src_texture = compress_rd->texture_create(rgba_texture_format, RD::TextureView());
  489. Vector<RD::Uniform> uniforms;
  490. {
  491. {
  492. RD::Uniform u;
  493. u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
  494. u.binding = 0;
  495. u.append_id(source_buffer);
  496. uniforms.push_back(u);
  497. }
  498. {
  499. RD::Uniform u;
  500. u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
  501. u.binding = 1;
  502. u.append_id(src_texture);
  503. uniforms.push_back(u);
  504. }
  505. }
  506. BetsyShader &rgb_shader = cached_shaders[rgb_shader_type];
  507. RID uniform_set = compress_rd->uniform_set_create(uniforms, rgb_shader.compiled, 0);
  508. RD::ComputeListID compute_list = compress_rd->compute_list_begin();
  509. compress_rd->compute_list_bind_compute_pipeline(compute_list, rgb_shader.pipeline);
  510. compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
  511. // Prepare the push constant with the mipmap's resolution.
  512. RGBToRGBAPushConstant push_constant;
  513. push_constant.width = width;
  514. push_constant.height = height;
  515. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RGBToRGBAPushConstant));
  516. compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 8) / 8, get_next_multiple(height, 8) / 8, 1);
  517. compress_rd->compute_list_end();
  518. compress_rd->free_rid(source_buffer);
  519. } else {
  520. src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);
  521. }
  522. {
  523. Vector<RD::Uniform> uniforms;
  524. {
  525. {
  526. RD::Uniform u;
  527. u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
  528. u.binding = 0;
  529. u.append_id(src_sampler);
  530. u.append_id(src_texture);
  531. uniforms.push_back(u);
  532. }
  533. {
  534. RD::Uniform u;
  535. u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
  536. u.binding = 1;
  537. u.append_id(dst_texture_primary);
  538. uniforms.push_back(u);
  539. }
  540. if (dest_format == Image::FORMAT_DXT1 || dest_format == Image::FORMAT_DXT5) {
  541. RD::Uniform u;
  542. u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
  543. u.binding = 2;
  544. u.append_id(dxt1_encoding_table_buffer);
  545. uniforms.push_back(u);
  546. }
  547. }
  548. RID uniform_set = compress_rd->uniform_set_create(uniforms, shader.compiled, 0);
  549. RD::ComputeListID compute_list = compress_rd->compute_list_begin();
  550. compress_rd->compute_list_bind_compute_pipeline(compute_list, shader.pipeline);
  551. compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
  552. switch (shader_type) {
  553. case BETSY_SHADER_BC6_SIGNED:
  554. case BETSY_SHADER_BC6_UNSIGNED: {
  555. BC6PushConstant push_constant;
  556. push_constant.sizeX = 1.0f / width;
  557. push_constant.sizeY = 1.0f / height;
  558. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant));
  559. compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
  560. } break;
  561. case BETSY_SHADER_BC1_STANDARD: {
  562. BC1PushConstant push_constant;
  563. push_constant.num_refines = 2;
  564. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC1PushConstant));
  565. compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
  566. } break;
  567. case BETSY_SHADER_BC4_UNSIGNED: {
  568. BC4PushConstant push_constant;
  569. push_constant.channel_idx = 0;
  570. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant));
  571. compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16);
  572. } break;
  573. default: {
  574. } break;
  575. }
  576. compress_rd->compute_list_end();
  577. if (!needs_alpha_block) {
  578. compress_rd->submit();
  579. compress_rd->sync();
  580. }
  581. }
  582. RID dst_texture_rid = dst_texture_primary;
  583. if (needs_alpha_block) {
  584. // Set the destination texture width and size.
  585. dst_texture_format_alpha.height = (height + 3) >> 2;
  586. dst_texture_format_alpha.width = (width + 3) >> 2;
  587. RID dst_texture_alpha = compress_rd->texture_create(dst_texture_format_alpha, RD::TextureView());
  588. {
  589. Vector<RD::Uniform> uniforms;
  590. {
  591. {
  592. RD::Uniform u;
  593. u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
  594. u.binding = 0;
  595. u.append_id(src_sampler);
  596. u.append_id(src_texture);
  597. uniforms.push_back(u);
  598. }
  599. {
  600. RD::Uniform u;
  601. u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
  602. u.binding = 1;
  603. u.append_id(dst_texture_alpha);
  604. uniforms.push_back(u);
  605. }
  606. }
  607. RID uniform_set = compress_rd->uniform_set_create(uniforms, secondary_shader.compiled, 0);
  608. RD::ComputeListID compute_list = compress_rd->compute_list_begin();
  609. compress_rd->compute_list_bind_compute_pipeline(compute_list, secondary_shader.pipeline);
  610. compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
  611. BC4PushConstant push_constant;
  612. push_constant.channel_idx = dest_format == Image::FORMAT_DXT5 ? 3 : 1;
  613. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant));
  614. compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16);
  615. compress_rd->compute_list_end();
  616. }
  617. // Stitching
  618. // Set the destination texture width and size.
  619. dst_texture_format_combined.height = (height + 3) >> 2;
  620. dst_texture_format_combined.width = (width + 3) >> 2;
  621. RID dst_texture_combined = compress_rd->texture_create(dst_texture_format_combined, RD::TextureView());
  622. {
  623. Vector<RD::Uniform> uniforms;
  624. {
  625. {
  626. RD::Uniform u;
  627. u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
  628. u.binding = 0;
  629. u.append_id(src_sampler);
  630. u.append_id(dest_format == Image::FORMAT_DXT5 ? dst_texture_alpha : dst_texture_primary);
  631. uniforms.push_back(u);
  632. }
  633. {
  634. RD::Uniform u;
  635. u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
  636. u.binding = 1;
  637. u.append_id(src_sampler);
  638. u.append_id(dest_format == Image::FORMAT_DXT5 ? dst_texture_primary : dst_texture_alpha);
  639. uniforms.push_back(u);
  640. }
  641. {
  642. RD::Uniform u;
  643. u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
  644. u.binding = 2;
  645. u.append_id(dst_texture_combined);
  646. uniforms.push_back(u);
  647. }
  648. }
  649. RID uniform_set = compress_rd->uniform_set_create(uniforms, stitch_shader.compiled, 0);
  650. RD::ComputeListID compute_list = compress_rd->compute_list_begin();
  651. compress_rd->compute_list_bind_compute_pipeline(compute_list, stitch_shader.pipeline);
  652. compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
  653. compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
  654. compress_rd->compute_list_end();
  655. compress_rd->submit();
  656. compress_rd->sync();
  657. }
  658. dst_texture_rid = dst_texture_combined;
  659. compress_rd->free_rid(dst_texture_primary);
  660. compress_rd->free_rid(dst_texture_alpha);
  661. }
  662. // Copy data from the GPU to the buffer.
  663. const Vector<uint8_t> texture_data = compress_rd->texture_get_data(dst_texture_rid, 0);
  664. int64_t dst_ofs = Image::get_image_mipmap_offset(img_width, img_height, dest_format, i);
  665. memcpy(dst_data_ptr + dst_ofs, texture_data.ptr(), texture_data.size());
  666. // Free the source and dest texture.
  667. compress_rd->free_rid(src_texture);
  668. compress_rd->free_rid(dst_texture_rid);
  669. }
  670. src_images.clear();
  671. // Set the compressed data to the image.
  672. r_img->set_data(img_width, img_height, r_img->has_mipmaps(), dest_format, dst_data);
  673. print_verbose(
  674. vformat("Betsy: Encoding a %dx%d image with %d mipmaps as %s took %d ms.",
  675. img_width,
  676. img_height,
  677. r_img->get_mipmap_count(),
  678. Image::get_format_name(dest_format),
  679. OS::get_singleton()->get_ticks_msec() - start_time));
  680. return OK;
  681. }
  682. void ensure_betsy_exists() {
  683. betsy_mutex.lock();
  684. if (betsy == nullptr) {
  685. betsy = memnew(BetsyCompressor);
  686. betsy->init();
  687. }
  688. betsy_mutex.unlock();
  689. }
  690. Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels) {
  691. ensure_betsy_exists();
  692. Image::Format format = r_img->get_format();
  693. Error result = ERR_UNAVAILABLE;
  694. if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBE9995) {
  695. if (r_img->detect_signed()) {
  696. result = betsy->compress(BETSY_FORMAT_BC6_SIGNED, r_img);
  697. } else {
  698. result = betsy->compress(BETSY_FORMAT_BC6_UNSIGNED, r_img);
  699. }
  700. }
  701. if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {
  702. free_device();
  703. }
  704. return result;
  705. }
  706. Error _betsy_compress_s3tc(Image *r_img, Image::UsedChannels p_channels) {
  707. ensure_betsy_exists();
  708. Error result = ERR_UNAVAILABLE;
  709. switch (p_channels) {
  710. case Image::USED_CHANNELS_RGB:
  711. case Image::USED_CHANNELS_L:
  712. result = betsy->compress(BETSY_FORMAT_BC1, r_img);
  713. break;
  714. case Image::USED_CHANNELS_RGBA:
  715. case Image::USED_CHANNELS_LA:
  716. result = betsy->compress(BETSY_FORMAT_BC3, r_img);
  717. break;
  718. case Image::USED_CHANNELS_R:
  719. result = betsy->compress(BETSY_FORMAT_BC4_UNSIGNED, r_img);
  720. break;
  721. case Image::USED_CHANNELS_RG:
  722. result = betsy->compress(BETSY_FORMAT_BC5_UNSIGNED, r_img);
  723. break;
  724. default:
  725. break;
  726. }
  727. if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {
  728. free_device();
  729. }
  730. return result;
  731. }
  732. void free_device() {
  733. if (betsy != nullptr) {
  734. betsy->finish();
  735. memdelete(betsy);
  736. }
  737. }