rasterizer_canvas_batcher.h 57 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560
  1. /*************************************************************************/
  2. /* rasterizer_canvas_batcher.h */
  3. /*************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /*************************************************************************/
  8. /* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
  9. /* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /*************************************************************************/
  30. #ifndef RASTERIZER_CANVAS_BATCHER_H
  31. #define RASTERIZER_CANVAS_BATCHER_H
  32. #include "core/os/os.h"
  33. #include "core/templates/local_vector.h"
  34. #include "rasterizer_array.h"
  35. #include "rasterizer_asserts.h"
  36. #include "rasterizer_storage_common.h"
  37. #include "core/config/project_settings.h"
  38. #include "servers/rendering/renderer_compositor.h"
  39. // We are using the curiously recurring template pattern
  40. // https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern
  41. // For static polymorphism.
  42. // This makes it super easy to access
  43. // data / call funcs in the derived rasterizers from the base without writing and
  44. // maintaining a boatload of virtual functions.
  45. // In addition it assures that vtable will not be used and the function calls can be optimized,
  46. // because it gives compile time static polymorphism.
  47. // These macros makes it simpler and less verbose to define (and redefine) the inline functions
  48. // template preamble
  49. #define T_PREAMBLE template <class T, typename T_STORAGE>
  50. // class preamble
  51. #define C_PREAMBLE RasterizerCanvasBatcher<T, T_STORAGE>
  52. // generic preamble
  53. #define PREAMBLE(RET_T) \
  54. T_PREAMBLE \
  55. RET_T C_PREAMBLE
  56. template <class T, typename T_STORAGE>
  57. class RasterizerCanvasBatcher {
  58. public:
  59. // used to determine whether we use hardware transform (none)
  60. // software transform all verts, or software transform just a translate
  61. // (no rotate or scale)
  62. enum TransformMode {
  63. TM_NONE,
  64. TM_ALL,
  65. TM_TRANSLATE,
  66. };
  67. // pod versions of vector and color and RID, need to be 32 bit for vertex format
  68. struct BatchVector2 {
  69. float x, y;
  70. void set(float xx, float yy) {
  71. x = xx;
  72. y = yy;
  73. }
  74. void set(const Vector2 &p_o) {
  75. x = p_o.x;
  76. y = p_o.y;
  77. }
  78. void to(Vector2 &r_o) const {
  79. r_o.x = x;
  80. r_o.y = y;
  81. }
  82. };
  83. struct BatchColor {
  84. float r, g, b, a;
  85. void set_white() {
  86. r = 1.0f;
  87. g = 1.0f;
  88. b = 1.0f;
  89. a = 1.0f;
  90. }
  91. void set(const Color &p_c) {
  92. r = p_c.r;
  93. g = p_c.g;
  94. b = p_c.b;
  95. a = p_c.a;
  96. }
  97. void set(float rr, float gg, float bb, float aa) {
  98. r = rr;
  99. g = gg;
  100. b = bb;
  101. a = aa;
  102. }
  103. bool operator==(const BatchColor &p_c) const {
  104. return (r == p_c.r) && (g == p_c.g) && (b == p_c.b) && (a == p_c.a);
  105. }
  106. bool operator!=(const BatchColor &p_c) const { return (*this == p_c) == false; }
  107. bool equals(const Color &p_c) const {
  108. return (r == p_c.r) && (g == p_c.g) && (b == p_c.b) && (a == p_c.a);
  109. }
  110. const float *get_data() const { return &r; }
  111. String to_string() const {
  112. String sz = "{";
  113. const float *data = get_data();
  114. for (int c = 0; c < 4; c++) {
  115. float f = data[c];
  116. int val = ((f * 255.0f) + 0.5f);
  117. sz += String(Variant(val)) + " ";
  118. }
  119. sz += "}";
  120. return sz;
  121. }
  122. };
  123. // simplest FVF - local or baked position
  124. struct BatchVertex {
  125. // must be 32 bit pod
  126. BatchVector2 pos;
  127. BatchVector2 uv;
  128. };
  129. // simple FVF but also incorporating baked color
  130. struct BatchVertexColored : public BatchVertex {
  131. // must be 32 bit pod
  132. BatchColor col;
  133. };
  134. // if we are using normal mapping, we need light angles to be sent
  135. struct BatchVertexLightAngled : public BatchVertexColored {
  136. // must be pod
  137. float light_angle;
  138. };
  139. // CUSTOM SHADER vertex formats. These are larger but will probably
  140. // be needed with custom shaders in order to have the data accessible in the shader.
  141. // if we are using COLOR in vertex shader but not position (VERTEX)
  142. struct BatchVertexModulated : public BatchVertexLightAngled {
  143. BatchColor modulate;
  144. };
  145. struct BatchTransform {
  146. BatchVector2 translate;
  147. BatchVector2 basis[2];
  148. };
  149. // last resort, specially for custom shader, we put everything possible into a huge FVF
  150. // not very efficient, but better than no batching at all.
  151. struct BatchVertexLarge : public BatchVertexModulated {
  152. // must be pod
  153. BatchTransform transform;
  154. };
  155. // Batch should be as small as possible, and ideally nicely aligned (is 32 bytes at the moment)
  156. struct Batch {
  157. RasterizerStorageCommon::BatchType type; // should be 16 bit
  158. uint16_t batch_texture_id;
  159. // also item reference number
  160. uint32_t first_command;
  161. // in the case of DEFAULT, this is num commands.
  162. // with rects, is number of command and rects.
  163. // with lines, is number of lines
  164. uint32_t num_commands;
  165. // first vertex of this batch in the vertex lists
  166. uint32_t first_vert;
  167. BatchColor color;
  168. };
  169. struct BatchTex {
  170. enum TileMode : uint32_t {
  171. TILE_OFF,
  172. TILE_NORMAL,
  173. TILE_FORCE_REPEAT,
  174. };
  175. RID RID_texture;
  176. RID RID_normal;
  177. TileMode tile_mode;
  178. BatchVector2 tex_pixel_size;
  179. uint32_t flags;
  180. };
  181. // items in a list to be sorted prior to joining
  182. struct BSortItem {
  183. // have a function to keep as pod, rather than operator
  184. void assign(const BSortItem &o) {
  185. item = o.item;
  186. z_index = o.z_index;
  187. }
  188. RendererCanvasRender::Item *item;
  189. int z_index;
  190. };
  191. // batch item may represent 1 or more items
  192. struct BItemJoined {
  193. uint32_t first_item_ref;
  194. uint32_t num_item_refs;
  195. Rect2 bounding_rect;
  196. // note the z_index may only be correct for the first of the joined item references
  197. // this has implications for light culling with z ranged lights.
  198. int16_t z_index;
  199. // these are defined in RasterizerStorageCommon::BatchFlags
  200. uint16_t flags;
  201. // we are always splitting items with lots of commands,
  202. // and items with unhandled primitives (default)
  203. bool use_hardware_transform() const { return num_item_refs == 1; }
  204. };
  205. struct BItemRef {
  206. RendererCanvasRender::Item *item;
  207. Color final_modulate;
  208. };
  209. struct BLightRegion {
  210. void reset() {
  211. light_bitfield = 0;
  212. shadow_bitfield = 0;
  213. too_many_lights = false;
  214. }
  215. uint64_t light_bitfield;
  216. uint64_t shadow_bitfield;
  217. bool too_many_lights; // we can only do light region optimization if there are 64 or less lights
  218. };
  219. struct BatchData {
  220. BatchData() {
  221. reset_flush();
  222. reset_joined_item();
  223. gl_vertex_buffer = 0;
  224. gl_index_buffer = 0;
  225. max_quads = 0;
  226. vertex_buffer_size_units = 0;
  227. vertex_buffer_size_bytes = 0;
  228. index_buffer_size_units = 0;
  229. index_buffer_size_bytes = 0;
  230. use_colored_vertices = false;
  231. settings_use_batching = false;
  232. settings_max_join_item_commands = 0;
  233. settings_colored_vertex_format_threshold = 0.0f;
  234. settings_batch_buffer_num_verts = 0;
  235. scissor_threshold_area = 0.0f;
  236. joined_item_batch_flags = 0;
  237. diagnose_frame = false;
  238. next_diagnose_tick = 10000;
  239. diagnose_frame_number = 9999999999; // some high number
  240. join_across_z_indices = true;
  241. settings_item_reordering_lookahead = 0;
  242. settings_use_batching_original_choice = false;
  243. settings_flash_batching = false;
  244. settings_diagnose_frame = false;
  245. settings_scissor_lights = false;
  246. settings_scissor_threshold = -1.0f;
  247. settings_use_single_rect_fallback = false;
  248. settings_use_software_skinning = true;
  249. settings_ninepatch_mode = 0; // default
  250. settings_light_max_join_items = 16;
  251. settings_uv_contract = false;
  252. settings_uv_contract_amount = 0.0f;
  253. buffer_mode_batch_upload_send_null = true;
  254. buffer_mode_batch_upload_flag_stream = false;
  255. stats_items_sorted = 0;
  256. stats_light_items_joined = 0;
  257. }
  258. // called for each joined item
  259. void reset_joined_item() {
  260. // noop but left in as a stub
  261. }
  262. // called after each flush
  263. void reset_flush() {
  264. batches.reset();
  265. batch_textures.reset();
  266. vertices.reset();
  267. light_angles.reset();
  268. vertex_colors.reset();
  269. vertex_modulates.reset();
  270. vertex_transforms.reset();
  271. total_quads = 0;
  272. total_verts = 0;
  273. total_color_changes = 0;
  274. use_light_angles = false;
  275. use_modulate = false;
  276. use_large_verts = false;
  277. fvf = RasterizerStorageCommon::FVF_REGULAR;
  278. }
  279. unsigned int gl_vertex_buffer;
  280. unsigned int gl_index_buffer;
  281. uint32_t max_quads;
  282. uint32_t vertex_buffer_size_units;
  283. uint32_t vertex_buffer_size_bytes;
  284. uint32_t index_buffer_size_units;
  285. uint32_t index_buffer_size_bytes;
  286. // small vertex FVF type - pos and UV.
  287. // This will always be written to initially, but can be translated
  288. // to larger FVFs if necessary.
  289. RasterizerArray<BatchVertex> vertices;
  290. // extra data which can be stored during prefilling, for later translation to larger FVFs
  291. RasterizerArray<float> light_angles;
  292. RasterizerArray<BatchColor> vertex_colors; // these aren't usually used, but are for polys
  293. RasterizerArray<BatchColor> vertex_modulates;
  294. RasterizerArray<BatchTransform> vertex_transforms;
  295. // instead of having a different buffer for each vertex FVF type
  296. // we have a special array big enough for the biggest FVF
  297. // which can have a changeable unit size, and reuse it.
  298. RasterizerUnitArray unit_vertices;
  299. RasterizerArray<Batch> batches;
  300. RasterizerArray<Batch> batches_temp; // used for translating to colored vertex batches
  301. RasterizerArray_non_pod<BatchTex> batch_textures; // the only reason this is non-POD is because of RIDs
  302. // SHOULD THESE BE IN FILLSTATE?
  303. // flexible vertex format.
  304. // all verts have pos and UV.
  305. // some have color, some light angles etc.
  306. RasterizerStorageCommon::FVF fvf;
  307. bool use_colored_vertices;
  308. bool use_light_angles;
  309. bool use_modulate;
  310. bool use_large_verts;
  311. // if the shader is using MODULATE, we prevent baking color so the final_modulate can
  312. // be read in the shader.
  313. // if the shader is reading VERTEX, we prevent baking vertex positions with extra matrices etc
  314. // to prevent the read position being incorrect.
  315. // These flags are defined in RasterizerStorageCommon::BatchFlags
  316. uint32_t joined_item_batch_flags;
  317. RasterizerArray<BItemJoined> items_joined;
  318. RasterizerArray<BItemRef> item_refs;
  319. // items are sorted prior to joining
  320. RasterizerArray<BSortItem> sort_items;
  321. // new for Godot 4 .. the client outputs a linked list so we need to convert this
  322. // to a linear array
  323. LocalVector<RendererCanvasRender::Item::Command *> command_shortlist;
  324. // counts
  325. int total_quads;
  326. int total_verts;
  327. // we keep a record of how many color changes caused new batches
  328. // if the colors are causing an excessive number of batches, we switch
  329. // to alternate batching method and add color to the vertex format.
  330. int total_color_changes;
  331. // measured in pixels, recalculated each frame
  332. float scissor_threshold_area;
  333. // diagnose this frame, every nTh frame when settings_diagnose_frame is on
  334. bool diagnose_frame;
  335. String frame_string;
  336. uint32_t next_diagnose_tick;
  337. uint64_t diagnose_frame_number;
  338. // whether to join items across z_indices - this can interfere with z ranged lights,
  339. // so has to be disabled in some circumstances
  340. bool join_across_z_indices;
  341. // global settings
  342. bool settings_use_batching; // the current use_batching (affected by flash)
  343. bool settings_use_batching_original_choice; // the choice entered in project settings
  344. bool settings_flash_batching; // for regression testing, flash between non-batched and batched renderer
  345. bool settings_diagnose_frame; // print out batches to help optimize / regression test
  346. int settings_max_join_item_commands;
  347. float settings_colored_vertex_format_threshold;
  348. int settings_batch_buffer_num_verts;
  349. bool settings_scissor_lights;
  350. float settings_scissor_threshold; // 0.0 to 1.0
  351. int settings_item_reordering_lookahead;
  352. bool settings_use_single_rect_fallback;
  353. bool settings_use_software_skinning;
  354. int settings_light_max_join_items;
  355. int settings_ninepatch_mode;
  356. // buffer orphaning modes
  357. bool buffer_mode_batch_upload_send_null;
  358. bool buffer_mode_batch_upload_flag_stream;
  359. // uv contraction
  360. bool settings_uv_contract;
  361. float settings_uv_contract_amount;
  362. // only done on diagnose frame
  363. void reset_stats() {
  364. stats_items_sorted = 0;
  365. stats_light_items_joined = 0;
  366. }
  367. // frame stats (just for monitoring and debugging)
  368. int stats_items_sorted;
  369. int stats_light_items_joined;
  370. } bdata;
  371. struct FillState {
  372. void reset_flush() {
  373. // don't reset members that need to be preserved after flushing
  374. // half way through a list of commands
  375. curr_batch = 0;
  376. batch_tex_id = -1;
  377. texpixel_size = Vector2(1, 1);
  378. contract_uvs = false;
  379. sequence_batch_type_flags = 0;
  380. }
  381. void reset_joined_item(bool p_use_hardware_transform) {
  382. reset_flush();
  383. use_hardware_transform = p_use_hardware_transform;
  384. extra_matrix_sent = false;
  385. }
  386. // for batching multiple types, we don't allow mixing RECTs / LINEs etc.
  387. // using flags allows quicker rejection of sequences with different batch types
  388. uint32_t sequence_batch_type_flags;
  389. Batch *curr_batch;
  390. int batch_tex_id;
  391. bool use_hardware_transform;
  392. bool contract_uvs;
  393. Vector2 texpixel_size;
  394. Color final_modulate;
  395. TransformMode transform_mode;
  396. TransformMode orig_transform_mode;
  397. // support for extra matrices
  398. bool extra_matrix_sent; // whether sent on this item (in which case software transform can't be used untl end of item)
  399. int transform_extra_command_number_p1; // plus one to allow fast checking against zero
  400. Transform2D transform_combined; // final * extra
  401. };
  402. // used during try_join
  403. struct RenderItemState {
  404. RenderItemState() { reset(); }
  405. void reset() {
  406. current_clip = nullptr;
  407. shader_cache = nullptr;
  408. rebind_shader = true;
  409. prev_use_skeleton = false;
  410. last_blend_mode = -1;
  411. canvas_last_material = RID();
  412. item_group_z = 0;
  413. item_group_light = nullptr;
  414. final_modulate = Color(-1.0, -1.0, -1.0, -1.0); // just something unlikely
  415. joined_item_batch_type_flags_curr = 0;
  416. joined_item_batch_type_flags_prev = 0;
  417. joined_item = nullptr;
  418. }
  419. RendererCanvasRender::Item *current_clip;
  420. typename T_STORAGE::Shader *shader_cache;
  421. bool rebind_shader;
  422. bool prev_use_skeleton;
  423. bool prev_distance_field;
  424. int last_blend_mode;
  425. RID canvas_last_material;
  426. Color final_modulate;
  427. // used for joining items only
  428. BItemJoined *joined_item;
  429. bool join_batch_break;
  430. BLightRegion light_region;
  431. // we need some logic to prevent joining items that have vastly different batch types
  432. // these are defined in RasterizerStorageCommon::BatchTypeFlags
  433. uint32_t joined_item_batch_type_flags_curr;
  434. uint32_t joined_item_batch_type_flags_prev;
  435. // 'item group' is data over a single call to canvas_render_items
  436. int item_group_z;
  437. Color item_group_modulate;
  438. RendererCanvasRender::Light *item_group_light;
  439. Transform2D item_group_base_transform;
  440. } _render_item_state;
  441. bool use_nvidia_rect_workaround;
  442. //////////////////////////////////////////////////////////////////////////////
  443. // End of structs used by the batcher. Beginning of funcs.
  444. private:
  445. // curiously recurring template pattern - allows access to functions in the DERIVED class
  446. // this is kind of like using virtual functions but more efficient as they are resolved at compile time
  447. T_STORAGE *get_storage() { return static_cast<const T *>(this)->storage; }
  448. const T_STORAGE *get_storage() const { return static_cast<const T *>(this)->storage; }
  449. T *get_this() { return static_cast<T *>(this); }
  450. const T *get_this() const { return static_cast<const T *>(this); }
  451. protected:
  452. // main functions called from the rasterizer canvas
  453. void batch_constructor();
  454. void batch_initialize();
  455. void batch_canvas_begin();
  456. void batch_canvas_end();
  457. void batch_canvas_render_items_begin(const Color &p_modulate, RendererCanvasRender::Light *p_light, const Transform2D &p_base_transform);
  458. void batch_canvas_render_items_end();
  459. void batch_canvas_render_items(RendererCanvasRender::Item *p_item_list, int p_z, const Color &p_modulate, RendererCanvasRender::Light *p_light, const Transform2D &p_base_transform);
  460. // recording and sorting items from the initial pass
  461. void record_items(RendererCanvasRender::Item *p_item_list, int p_z);
  462. void join_sorted_items();
  463. void sort_items();
  464. bool _sort_items_match(const BSortItem &p_a, const BSortItem &p_b) const;
  465. bool sort_items_from(int p_start);
  466. // joining logic
  467. bool _disallow_item_join_if_batch_types_too_different(RenderItemState &r_ris, uint32_t btf_allowed);
  468. bool _detect_item_batch_break(RenderItemState &r_ris, RendererCanvasRender::Item *p_ci, bool &r_batch_break);
  469. // drives the loop filling batches and flushing
  470. void render_joined_item_commands(const BItemJoined &p_bij, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material, bool p_lit);
  471. private:
  472. // flush once full or end of joined item
  473. void flush_render_batches(RendererCanvasRender::Item *p_first_item, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material, uint32_t p_sequence_batch_type_flags);
  474. // a single joined item can contain multiple itemrefs, and thus create lots of batches
  475. // command start given a separate name to make easier to tell apart godot 3 and 4
  476. bool prefill_joined_item(FillState &r_fill_state, RendererCanvasRender::Item::Command **r_first_command, RendererCanvasRender::Item *p_item, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material);
  477. // prefilling different types of batch
  478. // default batch is an 'unhandled' legacy type batch that will be drawn with the legacy path,
  479. // all other batches are accelerated.
  480. void _prefill_default_batch(FillState &r_fill_state, int p_command_num, const RendererCanvasRender::Item &p_item);
  481. // accelerated batches
  482. bool _prefill_rect(RendererCanvasRender::Item::CommandRect *rect, FillState &r_fill_state, int &r_command_start, int command_num, int command_count, RendererCanvasRender::Item::Command *const *commands, RendererCanvasRender::Item *p_item, bool multiply_final_modulate);
  483. // dealing with textures
  484. int _batch_find_or_create_tex(const RID &p_texture, const RID &p_normal, bool p_tile, int p_previous_match);
  485. protected:
  486. // legacy support for non batched mode
  487. void _legacy_canvas_item_render_commands(RendererCanvasRender::Item *p_item, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material);
  488. // light scissoring
  489. bool _light_scissor_begin(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect) const;
  490. bool _light_find_intersection(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect, Rect2 &r_cliprect) const;
  491. void _calculate_scissor_threshold_area();
  492. private:
  493. // translating vertex formats prior to rendering
  494. void _translate_batches_to_vertex_colored_FVF();
  495. template <class BATCH_VERTEX_TYPE, bool INCLUDE_LIGHT_ANGLES, bool INCLUDE_MODULATE, bool INCLUDE_LARGE>
  496. void _translate_batches_to_larger_FVF(uint32_t p_sequence_batch_type_flags);
  497. protected:
  498. // accessory funcs
  499. void _software_transform_vertex(BatchVector2 &r_v, const Transform2D &p_tr) const;
  500. void _software_transform_vertex(Vector2 &r_v, const Transform2D &p_tr) const;
  501. TransformMode _find_transform_mode(const Transform2D &p_tr) const {
  502. // decided whether to do translate only for software transform
  503. if ((p_tr.elements[0].x == 1.0f) &&
  504. (p_tr.elements[0].y == 0.0f) &&
  505. (p_tr.elements[1].x == 0.0f) &&
  506. (p_tr.elements[1].y == 1.0f)) {
  507. return TM_TRANSLATE;
  508. }
  509. return TM_ALL;
  510. }
  511. typename T_STORAGE::Texture *_get_canvas_texture(const RID &p_texture) const {
  512. if (p_texture.is_valid()) {
  513. typename T_STORAGE::Texture *texture = get_storage()->texture_owner.get_or_null(p_texture);
  514. if (texture) {
  515. return texture->get_ptr();
  516. }
  517. }
  518. return 0;
  519. }
  520. public:
  521. Batch *_batch_request_new(bool p_blank = true) {
  522. Batch *batch = bdata.batches.request();
  523. if (!batch) {
  524. // grow the batches
  525. bdata.batches.grow();
  526. // and the temporary batches (used for color verts)
  527. bdata.batches_temp.reset();
  528. bdata.batches_temp.grow();
  529. // this should always succeed after growing
  530. batch = bdata.batches.request();
  531. RAST_DEBUG_ASSERT(batch);
  532. }
  533. if (p_blank)
  534. memset(batch, 0, sizeof(Batch));
  535. return batch;
  536. }
  537. BatchVertex *_batch_vertex_request_new() {
  538. return bdata.vertices.request();
  539. }
  540. protected:
  541. int godot4_commands_count(RendererCanvasRender::Item::Command *p_comm) const {
  542. int count = 0;
  543. while (p_comm) {
  544. count++;
  545. p_comm = p_comm->next;
  546. }
  547. return count;
  548. }
  549. unsigned int godot4_commands_to_vector(RendererCanvasRender::Item::Command *p_comm, LocalVector<RendererCanvasRender::Item::Command *> &p_list) {
  550. p_list.clear();
  551. while (p_comm) {
  552. p_list.push_back(p_comm);
  553. p_comm = p_comm->next;
  554. }
  555. return p_list.size();
  556. }
  557. };
  558. PREAMBLE(void)::batch_canvas_begin() {
  559. // diagnose_frame?
  560. bdata.frame_string = ""; // just in case, always set this as we don't want a string leak in release...
  561. #if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
  562. if (bdata.settings_diagnose_frame) {
  563. bdata.diagnose_frame = false;
  564. uint32_t tick = OS::get_singleton()->get_ticks_msec();
  565. uint64_t frame = Engine::get_singleton()->get_frames_drawn();
  566. if (tick >= bdata.next_diagnose_tick) {
  567. bdata.next_diagnose_tick = tick + 10000;
  568. // the plus one is prevent starting diagnosis half way through frame
  569. bdata.diagnose_frame_number = frame + 1;
  570. }
  571. if (frame == bdata.diagnose_frame_number) {
  572. bdata.diagnose_frame = true;
  573. bdata.reset_stats();
  574. }
  575. if (bdata.diagnose_frame) {
  576. bdata.frame_string = "canvas_begin FRAME " + itos(frame) + "\n";
  577. }
  578. }
  579. #endif
  580. }
  581. PREAMBLE(void)::batch_canvas_end() {
  582. #if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
  583. if (bdata.diagnose_frame) {
  584. bdata.frame_string += "canvas_end\n";
  585. if (bdata.stats_items_sorted) {
  586. bdata.frame_string += "\titems reordered: " + itos(bdata.stats_items_sorted) + "\n";
  587. }
  588. if (bdata.stats_light_items_joined) {
  589. bdata.frame_string += "\tlight items joined: " + itos(bdata.stats_light_items_joined) + "\n";
  590. }
  591. print_line(bdata.frame_string);
  592. }
  593. #endif
  594. }
  595. PREAMBLE(void)::batch_canvas_render_items_begin(const Color &p_modulate, RendererCanvasRender::Light *p_light, const Transform2D &p_base_transform) {
  596. // if we are debugging, flash each frame between batching renderer and old version to compare for regressions
  597. if (bdata.settings_flash_batching) {
  598. if ((Engine::get_singleton()->get_frames_drawn() % 2) == 0)
  599. bdata.settings_use_batching = true;
  600. else
  601. bdata.settings_use_batching = false;
  602. }
  603. if (!bdata.settings_use_batching) {
  604. return;
  605. }
  606. // this only needs to be done when screen size changes, but this should be
  607. // infrequent enough
  608. _calculate_scissor_threshold_area();
  609. // set up render item state for all the z_indexes (this is common to all z_indexes)
  610. _render_item_state.reset();
  611. _render_item_state.item_group_modulate = p_modulate;
  612. _render_item_state.item_group_light = p_light;
  613. _render_item_state.item_group_base_transform = p_base_transform;
  614. _render_item_state.light_region.reset();
  615. // batch break must be preserved over the different z indices,
  616. // to prevent joining to an item on a previous index if not allowed
  617. _render_item_state.join_batch_break = false;
  618. // whether to join across z indices depends on whether there are z ranged lights.
  619. // joined z_index items can be wrongly classified with z ranged lights.
  620. bdata.join_across_z_indices = true;
  621. int light_count = 0;
  622. while (p_light) {
  623. light_count++;
  624. if ((p_light->z_min != RS::CANVAS_ITEM_Z_MIN) || (p_light->z_max != RS::CANVAS_ITEM_Z_MAX)) {
  625. // prevent joining across z indices. This would have caused visual regressions
  626. bdata.join_across_z_indices = false;
  627. }
  628. p_light = p_light->next_ptr;
  629. }
  630. // can't use the light region bitfield if there are too many lights
  631. // hopefully most games won't blow this limit..
  632. // if they do they will work but it won't batch join items just in case
  633. if (light_count > 64) {
  634. _render_item_state.light_region.too_many_lights = true;
  635. }
  636. }
  637. PREAMBLE(void)::batch_canvas_render_items_end() {
  638. if (!bdata.settings_use_batching) {
  639. return;
  640. }
  641. join_sorted_items();
  642. #if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
  643. if (bdata.diagnose_frame) {
  644. bdata.frame_string += "items\n";
  645. }
  646. #endif
  647. // batching render is deferred until after going through all the z_indices, joining all the items
  648. get_this()->canvas_render_items_implementation(0, 0, _render_item_state.item_group_modulate,
  649. _render_item_state.item_group_light,
  650. _render_item_state.item_group_base_transform);
  651. bdata.items_joined.reset();
  652. bdata.item_refs.reset();
  653. bdata.sort_items.reset();
  654. }
  655. PREAMBLE(void)::batch_canvas_render_items(RendererCanvasRender::Item *p_item_list, int p_z, const Color &p_modulate, RendererCanvasRender::Light *p_light, const Transform2D &p_base_transform) {
  656. // stage 1 : join similar items, so that their state changes are not repeated,
  657. // and commands from joined items can be batched together
  658. if (bdata.settings_use_batching) {
  659. record_items(p_item_list, p_z);
  660. return;
  661. }
  662. // only legacy renders at this stage, batched renderer doesn't render until canvas_render_items_end()
  663. get_this()->canvas_render_items_implementation(p_item_list, p_z, p_modulate, p_light, p_base_transform);
  664. }
  665. // Default batches will not occur in software transform only items
  666. // EXCEPT IN THE CASE OF SINGLE RECTS (and this may well not occur, check the logic in prefill_join_item TYPE_RECT)
  667. // but can occur where transform commands have been sent during hardware batch
  668. PREAMBLE(void)::_prefill_default_batch(FillState &r_fill_state, int p_command_num, const RendererCanvasRender::Item &p_item) {
  669. if (r_fill_state.curr_batch->type == RasterizerStorageCommon::BT_DEFAULT) {
  670. // don't need to flush an extra transform command?
  671. if (!r_fill_state.transform_extra_command_number_p1) {
  672. // another default command, just add to the existing batch
  673. r_fill_state.curr_batch->num_commands++;
  674. } else {
  675. #if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
  676. if (r_fill_state.transform_extra_command_number_p1 != p_command_num) {
  677. WARN_PRINT_ONCE("_prefill_default_batch : transform_extra_command_number_p1 != p_command_num");
  678. }
  679. #endif
  680. // if the first member of the batch is a transform we have to be careful
  681. if (!r_fill_state.curr_batch->num_commands) {
  682. // there can be leading useless extra transforms (sometimes happens with debug collision polys)
  683. // we need to rejig the first_command for the first useful transform
  684. r_fill_state.curr_batch->first_command += r_fill_state.transform_extra_command_number_p1 - 1;
  685. }
  686. // we do have a pending extra transform command to flush
  687. // either the extra transform is in the prior command, or not, in which case we need 2 batches
  688. r_fill_state.curr_batch->num_commands += 2;
  689. r_fill_state.transform_extra_command_number_p1 = 0; // mark as sent
  690. r_fill_state.extra_matrix_sent = true;
  691. // the original mode should always be hardware transform ..
  692. // test this assumption
  693. //CRASH_COND(r_fill_state.orig_transform_mode != TM_NONE);
  694. r_fill_state.transform_mode = r_fill_state.orig_transform_mode;
  695. // do we need to restore anything else?
  696. }
  697. } else {
  698. // end of previous different type batch, so start new default batch
  699. // first consider whether there is a dirty extra matrix to send
  700. if (r_fill_state.transform_extra_command_number_p1) {
  701. // get which command the extra is in, and blank all the records as it no longer is stored CPU side
  702. int extra_command = r_fill_state.transform_extra_command_number_p1 - 1; // plus 1 based
  703. r_fill_state.transform_extra_command_number_p1 = 0;
  704. r_fill_state.extra_matrix_sent = true;
  705. // send the extra to the GPU in a batch
  706. r_fill_state.curr_batch = _batch_request_new();
  707. r_fill_state.curr_batch->type = RasterizerStorageCommon::BT_DEFAULT;
  708. r_fill_state.curr_batch->first_command = extra_command;
  709. r_fill_state.curr_batch->num_commands = 1;
  710. // revert to the original transform mode
  711. // e.g. go back to NONE if we were in hardware transform mode
  712. r_fill_state.transform_mode = r_fill_state.orig_transform_mode;
  713. // reset the original transform if we are going back to software mode,
  714. // because the extra is now done on the GPU...
  715. // (any subsequent extras are sent directly to the GPU, no deferring)
  716. if (r_fill_state.orig_transform_mode != TM_NONE) {
  717. r_fill_state.transform_combined = p_item.final_transform;
  718. }
  719. // can possibly combine batch with the next one in some cases
  720. // this is more efficient than having an extra batch especially for the extra
  721. if ((extra_command + 1) == p_command_num) {
  722. r_fill_state.curr_batch->num_commands = 2;
  723. return;
  724. }
  725. }
  726. // start default batch
  727. r_fill_state.curr_batch = _batch_request_new();
  728. r_fill_state.curr_batch->type = RasterizerStorageCommon::BT_DEFAULT;
  729. r_fill_state.curr_batch->first_command = p_command_num;
  730. r_fill_state.curr_batch->num_commands = 1;
  731. }
  732. }
  733. PREAMBLE(int)::_batch_find_or_create_tex(const RID &p_texture, const RID &p_normal, bool p_tile, int p_previous_match) {
  734. // optimization .. in 99% cases the last matched value will be the same, so no need to traverse the list
  735. if (p_previous_match > 0) // if it is zero, it will get hit first in the linear search anyway
  736. {
  737. const BatchTex &batch_texture = bdata.batch_textures[p_previous_match];
  738. // note for future reference, if RID implementation changes, this could become more expensive
  739. if ((batch_texture.RID_texture == p_texture) && (batch_texture.RID_normal == p_normal)) {
  740. // tiling mode must also match
  741. bool tiles = batch_texture.tile_mode != BatchTex::TILE_OFF;
  742. if (tiles == p_tile)
  743. // match!
  744. return p_previous_match;
  745. }
  746. }
  747. // not the previous match .. we will do a linear search ... slower, but should happen
  748. // not very often except with non-batchable runs, which are going to be slow anyway
  749. // n.b. could possibly be replaced later by a fast hash table
  750. for (int n = 0; n < bdata.batch_textures.size(); n++) {
  751. const BatchTex &batch_texture = bdata.batch_textures[n];
  752. if ((batch_texture.RID_texture == p_texture) && (batch_texture.RID_normal == p_normal)) {
  753. // tiling mode must also match
  754. bool tiles = batch_texture.tile_mode != BatchTex::TILE_OFF;
  755. if (tiles == p_tile)
  756. // match!
  757. return n;
  758. }
  759. }
  760. // pushing back from local variable .. not ideal but has to use a Vector because non pod
  761. // due to RIDs
  762. BatchTex new_batch_tex;
  763. new_batch_tex.RID_texture = p_texture;
  764. new_batch_tex.RID_normal = p_normal;
  765. // get the texture
  766. typename T_STORAGE::Texture *texture = _get_canvas_texture(p_texture);
  767. if (texture) {
  768. // special case, there can be textures with no width or height
  769. int w = texture->width;
  770. int h = texture->height;
  771. if (!w || !h) {
  772. w = 1;
  773. h = 1;
  774. }
  775. new_batch_tex.tex_pixel_size.x = 1.0 / w;
  776. new_batch_tex.tex_pixel_size.y = 1.0 / h;
  777. new_batch_tex.flags = texture->flags;
  778. } else {
  779. // maybe doesn't need doing...
  780. new_batch_tex.tex_pixel_size.x = 1.0f;
  781. new_batch_tex.tex_pixel_size.y = 1.0f;
  782. new_batch_tex.flags = 0;
  783. }
  784. if (p_tile) {
  785. if (texture) {
  786. // default
  787. new_batch_tex.tile_mode = BatchTex::TILE_NORMAL;
  788. // no hardware support for non power of 2 tiling
  789. if (!get_storage()->config.support_npot_repeat_mipmap) {
  790. if (next_power_of_2(texture->alloc_width) != (unsigned int)texture->alloc_width && next_power_of_2(texture->alloc_height) != (unsigned int)texture->alloc_height) {
  791. new_batch_tex.tile_mode = BatchTex::TILE_FORCE_REPEAT;
  792. }
  793. }
  794. } else {
  795. // this should not happen?
  796. new_batch_tex.tile_mode = BatchTex::TILE_OFF;
  797. }
  798. } else {
  799. new_batch_tex.tile_mode = BatchTex::TILE_OFF;
  800. }
  801. // push back
  802. bdata.batch_textures.push_back(new_batch_tex);
  803. return bdata.batch_textures.size() - 1;
  804. }
  805. PREAMBLE(void)::batch_constructor() {
  806. bdata.settings_use_batching = false;
  807. #ifdef GLES_OVER_GL
  808. use_nvidia_rect_workaround = GLOBAL_GET("rendering/quality/2d/use_nvidia_rect_flicker_workaround");
  809. #else
  810. // Not needed (a priori) on GLES devices
  811. use_nvidia_rect_workaround = false;
  812. #endif
  813. }
  814. PREAMBLE(void)::batch_initialize() {
  815. #define BATCHING_LOAD_PROJECT_SETTINGS
  816. #ifdef BATCHING_LOAD_PROJECT_SETTINGS
  817. bdata.settings_use_batching = GLOBAL_GET("rendering/batching/options/use_batching");
  818. bdata.settings_max_join_item_commands = GLOBAL_GET("rendering/batching/parameters/max_join_item_commands");
  819. bdata.settings_colored_vertex_format_threshold = GLOBAL_GET("rendering/batching/parameters/colored_vertex_format_threshold");
  820. bdata.settings_item_reordering_lookahead = GLOBAL_GET("rendering/batching/parameters/item_reordering_lookahead");
  821. bdata.settings_light_max_join_items = GLOBAL_GET("rendering/batching/lights/max_join_items");
  822. bdata.settings_use_single_rect_fallback = GLOBAL_GET("rendering/batching/options/single_rect_fallback");
  823. bdata.settings_use_software_skinning = GLOBAL_GET("rendering/quality/2d/use_software_skinning");
  824. bdata.settings_ninepatch_mode = GLOBAL_GET("rendering/quality/2d/ninepatch_mode");
  825. // alternatively only enable uv contract if pixel snap in use,
  826. // but with this enable bool, it should not be necessary
  827. bdata.settings_uv_contract = GLOBAL_GET("rendering/batching/precision/uv_contract");
  828. bdata.settings_uv_contract_amount = (float)GLOBAL_GET("rendering/batching/precision/uv_contract_amount") / 1000000.0f;
  829. // we can use the threshold to determine whether to turn scissoring off or on
  830. bdata.settings_scissor_threshold = GLOBAL_GET("rendering/batching/lights/scissor_area_threshold");
  831. #endif
  832. if (bdata.settings_scissor_threshold > 0.999f) {
  833. bdata.settings_scissor_lights = false;
  834. } else {
  835. bdata.settings_scissor_lights = true;
  836. // apply power of 4 relationship for the area, as most of the important changes
  837. // will be happening at low values of scissor threshold
  838. bdata.settings_scissor_threshold *= bdata.settings_scissor_threshold;
  839. bdata.settings_scissor_threshold *= bdata.settings_scissor_threshold;
  840. }
  841. // The sweet spot on my desktop for cache is actually smaller than the max, and this
  842. // is the default. This saves memory too so we will use it for now, needs testing to see whether this varies according
  843. // to device / platform.
  844. #ifdef BATCHING_LOAD_PROJECT_SETTINGS
  845. bdata.settings_batch_buffer_num_verts = GLOBAL_GET("rendering/batching/parameters/batch_buffer_size");
  846. // override the use_batching setting in the editor
  847. // (note that if the editor can't start, you can't change the use_batching project setting!)
  848. if (Engine::get_singleton()->is_editor_hint()) {
  849. bool use_in_editor = GLOBAL_GET("rendering/batching/options/use_batching_in_editor");
  850. bdata.settings_use_batching = use_in_editor;
  851. // fix some settings in the editor, as the performance not worth the risk
  852. bdata.settings_use_single_rect_fallback = false;
  853. }
  854. #endif
  855. // if we are using batching, we will purposefully disable the nvidia workaround.
  856. // This is because the only reason to use the single rect fallback is the approx 2x speed
  857. // of the uniform drawing technique. If we used nvidia workaround, speed would be
  858. // approx equal to the batcher drawing technique (indexed primitive + VB).
  859. if (bdata.settings_use_batching) {
  860. use_nvidia_rect_workaround = false;
  861. }
  862. // For debugging, if flash is set in project settings, it will flash on alternate frames
  863. // between the non-batched renderer and the batched renderer,
  864. // in order to find regressions.
  865. // This should not be used except during development.
  866. // make a note of the original choice in case we are flashing on and off the batching
  867. bdata.settings_use_batching_original_choice = bdata.settings_use_batching;
  868. #ifdef BATCHING_LOAD_PROJECT_SETTINGS
  869. bdata.settings_flash_batching = GLOBAL_GET("rendering/batching/debug/flash_batching");
  870. #endif
  871. if (!bdata.settings_use_batching) {
  872. // no flash when batching turned off
  873. bdata.settings_flash_batching = false;
  874. }
  875. // frame diagnosis. print out the batches every nth frame
  876. bdata.settings_diagnose_frame = false;
  877. if (!Engine::get_singleton()->is_editor_hint() && bdata.settings_use_batching) {
  878. #ifdef BATCHING_LOAD_PROJECT_SETTINGS
  879. bdata.settings_diagnose_frame = GLOBAL_GET("rendering/batching/debug/diagnose_frame");
  880. #endif
  881. }
  882. // the maximum num quads in a batch is limited by GLES2. We can have only 16 bit indices,
  883. // which means we can address a vertex buffer of max size 65535. 4 vertices are needed per quad.
  884. // Note this determines the memory use by the vertex buffer vector. max quads (65536/4)-1
  885. // but can be reduced to save memory if really required (will result in more batches though)
  886. const int max_possible_quads = (65536 / 4) - 1;
  887. const int min_possible_quads = 8; // some reasonable small value
  888. // value from project settings
  889. int max_quads = bdata.settings_batch_buffer_num_verts / 4;
  890. // sanity checks
  891. max_quads = CLAMP(max_quads, min_possible_quads, max_possible_quads);
  892. bdata.settings_max_join_item_commands = CLAMP(bdata.settings_max_join_item_commands, 0, 65535);
  893. bdata.settings_colored_vertex_format_threshold = CLAMP(bdata.settings_colored_vertex_format_threshold, 0.0f, 1.0f);
  894. bdata.settings_scissor_threshold = CLAMP(bdata.settings_scissor_threshold, 0.0f, 1.0f);
  895. bdata.settings_light_max_join_items = CLAMP(bdata.settings_light_max_join_items, 0, 65535);
  896. bdata.settings_item_reordering_lookahead = CLAMP(bdata.settings_item_reordering_lookahead, 0, 65535);
  897. // allow user to override the api usage techniques using project settings
  898. // bdata.buffer_mode_batch_upload_send_null = GLOBAL_GET("rendering/options/api_usage_batching/send_null");
  899. // bdata.buffer_mode_batch_upload_flag_stream = GLOBAL_GET("rendering/options/api_usage_batching/flag_stream");
  900. // for debug purposes, output a string with the batching options
  901. String batching_options_string = "OpenGL ES Batching: ";
  902. if (bdata.settings_use_batching) {
  903. batching_options_string += "ON";
  904. if (OS::get_singleton()->is_stdout_verbose()) {
  905. batching_options_string += "\n\tOPTIONS\n";
  906. batching_options_string += "\tmax_join_item_commands " + itos(bdata.settings_max_join_item_commands) + "\n";
  907. batching_options_string += "\tcolored_vertex_format_threshold " + String(Variant(bdata.settings_colored_vertex_format_threshold)) + "\n";
  908. batching_options_string += "\tbatch_buffer_size " + itos(bdata.settings_batch_buffer_num_verts) + "\n";
  909. batching_options_string += "\tlight_scissor_area_threshold " + String(Variant(bdata.settings_scissor_threshold)) + "\n";
  910. batching_options_string += "\titem_reordering_lookahead " + itos(bdata.settings_item_reordering_lookahead) + "\n";
  911. batching_options_string += "\tlight_max_join_items " + itos(bdata.settings_light_max_join_items) + "\n";
  912. batching_options_string += "\tsingle_rect_fallback " + String(Variant(bdata.settings_use_single_rect_fallback)) + "\n";
  913. batching_options_string += "\tdebug_flash " + String(Variant(bdata.settings_flash_batching)) + "\n";
  914. batching_options_string += "\tdiagnose_frame " + String(Variant(bdata.settings_diagnose_frame));
  915. }
  916. print_line(batching_options_string);
  917. }
  918. // special case, for colored vertex format threshold.
  919. // as the comparison is >=, we want to be able to totally turn on or off
  920. // conversion to colored vertex format at the extremes, so we will force
  921. // 1.0 to be just above 1.0
  922. if (bdata.settings_colored_vertex_format_threshold > 0.995f) {
  923. bdata.settings_colored_vertex_format_threshold = 1.01f;
  924. }
  925. // save memory when batching off
  926. if (!bdata.settings_use_batching) {
  927. max_quads = 0;
  928. }
  929. uint32_t sizeof_batch_vert = sizeof(BatchVertex);
  930. bdata.max_quads = max_quads;
  931. // 4 verts per quad
  932. bdata.vertex_buffer_size_units = max_quads * 4;
  933. // the index buffer can be longer than 65535, only the indices need to be within this range
  934. bdata.index_buffer_size_units = max_quads * 6;
  935. const int max_verts = bdata.vertex_buffer_size_units;
  936. // this comes out at approx 64K for non-colored vertex buffer, and 128K for colored vertex buffer
  937. bdata.vertex_buffer_size_bytes = max_verts * sizeof_batch_vert;
  938. bdata.index_buffer_size_bytes = bdata.index_buffer_size_units * 2; // 16 bit inds
  939. // create equal number of normal and (max) unit sized verts (as the normal may need to be translated to a larger FVF)
  940. bdata.vertices.create(max_verts); // 512k
  941. bdata.unit_vertices.create(max_verts, sizeof(BatchVertexLarge));
  942. // extra data per vert needed for larger FVFs
  943. bdata.light_angles.create(max_verts);
  944. bdata.vertex_colors.create(max_verts);
  945. bdata.vertex_modulates.create(max_verts);
  946. bdata.vertex_transforms.create(max_verts);
  947. // num batches will be auto increased dynamically if required
  948. bdata.batches.create(1024);
  949. bdata.batches_temp.create(bdata.batches.max_size());
  950. // batch textures can also be increased dynamically
  951. bdata.batch_textures.create(32);
  952. }
  953. PREAMBLE(bool)::_light_scissor_begin(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect) const {
  954. float area_item = p_item_rect.size.x * p_item_rect.size.y; // double check these are always positive
  955. // quick reject .. the area of pixels saved can never be more than the area of the item
  956. if (area_item < bdata.scissor_threshold_area) {
  957. return false;
  958. }
  959. Rect2 cliprect;
  960. if (!_light_find_intersection(p_item_rect, p_light_xform, p_light_rect, cliprect)) {
  961. // should not really occur .. but just in case
  962. cliprect = Rect2(0, 0, 0, 0);
  963. } else {
  964. // some conditions not to scissor
  965. // determine the area (fill rate) that will be saved
  966. float area_cliprect = cliprect.size.x * cliprect.size.y;
  967. float area_saved = area_item - area_cliprect;
  968. // if area saved is too small, don't scissor
  969. if (area_saved < bdata.scissor_threshold_area) {
  970. return false;
  971. }
  972. }
  973. int rh = get_storage()->frame.current_rt->height;
  974. int y = rh - (cliprect.position.y + cliprect.size.y);
  975. get_this()->gl_enable_scissor(cliprect.position.x, y, cliprect.size.width, cliprect.size.height);
  976. return true;
  977. }
  978. PREAMBLE(bool)::_light_find_intersection(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect, Rect2 &r_cliprect) const {
  979. // transform light to world space (note this is done in the earlier intersection test, so could
  980. // be made more efficient)
  981. Vector2 pts[4] = {
  982. p_light_xform.xform(p_light_rect.position),
  983. p_light_xform.xform(Vector2(p_light_rect.position.x + p_light_rect.size.x, p_light_rect.position.y)),
  984. p_light_xform.xform(Vector2(p_light_rect.position.x, p_light_rect.position.y + p_light_rect.size.y)),
  985. p_light_xform.xform(Vector2(p_light_rect.position.x + p_light_rect.size.x, p_light_rect.position.y + p_light_rect.size.y)),
  986. };
  987. // calculate the light bound rect in world space
  988. Rect2 lrect(pts[0].x, pts[0].y, 0, 0);
  989. for (int n = 1; n < 4; n++) {
  990. lrect.expand_to(pts[n]);
  991. }
  992. // intersection between the 2 rects
  993. // they should probably always intersect, because of earlier check, but just in case...
  994. if (!p_item_rect.intersects(lrect))
  995. return false;
  996. // note this does almost the same as Rect2.clip but slightly more efficient for our use case
  997. r_cliprect.position.x = MAX(p_item_rect.position.x, lrect.position.x);
  998. r_cliprect.position.y = MAX(p_item_rect.position.y, lrect.position.y);
  999. Point2 item_rect_end = p_item_rect.position + p_item_rect.size;
  1000. Point2 lrect_end = lrect.position + lrect.size;
  1001. r_cliprect.size.x = MIN(item_rect_end.x, lrect_end.x) - r_cliprect.position.x;
  1002. r_cliprect.size.y = MIN(item_rect_end.y, lrect_end.y) - r_cliprect.position.y;
  1003. return true;
  1004. }
  1005. PREAMBLE(void)::_calculate_scissor_threshold_area() {
  1006. if (!bdata.settings_scissor_lights) {
  1007. return;
  1008. }
  1009. // scissor area threshold is 0.0 to 1.0 in the settings for ease of use.
  1010. // we need to translate to an absolute area to determine quickly whether
  1011. // to scissor.
  1012. if (bdata.settings_scissor_threshold < 0.0001f) {
  1013. bdata.scissor_threshold_area = -1.0f; // will always pass
  1014. } else {
  1015. // in pixels
  1016. int w = get_storage()->frame.current_rt->width;
  1017. int h = get_storage()->frame.current_rt->height;
  1018. int screen_area = w * h;
  1019. bdata.scissor_threshold_area = bdata.settings_scissor_threshold * screen_area;
  1020. }
  1021. }
  1022. PREAMBLE(void)::render_joined_item_commands(const BItemJoined &p_bij, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material, bool p_lit) {
  1023. RendererCanvasRender::Item *item = 0;
  1024. RendererCanvasRender::Item *first_item = bdata.item_refs[p_bij.first_item_ref].item;
  1025. // fill_state and bdata have once off setup per joined item, and a smaller reset on flush
  1026. FillState fill_state;
  1027. fill_state.reset_joined_item(p_bij.use_hardware_transform());
  1028. bdata.reset_joined_item();
  1029. // should this joined item be using large FVF?
  1030. if (p_bij.flags & RasterizerStorageCommon::USE_MODULATE_FVF) {
  1031. bdata.use_modulate = true;
  1032. bdata.fvf = RasterizerStorageCommon::FVF_MODULATED;
  1033. }
  1034. if (p_bij.flags & RasterizerStorageCommon::USE_LARGE_FVF) {
  1035. bdata.use_modulate = true;
  1036. bdata.use_large_verts = true;
  1037. bdata.fvf = RasterizerStorageCommon::FVF_LARGE;
  1038. }
  1039. // in the special case of custom shaders that read from VERTEX (i.e. vertex position)
  1040. // we want to disable software transform of extra matrix
  1041. if (bdata.joined_item_batch_flags & RasterizerStorageCommon::PREVENT_VERTEX_BAKING) {
  1042. fill_state.extra_matrix_sent = true;
  1043. }
  1044. for (unsigned int i = 0; i < p_bij.num_item_refs; i++) {
  1045. const BItemRef &ref = bdata.item_refs[p_bij.first_item_ref + i];
  1046. item = ref.item;
  1047. if (!p_lit) {
  1048. // if not lit we use the complex calculated final modulate
  1049. fill_state.final_modulate = ref.final_modulate;
  1050. } else {
  1051. // if lit we ignore canvas modulate and just use the item modulate
  1052. fill_state.final_modulate = item->final_modulate;
  1053. }
  1054. // ONCE OFF fill state setup, that will be retained over multiple calls to
  1055. // prefill_joined_item()
  1056. fill_state.transform_combined = item->final_transform;
  1057. // decide the initial transform mode, and make a backup
  1058. // in orig_transform_mode in case we need to switch back
  1059. if (!fill_state.use_hardware_transform) {
  1060. fill_state.transform_mode = _find_transform_mode(fill_state.transform_combined);
  1061. } else {
  1062. fill_state.transform_mode = TM_NONE;
  1063. }
  1064. fill_state.orig_transform_mode = fill_state.transform_mode;
  1065. // keep track of when we added an extra matrix
  1066. // so we can defer sending until we see a default command
  1067. fill_state.transform_extra_command_number_p1 = 0;
  1068. RendererCanvasRender::Item::Command *current_command = item->commands;
  1069. while (current_command) {
  1070. // fill as many batches as possible (until all done, or the vertex buffer is full)
  1071. bool bFull = get_this()->prefill_joined_item(fill_state, current_command, item, p_current_clip, r_reclip, p_material);
  1072. if (bFull) {
  1073. // always pass first item (commands for default are always first item)
  1074. flush_render_batches(first_item, p_current_clip, r_reclip, p_material, fill_state.sequence_batch_type_flags);
  1075. // zero all the batch data ready for a new run
  1076. bdata.reset_flush();
  1077. // don't zero all the fill state, some may need to be preserved
  1078. fill_state.reset_flush();
  1079. }
  1080. }
  1081. }
  1082. // flush if any left
  1083. flush_render_batches(first_item, p_current_clip, r_reclip, p_material, fill_state.sequence_batch_type_flags);
  1084. // zero all the batch data ready for a new run
  1085. bdata.reset_flush();
  1086. }
  1087. PREAMBLE(void)::_legacy_canvas_item_render_commands(RendererCanvasRender::Item *p_item, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material) {
  1088. // reuse the same list each time to prevent needless dynamic allocations
  1089. unsigned int command_count = godot4_commands_to_vector(p_item->commands, bdata.command_shortlist);
  1090. RendererCanvasRender::Item::Command *const *commands = nullptr;
  1091. if (command_count) {
  1092. commands = &bdata.command_shortlist[0];
  1093. }
  1094. // legacy .. just create one massive batch and render everything as before
  1095. bdata.batches.reset();
  1096. Batch *batch = _batch_request_new();
  1097. batch->type = RasterizerStorageCommon::BT_DEFAULT;
  1098. batch->num_commands = command_count;
  1099. get_this()->render_batches(commands, p_current_clip, r_reclip, p_material);
  1100. bdata.reset_flush();
  1101. }
  1102. PREAMBLE(void)::record_items(RendererCanvasRender::Item *p_item_list, int p_z) {
  1103. while (p_item_list) {
  1104. BSortItem *s = bdata.sort_items.request_with_grow();
  1105. s->item = p_item_list;
  1106. s->z_index = p_z;
  1107. p_item_list = p_item_list->next;
  1108. }
  1109. }
  1110. PREAMBLE(void)::join_sorted_items() {
  1111. }
  1112. PREAMBLE(void)::_software_transform_vertex(BatchVector2 &r_v, const Transform2D &p_tr) const {
  1113. Vector2 vc(r_v.x, r_v.y);
  1114. vc = p_tr.xform(vc);
  1115. r_v.set(vc);
  1116. }
  1117. PREAMBLE(void)::_software_transform_vertex(Vector2 &r_v, const Transform2D &p_tr) const {
  1118. r_v = p_tr.xform(r_v);
  1119. }
  1120. PREAMBLE(void)::_translate_batches_to_vertex_colored_FVF() {
  1121. // zeros the size and sets up how big each unit is
  1122. bdata.unit_vertices.prepare(sizeof(BatchVertexColored));
  1123. const BatchColor *source_vertex_colors = &bdata.vertex_colors[0];
  1124. RAST_DEBUG_ASSERT(bdata.vertex_colors.size() == bdata.vertices.size());
  1125. int num_verts = bdata.vertices.size();
  1126. for (int n = 0; n < num_verts; n++) {
  1127. const BatchVertex &bv = bdata.vertices[n];
  1128. BatchVertexColored *cv = (BatchVertexColored *)bdata.unit_vertices.request();
  1129. cv->pos = bv.pos;
  1130. cv->uv = bv.uv;
  1131. cv->col = *source_vertex_colors++;
  1132. }
  1133. }
  1134. // Translation always involved adding color to the FVF, which enables
  1135. // joining of batches that have different colors.
  1136. // There is a trade off. Non colored verts are smaller so work faster, but
  1137. // there comes a point where it is better to just use colored verts to avoid lots of
  1138. // batches.
  1139. // In addition this can optionally add light angles to the FVF, necessary for normal mapping.
  1140. T_PREAMBLE
  1141. template <class BATCH_VERTEX_TYPE, bool INCLUDE_LIGHT_ANGLES, bool INCLUDE_MODULATE, bool INCLUDE_LARGE>
  1142. void C_PREAMBLE::_translate_batches_to_larger_FVF(uint32_t p_sequence_batch_type_flags) {
  1143. bool include_poly_color = false;
  1144. // we ONLY want to include the color verts in translation when using polys,
  1145. // as rects do not write vertex colors, only colors per batch.
  1146. if (p_sequence_batch_type_flags & RasterizerStorageCommon::BTF_POLY) {
  1147. include_poly_color = INCLUDE_LIGHT_ANGLES | INCLUDE_MODULATE | INCLUDE_LARGE;
  1148. }
  1149. // zeros the size and sets up how big each unit is
  1150. bdata.unit_vertices.prepare(sizeof(BATCH_VERTEX_TYPE));
  1151. bdata.batches_temp.reset();
  1152. // As the vertices_colored and batches_temp are 'mirrors' of the non-colored version,
  1153. // the sizes should be equal, and allocations should never fail. Hence the use of debug
  1154. // asserts to check program flow, these should not occur at runtime unless the allocation
  1155. // code has been altered.
  1156. RAST_DEBUG_ASSERT(bdata.unit_vertices.max_size() == bdata.vertices.max_size());
  1157. RAST_DEBUG_ASSERT(bdata.batches_temp.max_size() == bdata.batches.max_size());
  1158. Color curr_col(-1.0f, -1.0f, -1.0f, -1.0f);
  1159. Batch *dest_batch = nullptr;
  1160. const BatchColor *source_vertex_colors = &bdata.vertex_colors[0];
  1161. const float *source_light_angles = &bdata.light_angles[0];
  1162. const BatchColor *source_vertex_modulates = &bdata.vertex_modulates[0];
  1163. const BatchTransform *source_vertex_transforms = &bdata.vertex_transforms[0];
  1164. // translate the batches into vertex colored batches
  1165. for (int n = 0; n < bdata.batches.size(); n++) {
  1166. const Batch &source_batch = bdata.batches[n];
  1167. // does source batch use light angles?
  1168. const BatchTex &btex = bdata.batch_textures[source_batch.batch_texture_id];
  1169. bool source_batch_uses_light_angles = btex.RID_normal != RID();
  1170. bool needs_new_batch = true;
  1171. if (dest_batch) {
  1172. if (dest_batch->type == source_batch.type) {
  1173. if (source_batch.type == RasterizerStorageCommon::BT_RECT) {
  1174. if (dest_batch->batch_texture_id == source_batch.batch_texture_id) {
  1175. // add to previous batch
  1176. dest_batch->num_commands += source_batch.num_commands;
  1177. needs_new_batch = false;
  1178. // create the colored verts (only if not default)
  1179. //int first_vert = source_batch.first_quad * 4;
  1180. //int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
  1181. int first_vert = source_batch.first_vert;
  1182. int end_vert = first_vert + (4 * source_batch.num_commands);
  1183. for (int v = first_vert; v < end_vert; v++) {
  1184. RAST_DEV_DEBUG_ASSERT(bdata.vertices.size());
  1185. const BatchVertex &bv = bdata.vertices[v];
  1186. BATCH_VERTEX_TYPE *cv = (BATCH_VERTEX_TYPE *)bdata.unit_vertices.request();
  1187. RAST_DEBUG_ASSERT(cv);
  1188. cv->pos = bv.pos;
  1189. cv->uv = bv.uv;
  1190. cv->col = source_batch.color;
  1191. if (INCLUDE_LIGHT_ANGLES) {
  1192. RAST_DEV_DEBUG_ASSERT(bdata.light_angles.size());
  1193. // this is required to allow compilation with non light angle vertex.
  1194. // it should be compiled out.
  1195. BatchVertexLightAngled *lv = (BatchVertexLightAngled *)cv;
  1196. if (source_batch_uses_light_angles)
  1197. lv->light_angle = *source_light_angles++;
  1198. else
  1199. lv->light_angle = 0.0f; // dummy, unused in vertex shader (could possibly be left uninitialized, but probably bad idea)
  1200. } // if including light angles
  1201. if (INCLUDE_MODULATE) {
  1202. RAST_DEV_DEBUG_ASSERT(bdata.vertex_modulates.size());
  1203. BatchVertexModulated *mv = (BatchVertexModulated *)cv;
  1204. mv->modulate = *source_vertex_modulates++;
  1205. } // including modulate
  1206. if (INCLUDE_LARGE) {
  1207. RAST_DEV_DEBUG_ASSERT(bdata.vertex_transforms.size());
  1208. BatchVertexLarge *lv = (BatchVertexLarge *)cv;
  1209. lv->transform = *source_vertex_transforms++;
  1210. } // if including large
  1211. }
  1212. } // textures match
  1213. } else {
  1214. // default
  1215. // we can still join, but only under special circumstances
  1216. // does this ever happen? not sure at this stage, but left for future expansion
  1217. uint32_t source_last_command = source_batch.first_command + source_batch.num_commands;
  1218. if (source_last_command == dest_batch->first_command) {
  1219. dest_batch->num_commands += source_batch.num_commands;
  1220. needs_new_batch = false;
  1221. } // if the commands line up exactly
  1222. }
  1223. } // if both batches are the same type
  1224. } // if dest batch is valid
  1225. if (needs_new_batch) {
  1226. dest_batch = bdata.batches_temp.request();
  1227. RAST_DEBUG_ASSERT(dest_batch);
  1228. *dest_batch = source_batch;
  1229. // create the colored verts (only if not default)
  1230. if (source_batch.type != RasterizerStorageCommon::BT_DEFAULT) {
  1231. // int first_vert = source_batch.first_quad * 4;
  1232. // int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
  1233. int first_vert = source_batch.first_vert;
  1234. int end_vert = first_vert + (4 * source_batch.num_commands);
  1235. for (int v = first_vert; v < end_vert; v++) {
  1236. RAST_DEV_DEBUG_ASSERT(bdata.vertices.size());
  1237. const BatchVertex &bv = bdata.vertices[v];
  1238. BATCH_VERTEX_TYPE *cv = (BATCH_VERTEX_TYPE *)bdata.unit_vertices.request();
  1239. RAST_DEBUG_ASSERT(cv);
  1240. cv->pos = bv.pos;
  1241. cv->uv = bv.uv;
  1242. // polys are special, they can have per vertex colors
  1243. if (!include_poly_color) {
  1244. cv->col = source_batch.color;
  1245. } else {
  1246. RAST_DEV_DEBUG_ASSERT(bdata.vertex_colors.size());
  1247. cv->col = *source_vertex_colors++;
  1248. }
  1249. if (INCLUDE_LIGHT_ANGLES) {
  1250. RAST_DEV_DEBUG_ASSERT(bdata.light_angles.size());
  1251. // this is required to allow compilation with non light angle vertex.
  1252. // it should be compiled out.
  1253. BatchVertexLightAngled *lv = (BatchVertexLightAngled *)cv;
  1254. if (source_batch_uses_light_angles)
  1255. lv->light_angle = *source_light_angles++;
  1256. else
  1257. lv->light_angle = 0.0f; // dummy, unused in vertex shader (could possibly be left uninitialized, but probably bad idea)
  1258. } // if using light angles
  1259. if (INCLUDE_MODULATE) {
  1260. RAST_DEV_DEBUG_ASSERT(bdata.vertex_modulates.size());
  1261. BatchVertexModulated *mv = (BatchVertexModulated *)cv;
  1262. mv->modulate = *source_vertex_modulates++;
  1263. } // including modulate
  1264. if (INCLUDE_LARGE) {
  1265. RAST_DEV_DEBUG_ASSERT(bdata.vertex_transforms.size());
  1266. BatchVertexLarge *lv = (BatchVertexLarge *)cv;
  1267. lv->transform = *source_vertex_transforms++;
  1268. } // if including large
  1269. }
  1270. }
  1271. }
  1272. }
  1273. // copy the temporary batches to the master batch list (this could be avoided but it makes the code cleaner)
  1274. bdata.batches.copy_from(bdata.batches_temp);
  1275. }
  1276. PREAMBLE(bool)::_disallow_item_join_if_batch_types_too_different(RenderItemState &r_ris, uint32_t btf_allowed) {
  1277. r_ris.joined_item_batch_type_flags_curr |= btf_allowed;
  1278. bool disallow = false;
  1279. if (r_ris.joined_item_batch_type_flags_prev & (~btf_allowed))
  1280. disallow = true;
  1281. return disallow;
  1282. }
  1283. #undef PREAMBLE
  1284. #undef T_PREAMBLE
  1285. #undef C_PREAMBLE
  1286. #endif // RASTERIZER_CANVAS_BATCHER_H