123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560 |
- /*************************************************************************/
- /* rasterizer_canvas_batcher.h */
- /*************************************************************************/
- /* This file is part of: */
- /* GODOT ENGINE */
- /* https://godotengine.org */
- /*************************************************************************/
- /* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
- /* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
- /* */
- /* Permission is hereby granted, free of charge, to any person obtaining */
- /* a copy of this software and associated documentation files (the */
- /* "Software"), to deal in the Software without restriction, including */
- /* without limitation the rights to use, copy, modify, merge, publish, */
- /* distribute, sublicense, and/or sell copies of the Software, and to */
- /* permit persons to whom the Software is furnished to do so, subject to */
- /* the following conditions: */
- /* */
- /* The above copyright notice and this permission notice shall be */
- /* included in all copies or substantial portions of the Software. */
- /* */
- /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
- /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
- /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
- /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
- /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
- /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
- /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
- /*************************************************************************/
- #ifndef RASTERIZER_CANVAS_BATCHER_H
- #define RASTERIZER_CANVAS_BATCHER_H
- #include "core/os/os.h"
- #include "core/templates/local_vector.h"
- #include "rasterizer_array.h"
- #include "rasterizer_asserts.h"
- #include "rasterizer_storage_common.h"
- #include "core/config/project_settings.h"
- #include "servers/rendering/renderer_compositor.h"
- // We are using the curiously recurring template pattern
- // https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern
- // For static polymorphism.
- // This makes it super easy to access
- // data / call funcs in the derived rasterizers from the base without writing and
- // maintaining a boatload of virtual functions.
- // In addition it assures that vtable will not be used and the function calls can be optimized,
- // because it gives compile time static polymorphism.
- // These macros makes it simpler and less verbose to define (and redefine) the inline functions
- // template preamble
- #define T_PREAMBLE template <class T, typename T_STORAGE>
- // class preamble
- #define C_PREAMBLE RasterizerCanvasBatcher<T, T_STORAGE>
- // generic preamble
- #define PREAMBLE(RET_T) \
- T_PREAMBLE \
- RET_T C_PREAMBLE
- template <class T, typename T_STORAGE>
- class RasterizerCanvasBatcher {
- public:
- // used to determine whether we use hardware transform (none)
- // software transform all verts, or software transform just a translate
- // (no rotate or scale)
- enum TransformMode {
- TM_NONE,
- TM_ALL,
- TM_TRANSLATE,
- };
- // pod versions of vector and color and RID, need to be 32 bit for vertex format
- struct BatchVector2 {
- float x, y;
- void set(float xx, float yy) {
- x = xx;
- y = yy;
- }
- void set(const Vector2 &p_o) {
- x = p_o.x;
- y = p_o.y;
- }
- void to(Vector2 &r_o) const {
- r_o.x = x;
- r_o.y = y;
- }
- };
- struct BatchColor {
- float r, g, b, a;
- void set_white() {
- r = 1.0f;
- g = 1.0f;
- b = 1.0f;
- a = 1.0f;
- }
- void set(const Color &p_c) {
- r = p_c.r;
- g = p_c.g;
- b = p_c.b;
- a = p_c.a;
- }
- void set(float rr, float gg, float bb, float aa) {
- r = rr;
- g = gg;
- b = bb;
- a = aa;
- }
- bool operator==(const BatchColor &p_c) const {
- return (r == p_c.r) && (g == p_c.g) && (b == p_c.b) && (a == p_c.a);
- }
- bool operator!=(const BatchColor &p_c) const { return (*this == p_c) == false; }
- bool equals(const Color &p_c) const {
- return (r == p_c.r) && (g == p_c.g) && (b == p_c.b) && (a == p_c.a);
- }
- const float *get_data() const { return &r; }
- String to_string() const {
- String sz = "{";
- const float *data = get_data();
- for (int c = 0; c < 4; c++) {
- float f = data[c];
- int val = ((f * 255.0f) + 0.5f);
- sz += String(Variant(val)) + " ";
- }
- sz += "}";
- return sz;
- }
- };
- // simplest FVF - local or baked position
- struct BatchVertex {
- // must be 32 bit pod
- BatchVector2 pos;
- BatchVector2 uv;
- };
- // simple FVF but also incorporating baked color
- struct BatchVertexColored : public BatchVertex {
- // must be 32 bit pod
- BatchColor col;
- };
- // if we are using normal mapping, we need light angles to be sent
- struct BatchVertexLightAngled : public BatchVertexColored {
- // must be pod
- float light_angle;
- };
- // CUSTOM SHADER vertex formats. These are larger but will probably
- // be needed with custom shaders in order to have the data accessible in the shader.
- // if we are using COLOR in vertex shader but not position (VERTEX)
- struct BatchVertexModulated : public BatchVertexLightAngled {
- BatchColor modulate;
- };
- struct BatchTransform {
- BatchVector2 translate;
- BatchVector2 basis[2];
- };
- // last resort, specially for custom shader, we put everything possible into a huge FVF
- // not very efficient, but better than no batching at all.
- struct BatchVertexLarge : public BatchVertexModulated {
- // must be pod
- BatchTransform transform;
- };
- // Batch should be as small as possible, and ideally nicely aligned (is 32 bytes at the moment)
- struct Batch {
- RasterizerStorageCommon::BatchType type; // should be 16 bit
- uint16_t batch_texture_id;
- // also item reference number
- uint32_t first_command;
- // in the case of DEFAULT, this is num commands.
- // with rects, is number of command and rects.
- // with lines, is number of lines
- uint32_t num_commands;
- // first vertex of this batch in the vertex lists
- uint32_t first_vert;
- BatchColor color;
- };
- struct BatchTex {
- enum TileMode : uint32_t {
- TILE_OFF,
- TILE_NORMAL,
- TILE_FORCE_REPEAT,
- };
- RID RID_texture;
- RID RID_normal;
- TileMode tile_mode;
- BatchVector2 tex_pixel_size;
- uint32_t flags;
- };
- // items in a list to be sorted prior to joining
- struct BSortItem {
- // have a function to keep as pod, rather than operator
- void assign(const BSortItem &o) {
- item = o.item;
- z_index = o.z_index;
- }
- RendererCanvasRender::Item *item;
- int z_index;
- };
- // batch item may represent 1 or more items
- struct BItemJoined {
- uint32_t first_item_ref;
- uint32_t num_item_refs;
- Rect2 bounding_rect;
- // note the z_index may only be correct for the first of the joined item references
- // this has implications for light culling with z ranged lights.
- int16_t z_index;
- // these are defined in RasterizerStorageCommon::BatchFlags
- uint16_t flags;
- // we are always splitting items with lots of commands,
- // and items with unhandled primitives (default)
- bool use_hardware_transform() const { return num_item_refs == 1; }
- };
- struct BItemRef {
- RendererCanvasRender::Item *item;
- Color final_modulate;
- };
- struct BLightRegion {
- void reset() {
- light_bitfield = 0;
- shadow_bitfield = 0;
- too_many_lights = false;
- }
- uint64_t light_bitfield;
- uint64_t shadow_bitfield;
- bool too_many_lights; // we can only do light region optimization if there are 64 or less lights
- };
- struct BatchData {
- BatchData() {
- reset_flush();
- reset_joined_item();
- gl_vertex_buffer = 0;
- gl_index_buffer = 0;
- max_quads = 0;
- vertex_buffer_size_units = 0;
- vertex_buffer_size_bytes = 0;
- index_buffer_size_units = 0;
- index_buffer_size_bytes = 0;
- use_colored_vertices = false;
- settings_use_batching = false;
- settings_max_join_item_commands = 0;
- settings_colored_vertex_format_threshold = 0.0f;
- settings_batch_buffer_num_verts = 0;
- scissor_threshold_area = 0.0f;
- joined_item_batch_flags = 0;
- diagnose_frame = false;
- next_diagnose_tick = 10000;
- diagnose_frame_number = 9999999999; // some high number
- join_across_z_indices = true;
- settings_item_reordering_lookahead = 0;
- settings_use_batching_original_choice = false;
- settings_flash_batching = false;
- settings_diagnose_frame = false;
- settings_scissor_lights = false;
- settings_scissor_threshold = -1.0f;
- settings_use_single_rect_fallback = false;
- settings_use_software_skinning = true;
- settings_ninepatch_mode = 0; // default
- settings_light_max_join_items = 16;
- settings_uv_contract = false;
- settings_uv_contract_amount = 0.0f;
- buffer_mode_batch_upload_send_null = true;
- buffer_mode_batch_upload_flag_stream = false;
- stats_items_sorted = 0;
- stats_light_items_joined = 0;
- }
- // called for each joined item
- void reset_joined_item() {
- // noop but left in as a stub
- }
- // called after each flush
- void reset_flush() {
- batches.reset();
- batch_textures.reset();
- vertices.reset();
- light_angles.reset();
- vertex_colors.reset();
- vertex_modulates.reset();
- vertex_transforms.reset();
- total_quads = 0;
- total_verts = 0;
- total_color_changes = 0;
- use_light_angles = false;
- use_modulate = false;
- use_large_verts = false;
- fvf = RasterizerStorageCommon::FVF_REGULAR;
- }
- unsigned int gl_vertex_buffer;
- unsigned int gl_index_buffer;
- uint32_t max_quads;
- uint32_t vertex_buffer_size_units;
- uint32_t vertex_buffer_size_bytes;
- uint32_t index_buffer_size_units;
- uint32_t index_buffer_size_bytes;
- // small vertex FVF type - pos and UV.
- // This will always be written to initially, but can be translated
- // to larger FVFs if necessary.
- RasterizerArray<BatchVertex> vertices;
- // extra data which can be stored during prefilling, for later translation to larger FVFs
- RasterizerArray<float> light_angles;
- RasterizerArray<BatchColor> vertex_colors; // these aren't usually used, but are for polys
- RasterizerArray<BatchColor> vertex_modulates;
- RasterizerArray<BatchTransform> vertex_transforms;
- // instead of having a different buffer for each vertex FVF type
- // we have a special array big enough for the biggest FVF
- // which can have a changeable unit size, and reuse it.
- RasterizerUnitArray unit_vertices;
- RasterizerArray<Batch> batches;
- RasterizerArray<Batch> batches_temp; // used for translating to colored vertex batches
- RasterizerArray_non_pod<BatchTex> batch_textures; // the only reason this is non-POD is because of RIDs
- // SHOULD THESE BE IN FILLSTATE?
- // flexible vertex format.
- // all verts have pos and UV.
- // some have color, some light angles etc.
- RasterizerStorageCommon::FVF fvf;
- bool use_colored_vertices;
- bool use_light_angles;
- bool use_modulate;
- bool use_large_verts;
- // if the shader is using MODULATE, we prevent baking color so the final_modulate can
- // be read in the shader.
- // if the shader is reading VERTEX, we prevent baking vertex positions with extra matrices etc
- // to prevent the read position being incorrect.
- // These flags are defined in RasterizerStorageCommon::BatchFlags
- uint32_t joined_item_batch_flags;
- RasterizerArray<BItemJoined> items_joined;
- RasterizerArray<BItemRef> item_refs;
- // items are sorted prior to joining
- RasterizerArray<BSortItem> sort_items;
- // new for Godot 4 .. the client outputs a linked list so we need to convert this
- // to a linear array
- LocalVector<RendererCanvasRender::Item::Command *> command_shortlist;
- // counts
- int total_quads;
- int total_verts;
- // we keep a record of how many color changes caused new batches
- // if the colors are causing an excessive number of batches, we switch
- // to alternate batching method and add color to the vertex format.
- int total_color_changes;
- // measured in pixels, recalculated each frame
- float scissor_threshold_area;
- // diagnose this frame, every nTh frame when settings_diagnose_frame is on
- bool diagnose_frame;
- String frame_string;
- uint32_t next_diagnose_tick;
- uint64_t diagnose_frame_number;
- // whether to join items across z_indices - this can interfere with z ranged lights,
- // so has to be disabled in some circumstances
- bool join_across_z_indices;
- // global settings
- bool settings_use_batching; // the current use_batching (affected by flash)
- bool settings_use_batching_original_choice; // the choice entered in project settings
- bool settings_flash_batching; // for regression testing, flash between non-batched and batched renderer
- bool settings_diagnose_frame; // print out batches to help optimize / regression test
- int settings_max_join_item_commands;
- float settings_colored_vertex_format_threshold;
- int settings_batch_buffer_num_verts;
- bool settings_scissor_lights;
- float settings_scissor_threshold; // 0.0 to 1.0
- int settings_item_reordering_lookahead;
- bool settings_use_single_rect_fallback;
- bool settings_use_software_skinning;
- int settings_light_max_join_items;
- int settings_ninepatch_mode;
- // buffer orphaning modes
- bool buffer_mode_batch_upload_send_null;
- bool buffer_mode_batch_upload_flag_stream;
- // uv contraction
- bool settings_uv_contract;
- float settings_uv_contract_amount;
- // only done on diagnose frame
- void reset_stats() {
- stats_items_sorted = 0;
- stats_light_items_joined = 0;
- }
- // frame stats (just for monitoring and debugging)
- int stats_items_sorted;
- int stats_light_items_joined;
- } bdata;
- struct FillState {
- void reset_flush() {
- // don't reset members that need to be preserved after flushing
- // half way through a list of commands
- curr_batch = 0;
- batch_tex_id = -1;
- texpixel_size = Vector2(1, 1);
- contract_uvs = false;
- sequence_batch_type_flags = 0;
- }
- void reset_joined_item(bool p_use_hardware_transform) {
- reset_flush();
- use_hardware_transform = p_use_hardware_transform;
- extra_matrix_sent = false;
- }
- // for batching multiple types, we don't allow mixing RECTs / LINEs etc.
- // using flags allows quicker rejection of sequences with different batch types
- uint32_t sequence_batch_type_flags;
- Batch *curr_batch;
- int batch_tex_id;
- bool use_hardware_transform;
- bool contract_uvs;
- Vector2 texpixel_size;
- Color final_modulate;
- TransformMode transform_mode;
- TransformMode orig_transform_mode;
- // support for extra matrices
- bool extra_matrix_sent; // whether sent on this item (in which case software transform can't be used untl end of item)
- int transform_extra_command_number_p1; // plus one to allow fast checking against zero
- Transform2D transform_combined; // final * extra
- };
- // used during try_join
- struct RenderItemState {
- RenderItemState() { reset(); }
- void reset() {
- current_clip = nullptr;
- shader_cache = nullptr;
- rebind_shader = true;
- prev_use_skeleton = false;
- last_blend_mode = -1;
- canvas_last_material = RID();
- item_group_z = 0;
- item_group_light = nullptr;
- final_modulate = Color(-1.0, -1.0, -1.0, -1.0); // just something unlikely
- joined_item_batch_type_flags_curr = 0;
- joined_item_batch_type_flags_prev = 0;
- joined_item = nullptr;
- }
- RendererCanvasRender::Item *current_clip;
- typename T_STORAGE::Shader *shader_cache;
- bool rebind_shader;
- bool prev_use_skeleton;
- bool prev_distance_field;
- int last_blend_mode;
- RID canvas_last_material;
- Color final_modulate;
- // used for joining items only
- BItemJoined *joined_item;
- bool join_batch_break;
- BLightRegion light_region;
- // we need some logic to prevent joining items that have vastly different batch types
- // these are defined in RasterizerStorageCommon::BatchTypeFlags
- uint32_t joined_item_batch_type_flags_curr;
- uint32_t joined_item_batch_type_flags_prev;
- // 'item group' is data over a single call to canvas_render_items
- int item_group_z;
- Color item_group_modulate;
- RendererCanvasRender::Light *item_group_light;
- Transform2D item_group_base_transform;
- } _render_item_state;
- bool use_nvidia_rect_workaround;
- //////////////////////////////////////////////////////////////////////////////
- // End of structs used by the batcher. Beginning of funcs.
- private:
- // curiously recurring template pattern - allows access to functions in the DERIVED class
- // this is kind of like using virtual functions but more efficient as they are resolved at compile time
- T_STORAGE *get_storage() { return static_cast<const T *>(this)->storage; }
- const T_STORAGE *get_storage() const { return static_cast<const T *>(this)->storage; }
- T *get_this() { return static_cast<T *>(this); }
- const T *get_this() const { return static_cast<const T *>(this); }
- protected:
- // main functions called from the rasterizer canvas
- void batch_constructor();
- void batch_initialize();
- void batch_canvas_begin();
- void batch_canvas_end();
- void batch_canvas_render_items_begin(const Color &p_modulate, RendererCanvasRender::Light *p_light, const Transform2D &p_base_transform);
- void batch_canvas_render_items_end();
- void batch_canvas_render_items(RendererCanvasRender::Item *p_item_list, int p_z, const Color &p_modulate, RendererCanvasRender::Light *p_light, const Transform2D &p_base_transform);
- // recording and sorting items from the initial pass
- void record_items(RendererCanvasRender::Item *p_item_list, int p_z);
- void join_sorted_items();
- void sort_items();
- bool _sort_items_match(const BSortItem &p_a, const BSortItem &p_b) const;
- bool sort_items_from(int p_start);
- // joining logic
- bool _disallow_item_join_if_batch_types_too_different(RenderItemState &r_ris, uint32_t btf_allowed);
- bool _detect_item_batch_break(RenderItemState &r_ris, RendererCanvasRender::Item *p_ci, bool &r_batch_break);
- // drives the loop filling batches and flushing
- void render_joined_item_commands(const BItemJoined &p_bij, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material, bool p_lit);
- private:
- // flush once full or end of joined item
- void flush_render_batches(RendererCanvasRender::Item *p_first_item, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material, uint32_t p_sequence_batch_type_flags);
- // a single joined item can contain multiple itemrefs, and thus create lots of batches
- // command start given a separate name to make easier to tell apart godot 3 and 4
- bool prefill_joined_item(FillState &r_fill_state, RendererCanvasRender::Item::Command **r_first_command, RendererCanvasRender::Item *p_item, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material);
- // prefilling different types of batch
- // default batch is an 'unhandled' legacy type batch that will be drawn with the legacy path,
- // all other batches are accelerated.
- void _prefill_default_batch(FillState &r_fill_state, int p_command_num, const RendererCanvasRender::Item &p_item);
- // accelerated batches
- bool _prefill_rect(RendererCanvasRender::Item::CommandRect *rect, FillState &r_fill_state, int &r_command_start, int command_num, int command_count, RendererCanvasRender::Item::Command *const *commands, RendererCanvasRender::Item *p_item, bool multiply_final_modulate);
- // dealing with textures
- int _batch_find_or_create_tex(const RID &p_texture, const RID &p_normal, bool p_tile, int p_previous_match);
- protected:
- // legacy support for non batched mode
- void _legacy_canvas_item_render_commands(RendererCanvasRender::Item *p_item, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material);
- // light scissoring
- bool _light_scissor_begin(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect) const;
- bool _light_find_intersection(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect, Rect2 &r_cliprect) const;
- void _calculate_scissor_threshold_area();
- private:
- // translating vertex formats prior to rendering
- void _translate_batches_to_vertex_colored_FVF();
- template <class BATCH_VERTEX_TYPE, bool INCLUDE_LIGHT_ANGLES, bool INCLUDE_MODULATE, bool INCLUDE_LARGE>
- void _translate_batches_to_larger_FVF(uint32_t p_sequence_batch_type_flags);
- protected:
- // accessory funcs
- void _software_transform_vertex(BatchVector2 &r_v, const Transform2D &p_tr) const;
- void _software_transform_vertex(Vector2 &r_v, const Transform2D &p_tr) const;
- TransformMode _find_transform_mode(const Transform2D &p_tr) const {
- // decided whether to do translate only for software transform
- if ((p_tr.elements[0].x == 1.0f) &&
- (p_tr.elements[0].y == 0.0f) &&
- (p_tr.elements[1].x == 0.0f) &&
- (p_tr.elements[1].y == 1.0f)) {
- return TM_TRANSLATE;
- }
- return TM_ALL;
- }
- typename T_STORAGE::Texture *_get_canvas_texture(const RID &p_texture) const {
- if (p_texture.is_valid()) {
- typename T_STORAGE::Texture *texture = get_storage()->texture_owner.get_or_null(p_texture);
- if (texture) {
- return texture->get_ptr();
- }
- }
- return 0;
- }
- public:
- Batch *_batch_request_new(bool p_blank = true) {
- Batch *batch = bdata.batches.request();
- if (!batch) {
- // grow the batches
- bdata.batches.grow();
- // and the temporary batches (used for color verts)
- bdata.batches_temp.reset();
- bdata.batches_temp.grow();
- // this should always succeed after growing
- batch = bdata.batches.request();
- RAST_DEBUG_ASSERT(batch);
- }
- if (p_blank)
- memset(batch, 0, sizeof(Batch));
- return batch;
- }
- BatchVertex *_batch_vertex_request_new() {
- return bdata.vertices.request();
- }
- protected:
- int godot4_commands_count(RendererCanvasRender::Item::Command *p_comm) const {
- int count = 0;
- while (p_comm) {
- count++;
- p_comm = p_comm->next;
- }
- return count;
- }
- unsigned int godot4_commands_to_vector(RendererCanvasRender::Item::Command *p_comm, LocalVector<RendererCanvasRender::Item::Command *> &p_list) {
- p_list.clear();
- while (p_comm) {
- p_list.push_back(p_comm);
- p_comm = p_comm->next;
- }
- return p_list.size();
- }
- };
- PREAMBLE(void)::batch_canvas_begin() {
- // diagnose_frame?
- bdata.frame_string = ""; // just in case, always set this as we don't want a string leak in release...
- #if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
- if (bdata.settings_diagnose_frame) {
- bdata.diagnose_frame = false;
- uint32_t tick = OS::get_singleton()->get_ticks_msec();
- uint64_t frame = Engine::get_singleton()->get_frames_drawn();
- if (tick >= bdata.next_diagnose_tick) {
- bdata.next_diagnose_tick = tick + 10000;
- // the plus one is prevent starting diagnosis half way through frame
- bdata.diagnose_frame_number = frame + 1;
- }
- if (frame == bdata.diagnose_frame_number) {
- bdata.diagnose_frame = true;
- bdata.reset_stats();
- }
- if (bdata.diagnose_frame) {
- bdata.frame_string = "canvas_begin FRAME " + itos(frame) + "\n";
- }
- }
- #endif
- }
- PREAMBLE(void)::batch_canvas_end() {
- #if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
- if (bdata.diagnose_frame) {
- bdata.frame_string += "canvas_end\n";
- if (bdata.stats_items_sorted) {
- bdata.frame_string += "\titems reordered: " + itos(bdata.stats_items_sorted) + "\n";
- }
- if (bdata.stats_light_items_joined) {
- bdata.frame_string += "\tlight items joined: " + itos(bdata.stats_light_items_joined) + "\n";
- }
- print_line(bdata.frame_string);
- }
- #endif
- }
- PREAMBLE(void)::batch_canvas_render_items_begin(const Color &p_modulate, RendererCanvasRender::Light *p_light, const Transform2D &p_base_transform) {
- // if we are debugging, flash each frame between batching renderer and old version to compare for regressions
- if (bdata.settings_flash_batching) {
- if ((Engine::get_singleton()->get_frames_drawn() % 2) == 0)
- bdata.settings_use_batching = true;
- else
- bdata.settings_use_batching = false;
- }
- if (!bdata.settings_use_batching) {
- return;
- }
- // this only needs to be done when screen size changes, but this should be
- // infrequent enough
- _calculate_scissor_threshold_area();
- // set up render item state for all the z_indexes (this is common to all z_indexes)
- _render_item_state.reset();
- _render_item_state.item_group_modulate = p_modulate;
- _render_item_state.item_group_light = p_light;
- _render_item_state.item_group_base_transform = p_base_transform;
- _render_item_state.light_region.reset();
- // batch break must be preserved over the different z indices,
- // to prevent joining to an item on a previous index if not allowed
- _render_item_state.join_batch_break = false;
- // whether to join across z indices depends on whether there are z ranged lights.
- // joined z_index items can be wrongly classified with z ranged lights.
- bdata.join_across_z_indices = true;
- int light_count = 0;
- while (p_light) {
- light_count++;
- if ((p_light->z_min != RS::CANVAS_ITEM_Z_MIN) || (p_light->z_max != RS::CANVAS_ITEM_Z_MAX)) {
- // prevent joining across z indices. This would have caused visual regressions
- bdata.join_across_z_indices = false;
- }
- p_light = p_light->next_ptr;
- }
- // can't use the light region bitfield if there are too many lights
- // hopefully most games won't blow this limit..
- // if they do they will work but it won't batch join items just in case
- if (light_count > 64) {
- _render_item_state.light_region.too_many_lights = true;
- }
- }
- PREAMBLE(void)::batch_canvas_render_items_end() {
- if (!bdata.settings_use_batching) {
- return;
- }
- join_sorted_items();
- #if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
- if (bdata.diagnose_frame) {
- bdata.frame_string += "items\n";
- }
- #endif
- // batching render is deferred until after going through all the z_indices, joining all the items
- get_this()->canvas_render_items_implementation(0, 0, _render_item_state.item_group_modulate,
- _render_item_state.item_group_light,
- _render_item_state.item_group_base_transform);
- bdata.items_joined.reset();
- bdata.item_refs.reset();
- bdata.sort_items.reset();
- }
- PREAMBLE(void)::batch_canvas_render_items(RendererCanvasRender::Item *p_item_list, int p_z, const Color &p_modulate, RendererCanvasRender::Light *p_light, const Transform2D &p_base_transform) {
- // stage 1 : join similar items, so that their state changes are not repeated,
- // and commands from joined items can be batched together
- if (bdata.settings_use_batching) {
- record_items(p_item_list, p_z);
- return;
- }
- // only legacy renders at this stage, batched renderer doesn't render until canvas_render_items_end()
- get_this()->canvas_render_items_implementation(p_item_list, p_z, p_modulate, p_light, p_base_transform);
- }
- // Default batches will not occur in software transform only items
- // EXCEPT IN THE CASE OF SINGLE RECTS (and this may well not occur, check the logic in prefill_join_item TYPE_RECT)
- // but can occur where transform commands have been sent during hardware batch
- PREAMBLE(void)::_prefill_default_batch(FillState &r_fill_state, int p_command_num, const RendererCanvasRender::Item &p_item) {
- if (r_fill_state.curr_batch->type == RasterizerStorageCommon::BT_DEFAULT) {
- // don't need to flush an extra transform command?
- if (!r_fill_state.transform_extra_command_number_p1) {
- // another default command, just add to the existing batch
- r_fill_state.curr_batch->num_commands++;
- } else {
- #if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
- if (r_fill_state.transform_extra_command_number_p1 != p_command_num) {
- WARN_PRINT_ONCE("_prefill_default_batch : transform_extra_command_number_p1 != p_command_num");
- }
- #endif
- // if the first member of the batch is a transform we have to be careful
- if (!r_fill_state.curr_batch->num_commands) {
- // there can be leading useless extra transforms (sometimes happens with debug collision polys)
- // we need to rejig the first_command for the first useful transform
- r_fill_state.curr_batch->first_command += r_fill_state.transform_extra_command_number_p1 - 1;
- }
- // we do have a pending extra transform command to flush
- // either the extra transform is in the prior command, or not, in which case we need 2 batches
- r_fill_state.curr_batch->num_commands += 2;
- r_fill_state.transform_extra_command_number_p1 = 0; // mark as sent
- r_fill_state.extra_matrix_sent = true;
- // the original mode should always be hardware transform ..
- // test this assumption
- //CRASH_COND(r_fill_state.orig_transform_mode != TM_NONE);
- r_fill_state.transform_mode = r_fill_state.orig_transform_mode;
- // do we need to restore anything else?
- }
- } else {
- // end of previous different type batch, so start new default batch
- // first consider whether there is a dirty extra matrix to send
- if (r_fill_state.transform_extra_command_number_p1) {
- // get which command the extra is in, and blank all the records as it no longer is stored CPU side
- int extra_command = r_fill_state.transform_extra_command_number_p1 - 1; // plus 1 based
- r_fill_state.transform_extra_command_number_p1 = 0;
- r_fill_state.extra_matrix_sent = true;
- // send the extra to the GPU in a batch
- r_fill_state.curr_batch = _batch_request_new();
- r_fill_state.curr_batch->type = RasterizerStorageCommon::BT_DEFAULT;
- r_fill_state.curr_batch->first_command = extra_command;
- r_fill_state.curr_batch->num_commands = 1;
- // revert to the original transform mode
- // e.g. go back to NONE if we were in hardware transform mode
- r_fill_state.transform_mode = r_fill_state.orig_transform_mode;
- // reset the original transform if we are going back to software mode,
- // because the extra is now done on the GPU...
- // (any subsequent extras are sent directly to the GPU, no deferring)
- if (r_fill_state.orig_transform_mode != TM_NONE) {
- r_fill_state.transform_combined = p_item.final_transform;
- }
- // can possibly combine batch with the next one in some cases
- // this is more efficient than having an extra batch especially for the extra
- if ((extra_command + 1) == p_command_num) {
- r_fill_state.curr_batch->num_commands = 2;
- return;
- }
- }
- // start default batch
- r_fill_state.curr_batch = _batch_request_new();
- r_fill_state.curr_batch->type = RasterizerStorageCommon::BT_DEFAULT;
- r_fill_state.curr_batch->first_command = p_command_num;
- r_fill_state.curr_batch->num_commands = 1;
- }
- }
- PREAMBLE(int)::_batch_find_or_create_tex(const RID &p_texture, const RID &p_normal, bool p_tile, int p_previous_match) {
- // optimization .. in 99% cases the last matched value will be the same, so no need to traverse the list
- if (p_previous_match > 0) // if it is zero, it will get hit first in the linear search anyway
- {
- const BatchTex &batch_texture = bdata.batch_textures[p_previous_match];
- // note for future reference, if RID implementation changes, this could become more expensive
- if ((batch_texture.RID_texture == p_texture) && (batch_texture.RID_normal == p_normal)) {
- // tiling mode must also match
- bool tiles = batch_texture.tile_mode != BatchTex::TILE_OFF;
- if (tiles == p_tile)
- // match!
- return p_previous_match;
- }
- }
- // not the previous match .. we will do a linear search ... slower, but should happen
- // not very often except with non-batchable runs, which are going to be slow anyway
- // n.b. could possibly be replaced later by a fast hash table
- for (int n = 0; n < bdata.batch_textures.size(); n++) {
- const BatchTex &batch_texture = bdata.batch_textures[n];
- if ((batch_texture.RID_texture == p_texture) && (batch_texture.RID_normal == p_normal)) {
- // tiling mode must also match
- bool tiles = batch_texture.tile_mode != BatchTex::TILE_OFF;
- if (tiles == p_tile)
- // match!
- return n;
- }
- }
- // pushing back from local variable .. not ideal but has to use a Vector because non pod
- // due to RIDs
- BatchTex new_batch_tex;
- new_batch_tex.RID_texture = p_texture;
- new_batch_tex.RID_normal = p_normal;
- // get the texture
- typename T_STORAGE::Texture *texture = _get_canvas_texture(p_texture);
- if (texture) {
- // special case, there can be textures with no width or height
- int w = texture->width;
- int h = texture->height;
- if (!w || !h) {
- w = 1;
- h = 1;
- }
- new_batch_tex.tex_pixel_size.x = 1.0 / w;
- new_batch_tex.tex_pixel_size.y = 1.0 / h;
- new_batch_tex.flags = texture->flags;
- } else {
- // maybe doesn't need doing...
- new_batch_tex.tex_pixel_size.x = 1.0f;
- new_batch_tex.tex_pixel_size.y = 1.0f;
- new_batch_tex.flags = 0;
- }
- if (p_tile) {
- if (texture) {
- // default
- new_batch_tex.tile_mode = BatchTex::TILE_NORMAL;
- // no hardware support for non power of 2 tiling
- if (!get_storage()->config.support_npot_repeat_mipmap) {
- if (next_power_of_2(texture->alloc_width) != (unsigned int)texture->alloc_width && next_power_of_2(texture->alloc_height) != (unsigned int)texture->alloc_height) {
- new_batch_tex.tile_mode = BatchTex::TILE_FORCE_REPEAT;
- }
- }
- } else {
- // this should not happen?
- new_batch_tex.tile_mode = BatchTex::TILE_OFF;
- }
- } else {
- new_batch_tex.tile_mode = BatchTex::TILE_OFF;
- }
- // push back
- bdata.batch_textures.push_back(new_batch_tex);
- return bdata.batch_textures.size() - 1;
- }
- PREAMBLE(void)::batch_constructor() {
- bdata.settings_use_batching = false;
- #ifdef GLES_OVER_GL
- use_nvidia_rect_workaround = GLOBAL_GET("rendering/quality/2d/use_nvidia_rect_flicker_workaround");
- #else
- // Not needed (a priori) on GLES devices
- use_nvidia_rect_workaround = false;
- #endif
- }
- PREAMBLE(void)::batch_initialize() {
- #define BATCHING_LOAD_PROJECT_SETTINGS
- #ifdef BATCHING_LOAD_PROJECT_SETTINGS
- bdata.settings_use_batching = GLOBAL_GET("rendering/batching/options/use_batching");
- bdata.settings_max_join_item_commands = GLOBAL_GET("rendering/batching/parameters/max_join_item_commands");
- bdata.settings_colored_vertex_format_threshold = GLOBAL_GET("rendering/batching/parameters/colored_vertex_format_threshold");
- bdata.settings_item_reordering_lookahead = GLOBAL_GET("rendering/batching/parameters/item_reordering_lookahead");
- bdata.settings_light_max_join_items = GLOBAL_GET("rendering/batching/lights/max_join_items");
- bdata.settings_use_single_rect_fallback = GLOBAL_GET("rendering/batching/options/single_rect_fallback");
- bdata.settings_use_software_skinning = GLOBAL_GET("rendering/quality/2d/use_software_skinning");
- bdata.settings_ninepatch_mode = GLOBAL_GET("rendering/quality/2d/ninepatch_mode");
- // alternatively only enable uv contract if pixel snap in use,
- // but with this enable bool, it should not be necessary
- bdata.settings_uv_contract = GLOBAL_GET("rendering/batching/precision/uv_contract");
- bdata.settings_uv_contract_amount = (float)GLOBAL_GET("rendering/batching/precision/uv_contract_amount") / 1000000.0f;
- // we can use the threshold to determine whether to turn scissoring off or on
- bdata.settings_scissor_threshold = GLOBAL_GET("rendering/batching/lights/scissor_area_threshold");
- #endif
- if (bdata.settings_scissor_threshold > 0.999f) {
- bdata.settings_scissor_lights = false;
- } else {
- bdata.settings_scissor_lights = true;
- // apply power of 4 relationship for the area, as most of the important changes
- // will be happening at low values of scissor threshold
- bdata.settings_scissor_threshold *= bdata.settings_scissor_threshold;
- bdata.settings_scissor_threshold *= bdata.settings_scissor_threshold;
- }
- // The sweet spot on my desktop for cache is actually smaller than the max, and this
- // is the default. This saves memory too so we will use it for now, needs testing to see whether this varies according
- // to device / platform.
- #ifdef BATCHING_LOAD_PROJECT_SETTINGS
- bdata.settings_batch_buffer_num_verts = GLOBAL_GET("rendering/batching/parameters/batch_buffer_size");
- // override the use_batching setting in the editor
- // (note that if the editor can't start, you can't change the use_batching project setting!)
- if (Engine::get_singleton()->is_editor_hint()) {
- bool use_in_editor = GLOBAL_GET("rendering/batching/options/use_batching_in_editor");
- bdata.settings_use_batching = use_in_editor;
- // fix some settings in the editor, as the performance not worth the risk
- bdata.settings_use_single_rect_fallback = false;
- }
- #endif
- // if we are using batching, we will purposefully disable the nvidia workaround.
- // This is because the only reason to use the single rect fallback is the approx 2x speed
- // of the uniform drawing technique. If we used nvidia workaround, speed would be
- // approx equal to the batcher drawing technique (indexed primitive + VB).
- if (bdata.settings_use_batching) {
- use_nvidia_rect_workaround = false;
- }
- // For debugging, if flash is set in project settings, it will flash on alternate frames
- // between the non-batched renderer and the batched renderer,
- // in order to find regressions.
- // This should not be used except during development.
- // make a note of the original choice in case we are flashing on and off the batching
- bdata.settings_use_batching_original_choice = bdata.settings_use_batching;
- #ifdef BATCHING_LOAD_PROJECT_SETTINGS
- bdata.settings_flash_batching = GLOBAL_GET("rendering/batching/debug/flash_batching");
- #endif
- if (!bdata.settings_use_batching) {
- // no flash when batching turned off
- bdata.settings_flash_batching = false;
- }
- // frame diagnosis. print out the batches every nth frame
- bdata.settings_diagnose_frame = false;
- if (!Engine::get_singleton()->is_editor_hint() && bdata.settings_use_batching) {
- #ifdef BATCHING_LOAD_PROJECT_SETTINGS
- bdata.settings_diagnose_frame = GLOBAL_GET("rendering/batching/debug/diagnose_frame");
- #endif
- }
- // the maximum num quads in a batch is limited by GLES2. We can have only 16 bit indices,
- // which means we can address a vertex buffer of max size 65535. 4 vertices are needed per quad.
- // Note this determines the memory use by the vertex buffer vector. max quads (65536/4)-1
- // but can be reduced to save memory if really required (will result in more batches though)
- const int max_possible_quads = (65536 / 4) - 1;
- const int min_possible_quads = 8; // some reasonable small value
- // value from project settings
- int max_quads = bdata.settings_batch_buffer_num_verts / 4;
- // sanity checks
- max_quads = CLAMP(max_quads, min_possible_quads, max_possible_quads);
- bdata.settings_max_join_item_commands = CLAMP(bdata.settings_max_join_item_commands, 0, 65535);
- bdata.settings_colored_vertex_format_threshold = CLAMP(bdata.settings_colored_vertex_format_threshold, 0.0f, 1.0f);
- bdata.settings_scissor_threshold = CLAMP(bdata.settings_scissor_threshold, 0.0f, 1.0f);
- bdata.settings_light_max_join_items = CLAMP(bdata.settings_light_max_join_items, 0, 65535);
- bdata.settings_item_reordering_lookahead = CLAMP(bdata.settings_item_reordering_lookahead, 0, 65535);
- // allow user to override the api usage techniques using project settings
- // bdata.buffer_mode_batch_upload_send_null = GLOBAL_GET("rendering/options/api_usage_batching/send_null");
- // bdata.buffer_mode_batch_upload_flag_stream = GLOBAL_GET("rendering/options/api_usage_batching/flag_stream");
- // for debug purposes, output a string with the batching options
- String batching_options_string = "OpenGL ES Batching: ";
- if (bdata.settings_use_batching) {
- batching_options_string += "ON";
- if (OS::get_singleton()->is_stdout_verbose()) {
- batching_options_string += "\n\tOPTIONS\n";
- batching_options_string += "\tmax_join_item_commands " + itos(bdata.settings_max_join_item_commands) + "\n";
- batching_options_string += "\tcolored_vertex_format_threshold " + String(Variant(bdata.settings_colored_vertex_format_threshold)) + "\n";
- batching_options_string += "\tbatch_buffer_size " + itos(bdata.settings_batch_buffer_num_verts) + "\n";
- batching_options_string += "\tlight_scissor_area_threshold " + String(Variant(bdata.settings_scissor_threshold)) + "\n";
- batching_options_string += "\titem_reordering_lookahead " + itos(bdata.settings_item_reordering_lookahead) + "\n";
- batching_options_string += "\tlight_max_join_items " + itos(bdata.settings_light_max_join_items) + "\n";
- batching_options_string += "\tsingle_rect_fallback " + String(Variant(bdata.settings_use_single_rect_fallback)) + "\n";
- batching_options_string += "\tdebug_flash " + String(Variant(bdata.settings_flash_batching)) + "\n";
- batching_options_string += "\tdiagnose_frame " + String(Variant(bdata.settings_diagnose_frame));
- }
- print_line(batching_options_string);
- }
- // special case, for colored vertex format threshold.
- // as the comparison is >=, we want to be able to totally turn on or off
- // conversion to colored vertex format at the extremes, so we will force
- // 1.0 to be just above 1.0
- if (bdata.settings_colored_vertex_format_threshold > 0.995f) {
- bdata.settings_colored_vertex_format_threshold = 1.01f;
- }
- // save memory when batching off
- if (!bdata.settings_use_batching) {
- max_quads = 0;
- }
- uint32_t sizeof_batch_vert = sizeof(BatchVertex);
- bdata.max_quads = max_quads;
- // 4 verts per quad
- bdata.vertex_buffer_size_units = max_quads * 4;
- // the index buffer can be longer than 65535, only the indices need to be within this range
- bdata.index_buffer_size_units = max_quads * 6;
- const int max_verts = bdata.vertex_buffer_size_units;
- // this comes out at approx 64K for non-colored vertex buffer, and 128K for colored vertex buffer
- bdata.vertex_buffer_size_bytes = max_verts * sizeof_batch_vert;
- bdata.index_buffer_size_bytes = bdata.index_buffer_size_units * 2; // 16 bit inds
- // create equal number of normal and (max) unit sized verts (as the normal may need to be translated to a larger FVF)
- bdata.vertices.create(max_verts); // 512k
- bdata.unit_vertices.create(max_verts, sizeof(BatchVertexLarge));
- // extra data per vert needed for larger FVFs
- bdata.light_angles.create(max_verts);
- bdata.vertex_colors.create(max_verts);
- bdata.vertex_modulates.create(max_verts);
- bdata.vertex_transforms.create(max_verts);
- // num batches will be auto increased dynamically if required
- bdata.batches.create(1024);
- bdata.batches_temp.create(bdata.batches.max_size());
- // batch textures can also be increased dynamically
- bdata.batch_textures.create(32);
- }
- PREAMBLE(bool)::_light_scissor_begin(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect) const {
- float area_item = p_item_rect.size.x * p_item_rect.size.y; // double check these are always positive
- // quick reject .. the area of pixels saved can never be more than the area of the item
- if (area_item < bdata.scissor_threshold_area) {
- return false;
- }
- Rect2 cliprect;
- if (!_light_find_intersection(p_item_rect, p_light_xform, p_light_rect, cliprect)) {
- // should not really occur .. but just in case
- cliprect = Rect2(0, 0, 0, 0);
- } else {
- // some conditions not to scissor
- // determine the area (fill rate) that will be saved
- float area_cliprect = cliprect.size.x * cliprect.size.y;
- float area_saved = area_item - area_cliprect;
- // if area saved is too small, don't scissor
- if (area_saved < bdata.scissor_threshold_area) {
- return false;
- }
- }
- int rh = get_storage()->frame.current_rt->height;
- int y = rh - (cliprect.position.y + cliprect.size.y);
- get_this()->gl_enable_scissor(cliprect.position.x, y, cliprect.size.width, cliprect.size.height);
- return true;
- }
- PREAMBLE(bool)::_light_find_intersection(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect, Rect2 &r_cliprect) const {
- // transform light to world space (note this is done in the earlier intersection test, so could
- // be made more efficient)
- Vector2 pts[4] = {
- p_light_xform.xform(p_light_rect.position),
- p_light_xform.xform(Vector2(p_light_rect.position.x + p_light_rect.size.x, p_light_rect.position.y)),
- p_light_xform.xform(Vector2(p_light_rect.position.x, p_light_rect.position.y + p_light_rect.size.y)),
- p_light_xform.xform(Vector2(p_light_rect.position.x + p_light_rect.size.x, p_light_rect.position.y + p_light_rect.size.y)),
- };
- // calculate the light bound rect in world space
- Rect2 lrect(pts[0].x, pts[0].y, 0, 0);
- for (int n = 1; n < 4; n++) {
- lrect.expand_to(pts[n]);
- }
- // intersection between the 2 rects
- // they should probably always intersect, because of earlier check, but just in case...
- if (!p_item_rect.intersects(lrect))
- return false;
- // note this does almost the same as Rect2.clip but slightly more efficient for our use case
- r_cliprect.position.x = MAX(p_item_rect.position.x, lrect.position.x);
- r_cliprect.position.y = MAX(p_item_rect.position.y, lrect.position.y);
- Point2 item_rect_end = p_item_rect.position + p_item_rect.size;
- Point2 lrect_end = lrect.position + lrect.size;
- r_cliprect.size.x = MIN(item_rect_end.x, lrect_end.x) - r_cliprect.position.x;
- r_cliprect.size.y = MIN(item_rect_end.y, lrect_end.y) - r_cliprect.position.y;
- return true;
- }
- PREAMBLE(void)::_calculate_scissor_threshold_area() {
- if (!bdata.settings_scissor_lights) {
- return;
- }
- // scissor area threshold is 0.0 to 1.0 in the settings for ease of use.
- // we need to translate to an absolute area to determine quickly whether
- // to scissor.
- if (bdata.settings_scissor_threshold < 0.0001f) {
- bdata.scissor_threshold_area = -1.0f; // will always pass
- } else {
- // in pixels
- int w = get_storage()->frame.current_rt->width;
- int h = get_storage()->frame.current_rt->height;
- int screen_area = w * h;
- bdata.scissor_threshold_area = bdata.settings_scissor_threshold * screen_area;
- }
- }
- PREAMBLE(void)::render_joined_item_commands(const BItemJoined &p_bij, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material, bool p_lit) {
- RendererCanvasRender::Item *item = 0;
- RendererCanvasRender::Item *first_item = bdata.item_refs[p_bij.first_item_ref].item;
- // fill_state and bdata have once off setup per joined item, and a smaller reset on flush
- FillState fill_state;
- fill_state.reset_joined_item(p_bij.use_hardware_transform());
- bdata.reset_joined_item();
- // should this joined item be using large FVF?
- if (p_bij.flags & RasterizerStorageCommon::USE_MODULATE_FVF) {
- bdata.use_modulate = true;
- bdata.fvf = RasterizerStorageCommon::FVF_MODULATED;
- }
- if (p_bij.flags & RasterizerStorageCommon::USE_LARGE_FVF) {
- bdata.use_modulate = true;
- bdata.use_large_verts = true;
- bdata.fvf = RasterizerStorageCommon::FVF_LARGE;
- }
- // in the special case of custom shaders that read from VERTEX (i.e. vertex position)
- // we want to disable software transform of extra matrix
- if (bdata.joined_item_batch_flags & RasterizerStorageCommon::PREVENT_VERTEX_BAKING) {
- fill_state.extra_matrix_sent = true;
- }
- for (unsigned int i = 0; i < p_bij.num_item_refs; i++) {
- const BItemRef &ref = bdata.item_refs[p_bij.first_item_ref + i];
- item = ref.item;
- if (!p_lit) {
- // if not lit we use the complex calculated final modulate
- fill_state.final_modulate = ref.final_modulate;
- } else {
- // if lit we ignore canvas modulate and just use the item modulate
- fill_state.final_modulate = item->final_modulate;
- }
- // ONCE OFF fill state setup, that will be retained over multiple calls to
- // prefill_joined_item()
- fill_state.transform_combined = item->final_transform;
- // decide the initial transform mode, and make a backup
- // in orig_transform_mode in case we need to switch back
- if (!fill_state.use_hardware_transform) {
- fill_state.transform_mode = _find_transform_mode(fill_state.transform_combined);
- } else {
- fill_state.transform_mode = TM_NONE;
- }
- fill_state.orig_transform_mode = fill_state.transform_mode;
- // keep track of when we added an extra matrix
- // so we can defer sending until we see a default command
- fill_state.transform_extra_command_number_p1 = 0;
- RendererCanvasRender::Item::Command *current_command = item->commands;
- while (current_command) {
- // fill as many batches as possible (until all done, or the vertex buffer is full)
- bool bFull = get_this()->prefill_joined_item(fill_state, current_command, item, p_current_clip, r_reclip, p_material);
- if (bFull) {
- // always pass first item (commands for default are always first item)
- flush_render_batches(first_item, p_current_clip, r_reclip, p_material, fill_state.sequence_batch_type_flags);
- // zero all the batch data ready for a new run
- bdata.reset_flush();
- // don't zero all the fill state, some may need to be preserved
- fill_state.reset_flush();
- }
- }
- }
- // flush if any left
- flush_render_batches(first_item, p_current_clip, r_reclip, p_material, fill_state.sequence_batch_type_flags);
- // zero all the batch data ready for a new run
- bdata.reset_flush();
- }
- PREAMBLE(void)::_legacy_canvas_item_render_commands(RendererCanvasRender::Item *p_item, RendererCanvasRender::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material) {
- // reuse the same list each time to prevent needless dynamic allocations
- unsigned int command_count = godot4_commands_to_vector(p_item->commands, bdata.command_shortlist);
- RendererCanvasRender::Item::Command *const *commands = nullptr;
- if (command_count) {
- commands = &bdata.command_shortlist[0];
- }
- // legacy .. just create one massive batch and render everything as before
- bdata.batches.reset();
- Batch *batch = _batch_request_new();
- batch->type = RasterizerStorageCommon::BT_DEFAULT;
- batch->num_commands = command_count;
- get_this()->render_batches(commands, p_current_clip, r_reclip, p_material);
- bdata.reset_flush();
- }
- PREAMBLE(void)::record_items(RendererCanvasRender::Item *p_item_list, int p_z) {
- while (p_item_list) {
- BSortItem *s = bdata.sort_items.request_with_grow();
- s->item = p_item_list;
- s->z_index = p_z;
- p_item_list = p_item_list->next;
- }
- }
- PREAMBLE(void)::join_sorted_items() {
- }
- PREAMBLE(void)::_software_transform_vertex(BatchVector2 &r_v, const Transform2D &p_tr) const {
- Vector2 vc(r_v.x, r_v.y);
- vc = p_tr.xform(vc);
- r_v.set(vc);
- }
- PREAMBLE(void)::_software_transform_vertex(Vector2 &r_v, const Transform2D &p_tr) const {
- r_v = p_tr.xform(r_v);
- }
- PREAMBLE(void)::_translate_batches_to_vertex_colored_FVF() {
- // zeros the size and sets up how big each unit is
- bdata.unit_vertices.prepare(sizeof(BatchVertexColored));
- const BatchColor *source_vertex_colors = &bdata.vertex_colors[0];
- RAST_DEBUG_ASSERT(bdata.vertex_colors.size() == bdata.vertices.size());
- int num_verts = bdata.vertices.size();
- for (int n = 0; n < num_verts; n++) {
- const BatchVertex &bv = bdata.vertices[n];
- BatchVertexColored *cv = (BatchVertexColored *)bdata.unit_vertices.request();
- cv->pos = bv.pos;
- cv->uv = bv.uv;
- cv->col = *source_vertex_colors++;
- }
- }
- // Translation always involved adding color to the FVF, which enables
- // joining of batches that have different colors.
- // There is a trade off. Non colored verts are smaller so work faster, but
- // there comes a point where it is better to just use colored verts to avoid lots of
- // batches.
- // In addition this can optionally add light angles to the FVF, necessary for normal mapping.
- T_PREAMBLE
- template <class BATCH_VERTEX_TYPE, bool INCLUDE_LIGHT_ANGLES, bool INCLUDE_MODULATE, bool INCLUDE_LARGE>
- void C_PREAMBLE::_translate_batches_to_larger_FVF(uint32_t p_sequence_batch_type_flags) {
- bool include_poly_color = false;
- // we ONLY want to include the color verts in translation when using polys,
- // as rects do not write vertex colors, only colors per batch.
- if (p_sequence_batch_type_flags & RasterizerStorageCommon::BTF_POLY) {
- include_poly_color = INCLUDE_LIGHT_ANGLES | INCLUDE_MODULATE | INCLUDE_LARGE;
- }
- // zeros the size and sets up how big each unit is
- bdata.unit_vertices.prepare(sizeof(BATCH_VERTEX_TYPE));
- bdata.batches_temp.reset();
- // As the vertices_colored and batches_temp are 'mirrors' of the non-colored version,
- // the sizes should be equal, and allocations should never fail. Hence the use of debug
- // asserts to check program flow, these should not occur at runtime unless the allocation
- // code has been altered.
- RAST_DEBUG_ASSERT(bdata.unit_vertices.max_size() == bdata.vertices.max_size());
- RAST_DEBUG_ASSERT(bdata.batches_temp.max_size() == bdata.batches.max_size());
- Color curr_col(-1.0f, -1.0f, -1.0f, -1.0f);
- Batch *dest_batch = nullptr;
- const BatchColor *source_vertex_colors = &bdata.vertex_colors[0];
- const float *source_light_angles = &bdata.light_angles[0];
- const BatchColor *source_vertex_modulates = &bdata.vertex_modulates[0];
- const BatchTransform *source_vertex_transforms = &bdata.vertex_transforms[0];
- // translate the batches into vertex colored batches
- for (int n = 0; n < bdata.batches.size(); n++) {
- const Batch &source_batch = bdata.batches[n];
- // does source batch use light angles?
- const BatchTex &btex = bdata.batch_textures[source_batch.batch_texture_id];
- bool source_batch_uses_light_angles = btex.RID_normal != RID();
- bool needs_new_batch = true;
- if (dest_batch) {
- if (dest_batch->type == source_batch.type) {
- if (source_batch.type == RasterizerStorageCommon::BT_RECT) {
- if (dest_batch->batch_texture_id == source_batch.batch_texture_id) {
- // add to previous batch
- dest_batch->num_commands += source_batch.num_commands;
- needs_new_batch = false;
- // create the colored verts (only if not default)
- //int first_vert = source_batch.first_quad * 4;
- //int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
- int first_vert = source_batch.first_vert;
- int end_vert = first_vert + (4 * source_batch.num_commands);
- for (int v = first_vert; v < end_vert; v++) {
- RAST_DEV_DEBUG_ASSERT(bdata.vertices.size());
- const BatchVertex &bv = bdata.vertices[v];
- BATCH_VERTEX_TYPE *cv = (BATCH_VERTEX_TYPE *)bdata.unit_vertices.request();
- RAST_DEBUG_ASSERT(cv);
- cv->pos = bv.pos;
- cv->uv = bv.uv;
- cv->col = source_batch.color;
- if (INCLUDE_LIGHT_ANGLES) {
- RAST_DEV_DEBUG_ASSERT(bdata.light_angles.size());
- // this is required to allow compilation with non light angle vertex.
- // it should be compiled out.
- BatchVertexLightAngled *lv = (BatchVertexLightAngled *)cv;
- if (source_batch_uses_light_angles)
- lv->light_angle = *source_light_angles++;
- else
- lv->light_angle = 0.0f; // dummy, unused in vertex shader (could possibly be left uninitialized, but probably bad idea)
- } // if including light angles
- if (INCLUDE_MODULATE) {
- RAST_DEV_DEBUG_ASSERT(bdata.vertex_modulates.size());
- BatchVertexModulated *mv = (BatchVertexModulated *)cv;
- mv->modulate = *source_vertex_modulates++;
- } // including modulate
- if (INCLUDE_LARGE) {
- RAST_DEV_DEBUG_ASSERT(bdata.vertex_transforms.size());
- BatchVertexLarge *lv = (BatchVertexLarge *)cv;
- lv->transform = *source_vertex_transforms++;
- } // if including large
- }
- } // textures match
- } else {
- // default
- // we can still join, but only under special circumstances
- // does this ever happen? not sure at this stage, but left for future expansion
- uint32_t source_last_command = source_batch.first_command + source_batch.num_commands;
- if (source_last_command == dest_batch->first_command) {
- dest_batch->num_commands += source_batch.num_commands;
- needs_new_batch = false;
- } // if the commands line up exactly
- }
- } // if both batches are the same type
- } // if dest batch is valid
- if (needs_new_batch) {
- dest_batch = bdata.batches_temp.request();
- RAST_DEBUG_ASSERT(dest_batch);
- *dest_batch = source_batch;
- // create the colored verts (only if not default)
- if (source_batch.type != RasterizerStorageCommon::BT_DEFAULT) {
- // int first_vert = source_batch.first_quad * 4;
- // int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
- int first_vert = source_batch.first_vert;
- int end_vert = first_vert + (4 * source_batch.num_commands);
- for (int v = first_vert; v < end_vert; v++) {
- RAST_DEV_DEBUG_ASSERT(bdata.vertices.size());
- const BatchVertex &bv = bdata.vertices[v];
- BATCH_VERTEX_TYPE *cv = (BATCH_VERTEX_TYPE *)bdata.unit_vertices.request();
- RAST_DEBUG_ASSERT(cv);
- cv->pos = bv.pos;
- cv->uv = bv.uv;
- // polys are special, they can have per vertex colors
- if (!include_poly_color) {
- cv->col = source_batch.color;
- } else {
- RAST_DEV_DEBUG_ASSERT(bdata.vertex_colors.size());
- cv->col = *source_vertex_colors++;
- }
- if (INCLUDE_LIGHT_ANGLES) {
- RAST_DEV_DEBUG_ASSERT(bdata.light_angles.size());
- // this is required to allow compilation with non light angle vertex.
- // it should be compiled out.
- BatchVertexLightAngled *lv = (BatchVertexLightAngled *)cv;
- if (source_batch_uses_light_angles)
- lv->light_angle = *source_light_angles++;
- else
- lv->light_angle = 0.0f; // dummy, unused in vertex shader (could possibly be left uninitialized, but probably bad idea)
- } // if using light angles
- if (INCLUDE_MODULATE) {
- RAST_DEV_DEBUG_ASSERT(bdata.vertex_modulates.size());
- BatchVertexModulated *mv = (BatchVertexModulated *)cv;
- mv->modulate = *source_vertex_modulates++;
- } // including modulate
- if (INCLUDE_LARGE) {
- RAST_DEV_DEBUG_ASSERT(bdata.vertex_transforms.size());
- BatchVertexLarge *lv = (BatchVertexLarge *)cv;
- lv->transform = *source_vertex_transforms++;
- } // if including large
- }
- }
- }
- }
- // copy the temporary batches to the master batch list (this could be avoided but it makes the code cleaner)
- bdata.batches.copy_from(bdata.batches_temp);
- }
- PREAMBLE(bool)::_disallow_item_join_if_batch_types_too_different(RenderItemState &r_ris, uint32_t btf_allowed) {
- r_ris.joined_item_batch_type_flags_curr |= btf_allowed;
- bool disallow = false;
- if (r_ris.joined_item_batch_type_flags_prev & (~btf_allowed))
- disallow = true;
- return disallow;
- }
- #undef PREAMBLE
- #undef T_PREAMBLE
- #undef C_PREAMBLE
- #endif // RASTERIZER_CANVAS_BATCHER_H
|