Browse Source

Merge pull request #86339 from lawnjelly/vertex_cache_optimizer

[3.x] Vertex cache optimizer
Rémi Verschelde 1 year ago
parent
commit
dbe3eca69b

+ 304 - 0
core/math/vertex_cache_optimizer.cpp

@@ -0,0 +1,304 @@
+/**************************************************************************/
+/*  vertex_cache_optimizer.cpp                                            */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "vertex_cache_optimizer.h"
+
+#include "core/math/math_funcs.h"
+
+// Precalculate the tables.
+void VertexCacheOptimizer::init() {
+	for (int i = 0; i < Constants::CACHE_SCORE_TABLE_SIZE; i++) {
+		float score = 0;
+		if (i < 3) {
+			// This vertex was used in the last triangle,
+			// so it has a fixed score, which ever of the three
+			// it's in. Otherwise, you can get very different
+			// answers depending on whether you add
+			// the triangle 1,2,3 or 3,1,2 - which is silly.
+			score = Constants::LAST_TRI_SCORE;
+		} else {
+			// Points for being high in the cache.
+			const float scaler = 1.0f / (Constants::CACHE_FUNCTION_LENGTH - 3);
+			score = 1.0f - (i - 3) * scaler;
+			score = Math::pow(score, Constants::CACHE_DECAY_POWER);
+		}
+		_cache_position_score[i] = (SCORE_TYPE)(Constants::SCORE_SCALING * score);
+	}
+
+	for (int i = 1; i < Constants::VALENCE_SCORE_TABLE_SIZE; i++) {
+		// Bonus points for having a low number of tris still to
+		// use the vert, so we get rid of lone verts quickly.
+		float valence_boost = Math::pow(i, -Constants::VALENCE_BOOST_POWER);
+		float score = Constants::VALENCE_BOOST_SCALE * valence_boost;
+		_valence_score[i] = (SCORE_TYPE)(Constants::SCORE_SCALING * score);
+	}
+}
+
+VertexCacheOptimizer::SCORE_TYPE VertexCacheOptimizer::find_vertex_score(int p_num_active_tris, int p_cache_position) {
+	if (p_num_active_tris == 0) {
+		// No triangles need this vertex!
+		return 0;
+	}
+
+	SCORE_TYPE score = 0;
+	if (p_cache_position < 0) {
+		// Vertex is not in LRU cache - no score.
+	} else {
+		score = _cache_position_score[p_cache_position];
+	}
+
+	if (p_num_active_tris < Constants::VALENCE_SCORE_TABLE_SIZE) {
+		score += _valence_score[p_num_active_tris];
+	}
+	return score;
+}
+
+VertexCacheOptimizer::VERTEX_INDEX_TYPE *VertexCacheOptimizer::_reorder_indices(VERTEX_INDEX_TYPE *r_dest_indices, const VERTEX_INDEX_TYPE *p_source_indices, int p_num_triangles, int p_num_vertices) {
+	ADJACENCY_TYPE *num_active_tris = (ADJACENCY_TYPE *)memalloc(sizeof(ADJACENCY_TYPE) * p_num_vertices);
+	memset(num_active_tris, 0, sizeof(ADJACENCY_TYPE) * p_num_vertices);
+
+	// First scan over the vertex data, count the total number of
+	// occurrances of each vertex.
+	for (int i = 0; i < 3 * p_num_triangles; i++) {
+		if (num_active_tris[p_source_indices[i]] == Constants::MAX_ADJACENCY) {
+			// Unsupported mesh,
+			// vertex shared by too many triangles.
+			memfree(num_active_tris);
+			return nullptr;
+		}
+		num_active_tris[p_source_indices[i]]++;
+	}
+
+	// Allocate the rest of the arrays.
+	ARRAY_INDEX_TYPE *offsets = (ARRAY_INDEX_TYPE *)memalloc(sizeof(ARRAY_INDEX_TYPE) * p_num_vertices);
+	SCORE_TYPE *last_score = (SCORE_TYPE *)memalloc(sizeof(SCORE_TYPE) * p_num_vertices);
+	CACHE_POS_TYPE *cache_tag = (CACHE_POS_TYPE *)memalloc(sizeof(CACHE_POS_TYPE) * p_num_vertices);
+
+	uint8_t *triangle_added = (uint8_t *)memalloc((p_num_triangles + 7) / 8);
+	SCORE_TYPE *triangle_score = (SCORE_TYPE *)memalloc(sizeof(SCORE_TYPE) * p_num_triangles);
+	TRIANGLE_INDEX_TYPE *triangle_indices = (TRIANGLE_INDEX_TYPE *)memalloc(sizeof(TRIANGLE_INDEX_TYPE) * 3 * p_num_triangles);
+	memset(triangle_added, 0, sizeof(uint8_t) * ((p_num_triangles + 7) / 8));
+	memset(triangle_score, 0, sizeof(SCORE_TYPE) * p_num_triangles);
+	memset(triangle_indices, 0, sizeof(TRIANGLE_INDEX_TYPE) * 3 * p_num_triangles);
+
+	// Count the triangle array offset for each vertex,
+	// initialize the rest of the data.
+	int sum = 0;
+	for (int i = 0; i < p_num_vertices; i++) {
+		offsets[i] = sum;
+		sum += num_active_tris[i];
+		num_active_tris[i] = 0;
+		cache_tag[i] = -1;
+	}
+
+	// Fill the vertex data structures with indices to the triangles
+	// using each vertex.
+	for (int i = 0; i < p_num_triangles; i++) {
+		for (int j = 0; j < 3; j++) {
+			int v = p_source_indices[3 * i + j];
+			triangle_indices[offsets[v] + num_active_tris[v]] = i;
+			num_active_tris[v]++;
+		}
+	}
+
+	// Initialize the score for all vertices.
+	for (int i = 0; i < p_num_vertices; i++) {
+		last_score[i] = find_vertex_score(num_active_tris[i], cache_tag[i]);
+		for (int j = 0; j < num_active_tris[i]; j++) {
+			triangle_score[triangle_indices[offsets[i] + j]] += last_score[i];
+		}
+	}
+
+	// Find the best triangle.
+	int best_triangle = -1;
+	int best_score = -1;
+
+	for (int i = 0; i < p_num_triangles; i++) {
+		if (triangle_score[i] > best_score) {
+			best_score = triangle_score[i];
+			best_triangle = i;
+		}
+	}
+
+	// Allocate the output array.
+	TRIANGLE_INDEX_TYPE *out_triangles = (TRIANGLE_INDEX_TYPE *)memalloc(sizeof(TRIANGLE_INDEX_TYPE) * p_num_triangles);
+	int out_pos = 0;
+
+	// Initialize the cache.
+	int cache[Constants::VERTEX_CACHE_SIZE + 3];
+	for (int i = 0; i < Constants::VERTEX_CACHE_SIZE + 3; i++) {
+		cache[i] = -1;
+	}
+
+	int scan_pos = 0;
+
+	// Output the currently best triangle, as long as there
+	// are triangles left to output.
+	while (best_triangle >= 0) {
+		// Mark the triangle as added.
+		set_added(triangle_added, best_triangle);
+		// Output this triangle.
+		out_triangles[out_pos++] = best_triangle;
+		for (int i = 0; i < 3; i++) {
+			// Update this vertex.
+			int v = p_source_indices[3 * best_triangle + i];
+
+			// Check the current cache position, if it
+			// is in the cache.
+			int endpos = cache_tag[v];
+			if (endpos < 0) {
+				endpos = Constants::VERTEX_CACHE_SIZE + i;
+			}
+			if (endpos > i) {
+				// Move all cache entries from the previous position
+				// in the cache to the new target position (i) one
+				// step backwards.
+				for (int j = endpos; j > i; j--) {
+					cache[j] = cache[j - 1];
+					// If this cache slot contains a real
+					// vertex, update its cache tag.
+					if (cache[j] >= 0) {
+						cache_tag[cache[j]]++;
+					}
+				}
+				// Insert the current vertex into its new target
+				// slot.
+				cache[i] = v;
+				cache_tag[v] = i;
+			}
+
+			// Find the current triangle in the list of active
+			// triangles and remove it (moving the last
+			// triangle in the list to the slot of this triangle).
+			for (int j = 0; j < num_active_tris[v]; j++) {
+				if (triangle_indices[offsets[v] + j] == best_triangle) {
+					triangle_indices[offsets[v] + j] = triangle_indices[offsets[v] + num_active_tris[v] - 1];
+					break;
+				}
+			}
+			// Shorten the list.
+			num_active_tris[v]--;
+		}
+		// Update the scores of all triangles in the cache.
+		for (int i = 0; i < Constants::VERTEX_CACHE_SIZE + 3; i++) {
+			int v = cache[i];
+			if (v < 0) {
+				break;
+			}
+			// This vertex has been pushed outside of the
+			// actual cache.
+			if (i >= Constants::VERTEX_CACHE_SIZE) {
+				cache_tag[v] = -1;
+				cache[i] = -1;
+			}
+			SCORE_TYPE newScore = find_vertex_score(num_active_tris[v], cache_tag[v]);
+			SCORE_TYPE diff = newScore - last_score[v];
+			for (int j = 0; j < num_active_tris[v]; j++) {
+				triangle_score[triangle_indices[offsets[v] + j]] += diff;
+			}
+			last_score[v] = newScore;
+		}
+		// Find the best triangle referenced by vertices in the cache.
+		best_triangle = -1;
+		best_score = -1;
+		for (int i = 0; i < Constants::VERTEX_CACHE_SIZE; i++) {
+			if (cache[i] < 0) {
+				break;
+			}
+			int v = cache[i];
+			for (int j = 0; j < num_active_tris[v]; j++) {
+				int t = triangle_indices[offsets[v] + j];
+				if (triangle_score[t] > best_score) {
+					best_triangle = t;
+					best_score = triangle_score[t];
+				}
+			}
+		}
+		// If no active triangle was found at all, continue
+		// scanning the whole list of triangles.
+		if (best_triangle < 0) {
+			for (; scan_pos < p_num_triangles; scan_pos++) {
+				if (!is_added(triangle_added, scan_pos)) {
+					best_triangle = scan_pos;
+					break;
+				}
+			}
+		}
+	}
+
+	// Convert the triangle index array into a full triangle list.
+	out_pos = 0;
+	for (int i = 0; i < p_num_triangles; i++) {
+		int t = out_triangles[i];
+		for (int j = 0; j < 3; j++) {
+			int v = p_source_indices[3 * t + j];
+			r_dest_indices[out_pos++] = v;
+		}
+	}
+
+	// Clean up.
+	memfree(triangle_indices);
+	memfree(offsets);
+	memfree(last_score);
+	memfree(num_active_tris);
+	memfree(cache_tag);
+	memfree(triangle_added);
+	memfree(triangle_score);
+	memfree(out_triangles);
+
+	return r_dest_indices;
+}
+
+bool VertexCacheOptimizer::reorder_indices_pool(PoolVector<int> &r_indices, uint32_t p_num_triangles, uint32_t p_num_verts) {
+	LocalVector<int> temp;
+	temp = r_indices;
+	if (reorder_indices(temp, p_num_triangles, p_num_verts)) {
+		r_indices = temp;
+		return true;
+	}
+	return false;
+}
+
+bool VertexCacheOptimizer::reorder_indices(LocalVector<int> &r_indices, uint32_t p_num_triangles, uint32_t p_num_verts) {
+	LocalVector<int> temp;
+	temp.resize(r_indices.size());
+	if (_reorder_indices((VERTEX_INDEX_TYPE *)temp.ptr(), (VERTEX_INDEX_TYPE *)r_indices.ptr(), p_num_triangles, p_num_verts)) {
+#if 0
+		uint32_t show = MIN(r_indices.size(), 16);
+		for (uint32_t n = 0; n < show; n++) {
+			print_line(itos(n) + " : " + itos(r_indices[n]) + " to " + itos(temp[n]));
+		}
+#endif
+
+		r_indices = temp;
+		return true;
+	}
+	return false;
+}

+ 120 - 0
core/math/vertex_cache_optimizer.h

@@ -0,0 +1,120 @@
+/**************************************************************************/
+/*  vertex_cache_optimizer.h                                              */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#ifndef VERTEX_CACHE_OPTIMIZER_H
+#define VERTEX_CACHE_OPTIMIZER_H
+
+// This class is derived from
+// https://www.martin.st/thesis/
+// Based on Tom Forsyth's vertex cache optimizer
+
+/*
+  Copyright (C) 2008 Martin Storsjo
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+  1. The origin of this software must not be misrepresented; you must not
+	 claim that you wrote the original software. If you use this software
+	 in a product, an acknowledgment in the product documentation would be
+	 appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+	 misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "core/local_vector.h"
+#include "core/math/math_defs.h"
+
+#include <stdint.h>
+
+class VertexCacheOptimizer {
+	typedef uint32_t VERTEX_INDEX_TYPE;
+
+	// The size of these data types affect the memory usage.
+	typedef uint16_t SCORE_TYPE;
+	typedef uint8_t ADJACENCY_TYPE;
+	typedef int8_t CACHE_POS_TYPE;
+	typedef int32_t TRIANGLE_INDEX_TYPE;
+	typedef int32_t ARRAY_INDEX_TYPE;
+
+	struct Constants {
+		// The size of the precalculated tables.
+		static const int CACHE_SCORE_TABLE_SIZE = 32;
+		static const int VALENCE_SCORE_TABLE_SIZE = 32;
+		static const int MAX_ADJACENCY = UINT8_MAX;
+		static const int SCORE_SCALING = 7281;
+
+		// Score function constants.
+		static constexpr float CACHE_DECAY_POWER = 1.5;
+		static constexpr float LAST_TRI_SCORE = 0.75;
+		static constexpr float VALENCE_BOOST_SCALE = 2.0;
+		static constexpr float VALENCE_BOOST_POWER = 0.5;
+
+		// Set these to adjust the performance and result quality.
+		static const int VERTEX_CACHE_SIZE = 24;
+		static const int CACHE_FUNCTION_LENGTH = 32;
+
+		static_assert(CACHE_SCORE_TABLE_SIZE >= VERTEX_CACHE_SIZE, "Vertex score table too small");
+	};
+
+	// Precalculated tables.
+	SCORE_TYPE _cache_position_score[Constants::CACHE_SCORE_TABLE_SIZE];
+	SCORE_TYPE _valence_score[Constants::VALENCE_SCORE_TABLE_SIZE];
+
+	int is_added(const uint8_t *p_triangle_added, int p_x) const {
+		return p_triangle_added[(p_x) >> 3] & (1 << (p_x & 7));
+	}
+
+	void set_added(uint8_t *p_triangle_added, int p_x) const {
+		p_triangle_added[(p_x) >> 3] |= (1 << (p_x & 7));
+	}
+
+	// Precalculate the tables.
+	void init();
+
+	// Calculate the score for a vertex.
+	SCORE_TYPE find_vertex_score(int p_num_active_tris, int p_cache_position);
+
+	// The main reordering function.
+	VERTEX_INDEX_TYPE *_reorder_indices(VERTEX_INDEX_TYPE *r_dest_indices, const VERTEX_INDEX_TYPE *p_source_indices, int p_num_triangles, int p_num_vertices);
+
+public:
+	VertexCacheOptimizer() {
+		init();
+	}
+
+	bool reorder_indices(LocalVector<int> &r_indices, uint32_t p_num_triangles, uint32_t p_num_verts);
+	bool reorder_indices_pool(PoolVector<int> &r_indices, uint32_t p_num_triangles, uint32_t p_num_verts);
+};
+
+#endif // VERTEX_CACHE_OPTIMIZER_H

+ 4 - 0
doc/classes/Mesh.xml

@@ -192,6 +192,10 @@
 		<constant name="ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION" value="2097152" enum="ArrayFormat">
 			Flag used to mark that the array uses an octahedral representation of normal and tangent vectors rather than cartesian.
 		</constant>
+		<constant name="ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION" value="4194304" enum="ArrayFormat">
+			Flag used to request vertex cache optimization.
+			This re-orders indices in order to make best use of GPU vertex caches, which can improve rendering performance particularly with high poly models.
+		</constant>
 		<constant name="ARRAY_COMPRESS_DEFAULT" value="2194432" enum="ArrayFormat">
 			Used to set flags [constant ARRAY_COMPRESS_VERTEX], [constant ARRAY_COMPRESS_NORMAL], [constant ARRAY_COMPRESS_TANGENT], [constant ARRAY_COMPRESS_COLOR], [constant ARRAY_COMPRESS_TEX_UV], [constant ARRAY_COMPRESS_TEX_UV2], [constant ARRAY_COMPRESS_WEIGHTS], and [constant ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION] quickly.
 			[b]Note:[/b] Since this flag enables [constant ARRAY_COMPRESS_COLOR], vertex colors will be stored as 8-bit unsigned integers. This will clamp overbright colors to [code]Color(1, 1, 1, 1)[/code] and reduce colors' precision.

+ 6 - 2
editor/import/resource_importer_obj.cpp

@@ -495,11 +495,12 @@ String ResourceImporterOBJ::get_preset_name(int p_idx) const {
 }
 
 void ResourceImporterOBJ::get_import_options(List<ImportOption> *r_options, int p_preset) const {
-	r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "generate_tangents"), true));
 	r_options->push_back(ImportOption(PropertyInfo(Variant::VECTOR3, "scale_mesh"), Vector3(1, 1, 1)));
 	r_options->push_back(ImportOption(PropertyInfo(Variant::VECTOR3, "offset_mesh"), Vector3(0, 0, 0)));
-	r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "octahedral_compression"), true));
 	r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "optimize_mesh_flags", PROPERTY_HINT_FLAGS, "Vertex,Normal,Tangent,Color,TexUV,TexUV2,Bones,Weights,Index"), VS::ARRAY_COMPRESS_DEFAULT >> VS::ARRAY_COMPRESS_BASE));
+	r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "generate_tangents"), true));
+	r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "vertex_cache_optimization"), true));
+	r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "octahedral_compression"), true));
 }
 bool ResourceImporterOBJ::get_option_visibility(const String &p_option, const Map<StringName, Variant> &p_options) const {
 	return true;
@@ -512,6 +513,9 @@ Error ResourceImporterOBJ::import(const String &p_source_file, const String &p_s
 	if (bool(p_options["octahedral_compression"])) {
 		compress_flags |= VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION;
 	}
+	if (bool(p_options["vertex_cache_optimization"])) {
+		compress_flags |= VS::ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION;
+	}
 	Error err = _parse_obj(p_source_file, meshes, true, p_options["generate_tangents"], compress_flags, p_options["scale_mesh"], p_options["offset_mesh"], nullptr);
 
 	ERR_FAIL_COND_V(err != OK, err);

+ 5 - 1
editor/import/resource_importer_scene.cpp

@@ -1109,9 +1109,10 @@ void ResourceImporterScene::get_import_options(List<ImportOption> *r_options, in
 	r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "materials/location", PROPERTY_HINT_ENUM, "Node,Mesh"), (meshes_out || materials_out) ? 1 : 0));
 	r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "materials/storage", PROPERTY_HINT_ENUM, "Built-In,Files (.material),Files (.tres)", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_UPDATE_ALL_IF_MODIFIED), materials_out ? 1 : 0));
 	r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "materials/keep_on_reimport"), materials_out));
-	r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "meshes/octahedral_compression"), true));
 	r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "meshes/compress", PROPERTY_HINT_FLAGS, "Vertex,Normal,Tangent,Color,TexUV,TexUV2,Bones,Weights,Index"), VS::ARRAY_COMPRESS_DEFAULT >> VS::ARRAY_COMPRESS_BASE));
 	r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "meshes/ensure_tangents"), true));
+	r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "meshes/octahedral_compression"), true));
+	r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "meshes/vertex_cache_optimization"), true));
 	r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "meshes/storage", PROPERTY_HINT_ENUM, "Built-In,Files (.mesh),Files (.tres)"), meshes_out ? 1 : 0));
 	r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "meshes/light_baking", PROPERTY_HINT_ENUM, "Disabled,Enable,Gen Lightmaps", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_UPDATE_ALL_IF_MODIFIED), 0));
 	r_options->push_back(ImportOption(PropertyInfo(Variant::REAL, "meshes/lightmap_texel_size", PROPERTY_HINT_RANGE, "0.001,100,0.001"), 0.1));
@@ -1257,6 +1258,9 @@ Error ResourceImporterScene::import(const String &p_source_file, const String &p
 	if (bool(p_options["meshes/octahedral_compression"])) {
 		compress_flags |= VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION;
 	}
+	if (bool(p_options["meshes/vertex_cache_optimization"])) {
+		compress_flags |= VS::ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION;
+	}
 	if (bool(p_options["meshes/ensure_tangents"])) {
 		import_flags |= EditorSceneImporter::IMPORT_GENERATE_TANGENT_ARRAYS;
 	}

+ 1 - 0
scene/resources/mesh.cpp

@@ -603,6 +603,7 @@ void Mesh::_bind_methods() {
 	BIND_ENUM_CONSTANT(ARRAY_FLAG_USE_2D_VERTICES);
 	BIND_ENUM_CONSTANT(ARRAY_FLAG_USE_16_BIT_BONES);
 	BIND_ENUM_CONSTANT(ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION);
+	BIND_ENUM_CONSTANT(ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION);
 
 	BIND_ENUM_CONSTANT(ARRAY_COMPRESS_DEFAULT);
 

+ 1 - 0
scene/resources/mesh.h

@@ -98,6 +98,7 @@ public:
 		ARRAY_FLAG_USE_16_BIT_BONES = ARRAY_COMPRESS_INDEX << 2,
 		ARRAY_FLAG_USE_DYNAMIC_UPDATE = ARRAY_COMPRESS_INDEX << 3,
 		ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION = ARRAY_COMPRESS_INDEX << 4,
+		ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION = ARRAY_COMPRESS_INDEX << 5,
 
 		ARRAY_COMPRESS_DEFAULT = ARRAY_COMPRESS_NORMAL | ARRAY_COMPRESS_TANGENT | ARRAY_COMPRESS_COLOR | ARRAY_COMPRESS_TEX_UV | ARRAY_COMPRESS_TEX_UV2 | ARRAY_COMPRESS_WEIGHTS | ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION
 

+ 14 - 0
servers/visual_server.cpp

@@ -31,6 +31,7 @@
 #include "visual_server.h"
 
 #include "core/engine.h"
+#include "core/math/vertex_cache_optimizer.h"
 #include "core/method_bind_ext.gen.inc"
 #include "core/project_settings.h"
 
@@ -768,6 +769,14 @@ Error VisualServer::_surface_set_data(Array p_arrays, uint32_t p_format, uint32_
 				ERR_FAIL_COND_V(indices.size() == 0, ERR_INVALID_PARAMETER);
 				ERR_FAIL_COND_V(indices.size() != p_index_array_len, ERR_INVALID_PARAMETER);
 
+				// Vertex cache optimization?
+				if (p_format & ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION) {
+					// Expecting triangles.
+					ERR_FAIL_COND_V((indices.size() % 3) != 0, ERR_INVALID_PARAMETER);
+					VertexCacheOptimizer opt;
+					opt.reorder_indices_pool(indices, indices.size() / 3, p_vertex_array_len);
+				}
+
 				/* determine whether using 16 or 32 bits indices */
 
 				PoolVector<int>::Read read = indices.read();
@@ -1276,6 +1285,11 @@ void VisualServer::mesh_add_surface_from_arrays(RID p_mesh, PrimitiveType p_prim
 	int index_array_len = 0;
 	int array_len = 0;
 
+	// Only implemented for triangles.
+	if (p_primitive != PrimitiveType::PRIMITIVE_TRIANGLES) {
+		p_compress_format &= ~ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION;
+	}
+
 	bool res = _mesh_find_format(p_primitive, p_arrays, p_blend_shapes, p_compress_format, use_split_stream, offsets, attributes_base_offset, attributes_stride, positions_stride, format, index_array_len, array_len);
 	ERR_FAIL_COND(!res);
 

+ 1 - 0
servers/visual_server.h

@@ -273,6 +273,7 @@ public:
 		ARRAY_FLAG_USE_16_BIT_BONES = ARRAY_COMPRESS_INDEX << 2,
 		ARRAY_FLAG_USE_DYNAMIC_UPDATE = ARRAY_COMPRESS_INDEX << 3,
 		ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION = ARRAY_COMPRESS_INDEX << 4,
+		ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION = ARRAY_COMPRESS_INDEX << 5,
 
 		ARRAY_COMPRESS_DEFAULT = ARRAY_COMPRESS_NORMAL | ARRAY_COMPRESS_TANGENT | ARRAY_COMPRESS_COLOR | ARRAY_COMPRESS_TEX_UV | ARRAY_COMPRESS_TEX_UV2 | ARRAY_COMPRESS_WEIGHTS | ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION