Selaa lähdekoodia

Merge pull request #98801 from zeux/my-final-form

Rewrite index optimization code for maximum efficiency
Thaddeus Crews 9 kuukautta sitten
vanhempi
commit
88a48d52f0

+ 2 - 2
editor/import/3d/resource_importer_obj.cpp

@@ -535,8 +535,6 @@ static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes,
 		}
 	}
 
-	mesh->optimize_indices_for_cache();
-
 	if (p_generate_lods) {
 		// Use normal merge/split angles that match the defaults used for 3D scene importing.
 		mesh->generate_lods(60.0f, {});
@@ -546,6 +544,8 @@ static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes,
 		mesh->create_shadow_mesh();
 	}
 
+	mesh->optimize_indices();
+
 	if (p_single_mesh && mesh->get_surface_count() > 0) {
 		r_meshes.push_back(mesh);
 	}

+ 2 - 2
editor/import/3d/resource_importer_scene.cpp

@@ -2567,8 +2567,6 @@ Node *ResourceImporterScene::_generate_meshes(Node *p_node, const Dictionary &p_
 					}
 				}
 
-				src_mesh_node->get_mesh()->optimize_indices_for_cache();
-
 				if (generate_lods) {
 					Array skin_pose_transform_array = _get_skinned_pose_transforms(src_mesh_node);
 					src_mesh_node->get_mesh()->generate_lods(merge_angle, skin_pose_transform_array);
@@ -2578,6 +2576,8 @@ Node *ResourceImporterScene::_generate_meshes(Node *p_node, const Dictionary &p_
 					src_mesh_node->get_mesh()->create_shadow_mesh();
 				}
 
+				src_mesh_node->get_mesh()->optimize_indices();
+
 				if (!save_to_file.is_empty()) {
 					Ref<Mesh> existing = ResourceCache::get_ref(save_to_file);
 					if (existing.is_valid()) {

+ 2 - 2
modules/meshoptimizer/register_types.cpp

@@ -40,10 +40,10 @@ void initialize_meshoptimizer_module(ModuleInitializationLevel p_level) {
 	}
 
 	SurfaceTool::optimize_vertex_cache_func = meshopt_optimizeVertexCache;
+	SurfaceTool::optimize_vertex_fetch_remap_func = meshopt_optimizeVertexFetchRemap;
 	SurfaceTool::simplify_func = meshopt_simplify;
 	SurfaceTool::simplify_with_attrib_func = meshopt_simplifyWithAttributes;
 	SurfaceTool::simplify_scale_func = meshopt_simplifyScale;
-	SurfaceTool::simplify_sloppy_func = meshopt_simplifySloppy;
 	SurfaceTool::generate_remap_func = meshopt_generateVertexRemap;
 	SurfaceTool::remap_vertex_func = meshopt_remapVertexBuffer;
 	SurfaceTool::remap_index_func = meshopt_remapIndexBuffer;
@@ -55,9 +55,9 @@ void uninitialize_meshoptimizer_module(ModuleInitializationLevel p_level) {
 	}
 
 	SurfaceTool::optimize_vertex_cache_func = nullptr;
+	SurfaceTool::optimize_vertex_fetch_remap_func = nullptr;
 	SurfaceTool::simplify_func = nullptr;
 	SurfaceTool::simplify_scale_func = nullptr;
-	SurfaceTool::simplify_sloppy_func = nullptr;
 	SurfaceTool::generate_remap_func = nullptr;
 	SurfaceTool::remap_vertex_func = nullptr;
 	SurfaceTool::remap_index_func = nullptr;

+ 85 - 18
scene/resources/3d/importer_mesh.cpp

@@ -168,10 +168,56 @@ void ImporterMesh::set_surface_material(int p_surface, const Ref<Material> &p_ma
 	mesh.unref();
 }
 
-void ImporterMesh::optimize_indices_for_cache() {
+template <typename T>
+static Vector<T> _remap_array(Vector<T> p_array, const Vector<uint32_t> &p_remap, uint32_t p_vertex_count) {
+	ERR_FAIL_COND_V(p_array.size() % p_remap.size() != 0, p_array);
+	int num_elements = p_array.size() / p_remap.size();
+	T *data = p_array.ptrw();
+	SurfaceTool::remap_vertex_func(data, data, p_remap.size(), sizeof(T) * num_elements, p_remap.ptr());
+	p_array.resize(p_vertex_count * num_elements);
+	return p_array;
+}
+
+static void _remap_arrays(Array &r_arrays, const Vector<uint32_t> &p_remap, uint32_t p_vertex_count) {
+	for (int i = 0; i < r_arrays.size(); i++) {
+		if (i == RS::ARRAY_INDEX) {
+			continue;
+		}
+
+		switch (r_arrays[i].get_type()) {
+			case Variant::NIL:
+				break;
+			case Variant::PACKED_VECTOR3_ARRAY:
+				r_arrays[i] = _remap_array<Vector3>(r_arrays[i], p_remap, p_vertex_count);
+				break;
+			case Variant::PACKED_VECTOR2_ARRAY:
+				r_arrays[i] = _remap_array<Vector2>(r_arrays[i], p_remap, p_vertex_count);
+				break;
+			case Variant::PACKED_FLOAT32_ARRAY:
+				r_arrays[i] = _remap_array<float>(r_arrays[i], p_remap, p_vertex_count);
+				break;
+			case Variant::PACKED_INT32_ARRAY:
+				r_arrays[i] = _remap_array<int32_t>(r_arrays[i], p_remap, p_vertex_count);
+				break;
+			case Variant::PACKED_BYTE_ARRAY:
+				r_arrays[i] = _remap_array<uint8_t>(r_arrays[i], p_remap, p_vertex_count);
+				break;
+			case Variant::PACKED_COLOR_ARRAY:
+				r_arrays[i] = _remap_array<Color>(r_arrays[i], p_remap, p_vertex_count);
+				break;
+			default:
+				ERR_FAIL_MSG("Unhandled array type.");
+		}
+	}
+}
+
+void ImporterMesh::optimize_indices() {
 	if (!SurfaceTool::optimize_vertex_cache_func) {
 		return;
 	}
+	if (!SurfaceTool::optimize_vertex_fetch_remap_func || !SurfaceTool::remap_vertex_func || !SurfaceTool::remap_index_func) {
+		return;
+	}
 
 	for (int i = 0; i < surfaces.size(); i++) {
 		if (surfaces[i].primitive != Mesh::PRIMITIVE_TRIANGLES) {
@@ -188,10 +234,48 @@ void ImporterMesh::optimize_indices_for_cache() {
 			continue;
 		}
 
+		// Optimize indices for vertex cache to establish final triangle order.
 		int *indices_ptr = indices.ptrw();
 		SurfaceTool::optimize_vertex_cache_func((unsigned int *)indices_ptr, (const unsigned int *)indices_ptr, index_count, vertex_count);
+		surfaces.write[i].arrays[RS::ARRAY_INDEX] = indices;
+
+		for (int j = 0; j < surfaces[i].lods.size(); ++j) {
+			Surface::LOD &lod = surfaces.write[i].lods.write[j];
+			int *lod_indices_ptr = lod.indices.ptrw();
+			SurfaceTool::optimize_vertex_cache_func((unsigned int *)lod_indices_ptr, (const unsigned int *)lod_indices_ptr, lod.indices.size(), vertex_count);
+		}
 
+		// Concatenate indices for all LODs in the order of coarse->fine; this establishes the effective order of vertices,
+		// and is important to optimize for vertex fetch (all GPUs) and shading (Mali GPUs)
+		PackedInt32Array merged_indices;
+		for (int j = surfaces[i].lods.size() - 1; j >= 0; --j) {
+			merged_indices.append_array(surfaces[i].lods[j].indices);
+		}
+		merged_indices.append_array(indices);
+
+		// Generate remap array that establishes optimal vertex order according to the order of indices above.
+		Vector<uint32_t> remap;
+		remap.resize(vertex_count);
+		unsigned int new_vertex_count = SurfaceTool::optimize_vertex_fetch_remap_func(remap.ptrw(), (const unsigned int *)merged_indices.ptr(), merged_indices.size(), vertex_count);
+
+		// We need to remap all vertex and index arrays in lockstep according to the remap.
+		SurfaceTool::remap_index_func((unsigned int *)indices_ptr, (const unsigned int *)indices_ptr, index_count, remap.ptr());
 		surfaces.write[i].arrays[RS::ARRAY_INDEX] = indices;
+
+		for (int j = 0; j < surfaces[i].lods.size(); ++j) {
+			Surface::LOD &lod = surfaces.write[i].lods.write[j];
+			int *lod_indices_ptr = lod.indices.ptrw();
+			SurfaceTool::remap_index_func((unsigned int *)lod_indices_ptr, (const unsigned int *)lod_indices_ptr, lod.indices.size(), remap.ptr());
+		}
+
+		_remap_arrays(surfaces.write[i].arrays, remap, new_vertex_count);
+		for (int j = 0; j < surfaces[i].blend_shape_data.size(); j++) {
+			_remap_arrays(surfaces.write[i].blend_shape_data.write[j].arrays, remap, new_vertex_count);
+		}
+	}
+
+	if (shadow_mesh.is_valid()) {
+		shadow_mesh->optimize_indices();
 	}
 }
 
@@ -215,9 +299,6 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, Array p_bone_transf
 	if (!SurfaceTool::simplify_with_attrib_func) {
 		return;
 	}
-	if (!SurfaceTool::optimize_vertex_cache_func) {
-		return;
-	}
 
 	LocalVector<Transform3D> bone_transform_vector;
 	for (int i = 0; i < p_bone_transform_array.size(); i++) {
@@ -431,12 +512,6 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, Array p_bone_transf
 		}
 
 		surfaces.write[i].lods.sort_custom<Surface::LODComparator>();
-
-		for (int j = 0; j < surfaces.write[i].lods.size(); j++) {
-			Surface::LOD &lod = surfaces.write[i].lods.write[j];
-			unsigned int *lod_indices_ptr = (unsigned int *)lod.indices.ptrw();
-			SurfaceTool::optimize_vertex_cache_func(lod_indices_ptr, lod_indices_ptr, lod.indices.size(), vertex_count);
-		}
 	}
 }
 
@@ -574,10 +649,6 @@ void ImporterMesh::create_shadow_mesh() {
 				index_wptr[j] = vertex_remap[index];
 			}
 
-			if (SurfaceTool::optimize_vertex_cache_func && surfaces[i].primitive == Mesh::PRIMITIVE_TRIANGLES) {
-				SurfaceTool::optimize_vertex_cache_func((unsigned int *)index_wptr, (const unsigned int *)index_wptr, index_count, new_vertices.size());
-			}
-
 			new_surface[RS::ARRAY_INDEX] = new_indices;
 
 			// Make sure the same LODs as the full version are used.
@@ -596,10 +667,6 @@ void ImporterMesh::create_shadow_mesh() {
 					index_wptr[k] = vertex_remap[index];
 				}
 
-				if (SurfaceTool::optimize_vertex_cache_func && surfaces[i].primitive == Mesh::PRIMITIVE_TRIANGLES) {
-					SurfaceTool::optimize_vertex_cache_func((unsigned int *)index_wptr, (const unsigned int *)index_wptr, index_count, new_vertices.size());
-				}
-
 				lods[surfaces[i].lods[j].distance] = new_indices;
 			}
 		}

+ 1 - 1
scene/resources/3d/importer_mesh.h

@@ -113,7 +113,7 @@ public:
 
 	void set_surface_material(int p_surface, const Ref<Material> &p_material);
 
-	void optimize_indices_for_cache();
+	void optimize_indices();
 
 	void generate_lods(float p_normal_merge_angle, Array p_skin_pose_transform_array);
 

+ 1 - 1
scene/resources/surface_tool.cpp

@@ -33,10 +33,10 @@
 #define EQ_VERTEX_DIST 0.00001
 
 SurfaceTool::OptimizeVertexCacheFunc SurfaceTool::optimize_vertex_cache_func = nullptr;
+SurfaceTool::OptimizeVertexFetchRemapFunc SurfaceTool::optimize_vertex_fetch_remap_func = nullptr;
 SurfaceTool::SimplifyFunc SurfaceTool::simplify_func = nullptr;
 SurfaceTool::SimplifyWithAttribFunc SurfaceTool::simplify_with_attrib_func = nullptr;
 SurfaceTool::SimplifyScaleFunc SurfaceTool::simplify_scale_func = nullptr;
-SurfaceTool::SimplifySloppyFunc SurfaceTool::simplify_sloppy_func = nullptr;
 SurfaceTool::GenerateRemapFunc SurfaceTool::generate_remap_func = nullptr;
 SurfaceTool::RemapVertexFunc SurfaceTool::remap_vertex_func = nullptr;
 SurfaceTool::RemapIndexFunc SurfaceTool::remap_index_func = nullptr;

+ 5 - 3
scene/resources/surface_tool.h

@@ -90,14 +90,14 @@ public:
 
 	typedef void (*OptimizeVertexCacheFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, size_t vertex_count);
 	static OptimizeVertexCacheFunc optimize_vertex_cache_func;
+	typedef size_t (*OptimizeVertexFetchRemapFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, size_t vertex_count);
+	static OptimizeVertexFetchRemapFunc optimize_vertex_fetch_remap_func;
 	typedef size_t (*SimplifyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float *r_error);
 	static SimplifyFunc simplify_func;
 	typedef size_t (*SimplifyWithAttribFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_data, size_t vertex_count, size_t vertex_stride, const float *attributes, size_t attribute_stride, const float *attribute_weights, size_t attribute_count, const unsigned char *vertex_lock, size_t target_index_count, float target_error, unsigned int options, float *result_error);
 	static SimplifyWithAttribFunc simplify_with_attrib_func;
 	typedef float (*SimplifyScaleFunc)(const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
 	static SimplifyScaleFunc simplify_scale_func;
-	typedef size_t (*SimplifySloppyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *out_result_error);
-	static SimplifySloppyFunc simplify_sloppy_func;
 	typedef size_t (*GenerateRemapFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const void *vertices, size_t vertex_count, size_t vertex_size);
 	static GenerateRemapFunc generate_remap_func;
 	typedef void (*RemapVertexFunc)(void *destination, const void *vertices, size_t vertex_count, size_t vertex_size, const unsigned int *remap);
@@ -222,7 +222,9 @@ public:
 
 	void clear();
 
-	LocalVector<Vertex> &get_vertex_array() { return vertex_array; }
+	LocalVector<Vertex> &get_vertex_array() {
+		return vertex_array;
+	}
 
 	void create_from_triangle_arrays(const Array &p_arrays);
 	void create_from_arrays(const Array &p_arrays, Mesh::PrimitiveType p_primitive_type = Mesh::PRIMITIVE_TRIANGLES);