2 年之前 · e28868e30c
--- a/core/object/worker_thread_pool.h
+++ b/core/object/worker_thread_pool.h
@@ -202,4 +202,25 @@ public:
 
															 	~WorkerThreadPool();
														
 
															 };
														
 
															+template <typename F>
														
 
															+static _FORCE_INLINE_ void for_range(int i_begin, int i_end, bool parallel, String name, F f) {
														
 
															+	if (!parallel) {
														
 
															+		for (int i = i_begin; i < i_end; i++) {
														
 
															+			f(i);
														
 
															+		}
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	auto wrapper = [&](int i, void *unused) {
														
 
															+		f(i + i_begin);
														
 
															+	};
														
 
															+
														
 
															+	WorkerThreadPool *wtp = WorkerThreadPool::get_singleton();
														
 
															+	WorkerThreadPool::GroupID gid = wtp->add_template_group_task(
														
 
															+			&wrapper, &decltype(wrapper)::operator(), nullptr,
														
 
															+			i_end - i_begin, -1,
														
 
															+			true, name);
														
 
															+	wtp->wait_for_group_task_completion(gid);
														
 
															+}
														
 
															+
														
 
															 #endif // WORKER_THREAD_POOL_H
														
--- a/modules/raycast/raycast_occlusion_cull.cpp
+++ b/modules/raycast/raycast_occlusion_cull.cpp
@@ -354,41 +354,14 @@ void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_in
 
															 	// Embree requires the last element to be readable by a 16-byte SSE load instruction, so we add padding to be safe.
														
 
															 	occ_inst->xformed_vertices.resize(vertices_size + 1);
														
 
															-	const Vector3 *read_ptr = occ->vertices.ptr();
														
 
															-	Vector3 *write_ptr = occ_inst->xformed_vertices.ptr();
														
 
															-
														
 
															-	if (vertices_size > 1024) {
														
 
															-		TransformThreadData td;
														
 
															-		td.xform = occ_inst->xform;
														
 
															-		td.read = read_ptr;
														
 
															-		td.write = write_ptr;
														
 
															-		td.vertex_count = vertices_size;
														
 
															-		td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count();
														
 
															-		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_transform_vertices_thread, &td, td.thread_count, -1, true, SNAME("RaycastOcclusionCull"));
														
 
															-		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
														
 
															-
														
 
															-	} else {
														
 
															-		_transform_vertices_range(read_ptr, write_ptr, occ_inst->xform, 0, vertices_size);
														
 
															-	}
														
 
															+	for_range(0, vertices_size, vertices_size > 1024, SNAME("RaycastOcclusionCull"), [&](const int i) {
														
 
															+		occ_inst->xformed_vertices[i] = occ_inst->xform.xform(occ->vertices[i]);
														
 
															+	});
														
 
															 	occ_inst->indices.resize(occ->indices.size());
														
 
															 	memcpy(occ_inst->indices.ptr(), occ->indices.ptr(), occ->indices.size() * sizeof(int32_t));
														
 
															 }
														
 
															-void RaycastOcclusionCull::Scenario::_transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data) {
														
 
															-	uint32_t vertex_total = p_data->vertex_count;
														
 
															-	uint32_t total_threads = p_data->thread_count;
														
 
															-	uint32_t from = p_thread * vertex_total / total_threads;
														
 
															-	uint32_t to = (p_thread + 1 == total_threads) ? vertex_total : ((p_thread + 1) * vertex_total / total_threads);
														
 
															-	_transform_vertices_range(p_data->read, p_data->write, p_data->xform, from, to);
														
 
															-}
														
 
															-
														
 
															-void RaycastOcclusionCull::Scenario::_transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to) {
														
 
															-	for (int i = p_from; i < p_to; i++) {
														
 
															-		p_write[i] = p_xform.xform(p_read[i]);
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															 void RaycastOcclusionCull::Scenario::_commit_scene(void *p_ud) {
														
 
															 	Scenario *scenario = (Scenario *)p_ud;
														
 
															 	int commit_idx = 1 - (scenario->current_scene_idx);
														
--- a/modules/raycast/raycast_occlusion_cull.h
+++ b/modules/raycast/raycast_occlusion_cull.h
@@ -121,14 +121,6 @@ private:
 
															 			const uint32_t *masks;
														
 
															 		};
														
 
															-		struct TransformThreadData {
														
 
															-			uint32_t thread_count;
														
 
															-			uint32_t vertex_count;
														
 
															-			Transform3D xform;
														
 
															-			const Vector3 *read;
														
 
															-			Vector3 *write = nullptr;
														
 
															-		};
														
 
															-
														
 
															 		Thread *commit_thread = nullptr;
														
 
															 		bool commit_done = true;
														
 
															 		bool dirty = false;
														
@@ -144,8 +136,6 @@ private:
 
															 		void _update_dirty_instance_thread(int p_idx, RID *p_instances);
														
 
															 		void _update_dirty_instance(int p_idx, RID *p_instances);
														
 
															-		void _transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data);
														
 
															-		void _transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to);
														
 
															 		static void _commit_scene(void *p_ud);
														
 
															 		bool update();
														
--- a/tests/core/threads/test_worker_thread_pool.h
+++ b/tests/core/threads/test_worker_thread_pool.h
@@ -106,6 +106,32 @@ TEST_CASE("[WorkerThreadPool] Process elements using group tasks") {
 
															 	}
														
 
															 }
														
 
															+TEST_CASE("[WorkerThreadPool] Parallel foreach") {
														
 
															+	const int count_max = 256;
														
 
															+
														
 
															+	for (int midpoint = 0; midpoint < count_max; midpoint++) {
														
 
															+		LocalVector<int> c;
														
 
															+		c.resize(count_max);
														
 
															+
														
 
															+		for_range(0, count_max, true, String(), [&](int i) {
														
 
															+			c[i] = 1;
														
 
															+		});
														
 
															+		c.sort();
														
 
															+		CHECK(c[0] == 1);
														
 
															+		CHECK(c[0] == c[count_max - 1]);
														
 
															+
														
 
															+		for_range(0, midpoint, false, String(), [&](int i) {
														
 
															+			c[i]++;
														
 
															+		});
														
 
															+		for_range(midpoint, count_max, true, String(), [&](int i) {
														
 
															+			c[i]++;
														
 
															+		});
														
 
															+		c.sort();
														
 
															+		CHECK(c[0] == 2);
														
 
															+		CHECK(c[0] == c[count_max - 1]);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															 } // namespace TestWorkerThreadPool
														
 
															 #endif // TEST_WORKER_THREAD_POOL_H