Ver código fonte

Merge pull request #48708 from nekomatata/heightmap-raycast-acceleration

Optimize raycast with large Heightmap shape data
Camille Mohr-Daurat 4 anos atrás
pai
commit
29bdd000f0
2 arquivos alterados com 297 adições e 111 exclusões
  1. 276 111
      servers/physics_3d/shape_3d_sw.cpp
  2. 21 0
      servers/physics_3d/shape_3d_sw.h

+ 276 - 111
servers/physics_3d/shape_3d_sw.cpp

@@ -1676,6 +1676,17 @@ struct _HeightmapSegmentCullParams {
 	FaceShape3DSW *face = nullptr;
 };
 
+struct _HeightmapGridCullState {
+	real_t length = 0.0;
+	real_t length_flat = 0.0;
+
+	real_t dist = 0.0;
+	real_t prev_dist = 0.0;
+
+	int x = 0;
+	int z = 0;
+};
+
 _FORCE_INLINE_ bool _heightmap_face_cull_segment(_HeightmapSegmentCullParams &p_params) {
 	Vector3 res;
 	Vector3 normal;
@@ -1688,11 +1699,11 @@ _FORCE_INLINE_ bool _heightmap_face_cull_segment(_HeightmapSegmentCullParams &p_
 	return false;
 }
 
-_FORCE_INLINE_ bool _heightmap_cell_cull_segment(_HeightmapSegmentCullParams &p_params, int p_x, int p_z) {
+_FORCE_INLINE_ bool _heightmap_cell_cull_segment(_HeightmapSegmentCullParams &p_params, const _HeightmapGridCullState &p_state) {
 	// First triangle.
-	p_params.heightmap->_get_point(p_x, p_z, p_params.face->vertex[0]);
-	p_params.heightmap->_get_point(p_x + 1, p_z, p_params.face->vertex[1]);
-	p_params.heightmap->_get_point(p_x, p_z + 1, p_params.face->vertex[2]);
+	p_params.heightmap->_get_point(p_state.x, p_state.z, p_params.face->vertex[0]);
+	p_params.heightmap->_get_point(p_state.x + 1, p_state.z, p_params.face->vertex[1]);
+	p_params.heightmap->_get_point(p_state.x, p_state.z + 1, p_params.face->vertex[2]);
 	p_params.face->normal = Plane(p_params.face->vertex[0], p_params.face->vertex[1], p_params.face->vertex[2]).normal;
 	if (_heightmap_face_cull_segment(p_params)) {
 		return true;
@@ -1700,7 +1711,7 @@ _FORCE_INLINE_ bool _heightmap_cell_cull_segment(_HeightmapSegmentCullParams &p_
 
 	// Second triangle.
 	p_params.face->vertex[0] = p_params.face->vertex[1];
-	p_params.heightmap->_get_point(p_x + 1, p_z + 1, p_params.face->vertex[1]);
+	p_params.heightmap->_get_point(p_state.x + 1, p_state.z + 1, p_params.face->vertex[1]);
 	p_params.face->normal = Plane(p_params.face->vertex[0], p_params.face->vertex[1], p_params.face->vertex[2]).normal;
 	if (_heightmap_face_cull_segment(p_params)) {
 		return true;
@@ -1709,13 +1720,51 @@ _FORCE_INLINE_ bool _heightmap_cell_cull_segment(_HeightmapSegmentCullParams &p_
 	return false;
 }
 
-bool HeightMapShape3DSW::intersect_segment(const Vector3 &p_begin, const Vector3 &p_end, Vector3 &r_point, Vector3 &r_normal) const {
-	if (heights.is_empty()) {
+_FORCE_INLINE_ bool _heightmap_chunk_cull_segment(_HeightmapSegmentCullParams &p_params, const _HeightmapGridCullState &p_state) {
+	const HeightMapShape3DSW::Range &chunk = p_params.heightmap->_get_bounds_chunk(p_state.x, p_state.z);
+
+	Vector3 enter_pos;
+	Vector3 exit_pos;
+
+	if (p_state.length_flat > CMP_EPSILON) {
+		real_t flat_to_3d = p_state.length / p_state.length_flat;
+		real_t enter_param = p_state.prev_dist * flat_to_3d;
+		real_t exit_param = p_state.dist * flat_to_3d;
+		enter_pos = p_params.from + p_params.dir * enter_param;
+		exit_pos = p_params.from + p_params.dir * exit_param;
+	} else {
+		// Consider the ray vertical.
+		// (though we shouldn't reach this often because there is an early check up-front)
+		enter_pos = p_params.from;
+		exit_pos = p_params.to;
+	}
+
+	// Transform positions to heightmap space.
+	enter_pos *= HeightMapShape3DSW::BOUNDS_CHUNK_SIZE;
+	exit_pos *= HeightMapShape3DSW::BOUNDS_CHUNK_SIZE;
+
+	// We did enter the flat projection of the AABB,
+	// but we have to check if we intersect it on the vertical axis.
+	if ((enter_pos.y > chunk.max) && (exit_pos.y > chunk.max)) {
+		return false;
+	}
+	if ((enter_pos.y < chunk.min) && (exit_pos.y < chunk.min)) {
 		return false;
 	}
 
-	Vector3 local_begin = p_begin + local_origin;
-	Vector3 local_end = p_end + local_origin;
+	return p_params.heightmap->_intersect_grid_segment(_heightmap_cell_cull_segment, enter_pos, exit_pos, p_params.heightmap->width, p_params.heightmap->depth, p_params.heightmap->local_origin, p_params.result, p_params.normal);
+}
+
+template <typename ProcessFunction>
+bool HeightMapShape3DSW::_intersect_grid_segment(ProcessFunction &p_process, const Vector3 &p_begin, const Vector3 &p_end, int p_width, int p_depth, const Vector3 &offset, Vector3 &r_point, Vector3 &r_normal) const {
+	Vector3 delta = (p_end - p_begin);
+	real_t length = delta.length();
+
+	if (length < CMP_EPSILON) {
+		return false;
+	}
+
+	Vector3 local_begin = p_begin + offset;
 
 	FaceShape3DSW face;
 	face.backface_collision = false;
@@ -1723,136 +1772,181 @@ bool HeightMapShape3DSW::intersect_segment(const Vector3 &p_begin, const Vector3
 	_HeightmapSegmentCullParams params;
 	params.from = p_begin;
 	params.to = p_end;
-	params.dir = (p_end - p_begin).normalized();
+	params.dir = delta / length;
 	params.heightmap = this;
 	params.face = &face;
 
-	// Quantize the ray begin/end.
-	int begin_x = floor(local_begin.x);
-	int begin_z = floor(local_begin.z);
-	int end_x = floor(local_end.x);
-	int end_z = floor(local_end.z);
+	_HeightmapGridCullState state;
 
-	if ((begin_x == end_x) && (begin_z == end_z)) {
-		// Simple case for rays that don't traverse the grid horizontally.
-		// Just perform a test on the given cell.
-		int x = CLAMP(begin_x, 0, width - 2);
-		int z = CLAMP(begin_z, 0, depth - 2);
-		if (_heightmap_cell_cull_segment(params, x, z)) {
-			r_point = params.result;
-			r_normal = params.normal;
-			return true;
-		}
-	} else {
-		// Perform grid query from projected ray.
-		Vector2 ray_dir_proj(local_end.x - local_begin.x, local_end.z - local_begin.z);
-		real_t ray_dist_proj = ray_dir_proj.length();
+	// Perform grid query from projected ray.
+	Vector2 ray_dir_flat(delta.x, delta.z);
+	state.length = length;
+	state.length_flat = ray_dir_flat.length();
 
-		if (ray_dist_proj < CMP_EPSILON) {
-			ray_dir_proj = Vector2();
-		} else {
-			ray_dir_proj /= ray_dist_proj;
-		}
+	if (state.length_flat < CMP_EPSILON) {
+		ray_dir_flat = Vector2();
+	} else {
+		ray_dir_flat /= state.length_flat;
+	}
 
-		const int x_step = (ray_dir_proj.x > CMP_EPSILON) ? 1 : ((ray_dir_proj.x < -CMP_EPSILON) ? -1 : 0);
-		const int z_step = (ray_dir_proj.y > CMP_EPSILON) ? 1 : ((ray_dir_proj.y < -CMP_EPSILON) ? -1 : 0);
+	const int x_step = (ray_dir_flat.x > CMP_EPSILON) ? 1 : ((ray_dir_flat.x < -CMP_EPSILON) ? -1 : 0);
+	const int z_step = (ray_dir_flat.y > CMP_EPSILON) ? 1 : ((ray_dir_flat.y < -CMP_EPSILON) ? -1 : 0);
 
-		const real_t infinite = 1e20;
-		const real_t delta_x = (x_step != 0) ? 1.f / Math::abs(ray_dir_proj.x) : infinite;
-		const real_t delta_z = (z_step != 0) ? 1.f / Math::abs(ray_dir_proj.y) : infinite;
+	const real_t infinite = 1e20;
+	const real_t delta_x = (x_step != 0) ? 1.f / Math::abs(ray_dir_flat.x) : infinite;
+	const real_t delta_z = (z_step != 0) ? 1.f / Math::abs(ray_dir_flat.y) : infinite;
 
-		real_t cross_x; // At which value of `param` we will cross a x-axis lane?
-		real_t cross_z; // At which value of `param` we will cross a z-axis lane?
+	real_t cross_x; // At which value of `param` we will cross a x-axis lane?
+	real_t cross_z; // At which value of `param` we will cross a z-axis lane?
 
-		// X initialization.
-		if (x_step != 0) {
-			if (x_step == 1) {
-				cross_x = (ceil(local_begin.x) - local_begin.x) * delta_x;
-			} else {
-				cross_x = (local_begin.x - floor(local_begin.x)) * delta_x;
-			}
+	// X initialization.
+	if (x_step != 0) {
+		if (x_step == 1) {
+			cross_x = (Math::ceil(local_begin.x) - local_begin.x) * delta_x;
 		} else {
-			cross_x = infinite; // Will never cross on X.
+			cross_x = (local_begin.x - Math::floor(local_begin.x)) * delta_x;
 		}
+	} else {
+		cross_x = infinite; // Will never cross on X.
+	}
 
-		// Z initialization.
-		if (z_step != 0) {
-			if (z_step == 1) {
-				cross_z = (ceil(local_begin.z) - local_begin.z) * delta_z;
-			} else {
-				cross_z = (local_begin.z - floor(local_begin.z)) * delta_z;
-			}
+	// Z initialization.
+	if (z_step != 0) {
+		if (z_step == 1) {
+			cross_z = (Math::ceil(local_begin.z) - local_begin.z) * delta_z;
 		} else {
-			cross_z = infinite; // Will never cross on Z.
+			cross_z = (local_begin.z - Math::floor(local_begin.z)) * delta_z;
 		}
+	} else {
+		cross_z = infinite; // Will never cross on Z.
+	}
 
-		int x = floor(local_begin.x);
-		int z = floor(local_begin.z);
+	int x = Math::floor(local_begin.x);
+	int z = Math::floor(local_begin.z);
 
-		// Workaround cases where the ray starts at an integer position.
-		if (Math::is_zero_approx(cross_x)) {
-			cross_x += delta_x;
-			// If going backwards, we should ignore the position we would get by the above flooring,
-			// because the ray is not heading in that direction.
-			if (x_step == -1) {
-				x -= 1;
-			}
+	// Workaround cases where the ray starts at an integer position.
+	if (Math::is_zero_approx(cross_x)) {
+		cross_x += delta_x;
+		// If going backwards, we should ignore the position we would get by the above flooring,
+		// because the ray is not heading in that direction.
+		if (x_step == -1) {
+			x -= 1;
 		}
+	}
 
-		if (Math::is_zero_approx(cross_z)) {
-			cross_z += delta_z;
-			if (z_step == -1) {
-				z -= 1;
-			}
+	if (Math::is_zero_approx(cross_z)) {
+		cross_z += delta_z;
+		if (z_step == -1) {
+			z -= 1;
 		}
+	}
+
+	// Start inside the grid.
+	int x_start = MAX(MIN(x, p_width - 2), 0);
+	int z_start = MAX(MIN(z, p_depth - 2), 0);
 
-		// Start inside the grid.
-		int x_start = CLAMP(x, 0, width - 2);
-		int z_start = CLAMP(z, 0, depth - 2);
+	// Adjust initial cross values.
+	cross_x += delta_x * x_step * (x_start - x);
+	cross_z += delta_z * z_step * (z_start - z);
 
-		// Adjust initial cross values.
-		cross_x += delta_x * x_step * (x_start - x);
-		cross_z += delta_z * z_step * (z_start - z);
+	x = x_start;
+	z = z_start;
 
-		x = x_start;
-		z = z_start;
+	while (true) {
+		state.prev_dist = state.dist;
+		state.x = x;
+		state.z = z;
 
-		if (_heightmap_cell_cull_segment(params, x, z)) {
+		if (cross_x < cross_z) {
+			// X lane.
+			x += x_step;
+			// Assign before advancing the param,
+			// to be in sync with the initialization step.
+			state.dist = cross_x;
+			cross_x += delta_x;
+		} else {
+			// Z lane.
+			z += z_step;
+			state.dist = cross_z;
+			cross_z += delta_z;
+		}
+
+		if (state.dist > state.length_flat) {
+			state.dist = state.length_flat;
+			if (p_process(params, state)) {
+				r_point = params.result;
+				r_normal = params.normal;
+				return true;
+			}
+			break;
+		}
+
+		if (p_process(params, state)) {
 			r_point = params.result;
 			r_normal = params.normal;
 			return true;
 		}
 
-		real_t dist = 0.0;
-		while (true) {
-			if (cross_x < cross_z) {
-				// X lane.
-				x += x_step;
-				// Assign before advancing the param,
-				// to be in sync with the initialization step.
-				dist = cross_x;
-				cross_x += delta_x;
-			} else {
-				// Z lane.
-				z += z_step;
-				dist = cross_z;
-				cross_z += delta_z;
-			}
+		// Stop when outside the grid.
+		if ((x < 0) || (z < 0) || (x >= p_width - 1) || (z >= p_depth - 1)) {
+			break;
+		}
+	}
 
-			// Stop when outside the grid.
-			if ((x < 0) || (z < 0) || (x >= width - 1) || (z >= depth - 1)) {
-				break;
-			}
+	return false;
+}
 
-			if (_heightmap_cell_cull_segment(params, x, z)) {
-				r_point = params.result;
-				r_normal = params.normal;
-				return true;
-			}
+bool HeightMapShape3DSW::intersect_segment(const Vector3 &p_begin, const Vector3 &p_end, Vector3 &r_point, Vector3 &r_normal) const {
+	if (heights.is_empty()) {
+		return false;
+	}
 
-			if (dist > ray_dist_proj) {
-				break;
-			}
+	Vector3 local_begin = p_begin + local_origin;
+	Vector3 local_end = p_end + local_origin;
+
+	// Quantize the ray begin/end.
+	int begin_x = Math::floor(local_begin.x);
+	int begin_z = Math::floor(local_begin.z);
+	int end_x = Math::floor(local_end.x);
+	int end_z = Math::floor(local_end.z);
+
+	if ((begin_x == end_x) && (begin_z == end_z)) {
+		// Simple case for rays that don't traverse the grid horizontally.
+		// Just perform a test on the given cell.
+		FaceShape3DSW face;
+		face.backface_collision = false;
+
+		_HeightmapSegmentCullParams params;
+		params.from = p_begin;
+		params.to = p_end;
+		params.dir = (p_end - p_begin).normalized();
+
+		params.heightmap = this;
+		params.face = &face;
+
+		_HeightmapGridCullState state;
+		state.x = MAX(MIN(begin_x, width - 2), 0);
+		state.z = MAX(MIN(begin_z, depth - 2), 0);
+		if (_heightmap_cell_cull_segment(params, state)) {
+			r_point = params.result;
+			r_normal = params.normal;
+			return true;
+		}
+	} else if (bounds_grid.is_empty()) {
+		// Process all cells intersecting the flat projection of the ray.
+		return _intersect_grid_segment(_heightmap_cell_cull_segment, p_begin, p_end, width, depth, local_origin, r_point, r_normal);
+	} else {
+		Vector3 ray_diff = (p_end - p_begin);
+		real_t length_flat_sqr = ray_diff.x * ray_diff.x + ray_diff.z * ray_diff.z;
+		if (length_flat_sqr < BOUNDS_CHUNK_SIZE * BOUNDS_CHUNK_SIZE) {
+			// Don't use chunks, the ray is too short in the plane.
+			return _intersect_grid_segment(_heightmap_cell_cull_segment, p_begin, p_end, width, depth, local_origin, r_point, r_normal);
+		} else {
+			// The ray is long, run raycast on a higher-level grid.
+			Vector3 bounds_from = p_begin / BOUNDS_CHUNK_SIZE;
+			Vector3 bounds_to = p_end / BOUNDS_CHUNK_SIZE;
+			Vector3 bounds_offset = local_origin / BOUNDS_CHUNK_SIZE;
+			return _intersect_grid_segment(_heightmap_chunk_cull_segment, bounds_from, bounds_to, bounds_grid_width, bounds_grid_depth, bounds_offset, r_point, r_normal);
 		}
 	}
 
@@ -1917,7 +2011,7 @@ void HeightMapShape3DSW::cull(const AABB &p_local_aabb, QueryCallback p_callback
 			_get_point(x, z, face.vertex[0]);
 			_get_point(x + 1, z, face.vertex[1]);
 			_get_point(x, z + 1, face.vertex[2]);
-			face.normal = Plane(face.vertex[0], face.vertex[2], face.vertex[1]).normal;
+			face.normal = Plane(face.vertex[0], face.vertex[1], face.vertex[2]).normal;
 			if (p_callback(p_userdata, &face)) {
 				return;
 			}
@@ -1925,7 +2019,7 @@ void HeightMapShape3DSW::cull(const AABB &p_local_aabb, QueryCallback p_callback
 			// Second triangle.
 			face.vertex[0] = face.vertex[1];
 			_get_point(x + 1, z + 1, face.vertex[1]);
-			face.normal = Plane(face.vertex[0], face.vertex[2], face.vertex[1]).normal;
+			face.normal = Plane(face.vertex[0], face.vertex[1], face.vertex[2]).normal;
 			if (p_callback(p_userdata, &face)) {
 				return;
 			}
@@ -1943,6 +2037,75 @@ Vector3 HeightMapShape3DSW::get_moment_of_inertia(real_t p_mass) const {
 			(p_mass / 3.0) * (extents.x * extents.x + extents.y * extents.y));
 }
 
+void HeightMapShape3DSW::_build_accelerator() {
+	bounds_grid.clear();
+
+	bounds_grid_width = width / BOUNDS_CHUNK_SIZE;
+	bounds_grid_depth = depth / BOUNDS_CHUNK_SIZE;
+
+	if (width % BOUNDS_CHUNK_SIZE > 0) {
+		++bounds_grid_width; // In case terrain size isn't dividable by chunk size.
+	}
+
+	if (depth % BOUNDS_CHUNK_SIZE > 0) {
+		++bounds_grid_depth;
+	}
+
+	uint32_t bound_grid_size = (uint32_t)(bounds_grid_width * bounds_grid_depth);
+
+	if (bound_grid_size < 2) {
+		// Grid is empty or just one chunk.
+		return;
+	}
+
+	bounds_grid.resize(bound_grid_size);
+
+	// Compute min and max height for all chunks.
+	for (int cz = 0; cz < bounds_grid_depth; ++cz) {
+		int z0 = cz * BOUNDS_CHUNK_SIZE;
+
+		for (int cx = 0; cx < bounds_grid_width; ++cx) {
+			int x0 = cx * BOUNDS_CHUNK_SIZE;
+
+			Range r;
+
+			r.min = _get_height(x0, z0);
+			r.max = r.min;
+
+			// Compute min and max height for this chunk.
+			// We have to include one extra cell to account for neighbors.
+			// Here is why:
+			// Say we have a flat terrain, and a plateau that fits a chunk perfectly.
+			//
+			//   Left        Right
+			// 0---0---0---1---1---1
+			// |   |   |   |   |   |
+			// 0---0---0---1---1---1
+			// |   |   |   |   |   |
+			// 0---0---0---1---1---1
+			//           x
+			//
+			// If the AABB for the Left chunk did not share vertices with the Right,
+			// then we would fail collision tests at x due to a gap.
+			//
+			int z_max = MIN(z0 + BOUNDS_CHUNK_SIZE + 1, depth);
+			int x_max = MIN(x0 + BOUNDS_CHUNK_SIZE + 1, width);
+			for (int z = z0; z < z_max; ++z) {
+				for (int x = x0; x < x_max; ++x) {
+					real_t height = _get_height(x, z);
+					if (height < r.min) {
+						r.min = height;
+					} else if (height > r.max) {
+						r.max = height;
+					}
+				}
+			}
+
+			bounds_grid[cx + cz * bounds_grid_width] = r;
+		}
+	}
+}
+
 void HeightMapShape3DSW::_setup(const Vector<real_t> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height) {
 	heights = p_heights;
 	width = p_width;
@@ -1959,6 +2122,8 @@ void HeightMapShape3DSW::_setup(const Vector<real_t> &p_heights, int p_width, in
 
 	aabb.position -= local_origin;
 
+	_build_accelerator();
+
 	configure(aabb);
 }
 
@@ -2017,7 +2182,7 @@ void HeightMapShape3DSW::set_data(const Variant &p_data) {
 	} else {
 		int heights_size = heights.size();
 		for (int i = 0; i < heights_size; ++i) {
-			float h = heights[i];
+			real_t h = heights[i];
 			if (h < min_height) {
 				min_height = h;
 			} else if (h > max_height) {

+ 21 - 0
servers/physics_3d/shape_3d_sw.h

@@ -32,6 +32,7 @@
 #define SHAPE_SW_H
 
 #include "core/math/geometry_3d.h"
+#include "core/templates/local_vector.h"
 #include "servers/physics_server_3d.h"
 
 class Shape3DSW;
@@ -385,6 +386,21 @@ struct HeightMapShape3DSW : public ConcaveShape3DSW {
 	int depth = 0;
 	Vector3 local_origin;
 
+	// Accelerator.
+	struct Range {
+		real_t min = 0.0;
+		real_t max = 0.0;
+	};
+	LocalVector<Range> bounds_grid;
+	int bounds_grid_width = 0;
+	int bounds_grid_depth = 0;
+
+	static const int BOUNDS_CHUNK_SIZE = 16;
+
+	_FORCE_INLINE_ const Range &_get_bounds_chunk(int p_x, int p_z) const {
+		return bounds_grid[(p_z * bounds_grid_width) + p_x];
+	}
+
 	_FORCE_INLINE_ real_t _get_height(int p_x, int p_z) const {
 		return heights[(p_z * width) + p_x];
 	}
@@ -397,6 +413,11 @@ struct HeightMapShape3DSW : public ConcaveShape3DSW {
 
 	void _get_cell(const Vector3 &p_point, int &r_x, int &r_y, int &r_z) const;
 
+	void _build_accelerator();
+
+	template <typename ProcessFunction>
+	bool _intersect_grid_segment(ProcessFunction &p_process, const Vector3 &p_begin, const Vector3 &p_end, int p_width, int p_depth, const Vector3 &offset, Vector3 &r_point, Vector3 &r_normal) const;
+
 	void _setup(const Vector<real_t> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height);
 
 public: