Jelajahi Sumber

FTI - Optimize `SceneTree` traversal

lawnjelly 2 bulan lalu
induk
melakukan
c7764ef26b

+ 6 - 0
doc/classes/ProjectSettings.xml

@@ -2482,6 +2482,12 @@
 			[b]Dummy[/b] is a 3D physics server that does nothing and returns only dummy values, effectively disabling all 3D physics functionality.
 			Third-party extensions and modules can add other physics engines to select with this setting.
 		</member>
+		<member name="physics/3d/physics_interpolation/scene_traversal" type="String" setter="" getter="" default="&quot;DEFAULT&quot;">
+			The approach used for 3D scene traversal when physics interpolation is enabled.
+			- [code]DEFAULT[/code]: The default optimized method.
+			- [code]Legacy[/code]: The previous reference method used for scene tree traversal, which is slower.
+			- [code]Debug[/code]: Swaps between [code]DEFAULT[/code] and [code]Legacy[/code] methods on alternating frames, and provides logging information (which in turn makes it slower). Intended for debugging only; you should use the [code]DEFAULT[/code] method in most cases.
+		</member>
 		<member name="physics/3d/run_on_separate_thread" type="bool" setter="" getter="" default="false">
 			If [code]true[/code], the 3D physics server runs on a separate thread, making better use of multi-core CPUs. If [code]false[/code], the 3D physics server runs on the main thread. Running the physics server on a separate thread can increase performance, but restricts API access to only physics process.
 			[b]Note:[/b] When [member physics/3d/physics_engine] is set to [code]Jolt Physics[/code], enabling this setting will prevent the 3D physics server from being able to provide any context when reporting errors and warnings, and will instead always refer to nodes as [code]&lt;unknown&gt;[/code].

+ 6 - 0
scene/3d/node_3d.cpp

@@ -270,6 +270,10 @@ void Node3D::_notification(int p_what) {
 			// unless they need to perform specific tasks (like changing process modes).
 			fti_pump_xform();
 			fti_pump_property();
+
+			// Detect whether we are using an identity transform.
+			// This is an optimization for faster tree transform concatenation.
+			data.fti_is_identity_xform = data.local_transform == Transform3D();
 		} break;
 		case NOTIFICATION_SUSPENDED:
 		case NOTIFICATION_PAUSED: {
@@ -1448,6 +1452,8 @@ Node3D::Node3D() :
 	data.fti_on_tick_property_list = false;
 	data.fti_global_xform_interp_set = false;
 	data.fti_frame_xform_force_update = false;
+	data.fti_is_identity_xform = false;
+	data.fti_processed = false;
 
 #ifdef TOOLS_ENABLED
 	data.gizmos_disabled = false;

+ 3 - 0
scene/3d/node_3d.h

@@ -51,6 +51,7 @@ class Node3D : public Node {
 	GDCLASS(Node3D, Node);
 
 	friend class SceneTreeFTI;
+	friend class SceneTreeFTITests;
 
 public:
 	// Edit mode for the rotation.
@@ -143,6 +144,8 @@ private:
 		bool fti_on_tick_property_list : 1;
 		bool fti_global_xform_interp_set : 1;
 		bool fti_frame_xform_force_update : 1;
+		bool fti_is_identity_xform : 1;
+		bool fti_processed : 1;
 
 		RID visibility_parent;
 

+ 2 - 1
scene/main/node.h

@@ -176,7 +176,7 @@ private:
 		mutable int internal_children_back_count_cache = 0;
 		mutable int external_children_count_cache = 0;
 		mutable int index = -1; // relative to front, normal or back.
-		int depth = -1;
+		int32_t depth = -1;
 		int blocked = 0; // Safeguard that throws an error when attempting to modify the tree in a harmful way while being traversed.
 		StringName name;
 		SceneTree *tree = nullptr;
@@ -379,6 +379,7 @@ protected:
 
 	void _set_use_identity_transform(bool p_enable) { data.use_identity_transform = p_enable; }
 	bool _is_using_identity_transform() const { return data.use_identity_transform; }
+	int32_t _get_scene_tree_depth() const { return data.depth; }
 
 	//call from SceneTree
 	void _call_input(const Ref<InputEvent> &p_event);

+ 348 - 39
scene/main/scene_tree_fti.cpp

@@ -33,14 +33,26 @@
 #include "scene_tree_fti.h"
 
 #include "core/config/engine.h"
+#include "core/config/project_settings.h"
 #include "core/math/transform_interpolator.h"
 #include "core/os/os.h"
 #include "scene/3d/visual_instance_3d.h"
 
+#ifdef GODOT_SCENE_TREE_FTI_VERIFY
+#include "scene_tree_fti_tests.h"
+#endif
+
+#ifdef DEV_ENABLED
+
 // Uncomment this to enable some slow extra DEV_ENABLED
 // checks to ensure there aren't more than one object added to the lists.
 // #define GODOT_SCENE_TREE_FTI_EXTRA_CHECKS
 
+// Uncomment this to regularly print the tree that is being interpolated.
+// #define GODOT_SCENE_TREE_FTI_PRINT_TREE
+
+#endif
+
 void SceneTreeFTI::_reset_node3d_flags(Node3D &r_node) {
 	r_node.data.fti_on_tick_xform_list = false;
 	r_node.data.fti_on_tick_property_list = false;
@@ -48,6 +60,7 @@ void SceneTreeFTI::_reset_node3d_flags(Node3D &r_node) {
 	r_node.data.fti_on_frame_property_list = false;
 	r_node.data.fti_global_xform_interp_set = false;
 	r_node.data.fti_frame_xform_force_update = false;
+	r_node.data.fti_processed = false;
 }
 
 void SceneTreeFTI::_reset_flags(Node *p_node) {
@@ -104,18 +117,23 @@ void SceneTreeFTI::tick_update() {
 			// Needs a reset so jittering will stop.
 			s->fti_pump_xform();
 
+			// Optimization - detect whether we have rested at identity xform.
+			s->data.fti_is_identity_xform = s->data.local_transform == Transform3D();
+
 			// This may not get updated so set it to the same as global xform.
 			// TODO: double check this is the best value.
 			s->data.global_transform_interpolated = s->get_global_transform();
 
 			// Remove from interpolation list.
 			if (s->data.fti_on_frame_xform_list) {
-				s->data.fti_on_frame_xform_list = false;
+				_node_remove_from_frame_list(*s, false);
 			}
 
-			// Ensure that the spatial gets at least ONE further
+			// Ensure that the node gets at least ONE further
 			// update in the resting position in the next frame update.
-			s->data.fti_frame_xform_force_update = true;
+			if (!s->data.fti_frame_xform_force_update) {
+				_node_add_to_frame_list(*s, true);
+			}
 		}
 	}
 
@@ -232,16 +250,117 @@ void SceneTreeFTI::_node_3d_notify_set_property(Node3D &r_node) {
 	}
 }
 
+void SceneTreeFTI::_create_depth_lists() {
+	uint32_t first_list = data.frame_start ? 0 : 1;
+
+	for (uint32_t l = first_list; l < 2; l++) {
+		LocalVector<Node3D *> &source_list = l == 0 ? data.frame_xform_list : data.frame_xform_list_forced;
+
+#ifdef DEBUG_ENABLED
+		bool log_nodes_moved_on_frame = (data.traversal_mode == TM_DEBUG) && !data.frame_start && data.periodic_debug_log;
+		if (log_nodes_moved_on_frame) {
+			if (source_list.size()) {
+				print_line(String("\n") + itos(source_list.size()) + " nodes moved during frame:");
+			} else {
+				print_line("0 nodes moved during frame.");
+			}
+		}
+#endif
+
+		for (uint32_t n = 0; n < source_list.size(); n++) {
+			Node3D *s = source_list[n];
+			s->data.fti_processed = false;
+
+			int32_t depth = s->_get_scene_tree_depth();
+
+			// This shouldn't happen, but wouldn't be terrible if it did.
+			DEV_ASSERT(depth >= 0);
+			depth = MIN(depth, (int32_t)data.scene_tree_depth_limit);
+
+			LocalVector<Node3D *> &dest_list = data.dirty_node_depth_lists[depth];
+#ifdef GODOT_SCENE_TREE_FTI_EXTRA_CHECKS
+			// Shouldn't really happen, but duplicates don't really matter that much.
+			if (dest_list.find(s) != -1) {
+				ERR_FAIL_COND(dest_list.find(s) != -1);
+			}
+#endif
+
+#ifdef DEBUG_ENABLED
+			if (log_nodes_moved_on_frame) {
+				print_line("\t" + s->get_name());
+			}
+#endif
+
+			if ((l == 0) && s->data.fti_frame_xform_force_update) {
+				continue;
+			}
+
+			dest_list.push_back(s);
+		}
+	}
+}
+
+void SceneTreeFTI::_clear_depth_lists() {
+	for (uint32_t d = 0; d < data.scene_tree_depth_limit; d++) {
+		data.dirty_node_depth_lists[d].clear();
+	}
+}
+
+void SceneTreeFTI::_node_add_to_frame_list(Node3D &r_node, bool p_forced) {
+	if (p_forced) {
+		DEV_ASSERT(!r_node.data.fti_frame_xform_force_update);
+#ifdef GODOT_SCENE_TREE_FTI_EXTRA_CHECKS
+		int64_t found = data.frame_xform_list_forced.find(&r_node);
+		if (found != -1) {
+			ERR_FAIL_COND(found != -1);
+		}
+#endif
+		data.frame_xform_list_forced.push_back(&r_node);
+		r_node.data.fti_frame_xform_force_update = true;
+	} else {
+		DEV_ASSERT(!r_node.data.fti_on_frame_xform_list);
+#ifdef GODOT_SCENE_TREE_FTI_EXTRA_CHECKS
+		int64_t found = data.frame_xform_list.find(&r_node);
+		if (found != -1) {
+			ERR_FAIL_COND(found != -1);
+		}
+#endif
+		data.frame_xform_list.push_back(&r_node);
+		r_node.data.fti_on_frame_xform_list = true;
+	}
+}
+
+void SceneTreeFTI::_node_remove_from_frame_list(Node3D &r_node, bool p_forced) {
+	if (p_forced) {
+		DEV_ASSERT(r_node.data.fti_frame_xform_force_update);
+		data.frame_xform_list_forced.erase_unordered(&r_node);
+		r_node.data.fti_frame_xform_force_update = false;
+	} else {
+		DEV_ASSERT(r_node.data.fti_on_frame_xform_list);
+		data.frame_xform_list.erase_unordered(&r_node);
+		r_node.data.fti_on_frame_xform_list = false;
+	}
+}
+
 void SceneTreeFTI::_node_3d_notify_set_xform(Node3D &r_node) {
 	DEV_CHECK_ONCE(data.enabled);
 
 	if (!r_node.is_physics_interpolated()) {
 		// Force an update of non-interpolated to servers
 		// on the next traversal.
-		r_node.data.fti_frame_xform_force_update = true;
+		if (!r_node.data.fti_frame_xform_force_update) {
+			_node_add_to_frame_list(r_node, true);
+		}
+
+		// ToDo: Double check this is a win,
+		// non-interpolated nodes we always check for identity,
+		// *just in case*.
+		r_node.data.fti_is_identity_xform = r_node.get_transform() == Transform3D();
 		return;
 	}
 
+	r_node.data.fti_is_identity_xform = false;
+
 	if (!r_node.data.fti_on_tick_xform_list) {
 		r_node.data.fti_on_tick_xform_list = true;
 
@@ -257,11 +376,19 @@ void SceneTreeFTI::_node_3d_notify_set_xform(Node3D &r_node) {
 		// however there is probably no downside to leaving it set
 		// as it will be cleared on the next frame anyway.
 		// This line is left for reference.
-		// r_spatial.data.fti_frame_xform_force_update = false;
+		// r_node.data.fti_frame_xform_force_update = false;
 	}
 
 	if (!r_node.data.fti_on_frame_xform_list) {
-		r_node.data.fti_on_frame_xform_list = true;
+		_node_add_to_frame_list(r_node, false);
+	}
+
+	// If we are in the second half of a frame, always add to the force update list,
+	// because we ignore the tick update list during the second update.
+	if (data.in_frame) {
+		if (!r_node.data.fti_frame_xform_force_update) {
+			_node_add_to_frame_list(r_node, true);
+		}
 	}
 }
 
@@ -274,6 +401,14 @@ void SceneTreeFTI::node_3d_notify_delete(Node3D *p_node) {
 
 	MutexLock(data.mutex);
 
+	// Remove from frame lists.
+	if (p_node->data.fti_on_frame_xform_list) {
+		_node_remove_from_frame_list(*p_node, false);
+	}
+	if (p_node->data.fti_frame_xform_force_update) {
+		_node_remove_from_frame_list(*p_node, true);
+	}
+
 	// Ensure this is kept in sync with the lists, in case a node
 	// is removed and re-added to the scene tree multiple times
 	// on the same frame / tick.
@@ -305,12 +440,19 @@ void SceneTreeFTI::node_3d_notify_delete(Node3D *p_node) {
 
 	DEV_CHECK_ONCE(data.frame_property_list.find(p_node) == -1);
 	DEV_CHECK_ONCE(data.request_reset_list.find(p_node) == -1);
+
+	DEV_CHECK_ONCE(data.frame_xform_list.find(p_node) == -1);
+	DEV_CHECK_ONCE(data.frame_xform_list_forced.find(p_node) == -1);
 #endif
 }
 
-void SceneTreeFTI::_update_dirty_nodes(Node *p_node, uint32_t p_current_frame, float p_interpolation_fraction, bool p_active, const Transform3D *p_parent_global_xform, int p_depth) {
+void SceneTreeFTI::_update_dirty_nodes(Node *p_node, uint32_t p_current_half_frame, float p_interpolation_fraction, bool p_active, const Transform3D *p_parent_global_xform, int p_depth) {
 	Node3D *s = Object::cast_to<Node3D>(p_node);
 
+#ifdef DEBUG_ENABLED
+	data.debug_node_count++;
+#endif
+
 	// Don't recurse into hidden branches.
 	if (s && !s->is_visible()) {
 		// NOTE : If we change from recursing entire tree, we should do an is_visible_in_tree()
@@ -330,7 +472,7 @@ void SceneTreeFTI::_update_dirty_nodes(Node *p_node, uint32_t p_current_frame, f
 	// so we should still recurse to children.
 	if (!s) {
 		for (uint32_t n = 0; n < num_children; n++) {
-			_update_dirty_nodes(children.ptr()[n], p_current_frame, p_interpolation_fraction, p_active, nullptr, p_depth + 1);
+			_update_dirty_nodes(children.ptr()[n], p_current_half_frame, p_interpolation_fraction, p_active, nullptr, p_depth + 1);
 		}
 		return;
 	}
@@ -355,13 +497,20 @@ void SceneTreeFTI::_update_dirty_nodes(Node *p_node, uint32_t p_current_frame, f
 		} else {
 			// On the frame end, we want to re-interpolate *anything* that has moved
 			// since the frame start.
-
 			if (s->_test_dirty_bits(Node3D::DIRTY_GLOBAL_INTERPOLATED_TRANSFORM)) {
 				p_active = true;
+
+#if 0
+				if (data.periodic_debug_log) {
+					print_line("activating on : " + s->get_name());
+				}
+#endif
 			}
 		}
 	}
 
+	// ToDo : Check global_xform_interp is up to date for nodes
+	// that are not traversed by the depth lists.
 	if (data.frame_start) {
 		// Mark on the Node3D whether we have set global_transform_interp.
 		// This can later be used when calling `get_global_transform_interpolated()`
@@ -370,15 +519,15 @@ void SceneTreeFTI::_update_dirty_nodes(Node *p_node, uint32_t p_current_frame, f
 	}
 
 	if (p_active) {
-#if 0
-		bool dirty = s->data.dirty & Node3D::DIRTY_GLOBAL_INTERP;
+#ifdef GODOT_SCENE_TREE_FTI_PRINT_TREE
+		bool dirty = s->data.dirty & Node3D::DIRTY_GLOBAL_INTERPOLATED;
 
-		if (data.debug) {
+		if (data.periodic_debug_log && !data.use_optimized_traversal_method && !data.frame_start) {
 			String sz;
 			for (int n = 0; n < p_depth; n++) {
 				sz += "\t";
 			}
-			print_line(sz + p_node->get_name() + (dirty ? " DIRTY" : ""));
+			print_line(sz + p_node->get_name() + (dirty ? " DIRTY" : "") + (s->get_transform() == Transform() ? "\t[IDENTITY]" : ""));
 		}
 #endif
 
@@ -386,9 +535,15 @@ void SceneTreeFTI::_update_dirty_nodes(Node *p_node, uint32_t p_current_frame, f
 		// This will either use interpolation, or just use the current local if not interpolated.
 		Transform3D local_interp;
 		if (s->is_physics_interpolated()) {
-			// Make sure to call `get_transform()` rather than using local_transform directly, because
-			// local_transform may be dirty and need updating from rotation / scale.
-			TransformInterpolator::interpolate_transform_3d(s->data.local_transform_prev, s->get_transform(), local_interp, p_interpolation_fraction);
+			// There may be no need to interpolate if the node has not been moved recently
+			// and is therefore not on the tick list...
+			if (s->data.fti_on_tick_xform_list) {
+				// Make sure to call `get_transform()` rather than using local_transform directly, because
+				// local_transform may be dirty and need updating from rotation / scale.
+				TransformInterpolator::interpolate_transform_3d(s->data.local_transform_prev, s->get_transform(), local_interp, p_interpolation_fraction);
+			} else {
+				local_interp = s->get_transform();
+			}
 		} else {
 			local_interp = s->get_transform();
 		}
@@ -396,13 +551,13 @@ void SceneTreeFTI::_update_dirty_nodes(Node *p_node, uint32_t p_current_frame, f
 		// Concatenate parent xform.
 		if (!s->is_set_as_top_level()) {
 			if (p_parent_global_xform) {
-				s->data.global_transform_interpolated = (*p_parent_global_xform) * local_interp;
+				s->data.global_transform_interpolated = s->data.fti_is_identity_xform ? *p_parent_global_xform : ((*p_parent_global_xform) * local_interp);
 			} else {
 				const Node3D *parent = s->get_parent_node_3d();
 
 				if (parent) {
-					const Transform3D &parent_glob = parent->data.fti_global_xform_interp_set ? parent->data.global_transform_interpolated : parent->data.global_transform;
-					s->data.global_transform_interpolated = parent_glob * local_interp;
+					const Transform3D &parent_glob = parent->data.fti_global_xform_interp_set ? parent->data.global_transform_interpolated : parent->get_global_transform();
+					s->data.global_transform_interpolated = s->data.fti_is_identity_xform ? parent_glob : parent_glob * local_interp;
 				} else {
 					s->data.global_transform_interpolated = local_interp;
 				}
@@ -425,6 +580,12 @@ void SceneTreeFTI::_update_dirty_nodes(Node *p_node, uint32_t p_current_frame, f
 		// that have a deferred frame update.
 		s->data.fti_frame_xform_force_update = false;
 
+		// Ensure branches are only processed once on each traversal.
+		s->data.fti_processed = true;
+
+#ifdef DEBUG_ENABLED
+		data.debug_nodes_processed++;
+#endif
 	} // if active.
 
 	// Remove the dirty interp flag from EVERYTHING as we go.
@@ -432,7 +593,7 @@ void SceneTreeFTI::_update_dirty_nodes(Node *p_node, uint32_t p_current_frame, f
 
 	// Recurse to children.
 	for (uint32_t n = 0; n < num_children; n++) {
-		_update_dirty_nodes(children.ptr()[n], p_current_frame, p_interpolation_fraction, p_active, s->data.fti_global_xform_interp_set ? &s->data.global_transform_interpolated : &s->data.global_transform, p_depth + 1);
+		_update_dirty_nodes(p_node->get_child(n), p_current_half_frame, p_interpolation_fraction, p_active, s->data.fti_global_xform_interp_set ? &s->data.global_transform_interpolated : &s->data.global_transform, p_depth + 1);
 	}
 }
 
@@ -442,37 +603,127 @@ void SceneTreeFTI::frame_update(Node *p_root, bool p_frame_start) {
 	}
 	MutexLock(data.mutex);
 
-	_update_request_resets();
-
 	data.frame_start = p_frame_start;
+	data.in_frame = true;
+
+	_update_request_resets();
 
-	float f = Engine::get_singleton()->get_physics_interpolation_fraction();
+	float interpolation_fraction = Engine::get_singleton()->get_physics_interpolation_fraction();
 	uint32_t frame = Engine::get_singleton()->get_frames_drawn();
 
-// #define SCENE_TREE_FTI_TAKE_TIMINGS
-#ifdef SCENE_TREE_FTI_TAKE_TIMINGS
-	uint64_t before = OS::get_singleton()->get_ticks_usec();
-#endif
+	uint64_t before = 0;
+#ifdef DEBUG_ENABLED
+	if (data.traversal_mode == TM_DEBUG) {
+		before = OS::get_singleton()->get_ticks_usec();
+
+		if (p_frame_start && ((frame % ((60 * 15) - 3)) == 0)) {
+			data.periodic_debug_log = true;
+		}
+	}
 
-	if (data.debug) {
+#ifdef GODOT_SCENE_TREE_FTI_PRINT_TREE
+	if (data.periodic_debug_log) {
 		print_line(String("\nScene: ") + (data.frame_start ? "start" : "end") + "\n");
 	}
+#endif
+#endif
+
+	data.debug_node_count = 0;
+	data.debug_nodes_processed = 0;
+
+	uint32_t half_frame = p_frame_start ? (frame * 2) : ((frame * 2) + 1);
+
+	bool print_debug_stats = false;
+	switch (data.traversal_mode) {
+		case TM_LEGACY: {
+			data.use_optimized_traversal_method = false;
+		} break;
+		case TM_DEBUG: {
+			// Switch on alternate frames between the two methods.
+			data.use_optimized_traversal_method = (frame % 2) == 1;
+
+			// Odd number ensures we debug stats for both methods.
+			print_debug_stats = (frame % ((60 * 8) - 1)) == 0;
+		} break;
+		default: {
+			data.use_optimized_traversal_method = true;
+		} break;
+	}
+
+#ifdef GODOT_SCENE_TREE_FTI_VERIFY
+	_tests->frame_update(p_root, half_frame, interpolation_fraction);
+#else
+
+	uint32_t skipped = 0;
+
+	if (!data.use_optimized_traversal_method) {
+		// Reference approach.
+		// Traverse the entire scene tree.
+		// Slow, but robust.
+		_update_dirty_nodes(p_root, half_frame, interpolation_fraction, false);
+	} else {
+		// Optimized approach.
+		// Traverse from depth lists.
+		// Be sure to check against the reference
+		// implementation when making changes.
+		_create_depth_lists();
+
+		for (uint32_t d = 0; d < data.scene_tree_depth_limit; d++) {
+			const LocalVector<Node3D *> &list = data.dirty_node_depth_lists[d];
 
-	// Probably not the most optimal approach as we traverse the entire SceneTree
-	// but simple and foolproof.
-	// Can be optimized later.
-	_update_dirty_nodes(p_root, frame, f, false);
+#if 0
+			if (list.size() > 0) {
+				print_line("depth " + itos(d) + ", contains " + itos(list.size()));
+			}
+#endif
+
+			for (uint32_t n = 0; n < list.size(); n++) {
+				// Already processed this frame?
+				Node3D *s = list[n];
 
-	if (!p_frame_start && data.debug) {
-		data.debug = false;
+				if (s->data.fti_processed) {
+#ifdef DEBUG_ENABLED
+					skipped++;
+#endif
+					continue;
+				}
+
+				// The first node requires a recursive visibility check
+				// up the tree, because `is_visible()` only returns the node
+				// local flag.
+				if (Object::cast_to<VisualInstance3D>(s)) {
+					if (!s->_is_vi_visible()) {
+#ifdef DEBUG_ENABLED
+						skipped++;
+#endif
+						continue;
+					}
+				} else if (!s->is_visible_in_tree()) {
+#ifdef DEBUG_ENABLED
+					skipped++;
+#endif
+					continue;
+				}
+
+				_update_dirty_nodes(s, half_frame, interpolation_fraction, true);
+			}
+		}
+
+		_clear_depth_lists();
 	}
 
-#ifdef SCENE_TREE_FTI_TAKE_TIMINGS
-	uint64_t after = OS::get_singleton()->get_ticks_usec();
-	if ((Engine::get_singleton()->get_frames_drawn() % 60) == 0) {
-		print_line("Took " + itos(after - before) + " usec " + (data.frame_start ? "start" : "end"));
+	if (print_debug_stats) {
+		uint64_t after = OS::get_singleton()->get_ticks_usec();
+		print_line(String(data.use_optimized_traversal_method ? "FTI optimized" : "FTI reference") + " nodes traversed : " + itos(data.debug_node_count) + (skipped == 0 ? "" : ", skipped " + itos(skipped)) + ", processed : " + itos(data.debug_nodes_processed) + ", took " + itos(after - before) + " usec " + (data.frame_start ? "(start)" : "(end)"));
+	}
+
+#endif //  not GODOT_SCENE_TREE_FTI_VERIFY
+
+	data.frame_xform_list_forced.clear();
+
+	if (!p_frame_start && data.periodic_debug_log) {
+		data.periodic_debug_log = false;
 	}
-#endif
 
 	// Update the properties once off at the end of the frame.
 	// No need for two passes for properties.
@@ -482,6 +733,64 @@ void SceneTreeFTI::frame_update(Node *p_root, bool p_frame_start) {
 			s->fti_update_servers_property();
 		}
 	}
+
+	// Marks the end of the frame.
+	// Enables us to recognize when change notifications
+	// come in _during_ a frame (they get treated differently).
+	if (!data.frame_start) {
+		data.in_frame = false;
+	}
+}
+
+SceneTreeFTI::SceneTreeFTI() {
+#ifdef GODOT_SCENE_TREE_FTI_VERIFY
+	_tests = memnew(SceneTreeFTITests(*this));
+#endif
+
+	Variant traversal_mode_string = GLOBAL_DEF("physics/3d/physics_interpolation/scene_traversal", "DEFAULT");
+	ProjectSettings::get_singleton()->set_custom_property_info(PropertyInfo(Variant::STRING, "physics/3d/physics_interpolation/scene_traversal", PROPERTY_HINT_ENUM, "DEFAULT,Legacy,Debug"));
+
+	data.traversal_mode = TM_DEFAULT;
+
+	if (traversal_mode_string == "Legacy") {
+		data.traversal_mode = TM_LEGACY;
+	} else if (traversal_mode_string == "Debug") {
+		// Don't allow debug mode in final exports,
+		// it will almost certainly be a mistake.
+#ifdef DEBUG_ENABLED
+		data.traversal_mode = TM_DEBUG;
+#else
+		data.traversal_mode = TM_DEFAULT;
+#endif
+	}
+
+	switch (data.traversal_mode) {
+		default: {
+			print_verbose("SceneTreeFTI: traversal method DEFAULT");
+		} break;
+		case TM_LEGACY: {
+			print_verbose("SceneTreeFTI: traversal method Legacy");
+		} break;
+		case TM_DEBUG: {
+			print_verbose("SceneTreeFTI: traversal method Debug");
+		} break;
+	}
+
+#ifdef GODOT_SCENE_TREE_FTI_EXTRA_CHECKS
+	print_line("SceneTreeFTI : GODOT_SCENE_TREE_FTI_EXTRA_CHECKS defined");
+#endif
+#ifdef GODOT_SCENE_TREE_FTI_PRINT_TREE
+	print_line("SceneTreeFTI : GODOT_SCENE_TREE_FTI_PRINT_TREE defined");
+#endif
+}
+
+SceneTreeFTI::~SceneTreeFTI() {
+#ifdef GODOT_SCENE_TREE_FTI_VERIFY
+	if (_tests) {
+		memfree(_tests);
+		_tests = nullptr;
+	}
+#endif
 }
 
 #endif // ndef _3D_DISABLED

+ 48 - 4
scene/main/scene_tree_fti.h

@@ -36,6 +36,12 @@
 class Node3D;
 class Node;
 struct Transform3D;
+class SceneTreeFTITests;
+
+#ifdef DEV_ENABLED
+// Uncomment this to verify traversal method results.
+// #define GODOT_SCENE_TREE_FTI_VERIFY
+#endif
 
 #ifdef _3D_DISABLED
 // Stubs
@@ -62,19 +68,37 @@ public:
 // but this covers the custom case of multiple scene trees.
 
 class SceneTreeFTI {
+	friend class SceneTreeFTITests;
+
+	enum TraversalMode : unsigned {
+		TM_DEFAULT,
+		TM_LEGACY,
+		TM_DEBUG,
+	};
+
 	struct Data {
+		static const uint32_t scene_tree_depth_limit = 32;
+
 		// Prev / Curr lists of Node3Ds having local xforms pumped.
 		LocalVector<Node3D *> tick_xform_list[2];
 
+		// The frame lists are changed nodes that need to start traversal,
+		// either longterm (on the tick list) or single frame forced.
+		LocalVector<Node3D *> frame_xform_list;
+		LocalVector<Node3D *> frame_xform_list_forced;
+
 		// Prev / Curr lists of Node3Ds having actively interpolated properties.
 		LocalVector<Node3D *> tick_property_list[2];
 
 		LocalVector<Node3D *> frame_property_list;
-
 		LocalVector<Node3D *> request_reset_list;
+		LocalVector<Node3D *> dirty_node_depth_lists[scene_tree_depth_limit];
 
+		// When we are using two alternating lists,
+		// which one is current.
 		uint32_t mirror = 0;
 
+		// Global on / off switch for SceneTreeFTI.
 		bool enabled = false;
 
 		// Whether we are in physics ticks, or in a frame.
@@ -85,10 +109,21 @@ class SceneTreeFTI {
 
 		Mutex mutex;
 
-		bool debug = false;
+		TraversalMode traversal_mode = TM_DEFAULT;
+		bool use_optimized_traversal_method = true;
+
+		// DEBUGGING
+		bool periodic_debug_log = false;
+		uint32_t debug_node_count = 0;
+		uint32_t debug_nodes_processed = 0;
+
 	} data;
 
-	void _update_dirty_nodes(Node *p_node, uint32_t p_current_frame, float p_interpolation_fraction, bool p_active, const Transform3D *p_parent_global_xform = nullptr, int p_depth = 0);
+#ifdef GODOT_SCENE_TREE_FTI_VERIFY
+	SceneTreeFTITests *_tests = nullptr;
+#endif
+
+	void _update_dirty_nodes(Node *p_node, uint32_t p_current_half_frame, float p_interpolation_fraction, bool p_active, const Transform3D *p_parent_global_xform = nullptr, int p_depth = 0);
 	void _update_request_resets();
 
 	void _reset_flags(Node *p_node);
@@ -96,6 +131,12 @@ class SceneTreeFTI {
 	void _node_3d_notify_set_xform(Node3D &r_node);
 	void _node_3d_notify_set_property(Node3D &r_node);
 
+	void _node_add_to_frame_list(Node3D &r_node, bool p_forced);
+	void _node_remove_from_frame_list(Node3D &r_node, bool p_forced);
+
+	void _create_depth_lists();
+	void _clear_depth_lists();
+
 public:
 	// Hottest function, allow inlining the data.enabled check.
 	void node_3d_notify_changed(Node3D &r_node, bool p_transform_changed) {
@@ -123,7 +164,10 @@ public:
 	void set_enabled(Node *p_root, bool p_enabled);
 	bool is_enabled() const { return data.enabled; }
 
-	void set_debug_next_frame() { data.debug = true; }
+	void set_debug_next_frame() { data.periodic_debug_log = true; }
+
+	SceneTreeFTI();
+	~SceneTreeFTI();
 };
 
 #endif // ndef _3D_DISABLED

+ 246 - 0
scene/main/scene_tree_fti_tests.cpp

@@ -0,0 +1,246 @@
+/**************************************************************************/
+/*  scene_tree_fti_tests.cpp                                              */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#ifndef _3D_DISABLED
+
+#ifdef GODOT_SCENE_TREE_FTI_VERIFY
+#include "scene_tree_fti_tests.h"
+
+#include "scene/3d/node_3d.h"
+#include "scene/3d/visual_instance_3d.h"
+#include "scene/main/scene_tree_fti.h"
+
+void SceneTreeFTITests::debug_verify_failed(const Node3D *p_node_3d, const Transform3D &p_test) {
+	print_line("VERIFY FAILED\n");
+	print_line("test xform : " + String(Variant(p_test)));
+
+	bool first = true;
+
+	while (p_node_3d) {
+		int32_t depth = MAX(p_node_3d->_get_scene_tree_depth(), 0);
+		String tabs;
+		for (int32_t n = 0; n < depth; n++) {
+			tabs += "\t";
+		}
+
+		bool interp_equal = p_node_3d->_get_cached_global_transform_interpolated() == p_test;
+		bool glob_equal = p_node_3d->get_global_transform() == p_test;
+
+		String sz = tabs + p_node_3d->get_name() + " [ " + p_node_3d->get_class_name() + " ]\n";
+
+		if (first) {
+			sz += tabs + "... " + String(Variant(p_test)) + "\n";
+		}
+
+		sz += tabs + (p_node_3d->data.fti_global_xform_interp_set ? "[I] " : "[i] ") + String(Variant(p_node_3d->_get_cached_global_transform_interpolated())) + (interp_equal ? " ***" : "") + "\n";
+		sz += tabs + "[g] " + String(Variant(p_node_3d->get_global_transform())) + (glob_equal ? " ***" : "");
+
+		print_line(sz);
+
+		p_node_3d = p_node_3d->get_parent_node_3d();
+		first = false;
+	}
+}
+
+void SceneTreeFTITests::update_dirty_nodes(Node *p_node, uint32_t p_current_half_frame, float p_interpolation_fraction, bool p_active, const Transform3D *p_parent_global_xform, int p_depth) {
+	SceneTreeFTI::Data &data = _fti.data;
+
+	// There are two runs going on here.
+	// FIRST the naive entire scene tree (reference), where we are
+	// setting state (i.e. writing out xforms, and other state)
+	// SECOND the optimized run, where we are NOT
+	// writing state, but only verifying that the xforms calculated
+	// match those from the reference approach.
+	bool should_verify = (data.traversal_mode == SceneTreeFTI::TM_DEBUG) && data.use_optimized_traversal_method;
+	bool set_state = !should_verify;
+
+	Node3D *s = Object::cast_to<Node3D>(p_node);
+
+	if (s && !s->is_visible()) {
+		return;
+	}
+
+	if (!s) {
+		for (int n = 0; n < p_node->get_child_count(); n++) {
+			update_dirty_nodes(p_node->get_child(n), p_current_half_frame, p_interpolation_fraction, p_active, nullptr, p_depth + 1);
+		}
+		return;
+	}
+
+	if (s->_test_dirty_bits(Node3D::DIRTY_GLOBAL_TRANSFORM)) {
+		_ALLOW_DISCARD_ s->get_global_transform();
+	}
+
+	if (!p_active) {
+		if (data.frame_start) {
+			if (s->data.fti_on_frame_xform_list || s->data.fti_frame_xform_force_update) {
+				p_active = true;
+			}
+		} else {
+			if (s->_test_dirty_bits(Node3D::DIRTY_GLOBAL_INTERPOLATED_TRANSFORM)) {
+				p_active = true;
+			}
+		}
+	}
+
+	if (data.frame_start) {
+		s->data.fti_global_xform_interp_set = p_active;
+	}
+
+	if (p_active) {
+		Transform3D local_interp;
+		if (s->is_physics_interpolated()) {
+			if (s->data.fti_on_tick_xform_list) {
+				TransformInterpolator::interpolate_transform_3d(s->data.local_transform_prev, s->get_transform(), local_interp, p_interpolation_fraction);
+			} else {
+				local_interp = s->get_transform();
+			}
+		} else {
+			local_interp = s->get_transform();
+		}
+
+		if (!s->is_set_as_top_level()) {
+			if (p_parent_global_xform) {
+				if (should_verify) {
+					Transform3D test = (*p_parent_global_xform) * local_interp;
+					if (s->data.disable_scale) {
+						test.basis.orthonormalize();
+					}
+					if (s->data.global_transform_interpolated != test) {
+						debug_verify_failed(s, test);
+						DEV_ASSERT(s->data.global_transform_interpolated == test);
+					}
+				} else {
+					s->data.global_transform_interpolated = s->data.fti_is_identity_xform ? (*p_parent_global_xform) : (*p_parent_global_xform) * local_interp;
+				}
+			} else {
+				const Node3D *parent = s->get_parent_node_3d();
+
+				if (parent) {
+					const Transform3D &parent_glob = parent->data.fti_global_xform_interp_set ? parent->data.global_transform_interpolated : parent->get_global_transform();
+
+					if (should_verify) {
+						Transform3D test = parent_glob * local_interp;
+						if (s->data.disable_scale) {
+							test.basis.orthonormalize();
+						}
+						if (s->data.global_transform_interpolated != test) {
+							debug_verify_failed(s, test);
+							DEV_ASSERT(s->data.global_transform_interpolated == test);
+						}
+
+					} else {
+						s->data.global_transform_interpolated = s->data.fti_is_identity_xform ? parent_glob : parent_glob * local_interp;
+					}
+				} else {
+					if (set_state) {
+						s->data.global_transform_interpolated = local_interp;
+					}
+				}
+			}
+		} else {
+			if (set_state) {
+				s->data.global_transform_interpolated = local_interp;
+			}
+		}
+
+		if (set_state) {
+			if (s->data.disable_scale) {
+				s->data.global_transform_interpolated.basis.orthonormalize();
+			}
+
+			s->fti_update_servers_xform();
+
+			s->data.fti_frame_xform_force_update = false;
+		}
+
+		s->data.fti_processed = true;
+	} // if active.
+
+	if (set_state) {
+		s->_clear_dirty_bits(Node3D::DIRTY_GLOBAL_INTERPOLATED_TRANSFORM);
+	}
+
+	for (int n = 0; n < p_node->get_child_count(); n++) {
+		update_dirty_nodes(p_node->get_child(n), p_current_half_frame, p_interpolation_fraction, p_active, s->data.fti_global_xform_interp_set ? &s->data.global_transform_interpolated : &s->data.global_transform, p_depth + 1);
+	}
+}
+
+void SceneTreeFTITests::frame_update(Node *p_root, uint32_t p_half_frame, float p_interpolation_fraction) {
+	SceneTreeFTI::Data &data = _fti.data;
+
+	// For testing, use both methods.
+	// FIRST the entire tree, writing out state.
+	{
+		data.use_optimized_traversal_method = false;
+		update_dirty_nodes(p_root, p_half_frame, p_interpolation_fraction, false);
+	}
+
+	// SECOND the optimized depth lists only,
+	// no writing of state, and verifying results.
+	{
+		data.use_optimized_traversal_method = true;
+
+		_fti._create_depth_lists();
+
+		for (uint32_t d = 0; d < data.scene_tree_depth_limit; d++) {
+			const LocalVector<Node3D *> &list = data.dirty_node_depth_lists[d];
+
+			for (uint32_t n = 0; n < list.size(); n++) {
+				Node3D *s = list[n];
+
+				if (s->data.fti_processed) {
+					continue;
+				}
+
+				if (Object::cast_to<VisualInstance3D>(s)) {
+					if (!s->_is_vi_visible()) {
+						continue;
+					}
+				} else if (!s->is_visible_in_tree()) {
+					continue;
+				}
+
+				update_dirty_nodes(s, p_half_frame, p_interpolation_fraction, true);
+			}
+		}
+
+		_fti._clear_depth_lists();
+	}
+}
+
+SceneTreeFTITests::SceneTreeFTITests(SceneTreeFTI &p_fti) :
+		_fti(p_fti) {
+	print_line("SceneTreeFTI : GODOT_SCENE_TREE_FTI_VERIFY defined");
+}
+
+#endif // def GODOT_SCENE_TREE_FTI_VERIFY
+
+#endif // ndef _3D_DISABLED

+ 50 - 0
scene/main/scene_tree_fti_tests.h

@@ -0,0 +1,50 @@
+/**************************************************************************/
+/*  scene_tree_fti_tests.h                                                */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#include <stdint.h>
+
+class Node3D;
+class Node;
+struct Transform3D;
+class SceneTreeFTI;
+
+class SceneTreeFTITests {
+	SceneTreeFTI &_fti;
+
+	void debug_verify_failed(const Node3D *p_node, const Transform3D &p_test);
+
+public:
+	void update_dirty_nodes(Node *p_node, uint32_t p_current_half_frame, float p_interpolation_fraction, bool p_active, const Transform3D *p_parent_global_xform = nullptr, int p_depth = 0);
+	void frame_update(Node *p_root, uint32_t p_half_frame, float p_interpolation_fraction);
+
+	SceneTreeFTITests(SceneTreeFTI &p_fti);
+};