Browse Source

Merge pull request #104851 from Ivorforce/tracy

Add `profiler` option to `SCons` builds, with support for `tracy` and `perfetto`.
Thaddeus Crews 1 month ago
parent
commit
04bc282ae7

+ 8 - 0
SConstruct

@@ -202,6 +202,14 @@ opts.Add(BoolVariable("use_volk", "Use the volk library to load the Vulkan loade
 opts.Add(BoolVariable("accesskit", "Use AccessKit C SDK", True))
 opts.Add(("accesskit_sdk_path", "Path to the AccessKit C SDK", ""))
 opts.Add(BoolVariable("sdl", "Enable the SDL3 input driver", True))
+opts.Add(("profiler_path", "Path to the Profiler framework. Only tracy and perfetto are supported at the moment.", ""))
+opts.Add(
+    BoolVariable(
+        "profiler_sample_callstack",
+        "Profile random samples application-wide using a callstack based sampler.",
+        False,
+    )
+)
 
 # Advanced options
 opts.Add(

+ 1 - 0
core/SCsub

@@ -217,6 +217,7 @@ env.CommandNoCache(
 )
 
 # Chain load SCsubs
+SConscript("profiling/SCsub")
 SConscript("os/SCsub")
 SConscript("math/SCsub")
 SConscript("crypto/SCsub")

+ 69 - 0
core/profiling/SCsub

@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+from misc.utility.scons_hints import *
+
+import pathlib
+from typing import Tuple
+
+import profiling_builders
+
+Import("env")
+
+env.add_source_files(env.core_sources, "*.cpp")
+
+
+def get_profiler_and_path_from_path(path: pathlib.Path) -> Tuple[str, pathlib.Path]:
+    if not path.is_dir():
+        print("profiler_path must be empty or point to a directory.")
+        Exit(255)
+
+    if (path / "sdk" / "perfetto.cc").is_file():
+        # perfetto root directory.
+        return "perfetto", path / "sdk"
+    if (path / "perfetto.cc").is_file():
+        # perfetto sdk directory.
+        return "perfetto", path
+
+    if (path / "public" / "TracyClient.cpp").is_file():
+        # tracy root directory
+        return "tracy", path / "public"
+    if (path / "TracyClient.cpp").is_file():
+        # tracy public directory
+        return "tracy", path
+
+    print("Unrecognized profiler_path option. Please set a path to either tracy or perfetto.")
+    Exit(255)
+
+
+env["profiler"] = None
+if env["profiler_path"]:
+    profiler_name, profiler_path = get_profiler_and_path_from_path(pathlib.Path(env["profiler_path"]))
+    env["profiler"] = profiler_name
+
+    if profiler_name == "tracy":
+        env.Prepend(CPPPATH=[str(profiler_path.absolute())])
+
+        env_tracy = env.Clone()
+        env_tracy.Append(CPPDEFINES=["TRACY_ENABLE"])
+        if env["profiler_sample_callstack"]:
+            if env["platform"] not in ("windows", "linux", "android"):
+                # Reference the feature matrix in the tracy documentation.
+                print("Tracy does not support call stack sampling on this platform. Aborting.")
+                Exit(255)
+
+            # 62 is the maximum supported callstack depth reported by the tracy docs.
+            env_tracy.Append(CPPDEFINES=[("TRACY_CALLSTACK", 62)])
+        env_tracy.disable_warnings()
+        env_tracy.add_source_files(env.core_sources, str((profiler_path / "TracyClient.cpp").absolute()))
+    elif profiler_name == "perfetto":
+        env.Prepend(CPPPATH=[str(profiler_path.absolute())])
+
+        env_perfetto = env.Clone()
+        if env["profiler_sample_callstack"]:
+            print("Perfetto does not support call stack sampling. Aborting.")
+            Exit(255)
+        env_perfetto.disable_warnings()
+        env_perfetto.Prepend(CPPPATH=[str(profiler_path.absolute())])
+        env_perfetto.add_source_files(env.core_sources, str((profiler_path / "perfetto.cc").absolute()))
+
+
+env.CommandNoCache("profiling.gen.h", [env.Value(env["profiler"])], env.Run(profiling_builders.profiler_gen_builder))

+ 54 - 0
core/profiling/profiling.cpp

@@ -0,0 +1,54 @@
+/**************************************************************************/
+/*  profiling.cpp                                                         */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "profiling.h"
+
+#if defined(GODOT_USE_TRACY)
+void godot_init_profiler() {
+	// Send our first event to tracy; otherwise it doesn't start collecting data.
+	// FrameMark is kind of fitting because it communicates "this is where we started tracing".
+	FrameMark;
+}
+#elif defined(GODOT_USE_PERFETTO)
+PERFETTO_TRACK_EVENT_STATIC_STORAGE();
+
+void godot_init_profiler() {
+	perfetto::TracingInitArgs args;
+
+	args.backends |= perfetto::kSystemBackend;
+
+	perfetto::Tracing::Initialize(args);
+	perfetto::TrackEvent::Register();
+}
+#else
+void godot_init_profiler() {
+	// Stub
+}
+#endif

+ 101 - 0
core/profiling/profiling.h

@@ -0,0 +1,101 @@
+/**************************************************************************/
+/*  profiling.h                                                           */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#include "core/typedefs.h"
+#include "profiling.gen.h"
+
+#if defined(GODOT_USE_TRACY)
+// Use the tracy profiler.
+
+#define TRACY_ENABLE
+#include <tracy/Tracy.hpp>
+
+#ifndef TRACY_CALLSTACK
+#define TRACY_CALLSTACK 0
+#endif
+
+// Define tracing macros.
+#define GodotProfileFrameMark FrameMark
+#define GodotProfileZone(m_zone_name) ZoneScopedN(m_zone_name)
+#define GodotProfileZoneGroupedFirst(m_group_name, m_zone_name) ZoneNamedN(__godot_tracy_zone_##m_group_name, m_zone_name, true)
+#define GodotProfileZoneGroupedEndEarly(m_group_name, m_zone_name) __godot_tracy_zone_##m_group_name.~ScopedZone();
+#define GodotProfileZoneGrouped(m_group_name, m_zone_name)                                                                                                       \
+	GodotProfileZoneGroupedEndEarly(m_group_name, m_zone_name);                                                                                                  \
+	static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location, TracyLine){ m_zone_name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; \
+	new (&__godot_tracy_zone_##m_group_name) tracy::ScopedZone(&TracyConcat(__tracy_source_location, TracyLine), TRACY_CALLSTACK, true)
+
+void godot_init_profiler();
+
+#elif defined(GODOT_USE_PERFETTO)
+// Use the perfetto profiler.
+
+#include <perfetto.h>
+
+PERFETTO_DEFINE_CATEGORIES(
+		perfetto::Category("godot")
+				.SetDescription("All Godot Events"), );
+
+// See PERFETTO_INTERNAL_SCOPED_EVENT_FINALIZER
+struct PerfettoGroupedEventEnder {
+	_FORCE_INLINE_ void _end_now() {
+		TRACE_EVENT_END("godot");
+	}
+
+	_FORCE_INLINE_ ~PerfettoGroupedEventEnder() {
+		_end_now();
+	}
+};
+
+#define GodotProfileFrameMark // TODO
+#define GodotProfileZone(m_zone_name) TRACE_EVENT("godot", m_zone_name);
+#define GodotProfileZoneGroupedFirst(m_group_name, m_zone_name) \
+	TRACE_EVENT_BEGIN("godot", m_zone_name);                    \
+	PerfettoGroupedEventEnder __godot_perfetto_zone_##m_group_name
+#define GodotProfileZoneGroupedEndEarly(m_group_name, m_zone_name) __godot_perfetto_zone_##m_group_name.~PerfettoGroupedEventEnder()
+#define GodotProfileZoneGrouped(m_group_name, m_zone_name) \
+	__godot_perfetto_zone_##m_group_name._end_now();       \
+	TRACE_EVENT_BEGIN("godot", m_zone_name);
+
+void godot_init_profiler();
+
+#else
+// No profiling; all macros are stubs.
+
+void godot_init_profiler();
+
+#define GodotProfileFrameMark
+#define GodotProfileZone(m_zone_name)
+#define GodotProfileZoneGroupedFirst(m_group_name, m_zone_name)
+#define GodotProfileZoneGroupedEndEarly(m_group_name, m_zone_name)
+#define GodotProfileZoneGrouped(m_group_name, m_zone_name)
+
+#endif

+ 13 - 0
core/profiling/profiling_builders.py

@@ -0,0 +1,13 @@
+"""Functions used to generate source files during build time"""
+
+import methods
+
+
+def profiler_gen_builder(target, source, env):
+    with methods.generated_wrapper(str(target[0])) as file:
+        if env["profiler"] == "tracy":
+            file.write("#define GODOT_USE_TRACY\n")
+            if env["profiler_sample_callstack"]:
+                file.write("#define TRACY_CALLSTACK 62\n")
+        if env["profiler"] == "perfetto":
+            file.write("#define GODOT_USE_PERFETTO\n")

+ 4 - 0
drivers/apple_embedded/os_apple_embedded.mm

@@ -41,6 +41,7 @@
 #include "core/io/dir_access.h"
 #include "core/io/file_access.h"
 #include "core/os/main_loop.h"
+#include "core/profiling/profiling.h"
 #import "drivers/apple/os_log_logger.h"
 #include "main/main.h"
 
@@ -205,6 +206,9 @@ bool OS_AppleEmbedded::iterate() {
 		return true;
 	}
 
+	GodotProfileFrameMark;
+	GodotProfileZone("OS_AppleEmbedded::iterate");
+
 	if (DisplayServer::get_singleton()) {
 		DisplayServer::get_singleton()->process_events();
 	}

+ 29 - 0
main/main.cpp

@@ -50,6 +50,7 @@
 #include "core/object/script_language.h"
 #include "core/os/os.h"
 #include "core/os/time.h"
+#include "core/profiling/profiling.h"
 #include "core/register_core_types.h"
 #include "core/string/translation_server.h"
 #include "core/version.h"
@@ -984,6 +985,7 @@ int Main::test_entrypoint(int argc, char *argv[], bool &tests_need_run) {
  */
 
 Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_phase) {
+	GodotProfileZone("setup");
 	Thread::make_main_thread();
 	set_current_thread_safe_for_nodes(true);
 
@@ -2908,6 +2910,7 @@ Error _parse_resource_dummy(void *p_data, VariantParser::Stream *p_stream, Ref<R
 }
 
 Error Main::setup2(bool p_show_boot_logo) {
+	GodotProfileZone("setup2");
 	OS::get_singleton()->benchmark_begin_measure("Startup", "Main::Setup2");
 
 	Thread::make_main_thread(); // Make whatever thread call this the main thread.
@@ -3743,6 +3746,7 @@ Error Main::setup2(bool p_show_boot_logo) {
 }
 
 void Main::setup_boot_logo() {
+	GodotProfileZone("setup_boot_logo");
 	MAIN_PRINT("Main: Load Boot Image");
 
 #if !defined(TOOLS_ENABLED) && defined(WEB_ENABLED)
@@ -3838,6 +3842,7 @@ static MainTimerSync main_timer_sync;
 // and should move on to `OS::run`, and EXIT_FAILURE otherwise for
 // an early exit with that error code.
 int Main::start() {
+	GodotProfileZone("start");
 	OS::get_singleton()->benchmark_begin_measure("Startup", "Main::Start");
 
 	ERR_FAIL_COND_V(!_start_success, EXIT_FAILURE);
@@ -4709,6 +4714,8 @@ static uint64_t navigation_process_max = 0;
 // will terminate the program. In case of failure, the OS exit code needs
 // to be set explicitly here (defaults to EXIT_SUCCESS).
 bool Main::iteration() {
+	GodotProfileZone("Main::iteration");
+	GodotProfileZoneGroupedFirst(_profile_zone, "prepare");
 	iterating++;
 
 	const uint64_t ticks = OS::get_singleton()->get_ticks_usec();
@@ -4754,6 +4761,8 @@ bool Main::iteration() {
 #endif // XR_DISABLED
 
 	for (int iters = 0; iters < advance.physics_steps; ++iters) {
+		GodotProfileZone("Physics Step");
+		GodotProfileZoneGroupedFirst(_physics_zone, "setup");
 		if (Input::get_singleton()->is_agile_input_event_flushing()) {
 			Input::get_singleton()->flush_buffered_events();
 		}
@@ -4766,18 +4775,22 @@ bool Main::iteration() {
 		// Prepare the fixed timestep interpolated nodes BEFORE they are updated
 		// by the physics server, otherwise the current and previous transforms
 		// may be the same, and no interpolation takes place.
+		GodotProfileZoneGrouped(_physics_zone, "main loop iteration prepare");
 		OS::get_singleton()->get_main_loop()->iteration_prepare();
 
 #ifndef PHYSICS_3D_DISABLED
+		GodotProfileZoneGrouped(_physics_zone, "PhysicsServer3D::sync");
 		PhysicsServer3D::get_singleton()->sync();
 		PhysicsServer3D::get_singleton()->flush_queries();
 #endif // PHYSICS_3D_DISABLED
 
 #ifndef PHYSICS_2D_DISABLED
+		GodotProfileZoneGrouped(_physics_zone, "PhysicsServer2D::sync");
 		PhysicsServer2D::get_singleton()->sync();
 		PhysicsServer2D::get_singleton()->flush_queries();
 #endif // PHYSICS_2D_DISABLED
 
+		GodotProfileZoneGrouped(_physics_zone, "physics_process");
 		if (OS::get_singleton()->get_main_loop()->physics_process(physics_step * time_scale)) {
 #ifndef PHYSICS_3D_DISABLED
 			PhysicsServer3D::get_singleton()->end_sync();
@@ -4795,9 +4808,11 @@ bool Main::iteration() {
 		uint64_t navigation_begin = OS::get_singleton()->get_ticks_usec();
 
 #ifndef NAVIGATION_2D_DISABLED
+		GodotProfileZoneGrouped(_profile_zone, "NavigationServer2D::physics_process");
 		NavigationServer2D::get_singleton()->physics_process(physics_step * time_scale);
 #endif // NAVIGATION_2D_DISABLED
 #ifndef NAVIGATION_3D_DISABLED
+		GodotProfileZoneGrouped(_profile_zone, "NavigationServer3D::physics_process");
 		NavigationServer3D::get_singleton()->physics_process(physics_step * time_scale);
 #endif // NAVIGATION_3D_DISABLED
 
@@ -4808,17 +4823,20 @@ bool Main::iteration() {
 #endif // !defined(NAVIGATION_2D_DISABLED) || !defined(NAVIGATION_3D_DISABLED)
 
 #ifndef PHYSICS_3D_DISABLED
+		GodotProfileZoneGrouped(_profile_zone, "3D physics");
 		PhysicsServer3D::get_singleton()->end_sync();
 		PhysicsServer3D::get_singleton()->step(physics_step * time_scale);
 #endif // PHYSICS_3D_DISABLED
 
 #ifndef PHYSICS_2D_DISABLED
+		GodotProfileZoneGrouped(_profile_zone, "2D physics");
 		PhysicsServer2D::get_singleton()->end_sync();
 		PhysicsServer2D::get_singleton()->step(physics_step * time_scale);
 #endif // PHYSICS_2D_DISABLED
 
 		message_queue->flush();
 
+		GodotProfileZoneGrouped(_profile_zone, "main loop iteration end");
 		OS::get_singleton()->get_main_loop()->iteration_end();
 
 		physics_process_ticks = MAX(physics_process_ticks, OS::get_singleton()->get_ticks_usec() - physics_begin); // keep the largest one for reference
@@ -4833,20 +4851,25 @@ bool Main::iteration() {
 
 	uint64_t process_begin = OS::get_singleton()->get_ticks_usec();
 
+	GodotProfileZoneGrouped(_profile_zone, "process");
 	if (OS::get_singleton()->get_main_loop()->process(process_step * time_scale)) {
 		exit = true;
 	}
 	message_queue->flush();
 
 #ifndef NAVIGATION_2D_DISABLED
+	GodotProfileZoneGrouped(_profile_zone, "process 2D navigation");
 	NavigationServer2D::get_singleton()->process(process_step * time_scale);
 #endif // NAVIGATION_2D_DISABLED
 #ifndef NAVIGATION_3D_DISABLED
+	GodotProfileZoneGrouped(_profile_zone, "process 3D navigation");
 	NavigationServer3D::get_singleton()->process(process_step * time_scale);
 #endif // NAVIGATION_3D_DISABLED
 
+	GodotProfileZoneGrouped(_profile_zone, "RenderingServer::sync");
 	RenderingServer::get_singleton()->sync(); //sync if still drawing from previous frames.
 
+	GodotProfileZoneGrouped(_profile_zone, "RenderingServer::draw");
 	const bool has_pending_resources_for_processing = RD::get_singleton() && RD::get_singleton()->has_pending_resources_for_processing();
 	bool wants_present = (DisplayServer::get_singleton()->can_any_window_draw() ||
 								 DisplayServer::get_singleton()->has_additional_outputs()) &&
@@ -4870,12 +4893,15 @@ bool Main::iteration() {
 	process_max = MAX(process_ticks, process_max);
 	uint64_t frame_time = OS::get_singleton()->get_ticks_usec() - ticks;
 
+	GodotProfileZoneGrouped(_profile_zone, "GDExtensionManager::frame");
 	GDExtensionManager::get_singleton()->frame();
 
+	GodotProfileZoneGrouped(_profile_zone, "ScriptServer::frame");
 	for (int i = 0; i < ScriptServer::get_language_count(); i++) {
 		ScriptServer::get_language(i)->frame();
 	}
 
+	GodotProfileZoneGrouped(_profile_zone, "AudioServer::update");
 	AudioServer::get_singleton()->update();
 
 	if (EngineDebugger::is_active()) {
@@ -4914,6 +4940,7 @@ bool Main::iteration() {
 	iterating--;
 
 	if (movie_writer) {
+		GodotProfileZoneGrouped(_profile_zone, "movie_writer->add_frame");
 		movie_writer->add_frame();
 	}
 
@@ -4940,6 +4967,7 @@ bool Main::iteration() {
 	SceneTree *scene_tree = SceneTree::get_singleton();
 	bool wake_for_events = scene_tree && scene_tree->is_accessibility_enabled();
 
+	GodotProfileZoneGrouped(_profile_zone, "OS::add_frame_delay");
 	OS::get_singleton()->add_frame_delay(DisplayServer::get_singleton()->window_can_draw(), wake_for_events);
 
 #ifdef TOOLS_ENABLED
@@ -4979,6 +5007,7 @@ void Main::force_redraw() {
  * The order matters as some of those steps are linked with each other.
  */
 void Main::cleanup(bool p_force) {
+	GodotProfileZone("cleanup");
 	OS::get_singleton()->benchmark_begin_measure("Shutdown", "Main::Cleanup");
 	if (!p_force) {
 		ERR_FAIL_COND(!_start_success);

+ 3 - 0
platform/android/java_godot_lib_jni.cpp

@@ -49,6 +49,7 @@
 #include "core/config/project_settings.h"
 #include "core/input/input.h"
 #include "core/os/main_loop.h"
+#include "core/profiling/profiling.h"
 #include "main/main.h"
 #include "servers/rendering/rendering_server.h"
 
@@ -151,6 +152,8 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_setVirtualKeyboardHei
 }
 
 JNIEXPORT jboolean JNICALL Java_org_godotengine_godot_GodotLib_initialize(JNIEnv *env, jclass clazz, jobject p_godot_instance, jobject p_asset_manager, jobject p_godot_io, jobject p_net_utils, jobject p_directory_access_handler, jobject p_file_access_handler, jboolean p_use_apk_expansion) {
+	godot_init_profiler();
+
 	JavaVM *jvm;
 	env->GetJavaVM(&jvm);
 

+ 3 - 0
platform/android/os_android.cpp

@@ -42,6 +42,7 @@
 #include "core/extension/gdextension_manager.h"
 #include "core/io/xml_parser.h"
 #include "core/os/main_loop.h"
+#include "core/profiling/profiling.h"
 #include "drivers/unix/dir_access_unix.h"
 #include "drivers/unix/file_access_unix.h"
 #ifdef TOOLS_ENABLED
@@ -364,6 +365,8 @@ void OS_Android::main_loop_begin() {
 }
 
 bool OS_Android::main_loop_iterate(bool *r_should_swap_buffers) {
+	GodotProfileFrameMark;
+	GodotProfileZone("OS_Android::main_loop_iterate");
 	if (!main_loop) {
 		return false;
 	}

+ 3 - 0
platform/ios/main_ios.mm

@@ -30,6 +30,7 @@
 
 #import "os_ios.h"
 
+#include "core/profiling/profiling.h"
 #import "drivers/apple_embedded/godot_app_delegate.h"
 #import "drivers/apple_embedded/main_utilities.h"
 #include "main/main.h"
@@ -55,6 +56,8 @@ int apple_embedded_main(int argc, char **argv) {
 	char *fargv[64];
 	argc = process_args(argc, argv, fargv);
 
+	godot_init_profiler();
+
 	Error err = Main::setup(fargv[0], argc - 1, &fargv[1], false);
 
 	if (err != OK) {

+ 3 - 0
platform/linuxbsd/godot_linuxbsd.cpp

@@ -30,6 +30,7 @@
 
 #include "os_linuxbsd.h"
 
+#include "core/profiling/profiling.h"
 #include "main/main.h"
 
 #include <unistd.h>
@@ -93,6 +94,8 @@ int main(int argc, char *argv[]) {
 	setrlimit(RLIMIT_STACK, &stack_lim);
 #endif
 
+	godot_init_profiler();
+
 	OS_LinuxBSD os;
 
 	setlocale(LC_CTYPE, "");

+ 3 - 0
platform/linuxbsd/os_linuxbsd.cpp

@@ -37,6 +37,7 @@
 #ifdef SDL_ENABLED
 #include "drivers/sdl/joypad_sdl.h"
 #endif
+#include "core/profiling/profiling.h"
 #include "main/main.h"
 #include "servers/display/display_server.h"
 #include "servers/rendering/rendering_server.h"
@@ -970,6 +971,8 @@ String OS_LinuxBSD::get_system_dir(SystemDir p_dir, bool p_shared_storage) const
 }
 
 void OS_LinuxBSD::run() {
+	GodotProfileFrameMark;
+	GodotProfileZone("OS_LinuxBSD::run");
 	if (!main_loop) {
 		return;
 	}

+ 4 - 0
platform/macos/godot_content_view.mm

@@ -34,6 +34,7 @@
 #import "godot_window.h"
 #import "key_mapping_macos.h"
 
+#include "core/profiling/profiling.h"
 #include "main/main.h"
 
 @implementation GodotContentLayerDelegate
@@ -56,6 +57,9 @@
 - (void)displayLayer:(CALayer *)layer {
 	DisplayServerMacOS *ds = (DisplayServerMacOS *)DisplayServer::get_singleton();
 	if (OS::get_singleton()->get_main_loop() && ds->get_is_resizing() && need_redraw) {
+		GodotProfileFrameMark;
+		GodotProfileZone("[GodotContentLayerDelegate displayLayer]");
+
 		Main::force_redraw();
 		if (!Main::is_iterating()) { // Avoid cyclic loop.
 			Main::iteration();

+ 3 - 0
platform/macos/godot_main_macos.mm

@@ -32,6 +32,7 @@
 
 #import "godot_application.h"
 
+#include "core/profiling/profiling.h"
 #include "main/main.h"
 
 #if defined(SANITIZERS_ENABLED)
@@ -39,6 +40,8 @@
 #endif
 
 int main(int argc, char **argv) {
+	godot_init_profiler();
+
 #if defined(VULKAN_ENABLED)
 	setenv("MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE", "1", 1); // MoltenVK - enable full component swizzling support.
 	setenv("MVK_CONFIG_SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST", "0", 1); // MoltenVK - use linear surface scaling. TODO: remove when full DPI scaling is implemented.

+ 9 - 0
platform/macos/os_macos.mm

@@ -41,6 +41,7 @@
 #include "core/crypto/crypto_core.h"
 #include "core/io/file_access.h"
 #include "core/os/main_loop.h"
+#include "core/profiling/profiling.h"
 #include "core/version_generated.gen.h"
 #include "drivers/apple/os_log_logger.h"
 #include "main/main.h"
@@ -1080,6 +1081,8 @@ static void handle_interrupt(int sig) {
 }
 
 void OS_MacOS_NSApp::start_main() {
+	godot_init_profiler();
+
 	Error err;
 	@autoreleasepool {
 		err = Main::setup(execpath, argc, argv);
@@ -1103,6 +1106,9 @@ void OS_MacOS_NSApp::start_main() {
 				pre_wait_observer = CFRunLoopObserverCreateWithHandler(kCFAllocatorDefault, kCFRunLoopBeforeWaiting, true, 0, ^(CFRunLoopObserverRef observer, CFRunLoopActivity activity) {
 					@autoreleasepool {
 						@try {
+							GodotProfileFrameMark;
+							GodotProfileZone("macOS main loop");
+
 							if (ds_mac) {
 								ds_mac->_process_events(false);
 							} else if (ds) {
@@ -1276,6 +1282,9 @@ void OS_MacOS_Embedded::run() {
 		while (true) {
 			@autoreleasepool {
 				@try {
+					GodotProfileFrameMark;
+					GodotProfileZone("macOS embedded main loop");
+
 					ds->process_events();
 
 #ifdef SDL_ENABLED

+ 3 - 0
platform/visionos/main_visionos.mm

@@ -30,6 +30,7 @@
 
 #import "os_visionos.h"
 
+#include "core/profiling/profiling.h"
 #import "drivers/apple_embedded/godot_app_delegate.h"
 #import "drivers/apple_embedded/main_utilities.h"
 #include "main/main.h"
@@ -50,6 +51,8 @@ int apple_embedded_main(int argc, char **argv) {
 	char *fargv[64];
 	argc = process_args(argc, argv, fargv);
 
+	godot_init_profiler();
+
 	Error err = Main::setup(fargv[0], argc - 1, &fargv[1], false);
 
 	if (err != OK) {

+ 3 - 0
platform/web/os_web.cpp

@@ -40,6 +40,7 @@
 #include "core/debugger/engine_debugger.h"
 #include "core/io/file_access.h"
 #include "core/os/main_loop.h"
+#include "core/profiling/profiling.h"
 #include "drivers/unix/dir_access_unix.h"
 #include "drivers/unix/file_access_unix.h"
 #include "main/main.h"
@@ -79,6 +80,8 @@ void OS_Web::fs_sync_callback() {
 }
 
 bool OS_Web::main_loop_iterate() {
+	GodotProfileFrameMark;
+	GodotProfileZone("OS_Web::main_loop_iterate");
 	if (is_userfs_persistent() && idb_needs_sync && !idb_is_syncing) {
 		idb_is_syncing = true;
 		idb_needs_sync = false;

+ 3 - 0
platform/web/web_main.cpp

@@ -35,6 +35,7 @@
 #include "core/config/engine.h"
 #include "core/io/file_access.h"
 #include "core/io/resource_loader.h"
+#include "core/profiling/profiling.h"
 #include "main/main.h"
 #include "scene/main/scene_tree.h"
 #include "scene/main/window.h" // SceneTree only forward declares it.
@@ -126,6 +127,8 @@ void print_web_header() {
 
 /// When calling main, it is assumed FS is setup and synced.
 extern EMSCRIPTEN_KEEPALIVE int godot_web_main(int argc, char *argv[]) {
+	godot_init_profiler();
+
 	os = new OS_Web();
 
 #ifdef TOOLS_ENABLED

+ 3 - 0
platform/windows/godot_windows.cpp

@@ -30,6 +30,7 @@
 
 #include "os_windows.h"
 
+#include "core/profiling/profiling.h"
 #include "main/main.h"
 
 #include <clocale>
@@ -66,6 +67,8 @@ char *wc_to_utf8(const wchar_t *wc) {
 }
 
 int widechar_main(int argc, wchar_t **argv) {
+	godot_init_profiler();
+
 	OS_Windows os(nullptr);
 
 	setlocale(LC_CTYPE, "");

+ 3 - 0
platform/windows/os_windows.cpp

@@ -39,6 +39,7 @@
 #include "core/debugger/script_debugger.h"
 #include "core/io/marshalls.h"
 #include "core/os/main_loop.h"
+#include "core/profiling/profiling.h"
 #include "core/version_generated.gen.h"
 #include "drivers/windows/dir_access_windows.h"
 #include "drivers/windows/file_access_windows.h"
@@ -2329,6 +2330,8 @@ void OS_Windows::run() {
 	main_loop->initialize();
 
 	while (true) {
+		GodotProfileFrameMark;
+		GodotProfileZone("OS_Windows::run");
 		DisplayServer::get_singleton()->process_events(); // get rid of pending events
 		if (Main::iteration()) {
 			break;

+ 2 - 0
scene/main/scene_tree.cpp

@@ -37,6 +37,7 @@
 #include "core/object/message_queue.h"
 #include "core/object/worker_thread_pool.h"
 #include "core/os/os.h"
+#include "core/profiling/profiling.h"
 #include "node.h"
 #include "scene/animation/tween.h"
 #include "scene/debugger/scene_debugger.h"
@@ -572,6 +573,7 @@ void SceneTree::set_group(const StringName &p_group, const String &p_name, const
 }
 
 void SceneTree::initialize() {
+	GodotProfileZone("SceneTree::initialize");
 	ERR_FAIL_NULL(root);
 	MainLoop::initialize();
 	root->_set_tree(this);

+ 10 - 1
servers/rendering/renderer_viewport.cpp

@@ -33,6 +33,7 @@
 #include "core/config/project_settings.h"
 #include "core/math/transform_interpolator.h"
 #include "core/object/worker_thread_pool.h"
+#include "core/profiling/profiling.h"
 #include "renderer_canvas_cull.h"
 #include "renderer_scene_cull.h"
 #include "rendering_server_globals.h"
@@ -733,6 +734,7 @@ void RendererViewport::_draw_viewport(Viewport *p_viewport) {
 }
 
 void RendererViewport::draw_viewports(bool p_swap_buffers) {
+	GodotProfileZoneGroupedFirst(_profile_zone, "prepare viewports");
 	timestamp_vp_map.clear();
 
 #ifndef XR_DISABLED
@@ -750,6 +752,7 @@ void RendererViewport::draw_viewports(bool p_swap_buffers) {
 	}
 
 	if (sorted_active_viewports_dirty) {
+		GodotProfileZoneGrouped(_profile_zone, "_sort_active_viewports");
 		sorted_active_viewports = _sort_active_viewports();
 		sorted_active_viewports_dirty = false;
 	}
@@ -758,11 +761,12 @@ void RendererViewport::draw_viewports(bool p_swap_buffers) {
 	//draw viewports
 	RENDER_TIMESTAMP("> Render Viewports");
 
+	GodotProfileZoneGrouped(_profile_zone, "render viewports");
+
 	//determine what is visible
 	draw_viewports_pass++;
 
 	for (int i = sorted_active_viewports.size() - 1; i >= 0; i--) { //to compute parent dependency, must go in reverse draw order
-
 		Viewport *vp = sorted_active_viewports[i];
 
 		if (vp->update_mode == RS::VIEWPORT_UPDATE_DISABLED) {
@@ -821,6 +825,9 @@ void RendererViewport::draw_viewports(bool p_swap_buffers) {
 	int draw_calls_used = 0;
 
 	for (int i = 0; i < sorted_active_viewports.size(); i++) {
+		// TODO Somehow print the index
+		GodotProfileZone("render viewport");
+
 		Viewport *vp = sorted_active_viewports[i];
 
 		if (vp->last_pass != draw_viewports_pass) {
@@ -925,6 +932,7 @@ void RendererViewport::draw_viewports(bool p_swap_buffers) {
 		vertices_drawn += vp->render_info.info[RS::VIEWPORT_RENDER_INFO_TYPE_CANVAS][RS::VIEWPORT_RENDER_INFO_PRIMITIVES_IN_FRAME];
 		draw_calls_used += vp->render_info.info[RS::VIEWPORT_RENDER_INFO_TYPE_CANVAS][RS::VIEWPORT_RENDER_INFO_DRAW_CALLS_IN_FRAME];
 	}
+
 	RSG::scene->set_debug_draw_mode(RS::VIEWPORT_DEBUG_DRAW_DISABLED);
 
 	total_objects_drawn = objects_drawn;
@@ -933,6 +941,7 @@ void RendererViewport::draw_viewports(bool p_swap_buffers) {
 
 	RENDER_TIMESTAMP("< Render Viewports");
 
+	GodotProfileZoneGrouped(_profile_zone, "rasterizer->blit_render_targets_to_screen");
 	if (p_swap_buffers && !blit_to_screen_list.is_empty()) {
 		for (const KeyValue<int, Vector<BlitToScreen>> &E : blit_to_screen_list) {
 			RSG::rasterizer->blit_render_targets_to_screen(E.key, E.value.ptr(), E.value.size());

+ 21 - 0
servers/rendering/rendering_device.cpp

@@ -37,6 +37,7 @@
 #include "core/config/project_settings.h"
 #include "core/io/dir_access.h"
 #include "core/io/file_access.h"
+#include "core/profiling/profiling.h"
 #include "modules/modules_enabled.gen.h"
 #include "servers/rendering/rendering_shader_container.h"
 
@@ -6256,12 +6257,16 @@ String RenderingDevice::get_device_pipeline_cache_uuid() const {
 void RenderingDevice::swap_buffers(bool p_present) {
 	ERR_RENDER_THREAD_GUARD();
 
+	GodotProfileZoneGroupedFirst(_profile_zone, "_end_frame");
 	_end_frame();
+
+	GodotProfileZoneGrouped(_profile_zone, "_execute_frame");
 	_execute_frame(p_present);
 
 	// Advance to the next frame and begin recording again.
 	frame = (frame + 1) % frames.size();
 
+	GodotProfileZoneGrouped(_profile_zone, "_begin_frame");
 	_begin_frame(true);
 }
 
@@ -6389,27 +6394,34 @@ uint64_t RenderingDevice::get_memory_usage(MemoryType p_type) const {
 }
 
 void RenderingDevice::_begin_frame(bool p_presented) {
+	GodotProfileZoneGroupedFirst(_profile_zone, "_stall_for_frame");
 	// Before writing to this frame, wait for it to be finished.
 	_stall_for_frame(frame);
 
 	if (command_pool_reset_enabled) {
+		GodotProfileZoneGrouped(_profile_zone, "driver->command_pool_reset");
 		bool reset = driver->command_pool_reset(frames[frame].command_pool);
 		ERR_FAIL_COND(!reset);
 	}
 
 	if (p_presented) {
+		GodotProfileZoneGrouped(_profile_zone, "update_perf_report");
 		update_perf_report();
 		driver->linear_uniform_set_pools_reset(frame);
 	}
 
 	// Begin recording on the frame's command buffers.
+	GodotProfileZoneGrouped(_profile_zone, "driver->begin_segment");
 	driver->begin_segment(frame, frames_drawn++);
+	GodotProfileZoneGrouped(_profile_zone, "driver->command_buffer_begin");
 	driver->command_buffer_begin(frames[frame].command_buffer);
 
 	// Reset the graph.
+	GodotProfileZoneGrouped(_profile_zone, "draw_graph.begin");
 	draw_graph.begin();
 
 	// Erase pending resources.
+	GodotProfileZoneGrouped(_profile_zone, "_free_pending_resources");
 	_free_pending_resources(frame);
 
 	// Advance staging buffers if used.
@@ -6446,11 +6458,16 @@ void RenderingDevice::_end_frame() {
 
 	// The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use.
 	RDD::CommandBufferID command_buffer = frames[frame].command_buffer;
+	GodotProfileZoneGroupedFirst(_profile_zone, "_submit_transfer_workers");
 	_submit_transfer_workers(command_buffer);
+	GodotProfileZoneGrouped(_profile_zone, "_submit_transfer_barriers");
 	_submit_transfer_barriers(command_buffer);
 
+	GodotProfileZoneGrouped(_profile_zone, "draw_graph.end");
 	draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool);
+	GodotProfileZoneGrouped(_profile_zone, "driver->command_buffer_end");
 	driver->command_buffer_end(command_buffer);
+	GodotProfileZoneGrouped(_profile_zone, "driver->end_segment");
 	driver->end_segment();
 }
 
@@ -6542,11 +6559,13 @@ void RenderingDevice::_stall_for_frame(uint32_t p_frame) {
 	thread_local PackedByteArray packed_byte_array;
 
 	if (frames[p_frame].fence_signaled) {
+		GodotProfileZoneGroupedFirst(_profile_zone, "driver->fence_wait");
 		driver->fence_wait(frames[p_frame].fence);
 		frames[p_frame].fence_signaled = false;
 
 		// Flush any pending requests for asynchronous buffer downloads.
 		if (!frames[p_frame].download_buffer_get_data_requests.is_empty()) {
+			GodotProfileZoneGrouped(_profile_zone, "flush asynchronous buffer downloads");
 			for (uint32_t i = 0; i < frames[p_frame].download_buffer_get_data_requests.size(); i++) {
 				const BufferGetDataRequest &request = frames[p_frame].download_buffer_get_data_requests[i];
 				packed_byte_array.resize(request.size);
@@ -6571,6 +6590,7 @@ void RenderingDevice::_stall_for_frame(uint32_t p_frame) {
 
 		// Flush any pending requests for asynchronous texture downloads.
 		if (!frames[p_frame].download_texture_get_data_requests.is_empty()) {
+			GodotProfileZoneGrouped(_profile_zone, "flush asynchronous texture downloads");
 			uint32_t pitch_step = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP);
 			for (uint32_t i = 0; i < frames[p_frame].download_texture_get_data_requests.size(); i++) {
 				const TextureGetDataRequest &request = frames[p_frame].download_texture_get_data_requests[i];
@@ -6618,6 +6638,7 @@ void RenderingDevice::_stall_for_frame(uint32_t p_frame) {
 				request.callback.call(packed_byte_array);
 			}
 
+			GodotProfileZoneGrouped(_profile_zone, "clear buffers");
 			frames[p_frame].download_texture_staging_buffers.clear();
 			frames[p_frame].download_buffer_texture_copy_regions.clear();
 			frames[p_frame].download_texture_mipmap_offsets.clear();

+ 17 - 0
servers/rendering/rendering_server_default.cpp

@@ -31,6 +31,7 @@
 #include "rendering_server_default.h"
 
 #include "core/os/os.h"
+#include "core/profiling/profiling.h"
 #include "renderer_canvas_cull.h"
 #include "renderer_scene_cull.h"
 #include "rendering_server_globals.h"
@@ -66,6 +67,7 @@ void RenderingServerDefault::request_frame_drawn_callback(const Callable &p_call
 }
 
 void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) {
+	GodotProfileZoneGroupedFirst(_profile_zone, "rasterizer->begin_frame");
 	RSG::rasterizer->begin_frame(frame_step);
 
 	TIMESTAMP_BEGIN()
@@ -75,6 +77,7 @@ void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) {
 	RENDER_TIMESTAMP("Prepare Render Frame");
 
 #ifndef XR_DISABLED
+	GodotProfileZoneGrouped(_profile_zone, "xr_server->pre_render");
 	XRServer *xr_server = XRServer::get_singleton();
 	if (xr_server != nullptr) {
 		// Let XR server know we're about to render a frame.
@@ -82,30 +85,41 @@ void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) {
 	}
 #endif // XR_DISABLED
 
+	GodotProfileZoneGrouped(_profile_zone, "scene->update");
 	RSG::scene->update(); //update scenes stuff before updating instances
+	GodotProfileZoneGrouped(_profile_zone, "canvas->update");
 	RSG::canvas->update();
 
 	frame_setup_time = double(OS::get_singleton()->get_ticks_usec() - time_usec) / 1000.0;
 
+	GodotProfileZoneGrouped(_profile_zone, "particles_storage->update_particles");
 	RSG::particles_storage->update_particles(); //need to be done after instances are updated (colliders and particle transforms), and colliders are rendered
 
+	GodotProfileZoneGrouped(_profile_zone, "scene->render_probes");
 	RSG::scene->render_probes();
 
+	GodotProfileZoneGrouped(_profile_zone, "viewport->draw_viewports");
 	RSG::viewport->draw_viewports(p_swap_buffers);
+
+	GodotProfileZoneGrouped(_profile_zone, "canvas_render->update");
 	RSG::canvas_render->update();
 
+	GodotProfileZoneGrouped(_profile_zone, "rasterizer->end_frame");
 	RSG::rasterizer->end_frame(p_swap_buffers);
 
 #ifndef XR_DISABLED
 	if (xr_server != nullptr) {
+		GodotProfileZone("xr_server->end_frame");
 		// let our XR server know we're done so we can get our frame timing
 		xr_server->end_frame();
 	}
 #endif // XR_DISABLED
 
+	GodotProfileZoneGrouped(_profile_zone, "update_visibility_notifiers");
 	RSG::canvas->update_visibility_notifiers();
 	RSG::scene->update_visibility_notifiers();
 
+	GodotProfileZoneGrouped(_profile_zone, "post_draw_steps");
 	if (create_thread) {
 		callable_mp(this, &RenderingServerDefault::_run_post_draw_steps).call_deferred();
 	} else {
@@ -113,6 +127,7 @@ void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) {
 	}
 
 	if (RSG::utilities->get_captured_timestamps_count()) {
+		GodotProfileZoneGrouped(_profile_zone, "frame_profile");
 		Vector<FrameProfileArea> new_profile;
 		if (RSG::utilities->capturing_timestamps) {
 			new_profile.resize(RSG::utilities->get_captured_timestamps_count());
@@ -143,6 +158,7 @@ void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) {
 	frame_profile_frame = RSG::utilities->get_captured_timestamps_frame();
 
 	if (print_gpu_profile) {
+		GodotProfileZoneGrouped(_profile_zone, "gpu_profile");
 		if (print_frame_profile_ticks_from == 0) {
 			print_frame_profile_ticks_from = OS::get_singleton()->get_ticks_usec();
 		}
@@ -185,6 +201,7 @@ void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) {
 		}
 	}
 
+	GodotProfileZoneGrouped(_profile_zone, "memory_info");
 	RSG::utilities->update_memory_info();
 }