Browse Source

Merge pull request #100770 from hpvb/command-queue-mt

Core: Refactor CommandQueueMT to use vararg templates for performance and maintainability
Rémi Verschelde 9 months ago
parent
commit
dd7d36e803

+ 91 - 303
core/templates/command_queue_mt.h

@@ -33,294 +33,74 @@
 
 #include "core/object/worker_thread_pool.h"
 #include "core/os/condition_variable.h"
-#include "core/os/memory.h"
 #include "core/os/mutex.h"
 #include "core/templates/local_vector.h"
 #include "core/templates/simple_type.h"
+#include "core/templates/tuple.h"
 #include "core/typedefs.h"
 
-#define COMMA(N) _COMMA_##N
-#define _COMMA_0
-#define _COMMA_1 ,
-#define _COMMA_2 ,
-#define _COMMA_3 ,
-#define _COMMA_4 ,
-#define _COMMA_5 ,
-#define _COMMA_6 ,
-#define _COMMA_7 ,
-#define _COMMA_8 ,
-#define _COMMA_9 ,
-#define _COMMA_10 ,
-#define _COMMA_11 ,
-#define _COMMA_12 ,
-#define _COMMA_13 ,
-#define _COMMA_14 ,
-#define _COMMA_15 ,
-
-// 1-based comma separated list of ITEMs
-#define COMMA_SEP_LIST(ITEM, LENGTH) _COMMA_SEP_LIST_##LENGTH(ITEM)
-#define _COMMA_SEP_LIST_15(ITEM) \
-	_COMMA_SEP_LIST_14(ITEM)     \
-	, ITEM(15)
-#define _COMMA_SEP_LIST_14(ITEM) \
-	_COMMA_SEP_LIST_13(ITEM)     \
-	, ITEM(14)
-#define _COMMA_SEP_LIST_13(ITEM) \
-	_COMMA_SEP_LIST_12(ITEM)     \
-	, ITEM(13)
-#define _COMMA_SEP_LIST_12(ITEM) \
-	_COMMA_SEP_LIST_11(ITEM)     \
-	, ITEM(12)
-#define _COMMA_SEP_LIST_11(ITEM) \
-	_COMMA_SEP_LIST_10(ITEM)     \
-	, ITEM(11)
-#define _COMMA_SEP_LIST_10(ITEM) \
-	_COMMA_SEP_LIST_9(ITEM)      \
-	, ITEM(10)
-#define _COMMA_SEP_LIST_9(ITEM) \
-	_COMMA_SEP_LIST_8(ITEM)     \
-	, ITEM(9)
-#define _COMMA_SEP_LIST_8(ITEM) \
-	_COMMA_SEP_LIST_7(ITEM)     \
-	, ITEM(8)
-#define _COMMA_SEP_LIST_7(ITEM) \
-	_COMMA_SEP_LIST_6(ITEM)     \
-	, ITEM(7)
-#define _COMMA_SEP_LIST_6(ITEM) \
-	_COMMA_SEP_LIST_5(ITEM)     \
-	, ITEM(6)
-#define _COMMA_SEP_LIST_5(ITEM) \
-	_COMMA_SEP_LIST_4(ITEM)     \
-	, ITEM(5)
-#define _COMMA_SEP_LIST_4(ITEM) \
-	_COMMA_SEP_LIST_3(ITEM)     \
-	, ITEM(4)
-#define _COMMA_SEP_LIST_3(ITEM) \
-	_COMMA_SEP_LIST_2(ITEM)     \
-	, ITEM(3)
-#define _COMMA_SEP_LIST_2(ITEM) \
-	_COMMA_SEP_LIST_1(ITEM)     \
-	, ITEM(2)
-#define _COMMA_SEP_LIST_1(ITEM) \
-	_COMMA_SEP_LIST_0(ITEM)     \
-	ITEM(1)
-#define _COMMA_SEP_LIST_0(ITEM)
-
-// 1-based semicolon separated list of ITEMs
-#define SEMIC_SEP_LIST(ITEM, LENGTH) _SEMIC_SEP_LIST_##LENGTH(ITEM)
-#define _SEMIC_SEP_LIST_15(ITEM) \
-	_SEMIC_SEP_LIST_14(ITEM);    \
-	ITEM(15)
-#define _SEMIC_SEP_LIST_14(ITEM) \
-	_SEMIC_SEP_LIST_13(ITEM);    \
-	ITEM(14)
-#define _SEMIC_SEP_LIST_13(ITEM) \
-	_SEMIC_SEP_LIST_12(ITEM);    \
-	ITEM(13)
-#define _SEMIC_SEP_LIST_12(ITEM) \
-	_SEMIC_SEP_LIST_11(ITEM);    \
-	ITEM(12)
-#define _SEMIC_SEP_LIST_11(ITEM) \
-	_SEMIC_SEP_LIST_10(ITEM);    \
-	ITEM(11)
-#define _SEMIC_SEP_LIST_10(ITEM) \
-	_SEMIC_SEP_LIST_9(ITEM);     \
-	ITEM(10)
-#define _SEMIC_SEP_LIST_9(ITEM) \
-	_SEMIC_SEP_LIST_8(ITEM);    \
-	ITEM(9)
-#define _SEMIC_SEP_LIST_8(ITEM) \
-	_SEMIC_SEP_LIST_7(ITEM);    \
-	ITEM(8)
-#define _SEMIC_SEP_LIST_7(ITEM) \
-	_SEMIC_SEP_LIST_6(ITEM);    \
-	ITEM(7)
-#define _SEMIC_SEP_LIST_6(ITEM) \
-	_SEMIC_SEP_LIST_5(ITEM);    \
-	ITEM(6)
-#define _SEMIC_SEP_LIST_5(ITEM) \
-	_SEMIC_SEP_LIST_4(ITEM);    \
-	ITEM(5)
-#define _SEMIC_SEP_LIST_4(ITEM) \
-	_SEMIC_SEP_LIST_3(ITEM);    \
-	ITEM(4)
-#define _SEMIC_SEP_LIST_3(ITEM) \
-	_SEMIC_SEP_LIST_2(ITEM);    \
-	ITEM(3)
-#define _SEMIC_SEP_LIST_2(ITEM) \
-	_SEMIC_SEP_LIST_1(ITEM);    \
-	ITEM(2)
-#define _SEMIC_SEP_LIST_1(ITEM) \
-	_SEMIC_SEP_LIST_0(ITEM)     \
-	ITEM(1)
-#define _SEMIC_SEP_LIST_0(ITEM)
-
-// 1-based space separated list of ITEMs
-#define SPACE_SEP_LIST(ITEM, LENGTH) _SPACE_SEP_LIST_##LENGTH(ITEM)
-#define _SPACE_SEP_LIST_15(ITEM) \
-	_SPACE_SEP_LIST_14(ITEM)     \
-	ITEM(15)
-#define _SPACE_SEP_LIST_14(ITEM) \
-	_SPACE_SEP_LIST_13(ITEM)     \
-	ITEM(14)
-#define _SPACE_SEP_LIST_13(ITEM) \
-	_SPACE_SEP_LIST_12(ITEM)     \
-	ITEM(13)
-#define _SPACE_SEP_LIST_12(ITEM) \
-	_SPACE_SEP_LIST_11(ITEM)     \
-	ITEM(12)
-#define _SPACE_SEP_LIST_11(ITEM) \
-	_SPACE_SEP_LIST_10(ITEM)     \
-	ITEM(11)
-#define _SPACE_SEP_LIST_10(ITEM) \
-	_SPACE_SEP_LIST_9(ITEM)      \
-	ITEM(10)
-#define _SPACE_SEP_LIST_9(ITEM) \
-	_SPACE_SEP_LIST_8(ITEM)     \
-	ITEM(9)
-#define _SPACE_SEP_LIST_8(ITEM) \
-	_SPACE_SEP_LIST_7(ITEM)     \
-	ITEM(8)
-#define _SPACE_SEP_LIST_7(ITEM) \
-	_SPACE_SEP_LIST_6(ITEM)     \
-	ITEM(7)
-#define _SPACE_SEP_LIST_6(ITEM) \
-	_SPACE_SEP_LIST_5(ITEM)     \
-	ITEM(6)
-#define _SPACE_SEP_LIST_5(ITEM) \
-	_SPACE_SEP_LIST_4(ITEM)     \
-	ITEM(5)
-#define _SPACE_SEP_LIST_4(ITEM) \
-	_SPACE_SEP_LIST_3(ITEM)     \
-	ITEM(4)
-#define _SPACE_SEP_LIST_3(ITEM) \
-	_SPACE_SEP_LIST_2(ITEM)     \
-	ITEM(3)
-#define _SPACE_SEP_LIST_2(ITEM) \
-	_SPACE_SEP_LIST_1(ITEM)     \
-	ITEM(2)
-#define _SPACE_SEP_LIST_1(ITEM) \
-	_SPACE_SEP_LIST_0(ITEM)     \
-	ITEM(1)
-#define _SPACE_SEP_LIST_0(ITEM)
-
-#define ARG(N) p##N
-#define PARAM(N) P##N p##N
-#define TYPE_PARAM(N) typename P##N
-#define PARAM_DECL(N) GetSimpleTypeT<P##N> p##N
-
-#define DECL_CMD(N)                                                          \
-	template <typename T, typename M COMMA(N) COMMA_SEP_LIST(TYPE_PARAM, N)> \
-	struct Command##N : public CommandBase {                                 \
-		T *instance;                                                         \
-		M method;                                                            \
-		SEMIC_SEP_LIST(PARAM_DECL, N);                                       \
-		virtual void call() override {                                       \
-			(instance->*method)(COMMA_SEP_LIST(ARG, N));                     \
-		}                                                                    \
-	};
-
-#define DECL_CMD_RET(N)                                                                  \
-	template <typename T, typename M, COMMA_SEP_LIST(TYPE_PARAM, N) COMMA(N) typename R> \
-	struct CommandRet##N : public SyncCommand {                                          \
-		R *ret;                                                                          \
-		T *instance;                                                                     \
-		M method;                                                                        \
-		SEMIC_SEP_LIST(PARAM_DECL, N);                                                   \
-		virtual void call() override {                                                   \
-			*ret = (instance->*method)(COMMA_SEP_LIST(ARG, N));                          \
-		}                                                                                \
-	};
-
-#define DECL_CMD_SYNC(N)                                                     \
-	template <typename T, typename M COMMA(N) COMMA_SEP_LIST(TYPE_PARAM, N)> \
-	struct CommandSync##N : public SyncCommand {                             \
-		T *instance;                                                         \
-		M method;                                                            \
-		SEMIC_SEP_LIST(PARAM_DECL, N);                                       \
-		virtual void call() override {                                       \
-			(instance->*method)(COMMA_SEP_LIST(ARG, N));                     \
-		}                                                                    \
-	};
-
-#define TYPE_ARG(N) P##N
-#define CMD_TYPE(N) Command##N<T, M COMMA(N) COMMA_SEP_LIST(TYPE_ARG, N)>
-#define CMD_ASSIGN_PARAM(N) cmd->p##N = p##N
-
-#define DECL_PUSH(N)                                                            \
-	template <typename T, typename M COMMA(N) COMMA_SEP_LIST(TYPE_PARAM, N)>    \
-	void push(T *p_instance, M p_method COMMA(N) COMMA_SEP_LIST(PARAM, N)) {    \
-		MutexLock mlock(mutex);                                                 \
-		CMD_TYPE(N) *cmd = allocate<CMD_TYPE(N)>();                             \
-		cmd->instance = p_instance;                                             \
-		cmd->method = p_method;                                                 \
-		SEMIC_SEP_LIST(CMD_ASSIGN_PARAM, N);                                    \
-		if (pump_task_id != WorkerThreadPool::INVALID_TASK_ID) {                \
-			WorkerThreadPool::get_singleton()->notify_yield_over(pump_task_id); \
-		}                                                                       \
-	}
-
-#define CMD_RET_TYPE(N) CommandRet##N<T, M, COMMA_SEP_LIST(TYPE_ARG, N) COMMA(N) R>
-
-#define DECL_PUSH_AND_RET(N)                                                                   \
-	template <typename T, typename M, COMMA_SEP_LIST(TYPE_PARAM, N) COMMA(N) typename R>       \
-	void push_and_ret(T *p_instance, M p_method, COMMA_SEP_LIST(PARAM, N) COMMA(N) R *r_ret) { \
-		MutexLock mlock(mutex);                                                                \
-		CMD_RET_TYPE(N) *cmd = allocate<CMD_RET_TYPE(N)>();                                    \
-		cmd->instance = p_instance;                                                            \
-		cmd->method = p_method;                                                                \
-		SEMIC_SEP_LIST(CMD_ASSIGN_PARAM, N);                                                   \
-		cmd->ret = r_ret;                                                                      \
-		if (pump_task_id != WorkerThreadPool::INVALID_TASK_ID) {                               \
-			WorkerThreadPool::get_singleton()->notify_yield_over(pump_task_id);                \
-		}                                                                                      \
-		sync_tail++;                                                                           \
-		_wait_for_sync(mlock);                                                                 \
-	}
-
-#define CMD_SYNC_TYPE(N) CommandSync##N<T, M COMMA(N) COMMA_SEP_LIST(TYPE_ARG, N)>
-
-#define DECL_PUSH_AND_SYNC(N)                                                         \
-	template <typename T, typename M COMMA(N) COMMA_SEP_LIST(TYPE_PARAM, N)>          \
-	void push_and_sync(T *p_instance, M p_method COMMA(N) COMMA_SEP_LIST(PARAM, N)) { \
-		MutexLock mlock(mutex);                                                       \
-		CMD_SYNC_TYPE(N) *cmd = allocate<CMD_SYNC_TYPE(N)>();                         \
-		cmd->instance = p_instance;                                                   \
-		cmd->method = p_method;                                                       \
-		SEMIC_SEP_LIST(CMD_ASSIGN_PARAM, N);                                          \
-		if (pump_task_id != WorkerThreadPool::INVALID_TASK_ID) {                      \
-			WorkerThreadPool::get_singleton()->notify_yield_over(pump_task_id);       \
-		}                                                                             \
-		sync_tail++;                                                                  \
-		_wait_for_sync(mlock);                                                        \
-	}
-
-#define MAX_CMD_PARAMS 15
-
 class CommandQueueMT {
 	struct CommandBase {
 		bool sync = false;
 		virtual void call() = 0;
 		virtual ~CommandBase() = default;
+
+		CommandBase(bool p_sync) :
+				sync(p_sync) {}
 	};
 
-	struct SyncCommand : public CommandBase {
-		virtual void call() override {}
-		SyncCommand() {
-			sync = true;
+	template <typename T, typename M, bool NeedsSync, typename... Args>
+	struct Command : public CommandBase {
+		T *instance;
+		M method;
+		Tuple<GetSimpleTypeT<Args>...> args;
+
+		template <typename... FwdArgs>
+		_FORCE_INLINE_ Command(T *p_instance, M p_method, FwdArgs &&...p_args) :
+				CommandBase(NeedsSync), instance(p_instance), method(p_method), args(std::forward<FwdArgs>(p_args)...) {}
+
+		void call() {
+			call_impl(BuildIndexSequence<sizeof...(Args)>{});
+		}
+
+	private:
+		template <size_t... I>
+		_FORCE_INLINE_ void call_impl(IndexSequence<I...>) {
+			// Move out of the Tuple, this will be destroyed as soon as the call is complete.
+			(instance->*method)(std::move(get<I>())...);
 		}
+
+		// This method exists so we can call it in the parameter pack expansion in call_impl.
+		template <size_t I>
+		_FORCE_INLINE_ auto &get() { return ::tuple_get<I>(args); }
 	};
 
-	DECL_CMD(0)
-	SPACE_SEP_LIST(DECL_CMD, 15)
+	// Separate class from Command so we can save the space of the ret pointer for commands that don't return.
+	template <typename T, typename M, typename R, typename... Args>
+	struct CommandRet : public CommandBase {
+		T *instance;
+		M method;
+		R *ret;
+		Tuple<GetSimpleTypeT<Args>...> args;
 
-	// Commands that return.
-	DECL_CMD_RET(0)
-	SPACE_SEP_LIST(DECL_CMD_RET, 15)
+		_FORCE_INLINE_ CommandRet(T *p_instance, M p_method, R *p_ret, GetSimpleTypeT<Args>... p_args) :
+				CommandBase(true), instance(p_instance), method(p_method), ret(p_ret), args{ p_args... } {}
 
-	/* commands that don't return but sync */
-	DECL_CMD_SYNC(0)
-	SPACE_SEP_LIST(DECL_CMD_SYNC, 15)
+		void call() override {
+			*ret = call_impl(BuildIndexSequence<sizeof...(Args)>{});
+		}
+
+	private:
+		template <size_t... I>
+		_FORCE_INLINE_ R call_impl(IndexSequence<I...>) {
+			// Move out of the Tuple, this will be destroyed as soon as the call is complete.
+			return (instance->*method)(std::move(get<I>())...);
+		}
+
+		// This method exists so we can call it in the parameter pack expansion in call_impl.
+		template <size_t I>
+		_FORCE_INLINE_ auto &get() { return ::tuple_get<I>(args); }
+	};
 
 	/***** BASE *******/
 
@@ -335,17 +115,32 @@ class CommandQueueMT {
 	WorkerThreadPool::TaskID pump_task_id = WorkerThreadPool::INVALID_TASK_ID;
 	uint64_t flush_read_ptr = 0;
 
-	template <typename T>
-	T *allocate() {
+	template <typename T, typename... Args>
+	_FORCE_INLINE_ void create_command(Args &&...p_args) {
 		// alloc size is size+T+safeguard
-		static_assert(sizeof(T) < UINT32_MAX, "Type too large to fit in the command queue.");
+		constexpr uint64_t alloc_size = ((sizeof(T) + 8U - 1U) & ~(8U - 1U));
+		static_assert(alloc_size < UINT32_MAX, "Type too large to fit in the command queue.");
 
-		uint32_t alloc_size = ((sizeof(T) + 8U - 1U) & ~(8U - 1U));
 		uint64_t size = command_mem.size();
-		command_mem.resize(size + alloc_size + 8);
+		command_mem.resize(size + alloc_size + sizeof(uint64_t));
 		*(uint64_t *)&command_mem[size] = alloc_size;
-		T *cmd = memnew_placement(&command_mem[size + 8], T);
-		return cmd;
+		void *cmd = &command_mem[size + sizeof(uint64_t)];
+		new (cmd) T(std::forward<Args>(p_args)...);
+	}
+
+	template <typename T, bool NeedsSync, typename... Args>
+	_FORCE_INLINE_ void _push_internal(Args &&...args) {
+		MutexLock mlock(mutex);
+		create_command<T>(std::forward<Args>(args)...);
+
+		if (pump_task_id != WorkerThreadPool::INVALID_TASK_ID) {
+			WorkerThreadPool::get_singleton()->notify_yield_over(pump_task_id);
+		}
+
+		if constexpr (NeedsSync) {
+			sync_tail++;
+			_wait_for_sync(mlock);
+		}
 	}
 
 	_FORCE_INLINE_ void _prevent_sync_wraparound() {
@@ -409,17 +204,26 @@ class CommandQueueMT {
 	void _no_op() {}
 
 public:
-	/* NORMAL PUSH COMMANDS */
-	DECL_PUSH(0)
-	SPACE_SEP_LIST(DECL_PUSH, 15)
+	template <typename T, typename M, typename... Args>
+	void push(T *p_instance, M p_method, Args &&...p_args) {
+		// Standard command, no sync.
+		using CommandType = Command<T, M, false, Args...>;
+		_push_internal<CommandType, false>(p_instance, p_method, std::forward<Args>(p_args)...);
+	}
 
-	/* PUSH AND RET COMMANDS */
-	DECL_PUSH_AND_RET(0)
-	SPACE_SEP_LIST(DECL_PUSH_AND_RET, 15)
+	template <typename T, typename M, typename... Args>
+	void push_and_sync(T *p_instance, M p_method, Args... p_args) {
+		// Standard command, sync.
+		using CommandType = Command<T, M, true, Args...>;
+		_push_internal<CommandType, true>(p_instance, p_method, std::forward<Args>(p_args)...);
+	}
 
-	/* PUSH AND RET SYNC COMMANDS*/
-	DECL_PUSH_AND_SYNC(0)
-	SPACE_SEP_LIST(DECL_PUSH_AND_SYNC, 15)
+	template <typename T, typename M, typename R, typename... Args>
+	void push_and_ret(T *p_instance, M p_method, R *r_ret, Args... p_args) {
+		// Command with return value, sync.
+		using CommandType = CommandRet<T, M, R, Args...>;
+		_push_internal<CommandType, true>(p_instance, p_method, r_ret, std::forward<Args>(p_args)...);
+	}
 
 	_FORCE_INLINE_ void flush_if_pending() {
 		if (unlikely(command_mem.size() > 0)) {
@@ -450,20 +254,4 @@ public:
 	~CommandQueueMT();
 };
 
-#undef ARG
-#undef PARAM
-#undef TYPE_PARAM
-#undef PARAM_DECL
-#undef DECL_CMD
-#undef DECL_CMD_RET
-#undef DECL_CMD_SYNC
-#undef TYPE_ARG
-#undef CMD_TYPE
-#undef CMD_ASSIGN_PARAM
-#undef DECL_PUSH
-#undef CMD_RET_TYPE
-#undef DECL_PUSH_AND_RET
-#undef CMD_SYNC_TYPE
-#undef DECL_CMD_SYNC
-
 #endif // COMMAND_QUEUE_MT_H

+ 121 - 0
core/templates/tuple.h

@@ -0,0 +1,121 @@
+/**************************************************************************/
+/*  tuple.h                                                               */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#ifndef TUPLE_H
+#define TUPLE_H
+
+// Simple recursive Tuple type that has no runtime overhead.
+//
+// The compile-time recursion works as follows:
+// Assume the following: Tuple<int, float> my_tuple(42, 3.14f);
+// This expands to a class hierarchy that inherits from the previous step.
+// So in this case this leads to:
+//  - struct Tuple<int> : Tuple<float>   <--- This contains the int value.
+//  - struct Tuple<float>                <--- This contains the float value.
+// where each of the classes has a single field of the type for that step in the
+// recursion. So: float value;  int value; etc.
+//
+// This works by splitting up the parameter pack for each step in the recursion minus the first.
+// so the the first step creates the "T value" from the first template parameter.
+// any further template arguments end up in "Rest", which we then use to instantiate a new
+// tuple, but now minus the first argument. To write this all out:
+//
+// Tuple<int, float>
+// step 1: Tuple T = int, Rest = float. Results in a Tuple<int> : Tuple<float>
+// step 2: Tuple T = float, no Rest. Results in a Tuple<float>
+//
+// tuple_get<I> works through a similar recursion, using the inheritance chain to walk to the right node.
+// In order to tuple_get<1>(my_tuple), from the example tuple above:
+//
+// 1. We want tuple_get<1> to return the float, which is one level "up" from Tuple<int> : Tuple<float>,
+//    (the real type of the Tuple "root").
+// 2. Since index 1 > 0, it casts the tuple to its parent type (Tuple<float>). This works because
+//    we cast to Tuple<Rest...> which in this case is just float.
+// 3. Now we're looking for index 0 in Tuple<float>, which directly returns its value field. Note
+//    how get<0> is a template specialization.
+//
+// At compile time, this gets fully resolved. The compiler sees get<1>(my_tuple) and:
+// 1. Creates TupleGet<1, Tuple<int, float>>::tuple_get which contains the cast to Tuple<float>.
+// 2. Creates TupleGet<0, Tuple<float>>::tuple_get which directly returns the value.
+// 3. The compiler will then simply optimize all of this nonsense away and return the float directly.
+
+#include "core/typedefs.h"
+
+template <typename... Types>
+struct Tuple;
+
+template <>
+struct Tuple<> {};
+
+template <typename T, typename... Rest>
+struct Tuple<T, Rest...> : Tuple<Rest...> {
+	T value;
+
+	Tuple() = default;
+
+	template <typename F, typename... R>
+	_FORCE_INLINE_ Tuple(F &&f, R &&...rest) :
+			Tuple<Rest...>(std::forward<R>(rest)...),
+			value(std::forward<F>(f)) {}
+};
+
+template <size_t I, typename Tuple>
+struct TupleGet;
+
+template <typename First, typename... Rest>
+struct TupleGet<0, Tuple<First, Rest...>> {
+	_FORCE_INLINE_ static First &tuple_get(Tuple<First, Rest...> &t) {
+		return t.value;
+	}
+};
+
+// Rationale for using auto here is that the alternative is writing a
+// helper struct to create an otherwise useless type. we would have to write
+// a second recursive template chain like: TupleGetType<I, Tuple<First, Rest...>>::type
+// just to recover the type in the most baroque way possible.
+
+template <size_t I, typename First, typename... Rest>
+struct TupleGet<I, Tuple<First, Rest...>> {
+	_FORCE_INLINE_ static auto &tuple_get(Tuple<First, Rest...> &t) {
+		return TupleGet<I - 1, Tuple<Rest...>>::tuple_get(static_cast<Tuple<Rest...> &>(t));
+	}
+};
+
+template <size_t I, typename... Types>
+_FORCE_INLINE_ auto &tuple_get(Tuple<Types...> &t) {
+	return TupleGet<I, Tuple<Types...>>::tuple_get(t);
+}
+
+template <size_t I, typename... Types>
+_FORCE_INLINE_ const auto &tuple_get(const Tuple<Types...> &t) {
+	return TupleGet<I, Tuple<Types...>>::tuple_get(t);
+}
+
+#endif // TUPLE_H

+ 1 - 1
modules/betsy/image_compress_betsy.h

@@ -124,7 +124,7 @@ public:
 
 	Error compress(BetsyFormat p_format, Image *r_img) {
 		Error err;
-		command_queue.push_and_ret(this, &BetsyCompressor::_compress, p_format, r_img, &err);
+		command_queue.push_and_ret(this, &BetsyCompressor::_compress, &err, p_format, r_img);
 		return err;
 	}
 };

+ 16 - 16
servers/server_wrap_mt_common.h

@@ -139,7 +139,7 @@
 		WRITE_ACTION                                                                \
 		if (Thread::get_caller_id() != server_thread) {                             \
 			m_r ret;                                                                \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, &ret); \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1); \
 			SYNC_DEBUG                                                              \
 			MAIN_THREAD_SYNC_CHECK                                                  \
 			return ret;                                                             \
@@ -153,7 +153,7 @@
 	virtual m_r m_type(m_arg1 p1) const override {                                  \
 		if (Thread::get_caller_id() != server_thread) {                             \
 			m_r ret;                                                                \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, &ret); \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1); \
 			SYNC_DEBUG                                                              \
 			MAIN_THREAD_SYNC_CHECK                                                  \
 			return ret;                                                             \
@@ -214,7 +214,7 @@
 		WRITE_ACTION                                                                    \
 		if (Thread::get_caller_id() != server_thread) {                                 \
 			m_r ret;                                                                    \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, &ret); \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2); \
 			SYNC_DEBUG                                                                  \
 			MAIN_THREAD_SYNC_CHECK                                                      \
 			return ret;                                                                 \
@@ -228,7 +228,7 @@
 	virtual m_r m_type(m_arg1 p1, m_arg2 p2) const override {                           \
 		if (Thread::get_caller_id() != server_thread) {                                 \
 			m_r ret;                                                                    \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, &ret); \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2); \
 			SYNC_DEBUG                                                                  \
 			MAIN_THREAD_SYNC_CHECK                                                      \
 			return ret;                                                                 \
@@ -289,7 +289,7 @@
 		WRITE_ACTION                                                                        \
 		if (Thread::get_caller_id() != server_thread) {                                     \
 			m_r ret;                                                                        \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, &ret); \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3); \
 			SYNC_DEBUG                                                                      \
 			MAIN_THREAD_SYNC_CHECK                                                          \
 			return ret;                                                                     \
@@ -303,7 +303,7 @@
 	virtual m_r m_type(m_arg1 p1, m_arg2 p2, m_arg3 p3) const override {                    \
 		if (Thread::get_caller_id() != server_thread) {                                     \
 			m_r ret;                                                                        \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, &ret); \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3); \
 			SYNC_DEBUG                                                                      \
 			MAIN_THREAD_SYNC_CHECK                                                          \
 			return ret;                                                                     \
@@ -364,7 +364,7 @@
 		WRITE_ACTION                                                                            \
 		if (Thread::get_caller_id() != server_thread) {                                         \
 			m_r ret;                                                                            \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, p4, &ret); \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3, p4); \
 			SYNC_DEBUG                                                                          \
 			MAIN_THREAD_SYNC_CHECK                                                              \
 			return ret;                                                                         \
@@ -378,7 +378,7 @@
 	virtual m_r m_type(m_arg1 p1, m_arg2 p2, m_arg3 p3, m_arg4 p4) const override {             \
 		if (Thread::get_caller_id() != server_thread) {                                         \
 			m_r ret;                                                                            \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, p4, &ret); \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3, p4); \
 			SYNC_DEBUG                                                                          \
 			MAIN_THREAD_SYNC_CHECK                                                              \
 			return ret;                                                                         \
@@ -439,7 +439,7 @@
 		WRITE_ACTION                                                                                \
 		if (Thread::get_caller_id() != server_thread) {                                             \
 			m_r ret;                                                                                \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, p4, p5, &ret); \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3, p4, p5); \
 			SYNC_DEBUG                                                                              \
 			MAIN_THREAD_SYNC_CHECK                                                                  \
 			return ret;                                                                             \
@@ -453,7 +453,7 @@
 	virtual m_r m_type(m_arg1 p1, m_arg2 p2, m_arg3 p3, m_arg4 p4, m_arg5 p5) const override {      \
 		if (Thread::get_caller_id() != server_thread) {                                             \
 			m_r ret;                                                                                \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, p4, p5, &ret); \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3, p4, p5); \
 			SYNC_DEBUG                                                                              \
 			MAIN_THREAD_SYNC_CHECK                                                                  \
 			return ret;                                                                             \
@@ -514,7 +514,7 @@
 		WRITE_ACTION                                                                                    \
 		if (Thread::get_caller_id() != server_thread) {                                                 \
 			m_r ret;                                                                                    \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, p4, p5, p6, &ret); \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3, p4, p5, p6); \
 			SYNC_DEBUG                                                                                  \
 			MAIN_THREAD_SYNC_CHECK                                                                      \
 			return ret;                                                                                 \
@@ -528,7 +528,7 @@
 	virtual m_r m_type(m_arg1 p1, m_arg2 p2, m_arg3 p3, m_arg4 p4, m_arg5 p5, m_arg6 p6) const override { \
 		if (Thread::get_caller_id() != server_thread) {                                                   \
 			m_r ret;                                                                                      \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, p4, p5, p6, &ret);   \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3, p4, p5, p6);   \
 			SYNC_DEBUG                                                                                    \
 			MAIN_THREAD_SYNC_CHECK                                                                        \
 			return ret;                                                                                   \
@@ -589,7 +589,7 @@
 		WRITE_ACTION                                                                                           \
 		if (Thread::get_caller_id() != server_thread) {                                                        \
 			m_r ret;                                                                                           \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, p4, p5, p6, p7, &ret);    \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3, p4, p5, p6, p7);    \
 			SYNC_DEBUG                                                                                         \
 			MAIN_THREAD_SYNC_CHECK                                                                             \
 			return ret;                                                                                        \
@@ -603,7 +603,7 @@
 	virtual m_r m_type(m_arg1 p1, m_arg2 p2, m_arg3 p3, m_arg4 p4, m_arg5 p5, m_arg6 p6, m_arg7 p7) const override { \
 		if (Thread::get_caller_id() != server_thread) {                                                              \
 			m_r ret;                                                                                                 \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, p4, p5, p6, p7, &ret);          \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3, p4, p5, p6, p7);          \
 			SYNC_DEBUG                                                                                               \
 			MAIN_THREAD_SYNC_CHECK                                                                                   \
 			return ret;                                                                                              \
@@ -664,7 +664,7 @@
 		WRITE_ACTION                                                                                                      \
 		if (Thread::get_caller_id() != server_thread) {                                                                   \
 			m_r ret;                                                                                                      \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, p4, p5, p6, p7, p8, &ret);           \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3, p4, p5, p6, p7, p8);           \
 			SYNC_DEBUG                                                                                                    \
 			MAIN_THREAD_SYNC_CHECK                                                                                        \
 			return ret;                                                                                                   \
@@ -678,7 +678,7 @@
 	virtual m_r m_type(m_arg1 p1, m_arg2 p2, m_arg3 p3, m_arg4 p4, m_arg5 p5, m_arg6 p6, m_arg7 p7, m_arg8 p8) const override { \
 		if (Thread::get_caller_id() != server_thread) {                                                                         \
 			m_r ret;                                                                                                            \
-			command_queue.push_and_ret(server_name, &ServerName::m_type, p1, p2, p3, p4, p5, p6, p7, p8, &ret);                 \
+			command_queue.push_and_ret(server_name, &ServerName::m_type, &ret, p1, p2, p3, p4, p5, p6, p7, p8);                 \
 			SYNC_DEBUG                                                                                                          \
 			MAIN_THREAD_SYNC_CHECK                                                                                              \
 			return ret;                                                                                                         \

+ 117 - 2
tests/core/templates/test_command_queue.h

@@ -201,10 +201,10 @@ public:
 						command_queue.push_and_sync(this, &SharedThreadState::func2, tr, f);
 						break;
 					case TEST_MSGRET_FUNC1_TRANSFORM:
-						command_queue.push_and_ret(this, &SharedThreadState::func1r, tr, &otr);
+						command_queue.push_and_ret(this, &SharedThreadState::func1r, &otr, tr);
 						break;
 					case TEST_MSGRET_FUNC2_TRANSFORM_FLOAT:
-						command_queue.push_and_ret(this, &SharedThreadState::func2r, tr, f, &otr);
+						command_queue.push_and_ret(this, &SharedThreadState::func2r, &otr, tr, f);
 						break;
 					default:
 						break;
@@ -244,6 +244,44 @@ public:
 		}
 		writer_thread.wait_to_finish();
 	}
+
+	struct CopyMoveTestType {
+		inline static int copy_count;
+		inline static int move_count;
+		int value = 0;
+
+		CopyMoveTestType(int p_value = 0) :
+				value(p_value) {}
+
+		CopyMoveTestType(const CopyMoveTestType &p_other) :
+				value(p_other.value) {
+			copy_count++;
+		}
+
+		CopyMoveTestType(CopyMoveTestType &&p_other) :
+				value(p_other.value) {
+			move_count++;
+		}
+
+		CopyMoveTestType &operator=(const CopyMoveTestType &p_other) {
+			value = p_other.value;
+			copy_count++;
+			return *this;
+		}
+
+		CopyMoveTestType &operator=(CopyMoveTestType &&p_other) {
+			value = p_other.value;
+			move_count++;
+			return *this;
+		}
+	};
+
+	void copy_move_test_copy(CopyMoveTestType p_test_type) {
+	}
+	void copy_move_test_ref(const CopyMoveTestType &p_test_type) {
+	}
+	void copy_move_test_move(CopyMoveTestType &&p_test_type) {
+	}
 };
 
 static void test_command_queue_basic(bool p_use_thread_pool_sync) {
@@ -446,6 +484,83 @@ TEST_CASE("[Stress][CommandQueue] Stress test command queue") {
 	ProjectSettings::get_singleton()->set_setting(COMMAND_QUEUE_SETTING,
 			ProjectSettings::get_singleton()->property_get_revert(COMMAND_QUEUE_SETTING));
 }
+
+TEST_CASE("[CommandQueue] Test Parameter Passing Semantics") {
+	SharedThreadState sts;
+	sts.init_threads();
+
+	SUBCASE("Testing with lvalue") {
+		SharedThreadState::CopyMoveTestType::copy_count = 0;
+		SharedThreadState::CopyMoveTestType::move_count = 0;
+
+		SharedThreadState::CopyMoveTestType lvalue(42);
+
+		SUBCASE("Pass by copy") {
+			sts.command_queue.push(&sts, &SharedThreadState::copy_move_test_copy, lvalue);
+
+			sts.message_count_to_read = -1;
+			sts.reader_threadwork.main_start_work();
+			sts.reader_threadwork.main_wait_for_done();
+
+			CHECK(SharedThreadState::CopyMoveTestType::copy_count == 1);
+			CHECK(SharedThreadState::CopyMoveTestType::move_count == 1);
+		}
+
+		SUBCASE("Pass by reference") {
+			sts.command_queue.push(&sts, &SharedThreadState::copy_move_test_ref, lvalue);
+
+			sts.message_count_to_read = -1;
+			sts.reader_threadwork.main_start_work();
+			sts.reader_threadwork.main_wait_for_done();
+
+			CHECK(SharedThreadState::CopyMoveTestType::copy_count == 1);
+			CHECK(SharedThreadState::CopyMoveTestType::move_count == 0);
+		}
+	}
+
+	SUBCASE("Testing with rvalue") {
+		SharedThreadState::CopyMoveTestType::copy_count = 0;
+		SharedThreadState::CopyMoveTestType::move_count = 0;
+
+		SUBCASE("Pass by copy") {
+			sts.command_queue.push(&sts, &SharedThreadState::copy_move_test_copy,
+					SharedThreadState::CopyMoveTestType(43));
+
+			sts.message_count_to_read = -1;
+			sts.reader_threadwork.main_start_work();
+			sts.reader_threadwork.main_wait_for_done();
+
+			CHECK(SharedThreadState::CopyMoveTestType::copy_count == 0);
+			CHECK(SharedThreadState::CopyMoveTestType::move_count == 2);
+		}
+
+		SUBCASE("Pass by reference") {
+			sts.command_queue.push(&sts, &SharedThreadState::copy_move_test_ref,
+					SharedThreadState::CopyMoveTestType(43));
+
+			sts.message_count_to_read = -1;
+			sts.reader_threadwork.main_start_work();
+			sts.reader_threadwork.main_wait_for_done();
+
+			CHECK(SharedThreadState::CopyMoveTestType::copy_count == 0);
+			CHECK(SharedThreadState::CopyMoveTestType::move_count == 1);
+		}
+
+		SUBCASE("Pass by rvalue reference") {
+			sts.command_queue.push(&sts, &SharedThreadState::copy_move_test_move,
+					SharedThreadState::CopyMoveTestType(43));
+
+			sts.message_count_to_read = -1;
+			sts.reader_threadwork.main_start_work();
+			sts.reader_threadwork.main_wait_for_done();
+
+			CHECK(SharedThreadState::CopyMoveTestType::copy_count == 0);
+			CHECK(SharedThreadState::CopyMoveTestType::move_count == 1);
+		}
+	}
+
+	sts.destroy_threads();
+}
 } // namespace TestCommandQueue
 
 #endif // TEST_COMMAND_QUEUE_H