浏览代码

Allow inlining of all parts of safe_refcount

Differences with this aren't huge but the effort is minimal, in some
workloads gain a couple of percent of performance.
Hein-Pieter van Braam 8 年之前
父节点
当前提交
2085bcce3b
共有 2 个文件被更改,包括 268 次插入289 次删除
  1. 245 12
      core/safe_refcount.cpp
  2. 23 277
      core/safe_refcount.h

+ 245 - 12
core/safe_refcount.cpp

@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md)    */
+/* Copyright (c) 2007-2017 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2017 Godot Engine contributors (cf. AUTHORS.md)    */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -29,26 +29,259 @@
 /*************************************************************************/
 #include "safe_refcount.h"
 
-#ifdef _MSC_VER
+// Atomic functions, these are used for multithread safe reference counters!
 
-// don't pollute my namespace!
-#include <windows.h>
-long atomic_conditional_increment(register long *pw) {
+#ifdef NO_THREADS
+
+/* Bogus implementation unaware of multiprocessing */
+
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_conditional_increment_impl(register T *pw) {
+
+	if (*pw == 0)
+		return 0;
+
+	(*pw)++;
+
+	return *pw;
+}
+
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_decrement_impl(register T *pw) {
+
+	(*pw)--;
+
+	return *pw;
+}
+
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_increment_impl(register T *pw) {
+
+	(*pw)++;
+
+	return *pw;
+}
+
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_sub_impl(register T *pw, register T val) {
+
+	(*pw) -= val;
+
+	return *pw;
+}
+
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_add_impl(register T *pw, register T val) {
+
+	(*pw) += val;
+
+	return *pw;
+}
+
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_exchange_if_greater_impl(register T *pw, register T val) {
+
+	if (val > *pw)
+		*pw = val;
+
+	return *pw;
+}
+
+#elif defined(__GNUC__)
+
+/* Implementation for GCC & Clang */
 
-	/* try to increment until it actually works */
-	// taken from boost
+// GCC guarantees atomic intrinsics for sizes of 1, 2, 4 and 8 bytes.
+// Clang states it supports GCC atomic builtins.
+
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_conditional_increment_impl(register T *pw) {
 
 	while (true) {
-		long tmp = static_cast<long const volatile &>(*pw);
+		T tmp = static_cast<T const volatile &>(*pw);
 		if (tmp == 0)
 			return 0; // if zero, can't add to it anymore
-		if (InterlockedCompareExchange(pw, tmp + 1, tmp) == tmp)
+		if (__sync_val_compare_and_swap(pw, tmp, tmp + 1) == tmp)
 			return tmp + 1;
 	}
 }
 
-long atomic_decrement(register long *pw) {
-	return InterlockedDecrement(pw);
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_decrement_impl(register T *pw) {
+
+	return __sync_sub_and_fetch(pw, 1);
+}
+
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_increment_impl(register T *pw) {
+
+	return __sync_add_and_fetch(pw, 1);
+}
+
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_sub_impl(register T *pw, register T val) {
+
+	return __sync_sub_and_fetch(pw, val);
+}
+
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_add_impl(register T *pw, register T val) {
+
+	return __sync_add_and_fetch(pw, val);
+}
+
+template <class T>
+static _ALWAYS_INLINE_ T _atomic_exchange_if_greater_impl(register T *pw, register T val) {
+
+	while (true) {
+		T tmp = static_cast<T const volatile &>(*pw);
+		if (tmp >= val)
+			return tmp; // already greater, or equal
+		if (__sync_val_compare_and_swap(pw, tmp, val) == tmp)
+			return val;
+	}
+}
+
+#elif defined(_MSC_VER)
+
+/* Implementation for MSVC-Windows */
+
+// don't pollute my namespace!
+#include <windows.h>
+
+#define ATOMIC_CONDITIONAL_INCREMENT_BODY(m_pw, m_win_type, m_win_cmpxchg, m_cpp_type) \
+	/* try to increment until it actually works */                                     \
+	/* taken from boost */                                                             \
+	while (true) {                                                                     \
+		m_cpp_type tmp = static_cast<m_cpp_type const volatile &>(*(m_pw));            \
+		if (tmp == 0)                                                                  \
+			return 0; /* if zero, can't add to it anymore */                           \
+		if (m_win_cmpxchg((m_win_type volatile *)(m_pw), tmp + 1, tmp) == tmp)         \
+			return tmp + 1;                                                            \
+	}
+
+#define ATOMIC_EXCHANGE_IF_GREATER_BODY(m_pw, m_val, m_win_type, m_win_cmpxchg, m_cpp_type) \
+	while (true) {                                                                          \
+		m_cpp_type tmp = static_cast<m_cpp_type const volatile &>(*(m_pw));                 \
+		if (tmp >= m_val)                                                                   \
+			return tmp; /* already greater, or equal */                                     \
+		if (m_win_cmpxchg((m_win_type volatile *)(m_pw), m_val, tmp) == tmp)                \
+			return m_val;                                                                   \
+	}
+
+static _ALWAYS_INLINE_ uint32_t _atomic_conditional_increment_impl(register uint32_t *pw) {
+
+	ATOMIC_CONDITIONAL_INCREMENT_BODY(pw, LONG, InterlockedCompareExchange, uint32_t)
+}
+
+static _ALWAYS_INLINE_ uint32_t _atomic_decrement_impl(register uint32_t *pw) {
+
+	return InterlockedDecrement((LONG volatile *)pw);
+}
+
+static _ALWAYS_INLINE_ uint32_t _atomic_increment_impl(register uint32_t *pw) {
+
+	return InterlockedIncrement((LONG volatile *)pw);
+}
+
+static _ALWAYS_INLINE_ uint32_t _atomic_sub_impl(register uint32_t *pw, register uint32_t val) {
+
+	return InterlockedExchangeAdd((LONG volatile *)pw, -(int32_t)val) - val;
+}
+
+static _ALWAYS_INLINE_ uint32_t _atomic_add_impl(register uint32_t *pw, register uint32_t val) {
+
+	return InterlockedAdd((LONG volatile *)pw, val);
+}
+
+static _ALWAYS_INLINE_ uint32_t _atomic_exchange_if_greater_impl(register uint32_t *pw, register uint32_t val) {
+
+	ATOMIC_EXCHANGE_IF_GREATER_BODY(pw, val, LONG, InterlockedCompareExchange, uint32_t)
+}
+
+static _ALWAYS_INLINE_ uint64_t _atomic_conditional_increment_impl(register uint64_t *pw) {
+
+	ATOMIC_CONDITIONAL_INCREMENT_BODY(pw, LONGLONG, InterlockedCompareExchange64, uint64_t)
+}
+
+static _ALWAYS_INLINE_ uint64_t _atomic_decrement_impl(register uint64_t *pw) {
+
+	return InterlockedDecrement64((LONGLONG volatile *)pw);
+}
+
+static _ALWAYS_INLINE_ uint64_t _atomic_increment_impl(register uint64_t *pw) {
+
+	return InterlockedIncrement64((LONGLONG volatile *)pw);
 }
 
+static _ALWAYS_INLINE_ uint64_t _atomic_sub_impl(register uint64_t *pw, register uint64_t val) {
+
+	return InterlockedExchangeAdd64((LONGLONG volatile *)pw, -(int64_t)val) - val;
+}
+
+static _ALWAYS_INLINE_ uint64_t _atomic_add_impl(register uint64_t *pw, register uint64_t val) {
+
+	return InterlockedAdd64((LONGLONG volatile *)pw, val);
+}
+
+static _ALWAYS_INLINE_ uint64_t _atomic_exchange_if_greater_impl(register uint64_t *pw, register uint64_t val) {
+
+	ATOMIC_EXCHANGE_IF_GREATER_BODY(pw, val, LONGLONG, InterlockedCompareExchange64, uint64_t)
+}
+
+#else
+
+//no threads supported?
+#error Must provide atomic functions for this platform or compiler!
+
 #endif
+
+// The actual advertised functions; they'll call the right implementation
+
+uint32_t atomic_conditional_increment(register uint32_t *counter) {
+	return _atomic_conditional_increment_impl(counter);
+}
+
+uint32_t atomic_decrement(register uint32_t *pw) {
+	return _atomic_decrement_impl(pw);
+}
+
+uint32_t atomic_increment(register uint32_t *pw) {
+	return _atomic_increment_impl(pw);
+}
+
+uint32_t atomic_sub(register uint32_t *pw, register uint32_t val) {
+	return _atomic_sub_impl(pw, val);
+}
+
+uint32_t atomic_add(register uint32_t *pw, register uint32_t val) {
+	return _atomic_add_impl(pw, val);
+}
+
+uint32_t atomic_exchange_if_greater(register uint32_t *pw, register uint32_t val) {
+	return _atomic_exchange_if_greater_impl(pw, val);
+}
+
+uint64_t atomic_conditional_increment(register uint64_t *counter) {
+	return _atomic_conditional_increment_impl(counter);
+}
+
+uint64_t atomic_decrement(register uint64_t *pw) {
+	return _atomic_decrement_impl(pw);
+}
+
+uint64_t atomic_increment(register uint64_t *pw) {
+	return _atomic_increment_impl(pw);
+}
+
+uint64_t atomic_sub(register uint64_t *pw, register uint64_t val) {
+	return _atomic_sub_impl(pw, val);
+}
+
+uint64_t atomic_add(register uint64_t *pw, register uint64_t val) {
+	return _atomic_add_impl(pw, val);
+}
+
+uint64_t atomic_exchange_if_greater(register uint64_t *pw, register uint64_t val) {
+	return _atomic_exchange_if_greater_impl(pw, val);
+}

+ 23 - 277
core/safe_refcount.h

@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md)    */
+/* Copyright (c) 2007-2017 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2017 Godot Engine contributors (cf. AUTHORS.md)    */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -34,277 +34,25 @@
 /* x86/x86_64 GCC */
 
 #include "platform_config.h"
-
-#ifdef NO_THREADS
-
-struct SafeRefCount {
-
-	int count;
-
-public:
-	// destroy() is called when weak_count_ drops to zero.
-
-	bool ref() { //true on success
-
-		if (count == 0)
-			return false;
-		count++;
-
-		return true;
-	}
-
-	int refval() { //true on success
-
-		if (count == 0)
-			return 0;
-		count++;
-		return count;
-	}
-
-	bool unref() { // true if must be disposed of
-
-		if (count > 0)
-			count--;
-
-		return count == 0;
-	}
-
-	long get() const { // nothrow
-
-		return static_cast<int const volatile &>(count);
-	}
-
-	void init(int p_value = 1) {
-
-		count = p_value;
-	};
-};
-
-#else
-
-#if defined(PLATFORM_REFCOUNT)
-
-#include "platform_refcount.h"
-
-#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-
-#define REFCOUNT_T volatile int
-#define REFCOUNT_GET_T int const volatile &
-
-static inline int atomic_conditional_increment(volatile int *pw) {
-	// int rv = *pw;
-	// if( rv != 0 ) ++*pw;
-	// return rv;
-
-	int rv, tmp;
-
-	__asm__(
-			"movl %0, %%eax\n\t"
-			"0:\n\t"
-			"test %%eax, %%eax\n\t"
-			"je 1f\n\t"
-			"movl %%eax, %2\n\t"
-			"incl %2\n\t"
-			"lock\n\t"
-			"cmpxchgl %2, %0\n\t"
-			"jne 0b\n\t"
-			"1:"
-			: "=m"(*pw), "=&a"(rv), "=&r"(tmp)
-			: // outputs (%0, %1, %2)
-			"m"(*pw)
-			: // input (%3)
-			"cc" // clobbers
-	);
-
-	return rv;
-}
-
-static inline int atomic_decrement(volatile int *pw) {
-
-	// return --(*pw);
-
-	unsigned char rv;
-
-	__asm__(
-			"lock\n\t"
-			"decl %0\n\t"
-			"setne %1"
-			: "=m"(*pw), "=qm"(rv)
-			: "m"(*pw)
-			: "memory");
-	return static_cast<int>(rv);
-}
-
-	/* PowerPC32/64 GCC */
-
-#elif (defined(__GNUC__)) && (defined(__powerpc__) || defined(__ppc__))
-
-#define REFCOUNT_T int
-#define REFCOUNT_GET_T int const volatile &
-
-inline int atomic_conditional_increment(int *pw) {
-	// if( *pw != 0 ) ++*pw;
-	// return *pw;
-
-	int rv;
-
-	__asm__(
-			"0:\n\t"
-			"lwarx %1, 0, %2\n\t"
-			"cmpwi %1, 0\n\t"
-			"beq 1f\n\t"
-			"addi %1, %1, 1\n\t"
-			"1:\n\t"
-			"stwcx. %1, 0, %2\n\t"
-			"bne- 0b"
-			:
-
-			"=m"(*pw), "=&b"(rv)
-			: "r"(pw), "m"(*pw)
-			: "cc");
-
-	return rv;
-}
-
-inline int atomic_decrement(int *pw) {
-	// return --*pw;
-
-	int rv;
-
-	__asm__ __volatile__(
-			"sync\n\t"
-			"0:\n\t"
-			"lwarx %1, 0, %2\n\t"
-			"addi %1, %1, -1\n\t"
-			"stwcx. %1, 0, %2\n\t"
-			"bne- 0b\n\t"
-			"isync"
-			:
-
-			"=m"(*pw), "=&b"(rv)
-			: "r"(pw), "m"(*pw)
-			: "memory", "cc");
-
-	return rv;
-}
-
-	/* CW ARM */
-
-#elif defined(__GNUC__) && (defined(__arm__))
-
-#define REFCOUNT_T int
-#define REFCOUNT_GET_T int const volatile &
-
-inline int atomic_conditional_increment(volatile int *v) {
-	int t;
-	int tmp;
-
-	__asm__ __volatile__(
-			"1:  ldrex   %0, [%2]        \n"
-			"    cmp     %0, #0      \n"
-			"    beq     2f          \n"
-			"    add     %0, %0, #1      \n"
-			"2: \n"
-			"    strex   %1, %0, [%2]    \n"
-			"    cmp     %1, #0          \n"
-			"    bne     1b              \n"
-
-			: "=&r"(t), "=&r"(tmp)
-			: "r"(v)
-			: "cc", "memory");
-
-	return t;
-}
-
-inline int atomic_decrement(volatile int *v) {
-	int t;
-	int tmp;
-
-	__asm__ __volatile__(
-			"1:  ldrex   %0, [%2]        \n"
-			"    add     %0, %0, #-1      \n"
-			"    strex   %1, %0, [%2]    \n"
-			"    cmp     %1, #0          \n"
-			"    bne     1b              \n"
-
-			: "=&r"(t), "=&r"(tmp)
-			: "r"(v)
-			: "cc", "memory");
-
-	return t;
-}
-
-	/* CW PPC */
-
-#elif (defined(__MWERKS__)) && defined(__POWERPC__)
-
-inline long atomic_conditional_increment(register long *pw) {
-	register int a;
-
-	asm {
-	loop:
-
-	lwarx   a, 0, pw
-	cmpwi   a, 0
-	beq     store
-
-	addi    a, a, 1
-
-	store:
-
-	stwcx.  a, 0, pw
-	bne-    loop
-	}
-
-	return a;
-}
-
-inline long atomic_decrement(register long *pw) {
-	register int a;
-
-	asm {
-
-	sync
-
-	loop:
-
-	lwarx   a, 0, pw
-	addi    a, a, -1
-	stwcx.  a, 0, pw
-	bne-    loop
-
-	isync
-	}
-
-	return a;
-}
-
-	/* Any Windows (MSVC) */
-
-#elif defined(_MSC_VER)
-
-// made functions to not pollute namespace..
-
-#define REFCOUNT_T long
-#define REFCOUNT_GET_T long const volatile &
-
-long atomic_conditional_increment(register long *pw);
-long atomic_decrement(register long *pw);
-
-#if 0
-#elif defined(__GNUC__) && defined(ARMV6_ENABLED)
-
-#endif
-
-#else
-
-#error This platform cannot use safe refcount, compile with NO_THREADS or implement it.
-
-#endif
+#include "typedefs.h"
+
+uint32_t atomic_conditional_increment(register uint32_t *counter);
+uint32_t atomic_decrement(register uint32_t *pw);
+uint32_t atomic_increment(register uint32_t *pw);
+uint32_t atomic_sub(register uint32_t *pw, register uint32_t val);
+uint32_t atomic_add(register uint32_t *pw, register uint32_t val);
+uint32_t atomic_exchange_if_greater(register uint32_t *pw, register uint32_t val);
+
+uint64_t atomic_conditional_increment(register uint64_t *counter);
+uint64_t atomic_decrement(register uint64_t *pw);
+uint64_t atomic_increment(register uint64_t *pw);
+uint64_t atomic_sub(register uint64_t *pw, register uint64_t val);
+uint64_t atomic_add(register uint64_t *pw, register uint64_t val);
+uint64_t atomic_exchange_if_greater(register uint64_t *pw, register uint64_t val);
 
 struct SafeRefCount {
 
-	REFCOUNT_T count;
+	uint32_t count;
 
 public:
 	// destroy() is called when weak_count_ drops to zero.
@@ -314,7 +62,7 @@ public:
 		return atomic_conditional_increment(&count) != 0;
 	}
 
-	int refval() { //true on success
+	uint32_t refval() { //true on success
 
 		return atomic_conditional_increment(&count);
 	}
@@ -328,17 +76,15 @@ public:
 		return false;
 	}
 
-	long get() const { // nothrow
+	uint32_t get() const { // nothrow
 
-		return static_cast<REFCOUNT_GET_T>(count);
+		return count;
 	}
 
-	void init(int p_value = 1) {
+	void init(uint32_t p_value = 1) {
 
 		count = p_value;
-	};
+	}
 };
 
-#endif // no thread safe
-
 #endif