2
0
Эх сурвалжийг харах

Merge pull request #85167 from RandomShaper/good_spinlock

Enhance SpinLock
Thaddeus Crews 10 сар өмнө
parent
commit
01ad56da38
3 өөрчлөгдсөн 61 нэмэгдсэн , 5 устгасан
  1. 1 0
      SConstruct
  2. 42 5
      core/os/spin_lock.h
  3. 18 0
      core/os/thread.h

+ 1 - 0
SConstruct

@@ -851,6 +851,7 @@ if env.msvc and not methods.using_clang(env):  # MSVC
                 "/wd4245",
                 "/wd4267",
                 "/wd4305",  # C4305 (truncation): double to float or real_t, too hard to avoid.
+                "/wd4324",  # C4820 (structure was padded due to alignment specifier)
                 "/wd4514",  # C4514 (unreferenced inline function has been removed)
                 "/wd4714",  # C4714 (function marked as __forceinline not inlined)
                 "/wd4820",  # C4820 (padding added after construct)

+ 42 - 5
core/os/spin_lock.h

@@ -33,6 +33,10 @@
 
 #include "core/typedefs.h"
 
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
 #if defined(__APPLE__)
 
 #include <os/lock.h>
@@ -52,19 +56,52 @@ public:
 
 #else
 
+#include "core/os/thread.h"
+
 #include <atomic>
 
-class SpinLock {
-	mutable std::atomic_flag locked = ATOMIC_FLAG_INIT;
+_ALWAYS_INLINE_ static void _cpu_pause() {
+#if defined(_MSC_VER)
+// ----- MSVC.
+#if defined(_M_ARM) || defined(_M_ARM64) // ARM.
+	__yield();
+#elif defined(_M_IX86) || defined(_M_X64) // x86.
+	_mm_pause();
+#endif
+#elif defined(__GNUC__) || defined(__clang__)
+// ----- GCC/Clang.
+#if defined(__i386__) || defined(__x86_64__) // x86.
+	__builtin_ia32_pause();
+#elif defined(__arm__) || defined(__aarch64__) // ARM.
+	asm volatile("yield");
+#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) // PowerPC.
+	asm volatile("or 27,27,27");
+#elif defined(__riscv) // RISC-V.
+	asm volatile(".insn i 0x0F, 0, x0, x0, 0x010");
+#endif
+#endif
+}
+
+static_assert(std::atomic_bool::is_always_lock_free);
+
+class alignas(Thread::CACHE_LINE_BYTES) SpinLock {
+	mutable std::atomic<bool> locked = ATOMIC_VAR_INIT(false);
 
 public:
 	_ALWAYS_INLINE_ void lock() const {
-		while (locked.test_and_set(std::memory_order_acquire)) {
-			// Continue.
+		while (true) {
+			bool expected = false;
+			if (locked.compare_exchange_weak(expected, true, std::memory_order_acquire, std::memory_order_relaxed)) {
+				break;
+			}
+			do {
+				_cpu_pause();
+			} while (locked.load(std::memory_order_relaxed));
 		}
 	}
+
 	_ALWAYS_INLINE_ void unlock() const {
-		locked.clear(std::memory_order_release);
+		locked.store(false, std::memory_order_release);
 	}
 };
 

+ 18 - 0
core/os/thread.h

@@ -42,6 +42,8 @@
 #include "core/templates/safe_refcount.h"
 #include "core/typedefs.h"
 
+#include <new>
+
 #ifdef MINGW_ENABLED
 #define MINGW_STDTHREAD_REDUNDANCY_WARNING
 #include "thirdparty/mingw-std-threads/mingw.thread.h"
@@ -85,6 +87,20 @@ public:
 		void (*term)() = nullptr;
 	};
 
+#if defined(__cpp_lib_hardware_interference_size) && !defined(ANDROID_ENABLED) // This would be OK with NDK >= 26.
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winterference-size"
+#endif
+	static constexpr size_t CACHE_LINE_BYTES = std::hardware_destructive_interference_size;
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+#else
+	// At a negligible memory cost, we use a conservatively high value.
+	static constexpr size_t CACHE_LINE_BYTES = 128;
+#endif
+
 private:
 	friend class Main;
 
@@ -135,6 +151,8 @@ public:
 
 	typedef uint64_t ID;
 
+	static constexpr size_t CACHE_LINE_BYTES = sizeof(void *);
+
 	enum : ID {
 		UNASSIGNED_ID = 0,
 		MAIN_ID = 1