Browse Source

dtoolbase: Add atomic wait and notify operations from C++20

Adds patomic_signed_lock_free, patomic_unsigned_lock_free, and patomic_flag with wait/notify methods modelled after C++20.  Implemented using futexes, falling back to a mutex+condition variable hash table if not supported.  (Currently the hash table has a fixed size of 64, which we could increase if necessary, but we really shouldn't even have a fraction of that number of simultaneously sleeping threads...)

Other atomic types are unaffected at the moment, in part because futexes are really restricted to 32-bit ints on Linux anyway
rdb 3 years ago
parent
commit
c3ce8164bc

+ 2 - 0
dtool/src/dtoolbase/CMakeLists.txt

@@ -46,6 +46,7 @@ set(P3DTOOLBASE_HEADERS
   typeRegistryNode.I typeRegistryNode.h
   typedObject.I typedObject.h
   pallocator.T pallocator.h
+  patomic.h patomic.I
   pdeque.h plist.h pmap.h pset.h
   pvector.h epvector.h
   lookup3.h
@@ -70,6 +71,7 @@ set(P3DTOOLBASE_SOURCES
   mutexWin32Impl.cxx
   mutexSpinlockImpl.cxx
   neverFreeMemory.cxx
+  patomic.cxx
   pdtoa.cxx
   pstrtod.cxx
   register_type.cxx

+ 1 - 0
dtool/src/dtoolbase/p3dtoolbase_composite2.cxx

@@ -1,3 +1,4 @@
+#include "patomic.cxx"
 #include "mutexPosixImpl.cxx"
 #include "mutexWin32Impl.cxx"
 #include "mutexSpinlockImpl.cxx"

+ 211 - 6
dtool/src/dtoolbase/patomic.I

@@ -11,6 +11,7 @@
  * @date 2022-01-28
  */
 
+#if defined(THREAD_DUMMY_IMPL) || defined(THREAD_SIMPLE_IMPL)
 /**
  * Value initializer.
  */
@@ -247,21 +248,225 @@ operator ^=(T arg) noexcept {
   return _value ^= arg;
 }
 
+#endif  // defined(THREAD_DUMMY_IMPL) || defined(THREAD_SIMPLE_IMPL)
+
+/**
+ * Initializes the variable to zero (according to C++20 semantics, NOT C++11
+ * semantics!)
+ */
+constexpr patomic_unsigned_lock_free::
+patomic_unsigned_lock_free() noexcept :
+  patomic<uint32_t>(0u) {
+}
+
+/**
+ * Initializes the variable to the given value.
+ */
+constexpr patomic_unsigned_lock_free::
+patomic_unsigned_lock_free(uint32_t desired) noexcept :
+  patomic<uint32_t>(desired) {
+}
+
+/**
+ * Waits until the value is no longer equal to the given value.
+ */
+ALWAYS_INLINE void patomic_unsigned_lock_free::
+wait(uint32_t old, std::memory_order order) const noexcept {
+  if (load(order) == old) {
+    patomic_wait((const volatile uint32_t *)this, old);
+  }
+}
+
+/**
+ * Wakes up at least one thread waiting for the value to change.
+ */
+ALWAYS_INLINE void patomic_unsigned_lock_free::
+notify_one() noexcept {
+  patomic_notify_one((volatile uint32_t *)this);
+}
+
+/**
+ * Wakes up at least one thread waiting for the value to change.
+ */
+ALWAYS_INLINE void patomic_unsigned_lock_free::
+notify_all() noexcept {
+  patomic_notify_all((volatile uint32_t *)this);
+}
+
+/**
+ * Initializes the variable to zero (according to C++20 semantics, NOT C++11
+ * semantics!)
+ */
+constexpr patomic_signed_lock_free::
+patomic_signed_lock_free() noexcept :
+  patomic<int32_t>(0) {
+}
+
+/**
+ * Initializes the variable to the given value.
+ */
+constexpr patomic_signed_lock_free::
+patomic_signed_lock_free(int32_t desired) noexcept :
+  patomic<int32_t>(desired) {
+}
+
+/**
+ * Waits until the value is no longer equal to the given value.
+ */
+ALWAYS_INLINE void patomic_signed_lock_free::
+wait(int32_t old, std::memory_order order) const noexcept {
+  if (load(order) == old) {
+    patomic_wait((const volatile int32_t *)this, old);
+  }
+}
+
+/**
+ * Wakes up at least one thread waiting for the value to change.
+ */
+ALWAYS_INLINE void patomic_signed_lock_free::
+notify_one() noexcept {
+  patomic_notify_one((volatile int32_t *)this);
+}
+
+/**
+ * Wakes up at least one thread waiting for the value to change.
+ */
+ALWAYS_INLINE void patomic_signed_lock_free::
+notify_all() noexcept {
+  patomic_notify_all((volatile int32_t *)this);
+}
+
+/**
+ * Allows assignment from ATOMIC_FLAG_INIT.
+ */
+constexpr patomic_flag::
+patomic_flag(bool desired) noexcept : _value(desired) {
+}
+
+/**
+ * Sets the flag to false.
+ */
+ALWAYS_INLINE void patomic_flag::
+clear(std::memory_order order) noexcept {
+  _value.store(0u, order);
+}
 
 /**
  * Sets the flag to true and returns the previous value.
  */
 ALWAYS_INLINE bool patomic_flag::
 test_and_set(std::memory_order order) noexcept {
-  bool value = __internal_flag;
-  __internal_flag = true;
-  return value;
+  return (bool)_value.exchange(1u, order);
 }
 
 /**
- * Sets the flag to false.
+ * Returns the current value of the flag.
+ */
+ALWAYS_INLINE bool patomic_flag::
+test(std::memory_order order) const noexcept {
+  return (bool)_value.load(order);
+}
+
+/**
+ * Waits until the value is no longer equal to the given value.
  */
 ALWAYS_INLINE void patomic_flag::
-clear(std::memory_order order) noexcept {
-  __internal_flag = false;
+wait(bool old, std::memory_order order) const noexcept {
+  _value.wait(old, order);
+}
+
+/**
+ * Wakes up at least one thread waiting for the value to change.
+ */
+ALWAYS_INLINE void patomic_flag::
+notify_one() noexcept {
+  _value.notify_one();
+}
+
+/**
+ * Wakes up at least one thread waiting for the value to change.
+ */
+ALWAYS_INLINE void patomic_flag::
+notify_all() noexcept {
+  _value.notify_all();
+}
+
+/**
+ *
+ */
+ALWAYS_INLINE void
+patomic_wait(const volatile int32_t *value, int32_t old) {
+  patomic_wait((const volatile uint32_t *)value, (uint32_t)old);
+}
+
+/**
+ *
+ */
+ALWAYS_INLINE void
+patomic_notify_one(volatile int32_t *value) {
+  patomic_notify_one((volatile uint32_t *)value);
+}
+
+/**
+ *
+ */
+ALWAYS_INLINE void
+patomic_notify_all(volatile int32_t *value) {
+  patomic_notify_all((volatile uint32_t *)value);
+}
+
+/**
+ *
+ */
+ALWAYS_INLINE void
+patomic_wait(const volatile uint32_t *value, uint32_t old) {
+#ifdef __linux__
+  while (__atomic_load_n(value, __ATOMIC_SEQ_CST) == old) {
+    syscall(SYS_futex, old, FUTEX_WAIT_PRIVATE, old, 0, 0, 0);
+  }
+//#elif _WIN32_WINNT >= _WIN32_WINNT_WIN8
+//  while (*value == old) {
+//    WaitOnAddress((volatile void *)value, &old, sizeof(uint32_t), INFINITE);
+//  }
+#elif defined(_WIN32)
+  while (*value == old) {
+    _patomic_wait_func((volatile void *)value, &old, sizeof(uint32_t), INFINITE);
+  }
+#elif defined(HAVE_POSIX_THREADS)
+  _patomic_wait(value, old);
+#else
+  while (*value == old);
+#endif
+}
+
+/**
+ *
+ */
+ALWAYS_INLINE void
+patomic_notify_one(volatile uint32_t *value) {
+#ifdef __linux__
+  syscall(SYS_futex, value, FUTEX_WAKE_PRIVATE, 1, 0, 0, 0);
+//#elif _WIN32_WINNT >= _WIN32_WINNT_WIN8
+//  WakeByAddressSingle((void *)value);
+#elif defined(_WIN32)
+  _patomic_wake_one_func((void *)value);
+#elif defined(HAVE_POSIX_THREADS)
+  _patomic_notify_all(value);
+#endif
+}
+
+/**
+ *
+ */
+ALWAYS_INLINE void
+patomic_notify_all(volatile uint32_t *value) {
+#ifdef __linux__
+  syscall(SYS_futex, value, FUTEX_WAKE_PRIVATE, INT_MAX, 0, 0, 0);
+//#elif _WIN32_WINNT >= _WIN32_WINNT_WIN8
+//  WakeByAddressAll((void *)value);
+#elif defined(_WIN32)
+  _patomic_wake_all_func((void *)value);
+#elif defined(HAVE_POSIX_THREADS)
+  _patomic_notify_all(value);
+#endif
 }

+ 168 - 0
dtool/src/dtoolbase/patomic.cxx

@@ -0,0 +1,168 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file patomic.cxx
+ * @author rdb
+ * @date 2022-02-23
+ */
+
+#include "patomic.h"
+
+#include <functional>
+
+static_assert(sizeof(patomic_unsigned_lock_free) == sizeof(uint32_t),
+              "expected atomic uint32_t to have same size as uint32_t");
+static_assert(sizeof(patomic_signed_lock_free) == sizeof(int32_t),
+              "expected atomic int32_t to have same size as int32_t");
+static_assert(sizeof(uint32_t) == sizeof(int32_t),
+              "expected int32_t to have same size as uint32_t");
+
+#if !defined(CPPPARSER) && defined(_WIN32)
+
+// On Windows 7, we try to load the Windows 8 functions dynamically, and
+// fall back to a condition variable table if they aren't available.
+static BOOL initialize_wait(volatile VOID *addr, PVOID cmp, SIZE_T size, DWORD timeout);
+static void dummy_wake(PVOID addr) {}
+
+BOOL (*_patomic_wait_func)(volatile VOID *, PVOID, SIZE_T, DWORD) = &initialize_wait;
+void (*_patomic_wake_one_func)(PVOID) = &dummy_wake;
+void (*_patomic_wake_all_func)(PVOID) = &dummy_wake;
+
+// Randomly pick an entry into the wait table based on the hash of the address.
+// It's possible to get hash collision, but that's not so bad, it just means
+// that the other thread will get a spurious wakeup.
+struct alignas(64) WaitTableEntry {
+  SRWLOCK _lock = SRWLOCK_INIT;
+  CONDITION_VARIABLE _cvar = CONDITION_VARIABLE_INIT;
+  DWORD _waiters = 0;
+};
+static WaitTableEntry _wait_table[64] = {};
+static const size_t _wait_hash_mask = 63;
+
+/**
+ * Emulates WakeByAddressSingle for Windows Vista and 7.
+ */
+static void
+emulated_wake(PVOID addr) {
+  size_t i = std::hash<volatile void *>{}(addr) & (sizeof(_wait_table) / sizeof(WaitTableEntry) - 1);
+  WaitTableEntry &entry = _wait_table[i];
+  AcquireSRWLockExclusive(&entry._lock);
+  DWORD num_waiters = entry._waiters;
+  ReleaseSRWLockExclusive(&entry._lock);
+  if (num_waiters > 0) {
+    // We have to wake up all the threads, even if only one of them is for this
+    // address.  Some of them will get a spurious wakeup, but that's OK.
+    WakeAllConditionVariable(&entry._cvar);
+  }
+}
+
+/**
+ * Emulates WaitOnAddress for Windows Vista and 7.  Only supports aligned
+ * 32-bit values.
+ */
+static BOOL
+emulated_wait(volatile VOID *addr, PVOID cmp, SIZE_T size, DWORD timeout) {
+  assert(size == sizeof(LONG));
+
+  LONG cmpval = *(LONG *)cmp;
+  if (*(LONG *)addr != cmpval) {
+    return TRUE;
+  }
+
+  size_t i = std::hash<volatile void *>{}(addr) & _wait_hash_mask;
+  WaitTableEntry &entry = _wait_table[i];
+  AcquireSRWLockExclusive(&entry._lock);
+  ++entry._waiters;
+  while (*(LONG *)addr == cmpval) {
+    if (SleepConditionVariableSRW(&entry._cvar, &entry._lock, timeout, 0) != 0) {
+      // Timeout.
+      --entry._waiters;
+      ReleaseSRWLockExclusive(&entry._lock);
+      return FALSE;
+    }
+  }
+  --entry._waiters;
+  ReleaseSRWLockExclusive(&entry._lock);
+  return TRUE;
+}
+
+/**
+ * Initially assigned to the wait function slot to initialize the function
+ * pointers.
+ */
+static BOOL
+initialize_wait(volatile VOID *addr, PVOID cmp, SIZE_T size, DWORD timeout) {
+  // There's a chance of a race here, with two threads trying to initialize the
+  // functions at the same time.  That's OK, because they should all produce
+  // the same results, and the stores to the function pointers are atomic.
+  HMODULE lib = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll");
+  if (lib) {
+    auto wait_func = (decltype(_patomic_wait_func))GetProcAddress(lib, "WaitOnAddress");
+    auto wake_one_func = (decltype(_patomic_wake_one_func))GetProcAddress(lib, "WakeByAddressSingle");
+    auto wake_all_func = (decltype(_patomic_wake_all_func))GetProcAddress(lib, "WakeByAddressAll");
+    if (wait_func && wake_one_func && wake_all_func) {
+      // Make sure that the wake function is guaranteed to be visible to other
+      // threads by the time we assign the wait function.
+      _patomic_wake_one_func = wake_one_func;
+      _patomic_wake_all_func = wake_all_func;
+      patomic_thread_fence(std::memory_order_release);
+      _patomic_wait_func = wait_func;
+      return wait_func(addr, cmp, size, timeout);
+    }
+  }
+
+  // We don't have Windows 8's functions, use the emulated wait and wake funcs.
+  _patomic_wake_one_func = &emulated_wake;
+  _patomic_wake_all_func = &emulated_wake;
+  patomic_thread_fence(std::memory_order_release);
+  _patomic_wait_func = &emulated_wait;
+
+  return emulated_wait(addr, cmp, size, timeout);
+}
+
+#elif !defined(CPPPARSER) && !defined(__linux__) && defined(HAVE_POSIX_THREADS)
+
+// Same as above, but using pthreads.
+struct alignas(64) WaitTableEntry {
+  pthread_mutex_t _lock = PTHREAD_MUTEX_INITIALIZER;
+  pthread_cond_t _cvar = PTHREAD_COND_INITIALIZER;
+  unsigned int _waiters = 0;
+};
+static WaitTableEntry _wait_table[64];
+static const size_t _wait_hash_mask = 63;
+
+/**
+ *
+ */
+void
+_patomic_wait(const volatile uint32_t *value, uint32_t old) {
+  WaitTableEntry &entry = _wait_table[std::hash<const volatile void *>{}(value) & _wait_hash_mask];
+  pthread_mutex_lock(&entry._lock);
+  ++entry._waiters;
+  while (__atomic_load_n(value, __ATOMIC_SEQ_CST) == old) {
+    pthread_cond_wait(&entry._cvar, &entry._lock);
+  }
+  --entry._waiters;
+  pthread_mutex_unlock(&entry._lock);
+}
+
+/**
+ *
+ */
+void
+_patomic_notify_all(volatile uint32_t *value) {
+  WaitTableEntry &entry = _wait_table[std::hash<const volatile void *>{}(value) & _wait_hash_mask];
+  pthread_mutex_lock(&entry._lock);
+  unsigned int num_waiters = entry._waiters;
+  pthread_mutex_unlock(&entry._lock);
+  if (num_waiters > 0) {
+    pthread_cond_broadcast(&entry._cvar);
+  }
+}
+
+#endif

+ 81 - 18
dtool/src/dtoolbase/patomic.h

@@ -19,6 +19,19 @@
 
 #include <atomic>
 
+#ifdef _WIN32
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
+#endif
+#include <windows.h>
+#endif
+
+#ifdef __linux__
+#include <linux/futex.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#endif
+
 #if defined(THREAD_DUMMY_IMPL) || defined(THREAD_SIMPLE_IMPL)
 
 /**
@@ -73,36 +86,86 @@ private:
   T _value;
 };
 
+#define patomic_thread_fence(order) (std::atomic_signal_fence((order)))
+
+#else
+
+// We're using real threading, so use the real implementation.
+template<class T>
+using patomic = std::atomic<T>;
+
+#define patomic_thread_fence(order) (std::atomic_thread_fence((order)))
+
+#endif
+
+/**
+ * Implementation of atomic_unsigned_lock_free with C++20 semantics.
+ */
+class EXPCL_DTOOL_DTOOLBASE patomic_unsigned_lock_free : public patomic<uint32_t> {
+public:
+  constexpr patomic_unsigned_lock_free() noexcept;
+  constexpr patomic_unsigned_lock_free(uint32_t desired) noexcept;
+
+  INLINE void wait(uint32_t old, std::memory_order order = std::memory_order_seq_cst) const noexcept;
+  ALWAYS_INLINE void notify_one() noexcept;
+  ALWAYS_INLINE void notify_all() noexcept;
+};
+
 /**
- * Dummy implementation of std::atomic_flag that does not do any atomic
- * operations.
+ * Implementation of atomic_signed_lock_free with C++20 semantics.
  */
-struct EXPCL_DTOOL_DTOOLBASE patomic_flag {
+class EXPCL_DTOOL_DTOOLBASE patomic_signed_lock_free : public patomic<int32_t> {
+public:
+  constexpr patomic_signed_lock_free() noexcept;
+  constexpr patomic_signed_lock_free(int32_t desired) noexcept;
+
+  INLINE void wait(int32_t old, std::memory_order order = std::memory_order_seq_cst) const noexcept;
+  ALWAYS_INLINE void notify_one() noexcept;
+  ALWAYS_INLINE void notify_all() noexcept;
+};
+
+/**
+ * Implementation of atomic_flag with C++20 semantics.
+ */
+class EXPCL_DTOOL_DTOOLBASE patomic_flag {
+public:
   constexpr patomic_flag() noexcept = default;
+  constexpr patomic_flag(bool desired) noexcept;
 
   patomic_flag(const patomic_flag &) = delete;
   patomic_flag &operator=(const patomic_flag &) = delete;
 
-  ALWAYS_INLINE bool test_and_set(std::memory_order order = std::memory_order_seq_cst) noexcept;
   ALWAYS_INLINE void clear(std::memory_order order = std::memory_order_seq_cst) noexcept;
+  ALWAYS_INLINE bool test_and_set(std::memory_order order = std::memory_order_seq_cst) noexcept;
+  ALWAYS_INLINE bool test(std::memory_order order = std::memory_order_seq_cst) const noexcept;
 
-  bool __internal_flag = false;
+  ALWAYS_INLINE void wait(bool old, std::memory_order order = std::memory_order_seq_cst) const noexcept;
+  ALWAYS_INLINE void notify_one() noexcept;
+  ALWAYS_INLINE void notify_all() noexcept;
+
+private:
+  patomic_unsigned_lock_free _value { 0u };
 };
 
-#define patomic_thread_fence(order) (std::atomic_signal_fence((order)))
+#ifndef CPPPARSER
+ALWAYS_INLINE void patomic_wait(const volatile int32_t *value, int32_t old);
+ALWAYS_INLINE void patomic_notify_one(volatile int32_t *value);
+ALWAYS_INLINE void patomic_notify_all(volatile int32_t *value);
+
+ALWAYS_INLINE void patomic_wait(const volatile uint32_t *value, uint32_t old);
+ALWAYS_INLINE void patomic_notify_one(volatile uint32_t *value);
+ALWAYS_INLINE void patomic_notify_all(volatile uint32_t *value);
+
+#ifdef _WIN32
+EXPCL_DTOOL_DTOOLBASE extern BOOL (*_patomic_wait_func)(volatile VOID *, PVOID, SIZE_T, DWORD);
+EXPCL_DTOOL_DTOOLBASE extern void (*_patomic_wake_one_func)(PVOID);
+EXPCL_DTOOL_DTOOLBASE extern void (*_patomic_wake_all_func)(PVOID);
+#elif !defined(__linux__) && defined(HAVE_POSIX_THREADS)
+EXPCL_DTOOL_DTOOLBASE void _patomic_wait(const volatile uint32_t *value, uint32_t old);
+EXPCL_DTOOL_DTOOLBASE void _patomic_notify_all(volatile uint32_t *value);
+#endif
 
 #include "patomic.I"
-
-#else
-
-// We're using real threading, so use the real implementation.
-template<class T>
-using patomic = std::atomic<T>;
-
-typedef std::atomic_flag patomic_flag;
-
-#define patomic_thread_fence(order) (std::atomic_thread_fence((order)))
-
-#endif
+#endif  // CPPPARSER
 
 #endif