hai 1 ano · ac19bb3a8c
--- a/core/sync/atomic.odin
+++ b/core/sync/atomic.odin
@@ -2,44 +2,452 @@ package sync
 
				 
			
 
				 import "base:intrinsics"
			
 
				 
			
 
				+/*
			
 
				+This procedure may lower CPU consumption or yield to a hyperthreaded twin
			
 
				+processor. It's exact function is architecture specific, but the intent is to
			
 
				+say that you're not doing much on a CPU.
			
 
				+*/
			
 
				 cpu_relax :: intrinsics.cpu_relax
			
 
				 
			
 
				 /*
			
 
				-Atomic_Memory_Order :: enum {
			
 
				-	Relaxed = 0, // Unordered
			
 
				-	Consume = 1, // Monotonic
			
 
				-	Acquire = 2,
			
 
				-	Release = 3,
			
 
				-	Acq_Rel = 4,
			
 
				-	Seq_Cst = 5,
			
 
				-}
			
 
				+Describes memory ordering for an atomic operation.
			
 
				+
			
 
				+Modern CPU's contain multiple cores and caches specific to those cores. When a
			
 
				+core performs a write to memory, the value is written to cache first. The issue
			
 
				+is that a core doesn't typically see what's inside the caches of other cores.
			
 
				+In order to make operations consistent CPU's implement mechanisms that
			
 
				+synchronize memory operations across cores by asking other cores or by
			
 
				+pushing data about writes to other cores.
			
 
				+
			
 
				+Due to how these algorithms are implemented, the stores and loads performed by
			
 
				+one core may seem to happen in a different order to another core. It also may
			
 
				+happen that a core reorders stores and loads (independent of how compiler put
			
 
				+them into the machine code). This can cause issues when trying to synchronize
			
 
				+multiple memory locations between two cores. Which is why CPU's allow for
			
 
				+stronger memory ordering guarantees if certain instructions or instruction
			
 
				+variants are used.
			
 
				+
			
 
				+In Odin there are 5 different memory ordering guaranties that can be provided
			
 
				+to an atomic operation:
			
 
				+
			
 
				+- `Relaxed`: The memory access (load or store) is unordered with respect to
			
 
				+  other memory accesses. This can be used to implement an atomic counter.
			
 
				+  Multiple threads access a single variable, but it doesn't matter when
			
 
				+  exactly it gets incremented, because it will become eventually consistent.
			
 
				+- `Consume`: No loads or stores dependent on a memory location can be
			
 
				+  reordered before a load with consume memory order. If other threads released
			
 
				+  the same memory, it becomes visible.
			
 
				+- `Acquire`: No loads or stores on a memory location can be reordered before a
			
 
				+  load of that memory location with acquire memory ordering. If other threads
			
 
				+  release the same memory, it becomes visible.
			
 
				+- `Release`: No loads or stores on a memory location can be reordered after a
			
 
				+  store of that memory location with release memory ordering. All threads that
			
 
				+  acquire the same memory location will see all writes done by the current
			
 
				+  thread.
			
 
				+- `Acq_Rel`: Acquire-release memory ordering: combines acquire and release
			
 
				+  memory orderings in the same operation.
			
 
				+- `Seq_Cst`: Sequential consistency. The strongest memory ordering. A load will
			
 
				+  always be an acquire operation, a store will always be a release operation,
			
 
				+  and in addition to that all threads observe the same order of writes.
			
 
				+
			
 
				+Non-explicit atomics will always be sequentially consistent.
			
 
				+
			
 
				+	Atomic_Memory_Order :: enum {
			
 
				+		Relaxed = 0, // Unordered
			
 
				+		Consume = 1, // Monotonic
			
 
				+		Acquire = 2,
			
 
				+		Release = 3,
			
 
				+		Acq_Rel = 4,
			
 
				+		Seq_Cst = 5,
			
 
				+	}
			
 
				+
			
 
				+**Note(i386, x64)**: x86 has a very strong memory model by default. It
			
 
				+guarantees that all writes are ordered, stores and loads aren't reordered. In
			
 
				+a sense, all operations are at least acquire and release operations. If `lock`
			
 
				+prefix is used, all operations are sequentially consistent. If you use explicit
			
 
				+atomics, make sure you have the correct atomic memory order, because bugs likely
			
 
				+will not show up in x86, but may show up on e.g. arm. More on x86 memory
			
 
				+ordering can be found
			
 
				+[[here; https://www.cs.cmu.edu/~410-f10/doc/Intel_Reordering_318147.pdf]]
			
 
				 */
			
 
				 Atomic_Memory_Order :: intrinsics.Atomic_Memory_Order
			
 
				 
			
 
				+/*
			
 
				+Establish memory ordering.
			
 
				+
			
 
				+This procedure establishes memory ordering, without an associated atomic
			
 
				+operation.
			
 
				+*/
			
 
				+atomic_thread_fence :: intrinsics.atomic_thread_fence
			
 
				+
			
 
				+/*
			
 
				+Establish memory ordering between a current thread and a signal handler.
			
 
				+
			
 
				+This procedure establishes memory ordering between a thread and a signal
			
 
				+handler, that run on the same thread, without an associated atomic operation.
			
 
				+This procedure is equivalent to `atomic_thread_fence`, except it doesn't
			
 
				+issue any CPU instructions for memory ordering.
			
 
				+*/
			
 
				+atomic_signal_fence :: intrinsics.atomic_signal_fence
			
 
				+
			
 
				+/*
			
 
				+Atomically store a value into memory.
			
 
				+
			
 
				+This procedure stores a value to a memory location in such a way that no other
			
 
				+thread is able to see partial reads. This operation is sequentially-consistent.
			
 
				+*/
			
 
				+atomic_store :: intrinsics.atomic_store
			
 
				+
			
 
				+/*
			
 
				+Atomically store a value into memory with explicit memory ordering.
			
 
				+
			
 
				+This procedure stores a value to a memory location in such a way that no other
			
 
				+thread is able to see partial reads. The memory ordering of this operation is
			
 
				+as specified by the `order` parameter.
			
 
				+*/
			
 
				+atomic_store_explicit :: intrinsics.atomic_store_explicit
			
 
				+
			
 
				+/*
			
 
				+Atomically load a value from memory.
			
 
				+
			
 
				+This procedure loads a value from a memory location in such a way that the
			
 
				+received value is not a partial read. The memory ordering of this operation is
			
 
				+sequentially-consistent.
			
 
				+*/
			
 
				+atomic_load :: intrinsics.atomic_load
			
 
				+
			
 
				+/*
			
 
				+Atomically load a value from memory with explicit memory ordering.
			
 
				+
			
 
				+This procedure loads a value from a memory location in such a way that the
			
 
				+received value is not a partial read. The memory ordering of this operation
			
 
				+is as specified by the `order` parameter.
			
 
				+*/
			
 
				+atomic_load_explicit :: intrinsics.atomic_load_explicit
			
 
				+
			
 
				+/*
			
 
				+Atomically add a value to the value stored in memory.
			
 
				+
			
 
				+This procedure loads a value from memory, adds the specified value to it, and
			
 
				+stores it back as an atomic operation. This operation is an atomic equivalent
			
 
				+of the following:
			
 
				+
			
 
				+	dst^ += val
			
 
				+
			
 
				+The memory ordering of this operation is sequentially-consistent.
			
 
				+*/
			
 
				+atomic_add :: intrinsics.atomic_add
			
 
				+
			
 
				+/*
			
 
				+Atomically add a value to the value stored in memory.
			
 
				+
			
 
				+This procedure loads a value from memory, adds the specified value to it, and
			
 
				+stores it back as an atomic operation. This operation is an atomic equivalent
			
 
				+of the following:
			
 
				+
			
 
				+	dst^ += val
			
 
				+
			
 
				+The memory ordering of this operation is as specified by the `order` parameter.
			
 
				+*/
			
 
				+atomic_add_explicit :: intrinsics.atomic_add_explicit
			
 
				+
			
 
				+/*
			
 
				+Atomically subtract a value from the value stored in memory.
			
 
				+
			
 
				+This procedure loads a value from memory, subtracts the specified value from it,
			
 
				+and stores the result back as an atomic operation. This operation is an atomic
			
 
				+equivalent of the following:
			
 
				+
			
 
				+	dst^ -= val
			
 
				+
			
 
				+The memory ordering of this operation is sequentially-consistent.
			
 
				+*/
			
 
				+atomic_sub :: intrinsics.atomic_sub
			
 
				+
			
 
				+/*
			
 
				+Atomically subtract a value from the value stored in memory.
			
 
				+
			
 
				+This procedure loads a value from memory, subtracts the specified value from it,
			
 
				+and stores the result back as an atomic operation. This operation is an atomic
			
 
				+equivalent of the following:
			
 
				+
			
 
				+	dst^ -= val
			
 
				+
			
 
				+The memory ordering of this operation is as specified by the `order` parameter.
			
 
				+*/
			
 
				+atomic_sub_explicit :: intrinsics.atomic_sub_explicit
			
 
				+
			
 
				+/*
			
 
				+Atomically replace the memory location with the result of AND operation with
			
 
				+the specified value.
			
 
				+
			
 
				+This procedure loads a value from memory, calculates the result of AND operation
			
 
				+between the loaded value and the specified value, and stores it back into the
			
 
				+same memory location as an atomic operation. This operation is an atomic
			
 
				+equivalent of the following:
			
 
				+
			
 
				+	dst^ &= val
			
 
				+
			
 
				+The memory ordering of this operation is sequentially-consistent.
			
 
				+*/
			
 
				+atomic_and :: intrinsics.atomic_and
			
 
				+
			
 
				+/*
			
 
				+Atomically replace the memory location with the result of AND operation with
			
 
				+the specified value.
			
 
				+
			
 
				+This procedure loads a value from memory, calculates the result of AND operation
			
 
				+between the loaded value and the specified value, and stores it back into the
			
 
				+same memory location as an atomic operation. This operation is an atomic
			
 
				+equivalent of the following:
			
 
				+
			
 
				+	dst^ &= val
			
 
				+
			
 
				+The memory ordering of this operation is as specified by the `order` parameter.
			
 
				+*/
			
 
				+atomic_and_explicit :: intrinsics.atomic_and_explicit
			
 
				+
			
 
				+/*
			
 
				+Atomically replace the memory location with the result of NAND operation with
			
 
				+the specified value.
			
 
				+
			
 
				+This procedure loads a value from memory, calculates the result of NAND operation
			
 
				+between the loaded value and the specified value, and stores it back into the
			
 
				+same memory location as an atomic operation. This operation is an atomic
			
 
				+equivalent of the following:
			
 
				+
			
 
				+	dst^ = ~(dst^ & val)
			
 
				+
			
 
				+The memory ordering of this operation is sequentially-consistent.
			
 
				+*/
			
 
				+atomic_nand :: intrinsics.atomic_nand
			
 
				 
			
 
				-atomic_thread_fence                     :: intrinsics.atomic_thread_fence
			
 
				-atomic_signal_fence                     :: intrinsics.atomic_signal_fence
			
 
				-atomic_store                            :: intrinsics.atomic_store
			
 
				-atomic_store_explicit                   :: intrinsics.atomic_store_explicit
			
 
				-atomic_load                             :: intrinsics.atomic_load
			
 
				-atomic_load_explicit                    :: intrinsics.atomic_load_explicit
			
 
				-atomic_add                              :: intrinsics.atomic_add
			
 
				-atomic_add_explicit                     :: intrinsics.atomic_add_explicit
			
 
				-atomic_sub                              :: intrinsics.atomic_sub
			
 
				-atomic_sub_explicit                     :: intrinsics.atomic_sub_explicit
			
 
				-atomic_and                              :: intrinsics.atomic_and
			
 
				-atomic_and_explicit                     :: intrinsics.atomic_and_explicit
			
 
				-atomic_nand                             :: intrinsics.atomic_nand
			
 
				-atomic_nand_explicit                    :: intrinsics.atomic_nand_explicit
			
 
				-atomic_or                               :: intrinsics.atomic_or
			
 
				-atomic_or_explicit                      :: intrinsics.atomic_or_explicit
			
 
				-atomic_xor                              :: intrinsics.atomic_xor
			
 
				-atomic_xor_explicit                     :: intrinsics.atomic_xor_explicit
			
 
				-atomic_exchange                         :: intrinsics.atomic_exchange
			
 
				-atomic_exchange_explicit                :: intrinsics.atomic_exchange_explicit
			
 
				-
			
 
				-// Returns value and optional ok boolean
			
 
				-atomic_compare_exchange_strong          :: intrinsics.atomic_compare_exchange_strong
			
 
				+/*
			
 
				+Atomically replace the memory location with the result of NAND operation with
			
 
				+the specified value.
			
 
				+
			
 
				+This procedure loads a value from memory, calculates the result of NAND operation
			
 
				+between the loaded value and the specified value, and stores it back into the
			
 
				+same memory location as an atomic operation. This operation is an atomic
			
 
				+equivalent of the following:
			
 
				+
			
 
				+	dst^ = ~(dst^ & val)
			
 
				+
			
 
				+The memory ordering of this operation is as specified by the `order` parameter.
			
 
				+*/
			
 
				+atomic_nand_explicit :: intrinsics.atomic_nand_explicit
			
 
				+
			
 
				+/*
			
 
				+Atomically replace the memory location with the result of OR operation with
			
 
				+the specified value.
			
 
				+
			
 
				+This procedure loads a value from memory, calculates the result of OR operation
			
 
				+between the loaded value and the specified value, and stores it back into the
			
 
				+same memory location as an atomic operation. This operation is an atomic
			
 
				+equivalent of the following:
			
 
				+
			
 
				+	dst^ |= val
			
 
				+
			
 
				+The memory ordering of this operation is sequentially-consistent.
			
 
				+*/
			
 
				+atomic_or :: intrinsics.atomic_or
			
 
				+
			
 
				+/*
			
 
				+Atomically replace the memory location with the result of OR operation with
			
 
				+the specified value.
			
 
				+
			
 
				+This procedure loads a value from memory, calculates the result of OR operation
			
 
				+between the loaded value and the specified value, and stores it back into the
			
 
				+same memory location as an atomic operation. This operation is an atomic
			
 
				+equivalent of the following:
			
 
				+
			
 
				+	dst^ |= val
			
 
				+
			
 
				+The memory ordering of this operation is as specified by the `order` parameter.
			
 
				+*/
			
 
				+atomic_or_explicit :: intrinsics.atomic_or_explicit
			
 
				+
			
 
				+/*
			
 
				+Atomically replace the memory location with the result of XOR operation with
			
 
				+the specified value.
			
 
				+
			
 
				+This procedure loads a value from memory, calculates the result of XOR operation
			
 
				+between the loaded value and the specified value, and stores it back into the
			
 
				+same memory location as an atomic operation. This operation is an atomic
			
 
				+equivalent of the following:
			
 
				+
			
 
				+	dst^ ~= val
			
 
				+
			
 
				+The memory ordering of this operation is sequentially-consistent.
			
 
				+*/
			
 
				+atomic_xor :: intrinsics.atomic_xor
			
 
				+
			
 
				+/*
			
 
				+Atomically replace the memory location with the result of XOR operation with
			
 
				+the specified value.
			
 
				+
			
 
				+This procedure loads a value from memory, calculates the result of XOR operation
			
 
				+between the loaded value and the specified value, and stores it back into the
			
 
				+same memory location as an atomic operation. This operation is an atomic
			
 
				+equivalent of the following:
			
 
				+
			
 
				+	dst^ ~= val
			
 
				+
			
 
				+The memory ordering of this operation is as specified by the `order` parameter.
			
 
				+*/
			
 
				+atomic_xor_explicit :: intrinsics.atomic_xor_explicit
			
 
				+
			
 
				+/*
			
 
				+Atomically exchange the value in a memory location, with the specified value.
			
 
				+
			
 
				+This procedure loads a value from the specified memory location, and stores the
			
 
				+specified value into that memory location. Then the loaded value is returned,
			
 
				+all done in a single atomic operation. This operation is an atomic equivalent
			
 
				+of the following:
			
 
				+
			
 
				+	tmp := dst^
			
 
				+	dst^ = val
			
 
				+	return tmp
			
 
				+
			
 
				+The memory ordering of this operation is sequentially-consistent.
			
 
				+*/
			
 
				+atomic_exchange :: intrinsics.atomic_exchange
			
 
				+
			
 
				+/*
			
 
				+Atomically exchange the value in a memory location, with the specified value.
			
 
				+
			
 
				+This procedure loads a value from the specified memory location, and stores the
			
 
				+specified value into that memory location. Then the loaded value is returned,
			
 
				+all done in a single atomic operation. This operation is an atomic equivalent
			
 
				+of the following:
			
 
				+
			
 
				+	tmp := dst^
			
 
				+	dst^ = val
			
 
				+	return tmp
			
 
				+
			
 
				+The memory ordering of this operation is as specified by the `order` parameter.
			
 
				+*/
			
 
				+atomic_exchange_explicit :: intrinsics.atomic_exchange_explicit
			
 
				+
			
 
				+/*
			
 
				+Atomically compare and exchange the value with a memory location.
			
 
				+
			
 
				+This procedure checks if the value pointed to by the `dst` parameter is equal
			
 
				+to `old`, and if they are, it stores the value `new` into the memory location,
			
 
				+all done in a single atomic operation. This procedure returns the old value
			
 
				+stored in a memory location and a boolean value signifying whether `old` was
			
 
				+equal to `new`.
			
 
				+
			
 
				+This procedure is an atomic equivalent of the following operation:
			
 
				+
			
 
				+	old_dst := dst^
			
 
				+	if old_dst == old {
			
 
				+		dst^ = new
			
 
				+		return old_dst, true
			
 
				+	} else {
			
 
				+		return old_dst, false
			
 
				+	}
			
 
				+
			
 
				+The strong version of compare exchange always returns true, when the returned
			
 
				+old value stored in location pointed to by `dst` and the `old` parameter are
			
 
				+equal.
			
 
				+
			
 
				+Atomic compare exchange has two memory orderings: One is for the
			
 
				+read-modify-write operation, if the comparison succeeds, and the other is for
			
 
				+the load operation, if the comparison fails. The memory ordering for both of
			
 
				+of these operations is sequentially-consistent.
			
 
				+*/
			
 
				+atomic_compare_exchange_strong :: intrinsics.atomic_compare_exchange_strong
			
 
				+
			
 
				+/*
			
 
				+Atomically compare and exchange the value with a memory location.
			
 
				+
			
 
				+This procedure checks if the value pointed to by the `dst` parameter is equal
			
 
				+to `old`, and if they are, it stores the value `new` into the memory location,
			
 
				+all done in a single atomic operation. This procedure returns the old value
			
 
				+stored in a memory location and a boolean value signifying whether `old` was
			
 
				+equal to `new`.
			
 
				+
			
 
				+This procedure is an atomic equivalent of the following operation:
			
 
				+
			
 
				+	old_dst := dst^
			
 
				+	if old_dst == old {
			
 
				+		dst^ = new
			
 
				+		return old_dst, true
			
 
				+	} else {
			
 
				+		return old_dst, false
			
 
				+	}
			
 
				+
			
 
				+The strong version of compare exchange always returns true, when the returned
			
 
				+old value stored in location pointed to by `dst` and the `old` parameter are
			
 
				+equal.
			
 
				+
			
 
				+Atomic compare exchange has two memory orderings: One is for the
			
 
				+read-modify-write operation, if the comparison succeeds, and the other is for
			
 
				+the load operation, if the comparison fails. The memory ordering for these
			
 
				+operations is as specified by `success` and `failure` parameters respectively.
			
 
				+*/
			
 
				 atomic_compare_exchange_strong_explicit :: intrinsics.atomic_compare_exchange_strong_explicit
			
 
				-atomic_compare_exchange_weak            :: intrinsics.atomic_compare_exchange_weak
			
 
				-atomic_compare_exchange_weak_explicit   :: intrinsics.atomic_compare_exchange_weak_explicit
			
 
				+
			
 
				+/*
			
 
				+Atomically compare and exchange the value with a memory location.
			
 
				+
			
 
				+This procedure checks if the value pointed to by the `dst` parameter is equal
			
 
				+to `old`, and if they are, it stores the value `new` into the memory location,
			
 
				+all done in a single atomic operation. This procedure returns the old value
			
 
				+stored in a memory location and a boolean value signifying whether `old` was
			
 
				+equal to `new`.
			
 
				+
			
 
				+This procedure is an atomic equivalent of the following operation:
			
 
				+
			
 
				+	old_dst := dst^
			
 
				+	if old_dst == old {
			
 
				+		// may return false here
			
 
				+		dst^ = new
			
 
				+		return old_dst, true
			
 
				+	} else {
			
 
				+		return old_dst, false
			
 
				+	}
			
 
				+
			
 
				+The weak version of compare exchange may return false, even if `dst^ == old`.
			
 
				+On some platforms running weak compare exchange in a loop is faster than a
			
 
				+strong version.
			
 
				+
			
 
				+Atomic compare exchange has two memory orderings: One is for the
			
 
				+read-modify-write operation, if the comparison succeeds, and the other is for
			
 
				+the load operation, if the comparison fails. The memory ordering for both
			
 
				+of these operations is sequentially-consistent.
			
 
				+*/
			
 
				+atomic_compare_exchange_weak :: intrinsics.atomic_compare_exchange_weak
			
 
				+
			
 
				+/*
			
 
				+Atomically compare and exchange the value with a memory location.
			
 
				+
			
 
				+This procedure checks if the value pointed to by the `dst` parameter is equal
			
 
				+to `old`, and if they are, it stores the value `new` into the memory location,
			
 
				+all done in a single atomic operation. This procedure returns the old value
			
 
				+stored in a memory location and a boolean value signifying whether `old` was
			
 
				+equal to `new`.
			
 
				+
			
 
				+This procedure is an atomic equivalent of the following operation:
			
 
				+
			
 
				+	old_dst := dst^
			
 
				+	if old_dst == old {
			
 
				+		// may return false here
			
 
				+		dst^ = new
			
 
				+		return old_dst, true
			
 
				+	} else {
			
 
				+		return old_dst, false
			
 
				+	}
			
 
				+
			
 
				+The weak version of compare exchange may return false, even if `dst^ == old`.
			
 
				+On some platforms running weak compare exchange in a loop is faster than a
			
 
				+strong version.
			
 
				+
			
 
				+Atomic compare exchange has two memory orderings: One is for the
			
 
				+read-modify-write operation, if the comparison succeeds, and the other is for
			
 
				+the load operation, if the comparison fails. The memory ordering for these
			
 
				+operations is as specified by the `success` and `failure` parameters
			
 
				+respectively.
			
 
				+*/
			
 
				+atomic_compare_exchange_weak_explicit :: intrinsics.atomic_compare_exchange_weak_explicit
			
--- a/core/sync/doc.odin
+++ b/core/sync/doc.odin
@@ -0,0 +1,21 @@
 
				+/*
			
 
				+Synchronization primitives
			
 
				+
			
 
				+This package implements various synchronization primitives that can be used to
			
 
				+synchronize threads' access to shared memory.
			
 
				+
			
 
				+To limit or control the threads' access to shared memory typically the
			
 
				+following approaches are used:
			
 
				+
			
 
				+* Locks
			
 
				+* Lock-free
			
 
				+
			
 
				+When using locks, sections of the code that access shared memory (also known as
			
 
				+**critical sections**) are guarded by locks, allowing limited access to threads
			
 
				+and blocking the execution of any other threads.
			
 
				+
			
 
				+In lock-free programming the data itself is organized in such a way that threads
			
 
				+don't intervene much. It can be done via segmenting the data between threads,
			
 
				+and/or by using atomic operations.
			
 
				+*/
			
 
				+package sync
			
--- a/core/sync/extended.odin
+++ b/core/sync/extended.odin
@@ -4,15 +4,41 @@ import "core:time"
 
				 import vg "core:sys/valgrind"
			
 
				 _ :: vg
			
 
				 
			
 
				-// A Wait_Group waits for a collection of threads to finish
			
 
				-//
			
 
				-// A Wait_Group must not be copied after first use
			
 
				+/*
			
 
				+Wait group.
			
 
				+
			
 
				+Wait group is a synchronization primitive used by the waiting thread to wait,
			
 
				+until a all working threads finish work.
			
 
				+
			
 
				+The waiting thread first sets the number of working threads it will expect to
			
 
				+wait for using `wait_group_add` call, and start waiting using `wait_group_wait`
			
 
				+call. When worker threads complete their work, each of them will call
			
 
				+`wait_group_done`, and after all working threads have called this procedure,
			
 
				+the waiting thread will resume execution.
			
 
				+
			
 
				+For the purpose of keeping track whether all working threads have finished their
			
 
				+work, the wait group keeps an internal atomic counter. Initially, the waiting
			
 
				+thread might set it to a certain non-zero amount. When each working thread
			
 
				+completes the work, the internal counter is atomically decremented until it
			
 
				+reaches zero. When it reaches zero, the waiting thread is unblocked. The counter
			
 
				+is not allowed to become negative.
			
 
				+
			
 
				+**Note**: Just like any synchronization primitives, a wait group cannot be
			
 
				+copied after first use. See documentation for `Mutex` or `Cond`.
			
 
				+*/
			
 
				 Wait_Group :: struct #no_copy {
			
 
				 	counter: int,
			
 
				 	mutex:   Mutex,
			
 
				 	cond:    Cond,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Increment an internal counter of a wait group.
			
 
				+
			
 
				+This procedure atomicaly increments a number to the specified wait group's
			
 
				+internal counter by a specified amount. This operation can be done on any
			
 
				+thread.
			
 
				+*/
			
 
				 wait_group_add :: proc "contextless" (wg: ^Wait_Group, delta: int) {
			
 
				 	if delta == 0 {
			
 
				 		return
			
@@ -32,10 +58,23 @@ wait_group_add :: proc "contextless" (wg: ^Wait_Group, delta: int) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Signal work done by a thread in a wait group.
			
 
				+
			
 
				+This procedure decrements the internal counter of the specified wait group and
			
 
				+wakes up the waiting thread. Once the internal counter reaches zero, the waiting
			
 
				+thread resumes execution.
			
 
				+*/
			
 
				 wait_group_done :: proc "contextless" (wg: ^Wait_Group) {
			
 
				 	wait_group_add(wg, -1)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Wait for all worker threads in the wait group.
			
 
				+
			
 
				+This procedure blocks the execution of the current thread, until the specified
			
 
				+wait group's internal counter reaches zero.
			
 
				+*/
			
 
				 wait_group_wait :: proc "contextless" (wg: ^Wait_Group) {
			
 
				 	guard(&wg.mutex)
			
 
				 
			
@@ -47,6 +86,14 @@ wait_group_wait :: proc "contextless" (wg: ^Wait_Group) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Wait for all worker threads in the wait group, or until timeout is reached.
			
 
				+
			
 
				+This procedure blocks the execution of the current thread, until the specified
			
 
				+wait group's internal counter reaches zero, or until the timeout is reached.
			
 
				+
			
 
				+This procedure returns `false`, if the timeout was reached, `true` otherwise.
			
 
				+*/
			
 
				 wait_group_wait_with_timeout :: proc "contextless" (wg: ^Wait_Group, duration: time.Duration) -> bool {
			
 
				 	if duration <= 0 {
			
 
				 		return false
			
@@ -64,41 +111,43 @@ wait_group_wait_with_timeout :: proc "contextless" (wg: ^Wait_Group, duration: t
 
				 	return true
			
 
				 }
			
 
				 
			
 
				-
			
 
				-
			
 
				 /*
			
 
				-A barrier enabling multiple threads to synchronize the beginning of some computation
			
 
				-
			
 
				-Example:
			
 
				-	package example
			
 
				-
			
 
				-	import "core:fmt"
			
 
				-	import "core:sync"
			
 
				-	import "core:thread"
			
 
				-
			
 
				-	barrier := &sync.Barrier{}
			
 
				-
			
 
				-	main :: proc "contextless" () {
			
 
				-		fmt.println("Start")
			
 
				-
			
 
				-		THREAD_COUNT :: 4
			
 
				-		threads: [THREAD_COUNT]^thread.Thread
			
 
				-
			
 
				-		sync.barrier_init(barrier, THREAD_COUNT)
			
 
				-
			
 
				-		for _, i in threads {
			
 
				-			threads[i] = thread.create_and_start(proc(t: ^thread.Thread) {
			
 
				-				// Same messages will be printed together but without any interleaving
			
 
				-				fmt.println("Getting ready!")
			
 
				-				sync.barrier_wait(barrier)
			
 
				-				fmt.println("Off their marks they go!")
			
 
				-			})
			
 
				-		}
			
 
				-
			
 
				-		for t in threads {
			
 
				-			thread.destroy(t) // join and free thread
			
 
				-		}
			
 
				-		fmt.println("Finished")
			
 
				+Barrier.
			
 
				+
			
 
				+A barrier is a synchronization primitive enabling multiple threads to
			
 
				+synchronize the beginning of some computation.
			
 
				+
			
 
				+When `barrier_wait` procedure is called by any thread, that thread will block
			
 
				+the execution, until all threads associated with the barrier reach the same
			
 
				+point of execution and also call `barrier_wait`.
			
 
				+
			
 
				+when barrier is initialized, a `thread_count` parameter is passed, signifying
			
 
				+the amount of participant threads of the barrier. The barrier also keeps track
			
 
				+of an internal atomic counter. When a thread calls `barrier_wait`, the internal
			
 
				+counter is incremented. When the internal counter reaches `thread_count`, it is
			
 
				+reset and all threads waiting on the barrier are unblocked.
			
 
				+
			
 
				+This type of synchronization primitive can be used to synchronize "staged"
			
 
				+workloads, where the workload is split into stages, and until all threads have
			
 
				+completed the previous threads, no thread is allowed to start work on the next
			
 
				+stage. In this case, after each stage, a `barrier_wait` shall be inserted in the
			
 
				+thread procedure.
			
 
				+
			
 
				+**Example**:
			
 
				+
			
 
				+	THREAD_COUNT :: 4
			
 
				+	threads: [THREAD_COUNT]^thread.Thread
			
 
				+	sync.barrier_init(barrier, THREAD_COUNT)
			
 
				+	for _, i in threads {
			
 
				+		threads[i] = thread.create_and_start(proc(t: ^thread.Thread) {
			
 
				+			// Same messages will be printed together but without any interleaving
			
 
				+			fmt.println("Getting ready!")
			
 
				+			sync.barrier_wait(barrier)
			
 
				+			fmt.println("Off their marks they go!")
			
 
				+		})
			
 
				+	}
			
 
				+	for t in threads {
			
 
				+		thread.destroy(t)
			
 
				 	}
			
 
				 */
			
 
				 Barrier :: struct #no_copy {
			
@@ -109,6 +158,13 @@ Barrier :: struct #no_copy {
 
				 	thread_count:  int,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Initialize a barrier.
			
 
				+
			
 
				+
			
 
				+This procedure initializes the barrier for the specified amount of participant
			
 
				+threads.
			
 
				+*/
			
 
				 barrier_init :: proc "contextless" (b: ^Barrier, thread_count: int) {
			
 
				 	when ODIN_VALGRIND_SUPPORT {
			
 
				 		vg.helgrind_barrier_resize_pre(b, uint(thread_count))
			
@@ -118,8 +174,13 @@ barrier_init :: proc "contextless" (b: ^Barrier, thread_count: int) {
 
				 	b.thread_count = thread_count
			
 
				 }
			
 
				 
			
 
				-// Block the current thread until all threads have rendezvoused
			
 
				-// Barrier can be reused after all threads rendezvoused once, and can be used continuously
			
 
				+/*
			
 
				+Block the current thread until all threads have rendezvoused.
			
 
				+
			
 
				+This procedure blocks the execution of the current thread, until all threads
			
 
				+have reached the same point in the execution of the thread proc. Multiple calls
			
 
				+to `barrier_wait` are allowed within the thread procedure.
			
 
				+*/
			
 
				 barrier_wait :: proc "contextless" (b: ^Barrier) -> (is_leader: bool) {
			
 
				 	when ODIN_VALGRIND_SUPPORT {
			
 
				 		vg.helgrind_barrier_wait_pre(b)
			
@@ -140,15 +201,31 @@ barrier_wait :: proc "contextless" (b: ^Barrier) -> (is_leader: bool) {
 
				 	return true
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Auto-reset event.
			
 
				 
			
 
				+Represents a thread synchronization primitive that, when signalled, releases one
			
 
				+single waiting thread and then resets automatically to a state where it can be
			
 
				+signalled again.
			
 
				+
			
 
				+When a thread calls `auto_reset_event_wait`, it's execution will be blocked,
			
 
				+until the event is signalled by another thread. The call to
			
 
				+`auto_reset_event_signal` wakes up exactly one thread waiting for the event.
			
 
				+*/
			
 
				 Auto_Reset_Event :: struct #no_copy {
			
 
				 	// status ==  0: Event is reset and no threads are waiting
			
 
				-	// status ==  1: Event is signaled
			
 
				+	// status ==  1: Event is signalled
			
 
				 	// status == -N: Event is reset and N threads are waiting
			
 
				 	status: i32,
			
 
				 	sema:   Sema,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Signal an auto-reset event.
			
 
				+
			
 
				+This procedure signals an auto-reset event, waking up exactly one waiting
			
 
				+thread.
			
 
				+*/
			
 
				 auto_reset_event_signal :: proc "contextless" (e: ^Auto_Reset_Event) {
			
 
				 	old_status := atomic_load_explicit(&e.status, .Relaxed)
			
 
				 	for {
			
@@ -163,6 +240,12 @@ auto_reset_event_signal :: proc "contextless" (e: ^Auto_Reset_Event) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Wait on an auto-reset event.
			
 
				+
			
 
				+This procedure blocks the execution of the current thread, until the event is
			
 
				+signalled by another thread.
			
 
				+*/
			
 
				 auto_reset_event_wait :: proc "contextless" (e: ^Auto_Reset_Event) {
			
 
				 	old_status := atomic_sub_explicit(&e.status, 1, .Acquire)
			
 
				 	if old_status < 1 {
			
@@ -170,13 +253,35 @@ auto_reset_event_wait :: proc "contextless" (e: ^Auto_Reset_Event) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Ticket lock.
			
 
				+
			
 
				+A ticket lock is a mutual exclusion lock that uses "tickets" to control which
			
 
				+thread is allowed into a critical section.
			
 
				 
			
 
				+This synchronization primitive works just like spinlock, except that it implements
			
 
				+a "fairness" guarantee, making sure that each thread gets a roughly equal amount
			
 
				+of entries into the critical section.
			
 
				 
			
 
				+This type of synchronization primitive is applicable for short critical sections
			
 
				+in low-contention systems, as it uses a spinlock under the hood.
			
 
				+*/
			
 
				 Ticket_Mutex :: struct #no_copy {
			
 
				 	ticket:  uint,
			
 
				 	serving: uint,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Acquire a lock on a ticket mutex.
			
 
				+
			
 
				+This procedure acquires a lock on a ticket mutex. If the ticket mutex is held
			
 
				+by another thread, this procedure also blocks the execution until the lock
			
 
				+can be acquired.
			
 
				+
			
 
				+Once the lock is acquired, any thread calling `ticket_mutex_lock` will be
			
 
				+blocked from entering any critical sections associated with the same ticket
			
 
				+mutex, until the lock is released.
			
 
				+*/
			
 
				 ticket_mutex_lock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) {
			
 
				 	ticket := atomic_add_explicit(&m.ticket, 1, .Relaxed)
			
 
				 	for ticket != atomic_load_explicit(&m.serving, .Acquire) {
			
@@ -184,44 +289,147 @@ ticket_mutex_lock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Release a lock on a ticket mutex.
			
 
				+
			
 
				+This procedure releases the lock on a ticket mutex. If any of the threads are
			
 
				+waiting to acquire the lock, exactly one of those threads is unblocked and
			
 
				+allowed into the critical section.
			
 
				+*/
			
 
				 ticket_mutex_unlock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) {
			
 
				 	atomic_add_explicit(&m.serving, 1, .Relaxed)
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+Guard the current scope with a lock on a ticket mutex.
			
 
				+
			
 
				+This procedure acquires a lock on a ticket mutex. The lock is automatically
			
 
				+released at the end of callee's scope. If the mutex was already locked, this
			
 
				+procedure also blocks until the lock can be acquired.
			
 
				+
			
 
				+When a lock has been acquired, all threads attempting to acquire a lock will be
			
 
				+blocked from entering any critical sections associated with the ticket mutex,
			
 
				+until the lock is released.
			
 
				+
			
 
				+This procedure always returns `true`. This makes it easy to define a critical
			
 
				+section by putting the function inside the `if` statement.
			
 
				+
			
 
				+**Example**:
			
 
				+
			
 
				+	if ticket_mutex_guard(&m) {
			
 
				+		...
			
 
				+	}
			
 
				+*/
			
 
				 @(deferred_in=ticket_mutex_unlock)
			
 
				 ticket_mutex_guard :: proc "contextless" (m: ^Ticket_Mutex) -> bool {
			
 
				 	ticket_mutex_lock(m)
			
 
				 	return true
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Benaphore.
			
 
				+
			
 
				+A benaphore is a combination of an atomic variable and a semaphore that can
			
 
				+improve locking efficiency in a no-contention system. Acquiring a benaphore
			
 
				+lock doesn't call into an internal semaphore, if no other thread in a middle of
			
 
				+a critical section.
			
 
				 
			
 
				+Once a lock on a benaphore is acquired by a thread, no other thread is allowed
			
 
				+into any critical sections, associted with the same benaphore, until the lock
			
 
				+is released.
			
 
				+*/
			
 
				 Benaphore :: struct #no_copy {
			
 
				 	counter: i32,
			
 
				 	sema:    Sema,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Acquire a lock on a benaphore.
			
 
				+
			
 
				+This procedure acquires a lock on the specified benaphore. If the lock on a
			
 
				+benaphore is already held, this procedure also blocks the execution of the
			
 
				+current thread, until the lock could be acquired.
			
 
				+
			
 
				+Once a lock is acquired, all threads attempting to take a lock will be blocked
			
 
				+from entering any critical sections associated with the same benaphore, until
			
 
				+until the lock is released.
			
 
				+*/
			
 
				 benaphore_lock :: proc "contextless" (b: ^Benaphore) {
			
 
				 	if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 {
			
 
				 		sema_wait(&b.sema)
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Try to acquire a lock on a benaphore.
			
 
				+
			
 
				+This procedure tries to acquire a lock on the specified benaphore. If it was
			
 
				+already locked, then the returned value is `false`, otherwise the lock is
			
 
				+acquired and the procedure returns `true`.
			
 
				+
			
 
				+If the lock is acquired, all threads that attempt to acquire a lock will be
			
 
				+blocked from entering any critical sections associated with the same benaphore,
			
 
				+until the lock is released.
			
 
				+*/
			
 
				 benaphore_try_lock :: proc "contextless" (b: ^Benaphore) -> bool {
			
 
				 	v, _ := atomic_compare_exchange_strong_explicit(&b.counter, 0, 1, .Acquire, .Acquire)
			
 
				 	return v == 0
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Release a lock on a benaphore.
			
 
				+
			
 
				+This procedure releases a lock on the specified benaphore. If any of the threads
			
 
				+are waiting on the lock, exactly one thread is allowed into a critical section
			
 
				+associated with the same banaphore.
			
 
				+*/
			
 
				 benaphore_unlock :: proc "contextless" (b: ^Benaphore) {
			
 
				 	if atomic_sub_explicit(&b.counter, 1, .Release) > 0 {
			
 
				 		sema_post(&b.sema)
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Guard the current scope with a lock on a benaphore.
			
 
				+
			
 
				+This procedure acquires a lock on a benaphore. The lock is automatically
			
 
				+released at the end of callee's scope. If the benaphore was already locked, this
			
 
				+procedure also blocks until the lock can be acquired.
			
 
				+
			
 
				+When a lock has been acquired, all threads attempting to acquire a lock will be
			
 
				+blocked from entering any critical sections associated with the same benaphore,
			
 
				+until the lock is released.
			
 
				+
			
 
				+This procedure always returns `true`. This makes it easy to define a critical
			
 
				+section by putting the function inside the `if` statement.
			
 
				+
			
 
				+**Example**:
			
 
				+
			
 
				+	if benaphore_guard(&m) {
			
 
				+		...
			
 
				+	}
			
 
				+*/
			
 
				 @(deferred_in=benaphore_unlock)
			
 
				 benaphore_guard :: proc "contextless" (m: ^Benaphore) -> bool {
			
 
				 	benaphore_lock(m)
			
 
				 	return true
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Recursive benaphore.
			
 
				+
			
 
				+Recurisve benaphore is just like a plain benaphore, except it allows reentrancy
			
 
				+into the critical section.
			
 
				+
			
 
				+When a lock is acquired on a benaphore, all other threads attempting to
			
 
				+acquire a lock on the same benaphore will be blocked from any critical sections,
			
 
				+associated with the same benaphore.
			
 
				+
			
 
				+When a lock is acquired on a benaphore by a thread, that thread is allowed
			
 
				+to acquire another lock on the same benaphore. When a thread has acquired the
			
 
				+lock on a benaphore, the benaphore will stay locked until the thread releases
			
 
				+the lock as many times as it has been locked by the thread.
			
 
				+*/
			
 
				 Recursive_Benaphore :: struct #no_copy {
			
 
				 	counter:   int,
			
 
				 	owner:     int,
			
@@ -229,6 +437,16 @@ Recursive_Benaphore :: struct #no_copy {
 
				 	sema:      Sema,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Acquire a lock on a recursive benaphore.
			
 
				+
			
 
				+This procedure acquires a lock on a recursive benaphore. If the benaphore is
			
 
				+held by another thread, this function blocks until the lock can be acquired.
			
 
				+
			
 
				+Once a lock is acquired, all other threads attempting to acquire a lock will
			
 
				+be blocked from entering any critical sections associated with the same
			
 
				+recursive benaphore, until the lock is released.
			
 
				+*/
			
 
				 recursive_benaphore_lock :: proc "contextless" (b: ^Recursive_Benaphore) {
			
 
				 	tid := current_thread_id()
			
 
				 	if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 {
			
@@ -241,6 +459,17 @@ recursive_benaphore_lock :: proc "contextless" (b: ^Recursive_Benaphore) {
 
				 	b.recursion += 1
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Try to acquire a lock on a recursive benaphore.
			
 
				+
			
 
				+This procedure attempts to acquire a lock on recursive benaphore. If the
			
 
				+benaphore is already held by a different thread, this procedure returns `false`.
			
 
				+Otherwise the lock is acquired and the procedure returns `true`.
			
 
				+
			
 
				+If the lock is acquired, all other threads attempting to acquire a lock will
			
 
				+be blocked from entering any critical sections assciated with the same recursive
			
 
				+benaphore, until the lock is released.
			
 
				+*/
			
 
				 recursive_benaphore_try_lock :: proc "contextless" (b: ^Recursive_Benaphore) -> bool {
			
 
				 	tid := current_thread_id()
			
 
				 	if b.owner == tid {
			
@@ -256,6 +485,13 @@ recursive_benaphore_try_lock :: proc "contextless" (b: ^Recursive_Benaphore) ->
 
				 	return true
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Release a lock on a recursive benaphore.
			
 
				+
			
 
				+This procedure releases a lock on the specified recursive benaphore. It also
			
 
				+causes the critical sections associated with the same benaphore, to become open
			
 
				+for other threads for entering.
			
 
				+*/
			
 
				 recursive_benaphore_unlock :: proc "contextless" (b: ^Recursive_Benaphore) {
			
 
				 	tid := current_thread_id()
			
 
				 	_assert(tid == b.owner, "tid != b.owner")
			
@@ -272,24 +508,50 @@ recursive_benaphore_unlock :: proc "contextless" (b: ^Recursive_Benaphore) {
 
				 	// outside the lock
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Guard the current scope with a recursive benaphore.
			
 
				+
			
 
				+This procedure acquires a lock on the specified recursive benaphores and
			
 
				+automatically releases it at the end of the callee's scope. If the recursive
			
 
				+benaphore was already held by a another thread, this procedure also blocks until
			
 
				+the lock can be acquired.
			
 
				+
			
 
				+When the lock is acquired all other threads attempting to take a lock will be
			
 
				+blocked from entering any critical sections associated with the same benaphore,
			
 
				+until the lock is released.
			
 
				+
			
 
				+This procedure always returns `true`, which makes it easy to define a critical
			
 
				+section by calling this procedure inside an `if` statement.
			
 
				+
			
 
				+**Example**:
			
 
				+
			
 
				+	if recursive_benaphore_guard(&m) {
			
 
				+		...
			
 
				+	}
			
 
				+*/
			
 
				 @(deferred_in=recursive_benaphore_unlock)
			
 
				 recursive_benaphore_guard :: proc "contextless" (m: ^Recursive_Benaphore) -> bool {
			
 
				 	recursive_benaphore_lock(m)
			
 
				 	return true
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Once action.
			
 
				 
			
 
				-
			
 
				-
			
 
				-// Once is a data value that will perform exactly on action.
			
 
				-// 
			
 
				-// A Once must not be copied after first use.
			
 
				+`Once` a synchronization primitive, that only allows a single entry into a
			
 
				+critical section from a single thread.
			
 
				+*/
			
 
				 Once :: struct #no_copy {
			
 
				 	m:    Mutex,
			
 
				 	done: bool,
			
 
				 }
			
 
				 
			
 
				-// once_do calls the procedure fn if and only if once_do is being called for the first for this instance of Once.
			
 
				+/*
			
 
				+Call a function once.
			
 
				+
			
 
				+The `once_do` procedure group calls a specified function, if it wasn't already
			
 
				+called from the perspective of a specific `Once` struct.
			
 
				+*/
			
 
				 once_do :: proc{
			
 
				 	once_do_without_data,
			
 
				 	once_do_without_data_contextless,
			
@@ -297,7 +559,9 @@ once_do :: proc{
 
				 	once_do_with_data_contextless,
			
 
				 }
			
 
				 
			
 
				-// once_do_without_data calls the procedure fn if and only if once_do_without_data is being called for the first for this instance of Once.
			
 
				+/*
			
 
				+Call a function with no data once.
			
 
				+*/
			
 
				 once_do_without_data :: proc(o: ^Once, fn: proc()) {
			
 
				 	@(cold)
			
 
				 	do_slow :: proc(o: ^Once, fn: proc()) {
			
@@ -313,7 +577,9 @@ once_do_without_data :: proc(o: ^Once, fn: proc()) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-// once_do_without_data calls the procedure fn if and only if once_do_without_data is being called for the first for this instance of Once.
			
 
				+/*
			
 
				+Call a contextless function with no data once.
			
 
				+*/
			
 
				 once_do_without_data_contextless :: proc(o: ^Once, fn: proc "contextless" ()) {
			
 
				 	@(cold)
			
 
				 	do_slow :: proc(o: ^Once, fn: proc "contextless" ()) {
			
@@ -329,7 +595,9 @@ once_do_without_data_contextless :: proc(o: ^Once, fn: proc "contextless" ()) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-// once_do_with_data calls the procedure fn if and only if once_do_with_data is being called for the first for this instance of Once.
			
 
				+/*
			
 
				+Call a function with data once.
			
 
				+*/
			
 
				 once_do_with_data :: proc(o: ^Once, fn: proc(data: rawptr), data: rawptr) {
			
 
				 	@(cold)
			
 
				 	do_slow :: proc(o: ^Once, fn: proc(data: rawptr), data: rawptr) {
			
@@ -345,7 +613,9 @@ once_do_with_data :: proc(o: ^Once, fn: proc(data: rawptr), data: rawptr) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-// once_do_with_data_contextless calls the procedure fn if and only if once_do_with_data_contextless is being called for the first for this instance of Once.
			
 
				+/*
			
 
				+Call a contextless function with data once.
			
 
				+*/
			
 
				 once_do_with_data_contextless :: proc "contextless" (o: ^Once, fn: proc "contextless" (data: rawptr), data: rawptr) {
			
 
				 	@(cold)
			
 
				 	do_slow :: proc "contextless" (o: ^Once, fn: proc "contextless" (data: rawptr), data: rawptr) {
			
@@ -361,83 +631,112 @@ once_do_with_data_contextless :: proc "contextless" (o: ^Once, fn: proc "context
 
				 	}
			
 
				 }
			
 
				 
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-// A Parker is an associated token which is initially not present:
			
 
				-//     * The `park` procedure blocks the current thread unless or until the token
			
 
				-//       is available, at which point the token is consumed.
			
 
				-//     * The `park_with_timeout` procedures works the same as `park` but only
			
 
				-//       blocks for the specified duration.
			
 
				-//     * The `unpark` procedure automatically makes the token available if it
			
 
				-//       was not already.
			
 
				+/*
			
 
				+A Parker is an associated token which is initially not present:
			
 
				+
			
 
				+* The `park` procedure blocks the current thread unless or until the token
			
 
				+  is available, at which point the token is consumed.
			
 
				+* The `park_with_timeout` procedures works the same as `park` but only
			
 
				+  blocks for the specified duration.
			
 
				+* The `unpark` procedure automatically makes the token available if it
			
 
				+  was not already.
			
 
				+*/
			
 
				 Parker :: struct #no_copy {
			
 
				 	state: Futex,
			
 
				 }
			
 
				 
			
 
				-// Blocks the current thread until the token is made available.
			
 
				-//
			
 
				-// Assumes this is only called by the thread that owns the Parker.
			
 
				+@(private="file") PARKER_EMPTY    :: 0
			
 
				+@(private="file") PARKER_NOTIFIED :: 1
			
 
				+@(private="file") PARKER_PARKED   :: max(u32)
			
 
				+
			
 
				+/*
			
 
				+Blocks until the token is available.
			
 
				+
			
 
				+This procedure blocks the execution of the current thread, until a token is
			
 
				+made available.
			
 
				+
			
 
				+**Note**: This procedure assumes this is only called by the thread that owns
			
 
				+the Parker.
			
 
				+*/
			
 
				 park :: proc "contextless" (p: ^Parker) {
			
 
				-	EMPTY    :: 0
			
 
				-	NOTIFIED :: 1
			
 
				-	PARKED   :: max(u32)
			
 
				-	if atomic_sub_explicit(&p.state, 1, .Acquire) == NOTIFIED {
			
 
				+	if atomic_sub_explicit(&p.state, 1, .Acquire) == PARKER_NOTIFIED {
			
 
				 		return
			
 
				 	}
			
 
				 	for {
			
 
				-		futex_wait(&p.state, PARKED)
			
 
				-		if _, ok := atomic_compare_exchange_strong_explicit(&p.state, NOTIFIED, EMPTY, .Acquire, .Acquire); ok {
			
 
				+		futex_wait(&p.state, PARKER_PARKED)
			
 
				+		if _, ok := atomic_compare_exchange_strong_explicit(&p.state, PARKER_NOTIFIED, PARKER_EMPTY, .Acquire, .Acquire); ok {
			
 
				 			return
			
 
				 		}
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-// Blocks the current thread until the token is made available, but only
			
 
				-// for a limited duration.
			
 
				-//
			
 
				-// Assumes this is only called by the thread that owns the Parker
			
 
				+/*
			
 
				+Blocks until the token is available with timeout.
			
 
				+
			
 
				+This procedure blocks the execution of the current thread until a token is made
			
 
				+available, or until the timeout has expired, whatever happens first.
			
 
				+
			
 
				+**Note**: This procedure assumes this is only called by the thread that owns
			
 
				+the Parker.
			
 
				+*/
			
 
				 park_with_timeout :: proc "contextless" (p: ^Parker, duration: time.Duration) {
			
 
				-	EMPTY    :: 0
			
 
				-	NOTIFIED :: 1
			
 
				-	PARKED   :: max(u32)
			
 
				-	if atomic_sub_explicit(&p.state, 1, .Acquire) == NOTIFIED {
			
 
				+	start_tick := time.tick_now()
			
 
				+	remaining_duration := duration
			
 
				+	if atomic_sub_explicit(&p.state, 1, .Acquire) == PARKER_NOTIFIED {
			
 
				 		return
			
 
				 	}
			
 
				-	futex_wait_with_timeout(&p.state, PARKED, duration)
			
 
				-	atomic_exchange_explicit(&p.state, EMPTY, .Acquire)
			
 
				+	for {
			
 
				+		if !futex_wait_with_timeout(&p.state, PARKER_PARKED, remaining_duration) {
			
 
				+			return
			
 
				+		}
			
 
				+		old, ok := atomic_compare_exchange_weak_explicit((^u32)(&p.state), PARKER_PARKED, PARKER_EMPTY, .Acquire, .Relaxed)
			
 
				+		if ok || old == PARKER_PARKED {
			
 
				+			return
			
 
				+		}
			
 
				+		end_tick := time.tick_now()
			
 
				+		remaining_duration -= time.tick_diff(start_tick, end_tick)
			
 
				+		start_tick = end_tick
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				-// Automatically makes thee token available if it was not already.
			
 
				+/*
			
 
				+Make the token available.
			
 
				+*/
			
 
				 unpark :: proc "contextless" (p: ^Parker)  {
			
 
				-	EMPTY    :: 0
			
 
				-	NOTIFIED :: 1
			
 
				-	PARKED   :: max(Futex)
			
 
				-	if atomic_exchange_explicit(&p.state, NOTIFIED, .Release) == PARKED {
			
 
				+	if atomic_exchange_explicit((^u32)(&p.state), PARKER_NOTIFIED, .Release) == PARKER_PARKED {
			
 
				 		futex_signal(&p.state)
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+One-shot event.
			
 
				 
			
 
				+A one-shot event is an associated token which is initially not present:
			
 
				 
			
 
				-// A One_Shot_Event is an associated token which is initially not present:
			
 
				-//     * The `one_shot_event_wait` blocks the current thread until the event
			
 
				-//       is made available
			
 
				-//     * The `one_shot_event_signal` procedure automatically makes the token
			
 
				-//       available if its was not already.
			
 
				+* The `one_shot_event_wait` blocks the current thread until the event
			
 
				+  is made available
			
 
				+* The `one_shot_event_signal` procedure automatically makes the token
			
 
				+  available if its was not already.
			
 
				+*/
			
 
				 One_Shot_Event :: struct #no_copy {
			
 
				 	state: Futex,
			
 
				 }
			
 
				 
			
 
				-// Blocks the current thread until the event is made available with `one_shot_event_signal`.
			
 
				+/*
			
 
				+Block until the event is made available.
			
 
				+
			
 
				+This procedure blocks the execution of the current thread, until the event is
			
 
				+made available.
			
 
				+*/
			
 
				 one_shot_event_wait :: proc "contextless" (e: ^One_Shot_Event) {
			
 
				 	for atomic_load_explicit(&e.state, .Acquire) == 0 {
			
 
				 		futex_wait(&e.state, 0)
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-// Releases any threads that are currently blocked by this event with `one_shot_event_wait`.
			
 
				+/*
			
 
				+Make event available.
			
 
				+*/
			
 
				 one_shot_event_signal :: proc "contextless" (e: ^One_Shot_Event) {
			
 
				 	atomic_store_explicit(&e.state, 1, .Release)
			
 
				 	futex_broadcast(&e.state)
			
--- a/core/sync/primitives.odin
+++ b/core/sync/primitives.odin
@@ -3,46 +3,108 @@ package sync
 
				 import "base:runtime"
			
 
				 import "core:time"
			
 
				 
			
 
				+/*
			
 
				+Obtain the current thread ID.
			
 
				+*/
			
 
				 current_thread_id :: proc "contextless" () -> int {
			
 
				 	return _current_thread_id()
			
 
				 }
			
 
				 
			
 
				-// A Mutex is a [[mutual exclusion lock; https://en.wikipedia.org/wiki/Mutual_exclusion]]
			
 
				-// It can be used to prevent more than one thread from executing the same piece of code,
			
 
				-// and thus prevent access to same piece of memory by multiple threads, at the same time.
			
 
				-//
			
 
				-// A Mutex's zero value represents an initial, *unlocked* state.
			
 
				-//
			
 
				-// If another thread tries to take the lock while another thread holds it, it will pause
			
 
				-// until the lock is released. Code or memory that is "surrounded" by a mutex lock is said
			
 
				-// to be "guarded by a mutex".
			
 
				-//
			
 
				-// A Mutex must not be copied after first use (e.g., after locking it the first time).
			
 
				-// This is because, in order to coordinate with other threads, all threads must watch
			
 
				-// the same memory address to know when the lock has been released. Trying to use a
			
 
				-// copy of the lock at a different memory address will result in broken and unsafe
			
 
				-// behavior. For this reason, Mutexes are marked as `#no_copy`.
			
 
				+/*
			
 
				+Mutual exclusion lock.
			
 
				+
			
 
				+A Mutex is a [[mutual exclusion lock; https://en.wikipedia.org/wiki/Mutual_exclusion]]
			
 
				+It can be used to prevent more than one thread from entering the critical
			
 
				+section, and thus prevent access to same piece of memory by multiple threads, at
			
 
				+the same time.
			
 
				+
			
 
				+Mutex's zero-initializzed value represents an initial, *unlocked* state.
			
 
				+ 
			
 
				+If another thread tries to acquire the lock, while it's already held (typically
			
 
				+by another thread), the thread's execution will be blocked, until the lock is
			
 
				+released. Code or memory that is "surrounded" by a mutex lock and unlock
			
 
				+operations is said to be "guarded by a mutex".
			
 
				+
			
 
				+**Note**: A Mutex must not be copied after first use (e.g., after locking it the
			
 
				+first time). This is because, in order to coordinate with other threads, all
			
 
				+threads must watch the same memory address to know when the lock has been
			
 
				+released. Trying to use a copy of the lock at a different memory address will
			
 
				+result in broken and unsafe behavior. For this reason, Mutexes are marked as
			
 
				+`#no_copy`.
			
 
				+
			
 
				+**Note**: If the current thread attempts to lock a mutex, while it's already
			
 
				+holding another lock, that will cause a trivial case of deadlock. Do not use
			
 
				+`Mutex` in recursive functions. In case multiple locks by the same thread are
			
 
				+desired, use `Recursive_Mutex`.
			
 
				+*/
			
 
				 Mutex :: struct #no_copy {
			
 
				 	impl: _Mutex,
			
 
				 }
			
 
				 
			
 
				-// mutex_lock locks m
			
 
				+/*
			
 
				+Acquire a lock on a mutex.
			
 
				+
			
 
				+This procedure acquires a lock with the specified mutex. If the mutex has been
			
 
				+already locked by any thread, this procedure also blocks until the lock can be
			
 
				+acquired.
			
 
				+
			
 
				+Once the lock is acquired, all other threads that attempt to acquire a lock will
			
 
				+be blocked from entering any critical sections associated with the same mutex,
			
 
				+until the the lock is released.
			
 
				+
			
 
				+**Note**: If the mutex is already locked by the current thread, a call to this
			
 
				+procedure will block indefinately. Do not use this in recursive procedures.
			
 
				+*/
			
 
				 mutex_lock :: proc "contextless" (m: ^Mutex) {
			
 
				 	_mutex_lock(m)
			
 
				 }
			
 
				 
			
 
				-// mutex_unlock unlocks m
			
 
				+/*
			
 
				+Release a lock on a mutex.
			
 
				+
			
 
				+This procedure releases the lock associated with the specified mutex. If the
			
 
				+mutex was not locked, this operation is a no-op.
			
 
				+
			
 
				+When the current thread, that holds a lock to the mutex calls `mutex_unlock`,
			
 
				+this allows one other thread waiting on the mutex to enter any critical sections
			
 
				+associated with the mutex. If there are no threads waiting on the mutex, the
			
 
				+critical sections will remain open.
			
 
				+*/
			
 
				 mutex_unlock :: proc "contextless" (m: ^Mutex) {
			
 
				 	_mutex_unlock(m)
			
 
				 }
			
 
				 
			
 
				-// mutex_try_lock tries to lock m, will return true on success, and false on failure
			
 
				+/*
			
 
				+Try to acquire a lock on a mutex.
			
 
				+
			
 
				+This procedure tries to acquire a lock on the specified mutex. If it was already
			
 
				+locked, then the returned value is `false`, otherwise the lock is acquired and
			
 
				+the procedure returns `true`.
			
 
				+
			
 
				+If the lock is acquired, all threads that attempt to acquire a lock will be
			
 
				+blocked from entering any critical sections associated with the same mutex,
			
 
				+until the lock is released.
			
 
				+*/
			
 
				 mutex_try_lock :: proc "contextless" (m: ^Mutex) -> bool {
			
 
				 	return _mutex_try_lock(m)
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				-Example:
			
 
				+Guard the current scope with a lock on a mutex.
			
 
				+
			
 
				+This procedure acquires a mutex lock. The lock is automatically released
			
 
				+at the end of callee's scope. If the mutex was already locked, this procedure
			
 
				+also blocks until the lock can be acquired.
			
 
				+
			
 
				+When a lock has been acquired, all threads attempting to acquire a lock will be
			
 
				+blocked from entering any critical sections associated with the mutex, until
			
 
				+the lock is released.
			
 
				+
			
 
				+This procedure always returns `true`. This makes it easy to define a critical
			
 
				+section by putting the function inside the `if` statement.
			
 
				+
			
 
				+**Example**:
			
 
				+
			
 
				 	if mutex_guard(&m) {
			
 
				 		...
			
 
				 	}
			
@@ -53,47 +115,145 @@ mutex_guard :: proc "contextless" (m: ^Mutex) -> bool {
 
				 	return true
			
 
				 }
			
 
				 
			
 
				-// A RW_Mutex is a reader/writer mutual exclusion lock
			
 
				-// The lock can be held by any arbitrary number of readers or a single writer
			
 
				-// The zero value for a RW_Mutex is an unlocked mutex
			
 
				-//
			
 
				-// A RW_Mutex must not be copied after first use
			
 
				+/*
			
 
				+Read-write mutual exclusion lock.
			
 
				+
			
 
				+An `RW_Mutex` is a reader/writer mutual exclusion lock. The lock can be held by
			
 
				+any number of readers or a single writer.
			
 
				+
			
 
				+This type of synchronization primitive supports two kinds of lock operations:
			
 
				+
			
 
				+- Exclusive lock (write lock)
			
 
				+- Shared lock (read lock)
			
 
				+
			
 
				+When an exclusive lock is acquired by any thread, all other threads, attempting
			
 
				+to acquire either an exclusive or shared lock, will be blocked from entering the
			
 
				+critical sections associated with the read-write mutex, until the exclusive
			
 
				+owner of the lock releases the lock.
			
 
				+
			
 
				+When a shared lock is acquired by any thread, any other thread attempting to
			
 
				+acquire a shared lock will also be able to enter all the critical sections
			
 
				+associated with the read-write mutex. However threads attempting to acquire
			
 
				+an exclusive lock will be blocked from entering those critical sections, until
			
 
				+all shared locks are released.
			
 
				+
			
 
				+**Note**: A read-write mutex must not be copied after first use (e.g., after
			
 
				+acquiring a lock). This is because, in order to coordinate with other threads,
			
 
				+all threads must watch the same memory address to know when the lock has been
			
 
				+released. Trying to use a copy of the lock at a different memory address will
			
 
				+result in broken and unsafe behavior. For this reason, mutexes are marked as
			
 
				+`#no_copy`.
			
 
				+
			
 
				+**Note**: A read-write mutex is not recursive. Do not attempt to acquire an
			
 
				+exclusive lock more than once from the same thread, or an exclusive and shared
			
 
				+lock on the same thread. Taking a shared lock multiple times is acceptable.
			
 
				+*/
			
 
				 RW_Mutex :: struct #no_copy {
			
 
				 	impl: _RW_Mutex,
			
 
				 }
			
 
				 
			
 
				-// rw_mutex_lock locks rw for writing (with a single writer)
			
 
				-// If the mutex is already locked for reading or writing, the mutex blocks until the mutex is available.
			
 
				+/*
			
 
				+Acquire an exclusive lock.
			
 
				+
			
 
				+This procedure acquires an exclusive lock on the specified read-write mutex. If
			
 
				+the lock is already held by any thread, this procedure also blocks until the
			
 
				+lock can be acquired.
			
 
				+
			
 
				+After a lock has been acquired, any thread attempting to acquire any lock
			
 
				+will be blocked from entering any critical sections associated with the same
			
 
				+read-write mutex, until the exclusive lock is released.
			
 
				+*/
			
 
				 rw_mutex_lock :: proc "contextless" (rw: ^RW_Mutex) {
			
 
				 	_rw_mutex_lock(rw)
			
 
				 }
			
 
				 
			
 
				-// rw_mutex_unlock unlocks rw for writing (with a single writer)
			
 
				+/*
			
 
				+Release an exclusive lock.
			
 
				+
			
 
				+This procedure releases an exclusive lock associated with the specified
			
 
				+read-write mutex.
			
 
				+
			
 
				+When the exclusive lock is released, all critical sections, associated with the
			
 
				+same read-write mutex, become open to other threads.
			
 
				+*/
			
 
				 rw_mutex_unlock :: proc "contextless" (rw: ^RW_Mutex) {
			
 
				 	_rw_mutex_unlock(rw)
			
 
				 }
			
 
				 
			
 
				-// rw_mutex_try_lock tries to lock rw for writing (with a single writer)
			
 
				+/*
			
 
				+Try to acquire an exclusive lock on a read-write mutex.
			
 
				+
			
 
				+This procedure tries to acquire an exclusive lock on the specified read-write
			
 
				+mutex. If the mutex was already locked, the procedure returns `false`. Otherwise
			
 
				+it acquires the exclusive lock and returns `true`.
			
 
				+
			
 
				+If the lock has been acquired, all threads attempting to acquire any lock
			
 
				+will be blocked from entering any critical sections associated with the same
			
 
				+read-write mutex, until the exclusive locked is released.
			
 
				+*/
			
 
				 rw_mutex_try_lock :: proc "contextless" (rw: ^RW_Mutex) -> bool {
			
 
				 	return _rw_mutex_try_lock(rw)
			
 
				 }
			
 
				 
			
 
				-// rw_mutex_shared_lock locks rw for reading (with arbitrary number of readers)
			
 
				+/*
			
 
				+Acquire a shared lock on a read-write mutex.
			
 
				+
			
 
				+This procedure acquires a shared lock on the specified read-write mutex. If the
			
 
				+mutex already has an exclusive lock held, this procedure also blocks until the
			
 
				+lock can be acquired.
			
 
				+
			
 
				+After the shared lock is obtained, all threads attempting to acquire an
			
 
				+exclusive lock will be blocked from entering any critical sections associated
			
 
				+with the same read-write mutex, until all shared locks associated with the
			
 
				+specified read-write mutex are released.
			
 
				+*/
			
 
				 rw_mutex_shared_lock :: proc "contextless" (rw: ^RW_Mutex) {
			
 
				 	_rw_mutex_shared_lock(rw)
			
 
				 }
			
 
				 
			
 
				-// rw_mutex_shared_unlock unlocks rw for reading (with arbitrary number of readers)
			
 
				+/*
			
 
				+Release the shared lock on a read-write mutex.
			
 
				+
			
 
				+This procedure releases shared lock on the specified read-write mutex. When all
			
 
				+shared locks are released, all critical sections associated with the same
			
 
				+read-write mutex become open to other threads.
			
 
				+*/
			
 
				 rw_mutex_shared_unlock :: proc "contextless" (rw: ^RW_Mutex) {
			
 
				 	_rw_mutex_shared_unlock(rw)
			
 
				 }
			
 
				 
			
 
				-// rw_mutex_try_shared_lock tries to lock rw for reading (with arbitrary number of readers)
			
 
				+/*
			
 
				+Try to acquire a shared lock on a read-write mutex.
			
 
				+
			
 
				+This procedure attempts to acquire a lock on the specified read-write mutex. If
			
 
				+the mutex already has an exclusive lock held, this procedure returns `false`.
			
 
				+Otherwise, it acquires the lock on the mutex and returns `true`.
			
 
				+
			
 
				+If the shared lock has been acquired, it causes all threads attempting to
			
 
				+acquire the exclusive lock to be blocked from entering any critical sections
			
 
				+associated with the same read-write mutex, until all shared locks are released.
			
 
				+*/
			
 
				 rw_mutex_try_shared_lock :: proc "contextless" (rw: ^RW_Mutex) -> bool {
			
 
				 	return _rw_mutex_try_shared_lock(rw)
			
 
				 }
			
 
				+
			
 
				 /*
			
 
				-Example:
			
 
				+Guard the current scope with an exclusive lock on a read-write mutex.
			
 
				+
			
 
				+This procedure acquires an exclusive lock on the specified read-write mutex.
			
 
				+This procedure automatically releases the lock at the end of the callee's scope.
			
 
				+If the mutex was already locked by readers or a writer, this procedure blocks,
			
 
				+until a lock can be acquired.
			
 
				+
			
 
				+When an exclusive lock is acquired, all other threads attempting to acquire an
			
 
				+exclusive lock will be blocked from entering any critical sections associated
			
 
				+with the same read-write mutex, until the exclusive lock is released.
			
 
				+
			
 
				+This procedure always returns `true`, which makes it easy to define a critical
			
 
				+section by running this procedure inside an `if` statement.
			
 
				+
			
 
				+**Example**:
			
 
				+
			
 
				 	if rw_mutex_guard(&m) {
			
 
				 		...
			
 
				 	}
			
@@ -105,8 +265,23 @@ rw_mutex_guard :: proc "contextless" (m: ^RW_Mutex) -> bool {
 
				 }
			
 
				 
			
 
				 /*
			
 
				-Example:
			
 
				-	if rw_mutex_shared_guard(&m) {
			
 
				+Guard the current scope with a shared lock on a read-write mutex.
			
 
				+
			
 
				+This procedure acquires a shared lock on the specified read-write mutex. This
			
 
				+procedure automatically releases the lock at the end of the callee's scope. If
			
 
				+the mutex already has an associated exclusive lock, this procedure blocks, until
			
 
				+a lock can be acquired.
			
 
				+
			
 
				+When a shared lock is obtained, all other threads attempting to obtain an
			
 
				+exclusive lock will be blocked from any critical sections, associated with the
			
 
				+same read-write mutex, until all shared locks are released.
			
 
				+
			
 
				+This procedure always returns `true`, which makes it easy to define a critical
			
 
				+section by running this procedure inside an `if` statement.
			
 
				+
			
 
				+**Example**:
			
 
				+
			
 
				+	if rw_mutex_guard(&m) {
			
 
				 		...
			
 
				 	}
			
 
				 */
			
@@ -116,30 +291,91 @@ rw_mutex_shared_guard :: proc "contextless" (m: ^RW_Mutex) -> bool {
 
				 	return true
			
 
				 }
			
 
				 
			
 
				-
			
 
				-
			
 
				-// A Recursive_Mutex is a recursive mutual exclusion lock
			
 
				-// The zero value for a Recursive_Mutex is an unlocked mutex
			
 
				-//
			
 
				-// A Recursive_Mutex must not be copied after first use
			
 
				+/*
			
 
				+Recursive mutual exclusion lock.
			
 
				+
			
 
				+Recurisve mutex is just like a plain mutex, except it allows reentrancy. In
			
 
				+order for a thread to release the mutex for other threads, the mutex needs to
			
 
				+be unlocked as many times, as it was locked.
			
 
				+
			
 
				+When a lock is acquired on a recursive mutex, all other threads attempting to
			
 
				+acquire a lock on the same mutex will be blocked from any critical sections,
			
 
				+associated with the same recrusive mutex.
			
 
				+
			
 
				+When a lock is acquired on a recursive mutex by a thread, that thread is allowed
			
 
				+to acquire another lock on the same mutex. When a thread has acquired the lock
			
 
				+on a recursive mutex, the recursive mutex will stay locked until the thread
			
 
				+releases the lock as many times as it has been locked by the thread.
			
 
				+
			
 
				+**Note**: A recursive mutex must not be copied after first use (e.g., after
			
 
				+acquiring a lock). This is because, in order to coordinate with other threads,
			
 
				+all threads must watch the same memory address to know when the lock has been
			
 
				+released. Trying to use a copy of the lock at a different memory address will
			
 
				+result in broken and unsafe behavior. For this reason, mutexes are marked as
			
 
				+`#no_copy`.
			
 
				+*/
			
 
				 Recursive_Mutex :: struct #no_copy {
			
 
				 	impl: _Recursive_Mutex,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Acquire a lock on a recursive mutex.
			
 
				+
			
 
				+This procedure acquires a lock on the specified recursive mutex. If the lock is
			
 
				+acquired by a different thread, this procedure also blocks until the lock can be
			
 
				+acquired.
			
 
				+
			
 
				+When the lock is acquired, all other threads attempting to acquire a lock will
			
 
				+be blocked from entering any critical sections associated with the same mutex,
			
 
				+until the lock is released.
			
 
				+*/
			
 
				 recursive_mutex_lock :: proc "contextless" (m: ^Recursive_Mutex) {
			
 
				 	_recursive_mutex_lock(m)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Release a lock on a recursive mutex.
			
 
				+
			
 
				+This procedure releases a lock on the specified recursive mutex. It also causes
			
 
				+the critical sections associated with the same mutex, to become open for other
			
 
				+threads for entering.
			
 
				+*/
			
 
				 recursive_mutex_unlock :: proc "contextless" (m: ^Recursive_Mutex) {
			
 
				 	_recursive_mutex_unlock(m)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Try to acquire a lock on a recursive mutex.
			
 
				+
			
 
				+This procedure attempts to acquire a lock on the specified recursive mutex. If
			
 
				+the recursive mutex is locked by other threads, this procedure returns `false`.
			
 
				+Otherwise it locks the mutex and returns `true`.
			
 
				+
			
 
				+If the lock is acquired, all other threads attempting to obtain a lock will be
			
 
				+blocked from entering any critical sections associated with the same mutex,
			
 
				+until the lock is released.
			
 
				+*/
			
 
				 recursive_mutex_try_lock :: proc "contextless" (m: ^Recursive_Mutex) -> bool {
			
 
				 	return _recursive_mutex_try_lock(m)
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				-Example:
			
 
				+Guard the scope with a recursive mutex lock.
			
 
				+
			
 
				+This procedure acquires a lock on the specified recursive mutex and
			
 
				+automatically releases it at the end of the callee's scope. If the recursive
			
 
				+mutex was already held by a another thread, this procedure also blocks until the
			
 
				+lock can be acquired.
			
 
				+
			
 
				+When the lock is acquired all other threads attempting to take a lock will be
			
 
				+blocked from entering any critical sections associated with the same mutex,
			
 
				+until the lock is released.
			
 
				+
			
 
				+This procedure always returns `true`, which makes it easy to define a critical
			
 
				+section by calling this procedure inside an `if` statement.
			
 
				+
			
 
				+**Example**:
			
 
				+
			
 
				 	if recursive_mutex_guard(&m) {
			
 
				 		...
			
 
				 	}
			
@@ -150,19 +386,69 @@ recursive_mutex_guard :: proc "contextless" (m: ^Recursive_Mutex) -> bool {
 
				 	return true
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+A condition variable.
			
 
				+
			
 
				+`Cond` implements a condition variable, a rendezvous point for threads waiting
			
 
				+for signalling the occurence of an event. Condition variables are used on
			
 
				+conjuction with mutexes to provide a shared access to one or more shared
			
 
				+variable.
			
 
				+
			
 
				+A typical usage of condition variable is as follows. A thread that intends to
			
 
				+modify a shared variable shall:
			
 
				+
			
 
				+1. Acquire a lock on a mutex.
			
 
				+2. Modify the shared memory.
			
 
				+3. Release the lock.
			
 
				+3. Call `cond_signal` or `cond_broadcast`.
			
 
				 
			
 
				-// Cond implements a condition variable, a rendezvous point for threads
			
 
				-// waiting for signalling the occurence of an event
			
 
				-//
			
 
				-// A Cond must not be copied after first use
			
 
				+A thread that intends to wait on a shared variable shall:
			
 
				+
			
 
				+1. Acquire a lock on a mutex.
			
 
				+2. Call `cond_wait` or `cond_wait_with_timeout` (will release the mutex).
			
 
				+3. Check the condition and keep waiting in a loop if not satisfied with result.
			
 
				+
			
 
				+**Note**: A condition variable must not be copied after first use (e.g., after
			
 
				+waiting on it the first time). This is because, in order to coordinate with
			
 
				+other threads, all threads must watch the same memory address to know when the
			
 
				+lock has been released. Trying to use a copy of the lock at a different memory
			
 
				+address will result in broken and unsafe behavior. For this reason, condition
			
 
				+variables are marked as `#no_copy`.
			
 
				+*/
			
 
				 Cond :: struct #no_copy {
			
 
				 	impl: _Cond,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Wait until the condition variable is signalled and release the associated mutex.
			
 
				+
			
 
				+This procedure blocks the current thread until the specified condition variable
			
 
				+is signalled, or until a spurious wakeup occurs. In addition, if the condition
			
 
				+has been signalled, this procedure releases the lock on the specified mutex.
			
 
				+
			
 
				+The mutex must be held by the calling thread, before calling the procedure.
			
 
				+
			
 
				+**Note**: This procedure can return on a spurious wake-up, even if the condition
			
 
				+variable was not signalled by a thread.
			
 
				+*/
			
 
				 cond_wait :: proc "contextless" (c: ^Cond, m: ^Mutex) {
			
 
				 	_cond_wait(c, m)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Wait until the condition variable is signalled or timeout is reached and release
			
 
				+the associated mutex.
			
 
				+
			
 
				+This procedure blocks the current thread until the specified condition variable
			
 
				+is signalled, a timeout is reached, or until a spurious wakeup occurs. In
			
 
				+addition, if the condition has been signalled, this procedure releases the
			
 
				+lock on the specified mutex.
			
 
				+
			
 
				+If the timeout was reached, this procedure returns `false`. Otherwise it returns
			
 
				+`true`.
			
 
				+
			
 
				+Before this procedure is called the mutex must be held by the calling thread.
			
 
				+*/
			
 
				 cond_wait_with_timeout :: proc "contextless" (c: ^Cond, m: ^Mutex, duration: time.Duration) -> bool {
			
 
				 	if duration <= 0 {
			
 
				 		return false
			
@@ -170,51 +456,123 @@ cond_wait_with_timeout :: proc "contextless" (c: ^Cond, m: ^Mutex, duration: tim
 
				 	return _cond_wait_with_timeout(c, m, duration)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Wake up one thread that waits on a condition variable.
			
 
				+
			
 
				+This procedure causes exactly one thread waiting on the condition variable to
			
 
				+wake up.
			
 
				+*/
			
 
				 cond_signal :: proc "contextless" (c: ^Cond) {
			
 
				 	_cond_signal(c)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Wake up all threads that wait on a condition variable.
			
 
				+
			
 
				+This procedure causes all threads waiting on the condition variable to wake up.
			
 
				+*/
			
 
				 cond_broadcast :: proc "contextless" (c: ^Cond) {
			
 
				 	_cond_broadcast(c)
			
 
				 }
			
 
				 
			
 
				-
			
 
				-// When waited upon, blocks until the internal count is greater than zero, then subtracts one.
			
 
				-// Posting to the semaphore increases the count by one, or the provided amount.
			
 
				-//
			
 
				-// A Sema must not be copied after first use
			
 
				+/*
			
 
				+Semaphore.
			
 
				+
			
 
				+When waited upon, semaphore blocks until the internal count is greater than
			
 
				+zero, then decrements the internal counter by one. Posting to the semaphore
			
 
				+increases the count by one, or the provided amount.
			
 
				+
			
 
				+This type of synchronization primitives can be useful for implementing queues.
			
 
				+The internal counter of the semaphore can be thought of as the amount of items
			
 
				+in the queue. After a data has been pushed to the queue, the thread shall call
			
 
				+`sema_post()` procedure, increasing the counter. When a thread takes an item
			
 
				+from the queue to do the job, it shall call `sema_wait()`, waiting on the
			
 
				+semaphore counter to become non-zero and decreasing it, if necessary.
			
 
				+
			
 
				+**Note**: A semaphore must not be copied after first use (e.g., after posting
			
 
				+to it). This is because, in order to coordinate with other threads, all threads
			
 
				+must watch the same memory address to know when the lock has been released.
			
 
				+Trying to use a copy of the lock at a different memory address will result in
			
 
				+broken and unsafe behavior. For this reason, semaphores are marked as `#no_copy`.
			
 
				+*/
			
 
				 Sema :: struct #no_copy {
			
 
				 	impl: _Sema,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Increment the internal counter on a semaphore by the specified amount.
			
 
				+
			
 
				+This procedure increments the internal counter of the semaphore. If any of the
			
 
				+threads were waiting on the semaphore, up to `count` of threads will continue
			
 
				+the execution and enter the critical section.
			
 
				+*/
			
 
				 sema_post :: proc "contextless" (s: ^Sema, count := 1) {
			
 
				 	_sema_post(s, count)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Wait on a semaphore until the internal counter is non-zero.
			
 
				+
			
 
				+This procedure blocks the execution of the current thread, until the semaphore
			
 
				+counter is non-zero, and atomically decrements it by one, once the wait has
			
 
				+ended.
			
 
				+*/
			
 
				 sema_wait :: proc "contextless" (s: ^Sema) {
			
 
				 	_sema_wait(s)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Wait on a semaphore until the internal counter is non-zero or a timeout is reached.
			
 
				+
			
 
				+This procedure blocks the execution of the current thread, until the semaphore
			
 
				+counter is non-zero, and if so atomically decrements it by one, once the wait
			
 
				+has ended. If the specified timeout is reached, the function returns `false`,
			
 
				+otherwise it returns `true`.
			
 
				+*/
			
 
				 sema_wait_with_timeout :: proc "contextless" (s: ^Sema, duration: time.Duration) -> bool {
			
 
				 	return _sema_wait_with_timeout(s, duration)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Fast userspace mutual exclusion lock.
			
 
				 
			
 
				+Futex is a fast userspace mutual exclusion lock, that uses a pointer to a 32-bit
			
 
				+value as an identifier of the queue of waiting threads. The value pointed to
			
 
				+by that pointer can be used to store extra data.
			
 
				 
			
 
				-// Futex is a fast userspace mutual exclusion lock, using a 32-bit memory address as a hint
			
 
				-// 
			
 
				-// An Futex must not be copied after first use
			
 
				+**IMPORTANT**: A futex must not be copied after first use (e.g., after waiting
			
 
				+on it the first time, or signalling it). This is because, in order to coordinate
			
 
				+with other threads, all threads must watch the same memory address. Trying to
			
 
				+use a copy of the lock at a different memory address will result in broken and
			
 
				+unsafe behavior.
			
 
				+*/
			
 
				 Futex :: distinct u32
			
 
				 
			
 
				+/*
			
 
				+Sleep if the futex contains the expected value until it's signalled.
			
 
				+
			
 
				+If the value of the futex is `expected`, this procedure blocks the execution of
			
 
				+the current thread, until the futex is woken up, or until a spurious wakeup
			
 
				+occurs.
			
 
				+*/
			
 
				 futex_wait :: proc "contextless" (f: ^Futex, expected: u32) {
			
 
				 	if u32(atomic_load_explicit(f, .Acquire)) != expected {
			
 
				 		return
			
 
				 	}
			
 
				-	
			
 
				-	_assert(_futex_wait(f, expected), "futex_wait failure")
			
 
				+	ok := _futex_wait(f, expected)
			
 
				+	_assert(ok, "futex_wait failure")
			
 
				 }
			
 
				 
			
 
				-// returns true if the wait happened within the duration, false if it exceeded the time duration
			
 
				+/*
			
 
				+Sleep if the futex contains the expected value until it's signalled or the
			
 
				+timeout is reached.
			
 
				+
			
 
				+If the value of the futex is `expected`, this procedure blocks the execution of
			
 
				+the current thread, until the futex is signalled, a timeout is reached, or
			
 
				+until a spurious wakeup occurs.
			
 
				+
			
 
				+This procedure returns `false` if the timeout was reached, `true` otherwise.
			
 
				+*/
			
 
				 futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
			
 
				 	if u32(atomic_load_explicit(f, .Acquire)) != expected {
			
 
				 		return true
			
@@ -226,10 +584,16 @@ futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duratio
 
				 	return _futex_wait_with_timeout(f, expected, duration)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Wake up a single thread waiting on a futex.
			
 
				+*/
			
 
				 futex_signal :: proc "contextless" (f: ^Futex) {
			
 
				 	_futex_signal(f)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Wake up multiple threads waiting on a futex.
			
 
				+*/
			
 
				 futex_broadcast :: proc "contextless" (f: ^Futex) {
			
 
				 	_futex_broadcast(f)
			
 
				 }