4 年之前 · 911c428dac
--- a/src/queue.cpp
+++ b/src/queue.cpp
@@ -10,24 +10,47 @@ struct MPMCQueueNodeNonAtomic {
 
				 	T   data;
			
 
				 };
			
 
				 
			
 
				+#define MPMC_CACHE_LINE_SIZE 64
			
 
				+
			
 
				 // Multiple Producer Multiple Consumer Queue
			
 
				 template <typename T>
			
 
				 struct MPMCQueue {
			
 
				-	static size_t const PAD0_OFFSET = (sizeof(i32) + sizeof(Array<MPMCQueueNode<T>>) + sizeof(BlockingMutex) + sizeof(i32));
			
 
				+	static size_t const PAD0_OFFSET = (sizeof(Array<MPMCQueueNode<T>>) + sizeof(BlockingMutex) + sizeof(i32) + sizeof(i32));
			
 
				 
			
 
				-	i32 mask;
			
 
				 	Array<MPMCQueueNode<T>> buffer;
			
 
				 	BlockingMutex mutex;
			
 
				 	std::atomic<i32> count;
			
 
				+	i32 mask;
			
 
				 
			
 
				-	char pad0[(128 - PAD0_OFFSET) % 64];
			
 
				+	char pad0[(MPMC_CACHE_LINE_SIZE*2 - PAD0_OFFSET) % MPMC_CACHE_LINE_SIZE];
			
 
				 	std::atomic<i32> head_idx;
			
 
				 
			
 
				-	char pad1[64 - sizeof(i32)];
			
 
				+	char pad1[MPMC_CACHE_LINE_SIZE - sizeof(i32)];
			
 
				 	std::atomic<i32> tail_idx;
			
 
				 };
			
 
				 
			
 
				 
			
 
				+template <typename T>
			
 
				+void mpmc_internal_init_buffer(Array<MPMCQueueNode<T>> *buffer, i32 offset) {
			
 
				+	i32 size = cast(i32)buffer->count;
			
 
				+	GB_ASSERT(offset % 8 == 0);
			
 
				+	GB_ASSERT(size % 8 == 0);
			
 
				+
			
 
				+	// NOTE(bill): pretend it's not atomic for performance
			
 
				+	auto *raw_data = cast(MPMCQueueNodeNonAtomic<T> *)buffer->data;
			
 
				+	for (i32 i = offset; i < size; i += 8) {
			
 
				+		raw_data[i+0].idx = i+0;
			
 
				+		raw_data[i+1].idx = i+1;
			
 
				+		raw_data[i+2].idx = i+2;
			
 
				+		raw_data[i+3].idx = i+3;
			
 
				+		raw_data[i+4].idx = i+4;
			
 
				+		raw_data[i+5].idx = i+5;
			
 
				+		raw_data[i+6].idx = i+6;
			
 
				+		raw_data[i+7].idx = i+7;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				 template <typename T>
			
 
				 void mpmc_init(MPMCQueue<T> *q, gbAllocator a, isize size_i) {
			
 
				 	if (size_i < 8) {
			
@@ -42,20 +65,11 @@ void mpmc_init(MPMCQueue<T> *q, gbAllocator a, isize size_i) {
 
				 	q->mask = size-1;
			
 
				 	array_init(&q->buffer, a, size);
			
 
				 
			
 
				-	// NOTE(bill): pretend it's not atomic for performance
			
 
				-	auto *raw_data = cast(MPMCQueueNodeNonAtomic<T> *)q->buffer.data;
			
 
				-	for (i32 i = 0; i < size; i += 8) {
			
 
				-		raw_data[i+0].idx = i+0;
			
 
				-		raw_data[i+1].idx = i+1;
			
 
				-		raw_data[i+2].idx = i+2;
			
 
				-		raw_data[i+3].idx = i+3;
			
 
				-		raw_data[i+4].idx = i+4;
			
 
				-		raw_data[i+5].idx = i+5;
			
 
				-		raw_data[i+6].idx = i+6;
			
 
				-		raw_data[i+7].idx = i+7;
			
 
				-	}
			
 
				+	mpmc_internal_init_buffer(&q->buffer, 0);
			
 
				 }
			
 
				 
			
 
				+
			
 
				+
			
 
				 template <typename T>
			
 
				 void mpmc_destroy(MPMCQueue<T> *q) {
			
 
				 	mutex_destroy(&q->mutex);
			
@@ -93,9 +107,7 @@ i32 mpmc_enqueue(MPMCQueue<T> *q, T const &data) {
 
				 			}
			
 
				 			// NOTE(bill): pretend it's not atomic for performance
			
 
				 			auto *raw_data = cast(MPMCQueueNodeNonAtomic<T> *)q->buffer.data;
			
 
				-			for (i32 i = old_size; i < new_size; i++) {
			
 
				-				raw_data[i].idx = i;
			
 
				-			}
			
 
				+			mpmc_internal_init_buffer(&q->buffer, old_size);
			
 
				 			q->mask = new_size-1;
			
 
				 			mutex_unlock(&q->mutex);
			
 
				 		} else {
			
@@ -133,70 +145,3 @@ bool mpmc_dequeue(MPMCQueue<T> *q, T *data_) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-
			
 
				-template <typename T>
			
 
				-struct MPSCQueueNode {
			
 
				-	std::atomic<MPSCQueueNode<T> *> next;
			
 
				-	T data;
			
 
				-};
			
 
				-
			
 
				-template <typename T>
			
 
				-struct MPSCQueue {
			
 
				-	gbAllocator allocator;
			
 
				-
			
 
				-	std::atomic<isize> count;
			
 
				-	std::atomic<MPSCQueueNode<T> *> head;
			
 
				-	std::atomic<MPSCQueueNode<T> *> tail;
			
 
				-};
			
 
				-
			
 
				-template <typename T>
			
 
				-void mpsc_init(MPSCQueue<T> *q, gbAllocator a) {
			
 
				-	using Node = MPSCQueueNode<T>;
			
 
				-
			
 
				-	q->allocator = a;
			
 
				-	Node *front = cast(Node *)gb_alloc_align(q->allocator, gb_size_of(Node), 64);
			
 
				-	front->next.store(nullptr, std::memory_order_relaxed);
			
 
				-	q->head.store(front, std::memory_order_relaxed);
			
 
				-	q->tail.store(front, std::memory_order_relaxed);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-template <typename T>
			
 
				-isize mpsc_enqueue(MPSCQueue<T> *q, T const &value) {
			
 
				-	using Node = MPSCQueueNode<T>;
			
 
				-
			
 
				-	Node *node = cast(Node *)gb_alloc_align(q->allocator, gb_size_of(Node), 64);
			
 
				-	node->data = value;
			
 
				-	node->next.store(nullptr, std::memory_order_relaxed);
			
 
				-
			
 
				-	auto *prev_head = q->head.exchange(node, std::memory_order_acq_rel);
			
 
				-	prev_head->next.store(node, std::memory_order_release);
			
 
				-	return q->count.fetch_add(1, std::memory_order_release);
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-bool mpsc_dequeue(MPSCQueue<T> *q, T *value_) {
			
 
				-	auto *tail = q->tail.load(std::memory_order_relaxed);
			
 
				-	auto *next = tail->next.load(std::memory_order_acquire);
			
 
				-	if (next == nullptr) {
			
 
				-		return false;
			
 
				-	}
			
 
				-
			
 
				-	if (value_) *value_ = next->data;
			
 
				-	q->tail.store(next, std::memory_order_release);
			
 
				-	q->count.fetch_sub(1, std::memory_order_release);
			
 
				-	gb_free(q->allocator, tail);
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-
			
 
				-template <typename T>
			
 
				-void mpsc_destroy(MPSCQueue<T> *q) {
			
 
				-	T output = {};
			
 
				-	while (mpsc_dequeue(q, &output)) {
			
 
				-		// okay
			
 
				-	}
			
 
				-	auto *front = q->head.load(std::memory_order_relaxed);
			
 
				-	gb_free(q->allocator, front);
			
 
				-}
			
 
				-