4 years ago · 571170fd30
--- a/src/array.cpp
+++ b/src/array.cpp
@@ -326,12 +326,17 @@ void array_set_capacity(Array<T> *array, isize capacity) {
 
				 		array_resize(array, capacity);
			
 
				 	}
			
 
				 
			
 
				-	T *new_data = nullptr;
			
 
				-	if (capacity > 0) {
			
 
				-		new_data = gb_alloc_array(array->allocator, T, capacity);
			
 
				-		gb_memmove(new_data, array->data, gb_size_of(T) * array->capacity);
			
 
				+	// NOTE(bill): try gb_resize_align first, and then fallback to alloc+memmove+free
			
 
				+	isize old_size = array->capacity * gb_size_of(T);
			
 
				+	isize new_size = capacity * gb_size_of(T);
			
 
				+	T *new_data = cast(T *)gb_resize_align(array->allocator, array->data, old_size, new_size, gb_align_of(T));
			
 
				+	if (new_data == nullptr) {
			
 
				+		if (capacity > 0) {
			
 
				+			new_data = gb_alloc_array(array->allocator, T, capacity);
			
 
				+			gb_memmove(new_data, array->data, gb_size_of(T) * array->capacity);
			
 
				+		}
			
 
				+		gb_free(array->allocator, array->data);
			
 
				 	}
			
 
				-	gb_free(array->allocator, array->data);
			
 
				 	array->data = new_data;
			
 
				 	array->capacity = capacity;
			
 
				 }
			
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -228,6 +228,28 @@ GB_ALLOCATOR_PROC(heap_allocator_proc) {
 
				 	return ptr;
			
 
				 }
			
 
				 
			
 
				+
			
 
				+template <typename T>
			
 
				+void resize_array_raw(T **array, gbAllocator const &a, isize old_count, isize new_count) {
			
 
				+	GB_ASSERT(new_count >= 0);
			
 
				+	if (new_count == 0) {
			
 
				+		gb_free(a, *array);
			
 
				+		*array = nullptr;
			
 
				+		return;
			
 
				+	}
			
 
				+	if (new_count < old_count) {
			
 
				+		return;
			
 
				+	}
			
 
				+	isize old_size = old_count * gb_size_of(T);
			
 
				+	isize new_size = new_count * gb_size_of(T);
			
 
				+	isize alignment = gb_align_of(T);
			
 
				+	auto new_data = cast(T *)gb_resize_align(a, *array, old_size, new_size, alignment);
			
 
				+	GB_ASSERT(new_data != nullptr);
			
 
				+	*array = new_data;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				 #include "unicode.cpp"
			
 
				 #include "array.cpp"
			
 
				 #include "string.cpp"
			
--- a/src/queue.cpp
+++ b/src/queue.cpp
@@ -1,26 +1,16 @@
 
				-template <typename T>
			
 
				-struct MPMCQueueNode {
			
 
				-	std::atomic<i32> idx;
			
 
				-	T                data;
			
 
				-};
			
 
				-
			
 
				-template <typename T>
			
 
				-struct MPMCQueueNodeNonAtomic {
			
 
				-	i32 idx;
			
 
				-	T   data;
			
 
				-};
			
 
				-
			
 
				 #define MPMC_CACHE_LINE_SIZE 64
			
 
				 
			
 
				 // Multiple Producer Multiple Consumer Queue
			
 
				 template <typename T>
			
 
				 struct MPMCQueue {
			
 
				-	static size_t const PAD0_OFFSET = (sizeof(Array<MPMCQueueNode<T>>) + sizeof(BlockingMutex) + sizeof(i32) + sizeof(i32));
			
 
				+	static size_t const PAD0_OFFSET = (sizeof(T *) + sizeof(std::atomic<i32> *) + sizeof(gbAllocator) + sizeof(BlockingMutex) + sizeof(i32) + sizeof(i32));
			
 
				 
			
 
				-	Array<MPMCQueueNode<T>> buffer;
			
 
				-	BlockingMutex mutex;
			
 
				-	std::atomic<i32> count;
			
 
				-	i32 mask;
			
 
				+	T *               nodes;
			
 
				+	std::atomic<i32> *indices;
			
 
				+	gbAllocator       allocator;
			
 
				+	BlockingMutex     mutex;
			
 
				+	std::atomic<i32>  count;
			
 
				+	i32               mask;
			
 
				 
			
 
				 	char pad0[(MPMC_CACHE_LINE_SIZE*2 - PAD0_OFFSET) % MPMC_CACHE_LINE_SIZE];
			
 
				 	std::atomic<i32> head_idx;
			
@@ -30,23 +20,22 @@ struct MPMCQueue {
 
				 };
			
 
				 
			
 
				 
			
 
				-template <typename T>
			
 
				-void mpmc_internal_init_buffer(Array<MPMCQueueNode<T>> *buffer, i32 offset) {
			
 
				-	i32 size = cast(i32)buffer->count;
			
 
				+
			
 
				+void mpmc_internal_init_indices(std::atomic<i32> *indices, i32 offset, i32 size) {
			
 
				 	GB_ASSERT(offset % 8 == 0);
			
 
				 	GB_ASSERT(size % 8 == 0);
			
 
				 
			
 
				 	// NOTE(bill): pretend it's not atomic for performance
			
 
				-	auto *raw_data = cast(MPMCQueueNodeNonAtomic<T> *)buffer->data;
			
 
				+	auto *raw_data = cast(i32 *)indices;
			
 
				 	for (i32 i = offset; i < size; i += 8) {
			
 
				-		raw_data[i+0].idx = i+0;
			
 
				-		raw_data[i+1].idx = i+1;
			
 
				-		raw_data[i+2].idx = i+2;
			
 
				-		raw_data[i+3].idx = i+3;
			
 
				-		raw_data[i+4].idx = i+4;
			
 
				-		raw_data[i+5].idx = i+5;
			
 
				-		raw_data[i+6].idx = i+6;
			
 
				-		raw_data[i+7].idx = i+7;
			
 
				+		raw_data[i+0] = i+0;
			
 
				+		raw_data[i+1] = i+1;
			
 
				+		raw_data[i+2] = i+2;
			
 
				+		raw_data[i+3] = i+3;
			
 
				+		raw_data[i+4] = i+4;
			
 
				+		raw_data[i+5] = i+5;
			
 
				+		raw_data[i+6] = i+6;
			
 
				+		raw_data[i+7] = i+7;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -63,9 +52,11 @@ void mpmc_init(MPMCQueue<T> *q, gbAllocator a, isize size_i) {
 
				 
			
 
				 	mutex_init(&q->mutex);
			
 
				 	q->mask = size-1;
			
 
				-	array_init(&q->buffer, a, size);
			
 
				+	q->allocator = a;
			
 
				+	q->nodes   = gb_alloc_array(a, T, size);
			
 
				+	q->indices = cast(std::atomic<i32> *)gb_alloc_array(a, i32, size);
			
 
				 
			
 
				-	mpmc_internal_init_buffer(&q->buffer, 0);
			
 
				+	mpmc_internal_init_indices(q->indices, 0, q->mask+1);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -73,7 +64,8 @@ void mpmc_init(MPMCQueue<T> *q, gbAllocator a, isize size_i) {
 
				 template <typename T>
			
 
				 void mpmc_destroy(MPMCQueue<T> *q) {
			
 
				 	mutex_destroy(&q->mutex);
			
 
				-	gb_free(q->buffer.allocator, q->buffer.data);
			
 
				+	gb_free(q->allocator, q->nodes);
			
 
				+	gb_free(q->allocator, q->indices);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -84,30 +76,35 @@ i32 mpmc_enqueue(MPMCQueue<T> *q, T const &data) {
 
				 	i32 head_idx = q->head_idx.load(std::memory_order_relaxed);
			
 
				 
			
 
				 	for (;;) {
			
 
				-		auto node = &q->buffer.data[head_idx & q->mask];
			
 
				-		i32 node_idx = node->idx.load(std::memory_order_acquire);
			
 
				+		auto node = &q->nodes[head_idx & q->mask];
			
 
				+		auto node_idx_ptr = &q->indices[head_idx & q->mask];
			
 
				+		i32 node_idx = node_idx_ptr->load(std::memory_order_acquire);
			
 
				 		i32 diff = node_idx - head_idx;
			
 
				 
			
 
				 		if (diff == 0) {
			
 
				 			i32 next_head_idx = head_idx+1;
			
 
				 			if (q->head_idx.compare_exchange_weak(head_idx, next_head_idx)) {
			
 
				-				node->data = data;
			
 
				-				node->idx.store(next_head_idx, std::memory_order_release);
			
 
				+				*node = data;
			
 
				+				node_idx_ptr->store(next_head_idx, std::memory_order_release);
			
 
				 				return q->count.fetch_add(1, std::memory_order_release);
			
 
				 			}
			
 
				 		} else if (diff < 0) {
			
 
				 			mutex_lock(&q->mutex);
			
 
				-			i32 old_size = cast(i32)q->buffer.count;
			
 
				+			i32 old_size = q->mask+1;
			
 
				 			i32 new_size = old_size*2;
			
 
				-			array_resize(&q->buffer, new_size);
			
 
				-			if (q->buffer.data == nullptr) {
			
 
				+			resize_array_raw(&q->nodes, q->allocator, old_size, new_size);
			
 
				+			if (q->nodes == nullptr) {
			
 
				+				GB_PANIC("Unable to resize enqueue: %td -> %td", old_size, new_size);
			
 
				+				mutex_unlock(&q->mutex);
			
 
				+				return -1;
			
 
				+			}
			
 
				+			resize_array_raw(&q->indices, q->allocator, old_size, new_size);
			
 
				+			if (q->indices == nullptr) {
			
 
				 				GB_PANIC("Unable to resize enqueue: %td -> %td", old_size, new_size);
			
 
				 				mutex_unlock(&q->mutex);
			
 
				 				return -1;
			
 
				 			}
			
 
				-			// NOTE(bill): pretend it's not atomic for performance
			
 
				-			auto *raw_data = cast(MPMCQueueNodeNonAtomic<T> *)q->buffer.data;
			
 
				-			mpmc_internal_init_buffer(&q->buffer, old_size);
			
 
				+			mpmc_internal_init_indices(q->indices, old_size, new_size);
			
 
				 			q->mask = new_size-1;
			
 
				 			mutex_unlock(&q->mutex);
			
 
				 		} else {
			
@@ -125,15 +122,16 @@ bool mpmc_dequeue(MPMCQueue<T> *q, T *data_) {
 
				 	i32 tail_idx = q->tail_idx.load(std::memory_order_relaxed);
			
 
				 
			
 
				 	for (;;) {
			
 
				-		auto node = &q->buffer.data[tail_idx & q->mask];
			
 
				-		i32 node_idx = node->idx.load(std::memory_order_acquire);
			
 
				+		auto node_ptr = &q->nodes[tail_idx & q->mask];
			
 
				+		auto node_idx_ptr = &q->indices[tail_idx & q->mask];
			
 
				+		i32 node_idx = node_idx_ptr->load(std::memory_order_acquire);
			
 
				 		i32 diff = node_idx - (tail_idx+1);
			
 
				 
			
 
				 		if (diff == 0) {
			
 
				 			i32 next_tail_idx = tail_idx+1;
			
 
				 			if (q->tail_idx.compare_exchange_weak(tail_idx, next_tail_idx)) {
			
 
				-				if (data_) *data_ = node->data;
			
 
				-				node->idx.store(tail_idx + q->mask + 1, std::memory_order_release);
			
 
				+				if (data_) *data_ = *node_ptr;
			
 
				+				node_idx_ptr->store(tail_idx + q->mask + 1, std::memory_order_release);
			
 
				 				q->count.fetch_sub(1, std::memory_order_release);
			
 
				 				return true;
			
 
				 			}