|
@@ -10,13 +10,18 @@ gb_internal void thread_pool_destroy(ThreadPool *pool);
|
|
gb_internal bool thread_pool_add_task(ThreadPool *pool, WorkerTaskProc *proc, void *data);
|
|
gb_internal bool thread_pool_add_task(ThreadPool *pool, WorkerTaskProc *proc, void *data);
|
|
gb_internal void thread_pool_wait(ThreadPool *pool);
|
|
gb_internal void thread_pool_wait(ThreadPool *pool);
|
|
|
|
|
|
|
|
+enum GrabState {
|
|
|
|
+ GrabSuccess = 0,
|
|
|
|
+ GrabEmpty = 1,
|
|
|
|
+ GrabFailed = 2,
|
|
|
|
+};
|
|
|
|
+
|
|
struct ThreadPool {
|
|
struct ThreadPool {
|
|
gbAllocator threads_allocator;
|
|
gbAllocator threads_allocator;
|
|
Slice<Thread> threads;
|
|
Slice<Thread> threads;
|
|
std::atomic<bool> running;
|
|
std::atomic<bool> running;
|
|
|
|
|
|
Futex tasks_available;
|
|
Futex tasks_available;
|
|
-
|
|
|
|
Futex tasks_left;
|
|
Futex tasks_left;
|
|
};
|
|
};
|
|
|
|
|
|
@@ -46,7 +51,7 @@ gb_internal void thread_pool_destroy(ThreadPool *pool) {
|
|
|
|
|
|
for_array_off(i, 1, pool->threads) {
|
|
for_array_off(i, 1, pool->threads) {
|
|
Thread *t = &pool->threads[i];
|
|
Thread *t = &pool->threads[i];
|
|
- pool->tasks_available.fetch_add(1, std::memory_order_relaxed);
|
|
|
|
|
|
+ pool->tasks_available.fetch_add(1, std::memory_order_acquire);
|
|
futex_broadcast(&pool->tasks_available);
|
|
futex_broadcast(&pool->tasks_available);
|
|
thread_join_and_destroy(t);
|
|
thread_join_and_destroy(t);
|
|
}
|
|
}
|
|
@@ -54,51 +59,86 @@ gb_internal void thread_pool_destroy(ThreadPool *pool) {
|
|
gb_free(pool->threads_allocator, pool->threads.data);
|
|
gb_free(pool->threads_allocator, pool->threads.data);
|
|
}
|
|
}
|
|
|
|
|
|
-void thread_pool_queue_push(Thread *thread, WorkerTask task) {
|
|
|
|
- u64 capture;
|
|
|
|
- u64 new_capture;
|
|
|
|
- do {
|
|
|
|
- capture = thread->head_and_tail.load();
|
|
|
|
-
|
|
|
|
- u64 mask = thread->capacity - 1;
|
|
|
|
- u64 head = (capture >> 32) & mask;
|
|
|
|
- u64 tail = ((u32)capture) & mask;
|
|
|
|
|
|
+TaskRingBuffer *taskring_grow(TaskRingBuffer *ring, isize bottom, isize top) {
|
|
|
|
+ TaskRingBuffer *new_ring = taskring_init(ring->size * 2);
|
|
|
|
+ for (isize i = top; i < bottom; i++) {
|
|
|
|
+ new_ring->buffer[i % new_ring->size] = ring->buffer[i % ring->size];
|
|
|
|
+ }
|
|
|
|
+ return new_ring;
|
|
|
|
+}
|
|
|
|
|
|
- u64 new_head = (head + 1) & mask;
|
|
|
|
- GB_ASSERT_MSG(new_head != tail, "Thread Queue Full!");
|
|
|
|
|
|
+void thread_pool_queue_push(Thread *thread, WorkerTask task) {
|
|
|
|
+ isize bot = thread->queue.bottom.load(std::memory_order_relaxed);
|
|
|
|
+ isize top = thread->queue.top.load(std::memory_order_acquire);
|
|
|
|
+ TaskRingBuffer *cur_ring = thread->queue.ring.load(std::memory_order_relaxed);
|
|
|
|
+
|
|
|
|
+ isize size = bot - top;
|
|
|
|
+ if (size > (cur_ring->size - 1)) {
|
|
|
|
+ // Queue is full
|
|
|
|
+ thread->queue.ring = taskring_grow(thread->queue.ring, bot, top);
|
|
|
|
+ cur_ring = thread->queue.ring.load(std::memory_order_relaxed);
|
|
|
|
+ }
|
|
|
|
|
|
- // This *must* be done in here, to avoid a potential race condition where we no longer own the slot by the time we're assigning
|
|
|
|
- thread->queue[head] = task;
|
|
|
|
- new_capture = (new_head << 32) | tail;
|
|
|
|
- } while (!thread->head_and_tail.compare_exchange_weak(capture, new_capture));
|
|
|
|
|
|
+ cur_ring->buffer[bot % cur_ring->size] = task;
|
|
|
|
+ std::atomic_thread_fence(std::memory_order_release);
|
|
|
|
+ thread->queue.bottom.store(bot + 1, std::memory_order_relaxed);
|
|
|
|
|
|
thread->pool->tasks_left.fetch_add(1, std::memory_order_release);
|
|
thread->pool->tasks_left.fetch_add(1, std::memory_order_release);
|
|
thread->pool->tasks_available.fetch_add(1, std::memory_order_relaxed);
|
|
thread->pool->tasks_available.fetch_add(1, std::memory_order_relaxed);
|
|
futex_broadcast(&thread->pool->tasks_available);
|
|
futex_broadcast(&thread->pool->tasks_available);
|
|
}
|
|
}
|
|
|
|
|
|
-bool thread_pool_queue_pop(Thread *thread, WorkerTask *task) {
|
|
|
|
- u64 capture;
|
|
|
|
- u64 new_capture;
|
|
|
|
- do {
|
|
|
|
- capture = thread->head_and_tail.load(std::memory_order_acquire);
|
|
|
|
-
|
|
|
|
- u64 mask = thread->capacity - 1;
|
|
|
|
- u64 head = (capture >> 32) & mask;
|
|
|
|
- u64 tail = ((u32)capture) & mask;
|
|
|
|
|
|
+GrabState thread_pool_queue_take(Thread *thread, WorkerTask *task) {
|
|
|
|
+ isize bot = thread->queue.bottom.load(std::memory_order_relaxed) - 1;
|
|
|
|
+ TaskRingBuffer *cur_ring = thread->queue.ring.load(std::memory_order_relaxed);
|
|
|
|
+ thread->queue.bottom.store(bot, std::memory_order_relaxed);
|
|
|
|
+ std::atomic_thread_fence(std::memory_order_seq_cst);
|
|
|
|
+
|
|
|
|
+ isize top = thread->queue.top.load(std::memory_order_relaxed);
|
|
|
|
+ if (top <= bot) {
|
|
|
|
+
|
|
|
|
+ // Queue is not empty
|
|
|
|
+ *task = cur_ring->buffer[bot % cur_ring->size];
|
|
|
|
+ if (top == bot) {
|
|
|
|
+ // Only one entry left in queue
|
|
|
|
+ if (!thread->queue.top.compare_exchange_strong(top, top + 1, std::memory_order_seq_cst, std::memory_order_relaxed)) {
|
|
|
|
+ // Race failed
|
|
|
|
+ thread->queue.bottom.store(bot + 1, std::memory_order_relaxed);
|
|
|
|
+ return GrabEmpty;
|
|
|
|
+ }
|
|
|
|
|
|
- u64 new_tail = (tail + 1) & mask;
|
|
|
|
- if (tail == head) {
|
|
|
|
- return false;
|
|
|
|
|
|
+ thread->queue.bottom.store(bot + 1, std::memory_order_relaxed);
|
|
|
|
+ return GrabSuccess;
|
|
}
|
|
}
|
|
|
|
|
|
- // Making a copy of the task before we increment the tail, avoiding the same potential race condition as above
|
|
|
|
- *task = thread->queue[tail];
|
|
|
|
-
|
|
|
|
- new_capture = (head << 32) | new_tail;
|
|
|
|
- } while (!thread->head_and_tail.compare_exchange_weak(capture, new_capture, std::memory_order_release));
|
|
|
|
|
|
+ // We got a task without hitting a race
|
|
|
|
+ return GrabSuccess;
|
|
|
|
+ } else {
|
|
|
|
+ // Queue is empty
|
|
|
|
+ thread->queue.bottom.store(bot + 1, std::memory_order_relaxed);
|
|
|
|
+ return GrabEmpty;
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
|
|
- return true;
|
|
|
|
|
|
+GrabState thread_pool_queue_steal(Thread *thread, WorkerTask *task) {
|
|
|
|
+ isize top = thread->queue.top.load(std::memory_order_acquire);
|
|
|
|
+ std::atomic_thread_fence(std::memory_order_seq_cst);
|
|
|
|
+ isize bot = thread->queue.bottom.load(std::memory_order_acquire);
|
|
|
|
+
|
|
|
|
+ GrabState ret = GrabEmpty;
|
|
|
|
+ if (top < bot) {
|
|
|
|
+ // Queue is not empty
|
|
|
|
+ TaskRingBuffer *cur_ring = thread->queue.ring.load(std::memory_order_consume);
|
|
|
|
+ *task = cur_ring->buffer[top % cur_ring->size];
|
|
|
|
+
|
|
|
|
+ if (!thread->queue.top.compare_exchange_strong(top, top + 1, std::memory_order_seq_cst, std::memory_order_relaxed)) {
|
|
|
|
+ // Race failed
|
|
|
|
+ ret = GrabFailed;
|
|
|
|
+ } else {
|
|
|
|
+ ret = GrabSuccess;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return ret;
|
|
}
|
|
}
|
|
|
|
|
|
gb_internal bool thread_pool_add_task(ThreadPool *pool, WorkerTaskProc *proc, void *data) {
|
|
gb_internal bool thread_pool_add_task(ThreadPool *pool, WorkerTaskProc *proc, void *data) {
|
|
@@ -115,12 +155,11 @@ gb_internal void thread_pool_wait(ThreadPool *pool) {
|
|
|
|
|
|
while (pool->tasks_left.load(std::memory_order_acquire)) {
|
|
while (pool->tasks_left.load(std::memory_order_acquire)) {
|
|
// if we've got tasks on our queue, run them
|
|
// if we've got tasks on our queue, run them
|
|
- while (thread_pool_queue_pop(current_thread, &task)) {
|
|
|
|
|
|
+ while (!thread_pool_queue_take(current_thread, &task)) {
|
|
task.do_work(task.data);
|
|
task.do_work(task.data);
|
|
pool->tasks_left.fetch_sub(1, std::memory_order_release);
|
|
pool->tasks_left.fetch_sub(1, std::memory_order_release);
|
|
}
|
|
}
|
|
|
|
|
|
-
|
|
|
|
// is this mem-barriered enough?
|
|
// is this mem-barriered enough?
|
|
// This *must* be executed in this order, so the futex wakes immediately
|
|
// This *must* be executed in this order, so the futex wakes immediately
|
|
// if rem_tasks has changed since we checked last, otherwise the program
|
|
// if rem_tasks has changed since we checked last, otherwise the program
|
|
@@ -145,7 +184,7 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) {
|
|
usize finished_tasks = 0;
|
|
usize finished_tasks = 0;
|
|
i32 state;
|
|
i32 state;
|
|
|
|
|
|
- while (thread_pool_queue_pop(current_thread, &task)) {
|
|
|
|
|
|
+ while (!thread_pool_queue_take(current_thread, &task)) {
|
|
task.do_work(task.data);
|
|
task.do_work(task.data);
|
|
pool->tasks_left.fetch_sub(1, std::memory_order_release);
|
|
pool->tasks_left.fetch_sub(1, std::memory_order_release);
|
|
|
|
|
|
@@ -167,7 +206,13 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) {
|
|
|
|
|
|
Thread *thread = &pool->threads.data[idx];
|
|
Thread *thread = &pool->threads.data[idx];
|
|
WorkerTask task;
|
|
WorkerTask task;
|
|
- if (thread_pool_queue_pop(thread, &task)) {
|
|
|
|
|
|
+
|
|
|
|
+ GrabState ret = thread_pool_queue_steal(thread, &task);
|
|
|
|
+ if (ret == GrabFailed) {
|
|
|
|
+ goto main_loop_continue;
|
|
|
|
+ } else if (ret == GrabEmpty) {
|
|
|
|
+ continue;
|
|
|
|
+ } else if (ret == GrabSuccess) {
|
|
task.do_work(task.data);
|
|
task.do_work(task.data);
|
|
pool->tasks_left.fetch_sub(1, std::memory_order_release);
|
|
pool->tasks_left.fetch_sub(1, std::memory_order_release);
|
|
|
|
|
|
@@ -182,6 +227,7 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) {
|
|
|
|
|
|
// if we've done all our work, and there's nothing to steal, go to sleep
|
|
// if we've done all our work, and there's nothing to steal, go to sleep
|
|
state = pool->tasks_available.load(std::memory_order_acquire);
|
|
state = pool->tasks_available.load(std::memory_order_acquire);
|
|
|
|
+ if (!pool->running) { break; }
|
|
futex_wait(&pool->tasks_available, state);
|
|
futex_wait(&pool->tasks_available, state);
|
|
|
|
|
|
main_loop_continue:;
|
|
main_loop_continue:;
|