Explorar o código

Merge branch 'master' of https://github.com/odin-lang/Odin into more_dwmapi_bindings

JooperGH %!s(int64=2) %!d(string=hai) anos
pai
achega
4a70265bfb

+ 1 - 1
core/math/linalg/extended.odin

@@ -531,7 +531,7 @@ not_equal          :: proc{not_equal_single, not_equal_array}
 
 any :: proc(x: $A/[$N]bool) -> (out: bool) {
 	for e in x {
-		if x {
+		if e {
 			return true
 		}
 	}

+ 0 - 13
core/mem/raw.odin

@@ -23,16 +23,3 @@ make_any :: proc "contextless" (data: rawptr, id: typeid) -> any {
 }
 
 raw_data :: builtin.raw_data
-
-
-Poly_Raw_Map_Entry :: struct($Key, $Value: typeid) {
-	hash:  uintptr,
-	next:  int,
-	key:   Key,
-	value: Value,	
-}
-
-Poly_Raw_Map :: struct($Key, $Value: typeid) {
-	hashes:  []int,
-	entries: [dynamic]Poly_Raw_Map_Entry(Key, Value),
-}

+ 1 - 1
core/os/os_darwin.odin

@@ -333,7 +333,7 @@ foreign dl {
 	@(link_name="dlerror") _unix_dlerror :: proc() -> cstring ---
 }
 
-get_last_error :: proc() -> int {
+get_last_error :: proc "contextless" () -> int {
 	return __error()^
 }
 

+ 1 - 1
core/os/os_freebsd.odin

@@ -303,7 +303,7 @@ is_path_separator :: proc(r: rune) -> bool {
 	return r == '/'
 }
 
-get_last_error :: proc() -> int {
+get_last_error :: proc "contextless" () -> int {
 	return __errno_location()^
 }
 

+ 1 - 1
core/os/os_linux.odin

@@ -441,7 +441,7 @@ _get_errno :: proc(res: int) -> Errno {
 }
 
 // get errno from libc
-get_last_error :: proc() -> int {
+get_last_error :: proc "contextless" () -> int {
 	return __errno_location()^
 }
 

+ 1 - 1
core/os/os_openbsd.odin

@@ -294,7 +294,7 @@ is_path_separator :: proc(r: rune) -> bool {
 	return r == '/'
 }
 
-get_last_error :: proc() -> int {
+get_last_error :: proc "contextless" () -> int {
 	return __errno()^
 }
 

+ 7 - 36
core/reflect/reflect.odin

@@ -123,46 +123,17 @@ backing_type_kind :: proc(T: typeid) -> Type_Kind {
 }
 
 
-type_info_base :: proc(info: ^Type_Info) -> ^Type_Info {
-	if info == nil { return nil }
-
-	base := info
-	loop: for {
-		#partial switch i in base.variant {
-		case Type_Info_Named: base = i.base
-		case: break loop
-		}
-	}
-	return base
-}
-
-
-type_info_core :: proc(info: ^Type_Info) -> ^Type_Info {
-	if info == nil { return nil }
-
-	base := info
-	loop: for {
-		#partial switch i in base.variant {
-		case Type_Info_Named:  base = i.base
-		case Type_Info_Enum:   base = i.base
-		case: break loop
-		}
-	}
-	return base
-}
+type_info_base :: runtime.type_info_base
+type_info_core :: runtime.type_info_core 
 type_info_base_without_enum :: type_info_core
 
 
-typeid_base :: proc(id: typeid) -> typeid {
-	ti := type_info_of(id)
-	ti = type_info_base(ti)
-	return ti.id
-}
-typeid_core :: proc(id: typeid) -> typeid {
-	ti := type_info_base_without_enum(type_info_of(id))
-	return ti.id
+when !ODIN_DISALLOW_RTTI {
+	typeid_base :: runtime.typeid_base
+	typeid_core :: runtime.typeid_core
+	typeid_base_without_enum :: typeid_core
 }
-typeid_base_without_enum :: typeid_core
+
 
 any_base :: proc(v: any) -> any {
 	v := v

+ 1 - 1
core/runtime/core_builtin.odin

@@ -615,7 +615,7 @@ shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, new_cap := -1, loc := #call
 	old_size := a.cap * size_of(E)
 	new_size := new_cap * size_of(E)
 
-	new_data, err := mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc)
+	new_data, err := mem_resize(a.data, old_size, new_size, align_of(E), a.allocator, loc)
 	if err != nil {
 		return
 	}

+ 1 - 1
core/slice/map.odin

@@ -37,7 +37,7 @@ Map_Entry_Info :: struct($Key, $Value: typeid) {
 }
 
 
-map_entries :: proc(m: $M/map[$K]$V, allocator := context.allocator) -> (entries: []Map_Entry(K, V), err: runtime.Allocator) {
+map_entries :: proc(m: $M/map[$K]$V, allocator := context.allocator) -> (entries: []Map_Entry(K, V), err: runtime.Allocator_Error) {
 	entries = make(type_of(entries), len(m), allocator) or_return
 	i := 0
 	for key, value in m {

+ 87 - 30
core/sync/extended.odin

@@ -11,7 +11,7 @@ Wait_Group :: struct {
 	cond:    Cond,
 }
 
-wait_group_add :: proc(wg: ^Wait_Group, delta: int) {
+wait_group_add :: proc "contextless" (wg: ^Wait_Group, delta: int) {
 	if delta == 0 {
 		return
 	}
@@ -20,32 +20,32 @@ wait_group_add :: proc(wg: ^Wait_Group, delta: int) {
 
 	atomic_add(&wg.counter, delta)
 	if wg.counter < 0 {
-		panic("sync.Wait_Group negative counter")
+		_panic("sync.Wait_Group negative counter")
 	}
 	if wg.counter == 0 {
 		cond_broadcast(&wg.cond)
 		if wg.counter != 0 {
-			panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 	}
 }
 
-wait_group_done :: proc(wg: ^Wait_Group) {
+wait_group_done :: proc "contextless" (wg: ^Wait_Group) {
 	wait_group_add(wg, -1)
 }
 
-wait_group_wait :: proc(wg: ^Wait_Group) {
+wait_group_wait :: proc "contextless" (wg: ^Wait_Group) {
 	guard(&wg.mutex)
 
 	if wg.counter != 0 {
 		cond_wait(&wg.cond, &wg.mutex)
 		if wg.counter != 0 {
-			panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 	}
 }
 
-wait_group_wait_with_timeout :: proc(wg: ^Wait_Group, duration: time.Duration) -> bool {
+wait_group_wait_with_timeout :: proc "contextless" (wg: ^Wait_Group, duration: time.Duration) -> bool {
 	if duration <= 0 {
 		return false
 	}
@@ -56,7 +56,7 @@ wait_group_wait_with_timeout :: proc(wg: ^Wait_Group, duration: time.Duration) -
 			return false
 		}
 		if wg.counter != 0 {
-			panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 	}
 	return true
@@ -76,7 +76,7 @@ Example:
 
 	barrier := &sync.Barrier{}
 
-	main :: proc() {
+	main :: proc "contextless" () {
 		fmt.println("Start")
 
 		THREAD_COUNT :: 4
@@ -107,7 +107,7 @@ Barrier :: struct {
 	thread_count:  int,
 }
 
-barrier_init :: proc(b: ^Barrier, thread_count: int) {
+barrier_init :: proc "contextless" (b: ^Barrier, thread_count: int) {
 	b.index = 0
 	b.generation_id = 0
 	b.thread_count = thread_count
@@ -115,7 +115,7 @@ barrier_init :: proc(b: ^Barrier, thread_count: int) {
 
 // Block the current thread until all threads have rendezvoused
 // Barrier can be reused after all threads rendezvoused once, and can be used continuously
-barrier_wait :: proc(b: ^Barrier) -> (is_leader: bool) {
+barrier_wait :: proc "contextless" (b: ^Barrier) -> (is_leader: bool) {
 	guard(&b.mutex)
 	local_gen := b.generation_id
 	b.index += 1
@@ -141,7 +141,7 @@ Auto_Reset_Event :: struct {
 	sema:   Sema,
 }
 
-auto_reset_event_signal :: proc(e: ^Auto_Reset_Event) {
+auto_reset_event_signal :: proc "contextless" (e: ^Auto_Reset_Event) {
 	old_status := atomic_load_explicit(&e.status, .Relaxed)
 	for {
 		new_status := old_status + 1 if old_status < 1 else 1
@@ -155,7 +155,7 @@ auto_reset_event_signal :: proc(e: ^Auto_Reset_Event) {
 	}
 }
 
-auto_reset_event_wait :: proc(e: ^Auto_Reset_Event) {
+auto_reset_event_wait :: proc "contextless" (e: ^Auto_Reset_Event) {
 	old_status := atomic_sub_explicit(&e.status, 1, .Acquire)
 	if old_status < 1 {
 		sema_wait(&e.sema)
@@ -169,18 +169,18 @@ Ticket_Mutex :: struct {
 	serving: uint,
 }
 
-ticket_mutex_lock :: #force_inline proc(m: ^Ticket_Mutex) {
+ticket_mutex_lock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) {
 	ticket := atomic_add_explicit(&m.ticket, 1, .Relaxed)
 	for ticket != atomic_load_explicit(&m.serving, .Acquire) {
 		cpu_relax()
 	}
 }
 
-ticket_mutex_unlock :: #force_inline proc(m: ^Ticket_Mutex) {
+ticket_mutex_unlock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) {
 	atomic_add_explicit(&m.serving, 1, .Relaxed)
 }
 @(deferred_in=ticket_mutex_unlock)
-ticket_mutex_guard :: proc(m: ^Ticket_Mutex) -> bool {
+ticket_mutex_guard :: proc "contextless" (m: ^Ticket_Mutex) -> bool {
 	ticket_mutex_lock(m)
 	return true
 }
@@ -191,25 +191,25 @@ Benaphore :: struct {
 	sema:    Sema,
 }
 
-benaphore_lock :: proc(b: ^Benaphore) {
+benaphore_lock :: proc "contextless" (b: ^Benaphore) {
 	if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 {
 		sema_wait(&b.sema)
 	}
 }
 
-benaphore_try_lock :: proc(b: ^Benaphore) -> bool {
+benaphore_try_lock :: proc "contextless" (b: ^Benaphore) -> bool {
 	v, _ := atomic_compare_exchange_strong_explicit(&b.counter, 0, 1, .Acquire, .Acquire)
 	return v == 0
 }
 
-benaphore_unlock :: proc(b: ^Benaphore) {
+benaphore_unlock :: proc "contextless" (b: ^Benaphore) {
 	if atomic_sub_explicit(&b.counter, 1, .Release) > 0 {
 		sema_post(&b.sema)
 	}
 }
 
 @(deferred_in=benaphore_unlock)
-benaphore_guard :: proc(m: ^Benaphore) -> bool {
+benaphore_guard :: proc "contextless" (m: ^Benaphore) -> bool {
 	benaphore_lock(m)
 	return true
 }
@@ -221,7 +221,7 @@ Recursive_Benaphore :: struct {
 	sema:      Sema,
 }
 
-recursive_benaphore_lock :: proc(b: ^Recursive_Benaphore) {
+recursive_benaphore_lock :: proc "contextless" (b: ^Recursive_Benaphore) {
 	tid := current_thread_id()
 	if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 {
 		if tid != b.owner {
@@ -233,7 +233,7 @@ recursive_benaphore_lock :: proc(b: ^Recursive_Benaphore) {
 	b.recursion += 1
 }
 
-recursive_benaphore_try_lock :: proc(b: ^Recursive_Benaphore) -> bool {
+recursive_benaphore_try_lock :: proc "contextless" (b: ^Recursive_Benaphore) -> bool {
 	tid := current_thread_id()
 	if b.owner == tid {
 		atomic_add_explicit(&b.counter, 1, .Acquire)
@@ -248,9 +248,9 @@ recursive_benaphore_try_lock :: proc(b: ^Recursive_Benaphore) -> bool {
 	return true
 }
 
-recursive_benaphore_unlock :: proc(b: ^Recursive_Benaphore) {
+recursive_benaphore_unlock :: proc "contextless" (b: ^Recursive_Benaphore) {
 	tid := current_thread_id()
-	assert(tid == b.owner)
+	_assert(tid == b.owner, "tid != b.owner")
 	b.recursion -= 1
 	recursion := b.recursion
 	if recursion == 0 {
@@ -265,7 +265,7 @@ recursive_benaphore_unlock :: proc(b: ^Recursive_Benaphore) {
 }
 
 @(deferred_in=recursive_benaphore_unlock)
-recursive_benaphore_guard :: proc(m: ^Recursive_Benaphore) -> bool {
+recursive_benaphore_guard :: proc "contextless" (m: ^Recursive_Benaphore) -> bool {
 	recursive_benaphore_lock(m)
 	return true
 }
@@ -282,7 +282,15 @@ Once :: struct {
 }
 
 // once_do calls the procedure fn if and only if once_do is being called for the first for this instance of Once.
-once_do :: proc(o: ^Once, fn: proc()) {
+once_do :: proc{
+	once_do_without_data,
+	once_do_without_data_contextless,
+	once_do_with_data,
+	once_do_with_data_contextless,
+}
+
+// once_do_without_data calls the procedure fn if and only if once_do_without_data is being called for the first for this instance of Once.
+once_do_without_data :: proc(o: ^Once, fn: proc()) {
 	@(cold)
 	do_slow :: proc(o: ^Once, fn: proc()) {
 		guard(&o.m)
@@ -292,12 +300,61 @@ once_do :: proc(o: ^Once, fn: proc()) {
 		}
 	}
 
-	
 	if atomic_load_explicit(&o.done, .Acquire) == false {
 		do_slow(o, fn)
 	}
 }
 
+// once_do_without_data calls the procedure fn if and only if once_do_without_data is being called for the first for this instance of Once.
+once_do_without_data_contextless :: proc(o: ^Once, fn: proc "contextless" ()) {
+	@(cold)
+	do_slow :: proc(o: ^Once, fn: proc "contextless" ()) {
+		guard(&o.m)
+		if !o.done {
+			fn()
+			atomic_store_explicit(&o.done, true, .Release)
+		}
+	}
+
+	if atomic_load_explicit(&o.done, .Acquire) == false {
+		do_slow(o, fn)
+	}
+}
+
+// once_do_with_data calls the procedure fn if and only if once_do_with_data is being called for the first for this instance of Once.
+once_do_with_data :: proc(o: ^Once, fn: proc(data: rawptr), data: rawptr) {
+	@(cold)
+	do_slow :: proc(o: ^Once, fn: proc(data: rawptr), data: rawptr) {
+		guard(&o.m)
+		if !o.done {
+			fn(data)
+			atomic_store_explicit(&o.done, true, .Release)
+		}
+	}
+
+	if atomic_load_explicit(&o.done, .Acquire) == false {
+		do_slow(o, fn, data)
+	}
+}
+
+// once_do_with_data_contextless calls the procedure fn if and only if once_do_with_data_contextless is being called for the first for this instance of Once.
+once_do_with_data_contextless :: proc "contextless" (o: ^Once, fn: proc "contextless" (data: rawptr), data: rawptr) {
+	@(cold)
+	do_slow :: proc "contextless" (o: ^Once, fn: proc "contextless" (data: rawptr), data: rawptr) {
+		guard(&o.m)
+		if !o.done {
+			fn(data)
+			atomic_store_explicit(&o.done, true, .Release)
+		}
+	}
+
+	if atomic_load_explicit(&o.done, .Acquire) == false {
+		do_slow(o, fn, data)
+	}
+}
+
+
+
 
 
 // A Parker is an associated token which is initially not present:
@@ -314,7 +371,7 @@ Parker :: struct {
 // Blocks the current thread until the token is made available.
 //
 // Assumes this is only called by the thread that owns the Parker.
-park :: proc(p: ^Parker) {
+park :: proc "contextless" (p: ^Parker) {
 	EMPTY    :: 0
 	NOTIFIED :: 1
 	PARKED   :: max(u32)
@@ -333,7 +390,7 @@ park :: proc(p: ^Parker) {
 // for a limited duration.
 //
 // Assumes this is only called by the thread that owns the Parker
-park_with_timeout :: proc(p: ^Parker, duration: time.Duration) {
+park_with_timeout :: proc "contextless" (p: ^Parker, duration: time.Duration) {
 	EMPTY    :: 0
 	NOTIFIED :: 1
 	PARKED   :: max(u32)
@@ -345,7 +402,7 @@ park_with_timeout :: proc(p: ^Parker, duration: time.Duration) {
 }
 
 // Automatically makes thee token available if it was not already.
-unpark :: proc(p: ^Parker)  {
+unpark :: proc "contextless" (p: ^Parker)  {
 	EMPTY    :: 0
 	NOTIFIED :: 1
 	PARKED   :: max(Futex)

+ 7 - 7
core/sync/futex_darwin.odin

@@ -24,11 +24,11 @@ EINTR     :: -4
 EFAULT    :: -14
 ETIMEDOUT :: -60
 
-_futex_wait :: proc(f: ^Futex, expected: u32) -> bool {
+_futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 	return _futex_wait_with_timeout(f, expected, 0)
 }
 
-_futex_wait_with_timeout :: proc(f: ^Futex, expected: u32, duration: time.Duration) -> bool {
+_futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
 	timeout_ns := u32(duration) * 1000
 	
 	s := __ulock_wait(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, u64(expected), timeout_ns)
@@ -41,13 +41,13 @@ _futex_wait_with_timeout :: proc(f: ^Futex, expected: u32, duration: time.Durati
 	case ETIMEDOUT:
 		return false
 	case:
-		panic("futex_wait failure")
+		_panic("futex_wait failure")
 	}
 	return true
 
 }
 
-_futex_signal :: proc(f: ^Futex) {
+_futex_signal :: proc "contextless" (f: ^Futex) {
 	loop: for {
 		s := __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, 0)
 		if s >= 0 {
@@ -59,12 +59,12 @@ _futex_signal :: proc(f: ^Futex) {
 		case ENOENT:
 			return
 		case:
-			panic("futex_wake_single failure")
+			_panic("futex_wake_single failure")
 		}
 	}
 }
 
-_futex_broadcast :: proc(f: ^Futex) {
+_futex_broadcast :: proc "contextless" (f: ^Futex) {
 	loop: for {
 		s := __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO | ULF_WAKE_ALL, f, 0)
 		if s >= 0 {
@@ -76,7 +76,7 @@ _futex_broadcast :: proc(f: ^Futex) {
 		case ENOENT:
 			return
 		case:
-			panic("futex_wake_all failure")
+			_panic("futex_wake_all failure")
 		}
 	}
 }

+ 8 - 8
core/sync/futex_freebsd.odin

@@ -17,7 +17,7 @@ foreign libc {
 	__error :: proc "c" () -> ^c.int ---
 }
 
-_futex_wait :: proc(f: ^Futex, expected: u32) -> bool {
+_futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 	timeout := [2]i64{14400, 0} // 4 hours
 	for {
 		res := _umtx_op(f, UMTX_OP_WAIT, c.ulong(expected), nil, &timeout)
@@ -30,12 +30,12 @@ _futex_wait :: proc(f: ^Futex, expected: u32) -> bool {
 			continue
 		}
 
-		panic("_futex_wait failure")
+		_panic("_futex_wait failure")
 	}
 	unreachable()
 }
 
-_futex_wait_with_timeout :: proc(f: ^Futex, expected: u32, duration: time.Duration) -> bool {
+_futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
 	if duration <= 0 {
 		return false
 	}
@@ -51,21 +51,21 @@ _futex_wait_with_timeout :: proc(f: ^Futex, expected: u32, duration: time.Durati
 		return false
 	}
 
-	panic("_futex_wait_with_timeout failure")
+	_panic("_futex_wait_with_timeout failure")
 }
 
-_futex_signal :: proc(f: ^Futex) {
+_futex_signal :: proc "contextless" (f: ^Futex) {
 	res := _umtx_op(f, UMTX_OP_WAKE, 1, nil, nil)
 
 	if res == -1 {
-		panic("_futex_signal failure")
+		_panic("_futex_signal failure")
 	}
 }
 
-_futex_broadcast :: proc(f: ^Futex)  {
+_futex_broadcast :: proc "contextless" (f: ^Futex)  {
 	res := _umtx_op(f, UMTX_OP_WAKE, c.ulong(max(i32)), nil, nil)
 
 	if res == -1 {
-		panic("_futex_broadcast failure")
+		_panic("_futex_broadcast failure")
 	}
 }

+ 10 - 10
core/sync/futex_linux.odin

@@ -21,20 +21,20 @@ EFAULT    :: -14
 EINVAL    :: -22
 ETIMEDOUT :: -110
 
-get_errno :: proc(r: int) -> int {
+get_errno :: proc "contextless" (r: int) -> int {
 	if -4096 < r && r < 0 {
 		return r
 	}
 	return 0
 }
 
-internal_futex :: proc(f: ^Futex, op: c.int, val: u32, timeout: rawptr) -> int {
+internal_futex :: proc "contextless" (f: ^Futex, op: c.int, val: u32, timeout: rawptr) -> int {
 	code := int(intrinsics.syscall(unix.SYS_futex, uintptr(f), uintptr(op), uintptr(val), uintptr(timeout), 0, 0))
 	return get_errno(code)
 }
 
 
-_futex_wait :: proc(f: ^Futex, expected: u32) -> bool {
+_futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 	err := internal_futex(f, FUTEX_WAIT_PRIVATE | FUTEX_WAIT, expected, nil)
 	switch err {
 	case ESUCCESS, EINTR, EAGAIN, EINVAL:
@@ -44,12 +44,12 @@ _futex_wait :: proc(f: ^Futex, expected: u32) -> bool {
 	case EFAULT: 
 		fallthrough
 	case:
-		panic("futex_wait failure")
+		_panic("futex_wait failure")
 	}
 	return true
 }
 
-_futex_wait_with_timeout :: proc(f: ^Futex, expected: u32, duration: time.Duration) -> bool {
+_futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
 	if duration <= 0 {
 		return false
 	}
@@ -71,27 +71,27 @@ _futex_wait_with_timeout :: proc(f: ^Futex, expected: u32, duration: time.Durati
 	case EFAULT: 
 		fallthrough
 	case:
-		panic("futex_wait_with_timeout failure")
+		_panic("futex_wait_with_timeout failure")
 	}
 	return true
 }
 
 
-_futex_signal :: proc(f: ^Futex) {
+_futex_signal :: proc "contextless" (f: ^Futex) {
 	err := internal_futex(f, FUTEX_WAKE_PRIVATE | FUTEX_WAKE, 1, nil)
 	switch err {
 	case ESUCCESS, EINVAL, EFAULT:
 		// okay
 	case:
-		panic("futex_wake_single failure")
+		_panic("futex_wake_single failure")
 	}
 }
-_futex_broadcast :: proc(f: ^Futex)  {
+_futex_broadcast :: proc "contextless" (f: ^Futex)  {
 	err := internal_futex(f, FUTEX_WAKE_PRIVATE | FUTEX_WAKE, u32(max(i32)), nil)
 	switch err {
 	case ESUCCESS, EINVAL, EFAULT:
 		// okay
 	case:
-		panic("_futex_wake_all failure")
+		_panic("_futex_wake_all failure")
 	}
 }

+ 8 - 8
core/sync/futex_openbsd.odin

@@ -21,7 +21,7 @@ foreign libc {
 	_unix_futex :: proc "c" (f: ^Futex, op: c.int, val: u32, timeout: rawptr) -> c.int ---
 }
 
-_futex_wait :: proc(f: ^Futex, expected: u32) -> bool {
+_futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 	res := _unix_futex(f, FUTEX_WAIT_PRIVATE, expected, nil)
 
 	if res != -1 {
@@ -32,10 +32,10 @@ _futex_wait :: proc(f: ^Futex, expected: u32) -> bool {
 		return false
 	}
 
-	panic("futex_wait failure")
+	_panic("futex_wait failure")
 }
 
-_futex_wait_with_timeout :: proc(f: ^Futex, expected: u32, duration: time.Duration) -> bool {
+_futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
 	if duration <= 0 {
 		return false
 	}
@@ -58,21 +58,21 @@ _futex_wait_with_timeout :: proc(f: ^Futex, expected: u32, duration: time.Durati
 		return false
 	}
 
-	panic("futex_wait_with_timeout failure")
+	_panic("futex_wait_with_timeout failure")
 }
 
-_futex_signal :: proc(f: ^Futex) {
+_futex_signal :: proc "contextless" (f: ^Futex) {
 	res := _unix_futex(f, FUTEX_WAKE_PRIVATE, 1, nil)
 
 	if res == -1 {
-		panic("futex_wake_single failure")
+		_panic("futex_wake_single failure")
 	}
 }
 
-_futex_broadcast :: proc(f: ^Futex)  {
+_futex_broadcast :: proc "contextless" (f: ^Futex)  {
 	res := _unix_futex(f, FUTEX_WAKE_PRIVATE, u32(max(i32)), nil)
 
 	if res == -1 {
-		panic("_futex_wake_all failure")
+		_panic("_futex_wake_all failure")
 	}
 }

+ 4 - 4
core/sync/futex_wasm.odin

@@ -5,18 +5,18 @@ package sync
 import "core:intrinsics"
 import "core:time"
 
-_futex_wait :: proc(f: ^Futex, expected: u32) -> bool {
+_futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 	s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, -1)
 	return s != 0
 }
 
-_futex_wait_with_timeout :: proc(f: ^Futex, expected: u32, duration: time.Duration) -> bool {
+_futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
 	s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, i64(duration))
 	return s != 0
 
 }
 
-_futex_signal :: proc(f: ^Futex) {
+_futex_signal :: proc "contextless" (f: ^Futex) {
 	loop: for {
 		s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), 1)
 		if s >= 1 {
@@ -25,7 +25,7 @@ _futex_signal :: proc(f: ^Futex) {
 	}
 }
 
-_futex_broadcast :: proc(f: ^Futex) {
+_futex_broadcast :: proc "contextless" (f: ^Futex) {
 	loop: for {
 		s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), ~u32(0))
 		if s >= 0 {

+ 4 - 4
core/sync/futex_windows.odin

@@ -39,22 +39,22 @@ CustomWaitOnAddress :: proc "stdcall" (Address: rawptr, CompareAddress: rawptr,
 }
 
 
-_futex_wait :: proc(f: ^Futex, expect: u32) -> bool {
+_futex_wait :: proc "contextless" (f: ^Futex, expect: u32) -> bool {
 	expect := expect
 	return CustomWaitOnAddress(f, &expect, size_of(expect), nil)
 }
 
-_futex_wait_with_timeout :: proc(f: ^Futex, expect: u32, duration: time.Duration) -> bool {
+_futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expect: u32, duration: time.Duration) -> bool {
 	expect := expect
 	// NOTE(bill): for some bizarre reason, this has be a negative number
 	timeout := -i64(duration / 100)
 	return CustomWaitOnAddress(f, &expect, size_of(expect), &timeout)
 }
 
-_futex_signal :: proc(f: ^Futex) {
+_futex_signal :: proc "contextless" (f: ^Futex) {
 	WakeByAddressSingle(f)
 }
 
-_futex_broadcast :: proc(f: ^Futex) {
+_futex_broadcast :: proc "contextless" (f: ^Futex) {
 	WakeByAddressAll(f)
 }

+ 44 - 28
core/sync/primitives.odin

@@ -1,5 +1,6 @@
 package sync
 
+import "core:runtime"
 import "core:time"
 
 current_thread_id :: proc "contextless" () -> int {
@@ -15,17 +16,17 @@ Mutex :: struct {
 }
 
 // mutex_lock locks m
-mutex_lock :: proc(m: ^Mutex) {
+mutex_lock :: proc "contextless" (m: ^Mutex) {
 	_mutex_lock(m)
 }
 
 // mutex_unlock unlocks m
-mutex_unlock :: proc(m: ^Mutex) {
+mutex_unlock :: proc "contextless" (m: ^Mutex) {
 	_mutex_unlock(m)
 }
 
 // mutex_try_lock tries to lock m, will return true on success, and false on failure
-mutex_try_lock :: proc(m: ^Mutex) -> bool {
+mutex_try_lock :: proc "contextless" (m: ^Mutex) -> bool {
 	return _mutex_try_lock(m)
 }
 
@@ -36,7 +37,7 @@ Example:
 	}
 */
 @(deferred_in=mutex_unlock)
-mutex_guard :: proc(m: ^Mutex) -> bool {
+mutex_guard :: proc "contextless" (m: ^Mutex) -> bool {
 	mutex_lock(m)
 	return true
 }
@@ -52,32 +53,32 @@ RW_Mutex :: struct {
 
 // rw_mutex_lock locks rw for writing (with a single writer)
 // If the mutex is already locked for reading or writing, the mutex blocks until the mutex is available.
-rw_mutex_lock :: proc(rw: ^RW_Mutex) {
+rw_mutex_lock :: proc "contextless" (rw: ^RW_Mutex) {
 	_rw_mutex_lock(rw)
 }
 
 // rw_mutex_unlock unlocks rw for writing (with a single writer)
-rw_mutex_unlock :: proc(rw: ^RW_Mutex) {
+rw_mutex_unlock :: proc "contextless" (rw: ^RW_Mutex) {
 	_rw_mutex_unlock(rw)
 }
 
 // rw_mutex_try_lock tries to lock rw for writing (with a single writer)
-rw_mutex_try_lock :: proc(rw: ^RW_Mutex) -> bool {
+rw_mutex_try_lock :: proc "contextless" (rw: ^RW_Mutex) -> bool {
 	return _rw_mutex_try_lock(rw)
 }
 
 // rw_mutex_shared_lock locks rw for reading (with arbitrary number of readers)
-rw_mutex_shared_lock :: proc(rw: ^RW_Mutex) {
+rw_mutex_shared_lock :: proc "contextless" (rw: ^RW_Mutex) {
 	_rw_mutex_shared_lock(rw)
 }
 
 // rw_mutex_shared_unlock unlocks rw for reading (with arbitrary number of readers)
-rw_mutex_shared_unlock :: proc(rw: ^RW_Mutex) {
+rw_mutex_shared_unlock :: proc "contextless" (rw: ^RW_Mutex) {
 	_rw_mutex_shared_unlock(rw)
 }
 
 // rw_mutex_try_shared_lock tries to lock rw for reading (with arbitrary number of readers)
-rw_mutex_try_shared_lock :: proc(rw: ^RW_Mutex) -> bool {
+rw_mutex_try_shared_lock :: proc "contextless" (rw: ^RW_Mutex) -> bool {
 	return _rw_mutex_try_shared_lock(rw)
 }
 /*
@@ -87,7 +88,7 @@ Example:
 	}
 */
 @(deferred_in=rw_mutex_unlock)
-rw_mutex_guard :: proc(m: ^RW_Mutex) -> bool {
+rw_mutex_guard :: proc "contextless" (m: ^RW_Mutex) -> bool {
 	rw_mutex_lock(m)
 	return true
 }
@@ -99,7 +100,7 @@ Example:
 	}
 */
 @(deferred_in=rw_mutex_shared_unlock)
-rw_mutex_shared_guard :: proc(m: ^RW_Mutex) -> bool {
+rw_mutex_shared_guard :: proc "contextless" (m: ^RW_Mutex) -> bool {
 	rw_mutex_shared_lock(m)
 	return true
 }
@@ -114,15 +115,15 @@ Recursive_Mutex :: struct {
 	impl: _Recursive_Mutex,
 }
 
-recursive_mutex_lock :: proc(m: ^Recursive_Mutex) {
+recursive_mutex_lock :: proc "contextless" (m: ^Recursive_Mutex) {
 	_recursive_mutex_lock(m)
 }
 
-recursive_mutex_unlock :: proc(m: ^Recursive_Mutex) {
+recursive_mutex_unlock :: proc "contextless" (m: ^Recursive_Mutex) {
 	_recursive_mutex_unlock(m)
 }
 
-recursive_mutex_try_lock :: proc(m: ^Recursive_Mutex) -> bool {
+recursive_mutex_try_lock :: proc "contextless" (m: ^Recursive_Mutex) -> bool {
 	return _recursive_mutex_try_lock(m)
 }
 
@@ -133,7 +134,7 @@ Example:
 	}
 */
 @(deferred_in=recursive_mutex_unlock)
-recursive_mutex_guard :: proc(m: ^Recursive_Mutex) -> bool {
+recursive_mutex_guard :: proc "contextless" (m: ^Recursive_Mutex) -> bool {
 	recursive_mutex_lock(m)
 	return true
 }
@@ -147,22 +148,22 @@ Cond :: struct {
 	impl: _Cond,
 }
 
-cond_wait :: proc(c: ^Cond, m: ^Mutex) {
+cond_wait :: proc "contextless" (c: ^Cond, m: ^Mutex) {
 	_cond_wait(c, m)
 }
 
-cond_wait_with_timeout :: proc(c: ^Cond, m: ^Mutex, duration: time.Duration) -> bool {
+cond_wait_with_timeout :: proc "contextless" (c: ^Cond, m: ^Mutex, duration: time.Duration) -> bool {
 	if duration <= 0 {
 		return false
 	}
 	return _cond_wait_with_timeout(c, m, duration)
 }
 
-cond_signal :: proc(c: ^Cond) {
+cond_signal :: proc "contextless" (c: ^Cond) {
 	_cond_signal(c)
 }
 
-cond_broadcast :: proc(c: ^Cond) {
+cond_broadcast :: proc "contextless" (c: ^Cond) {
 	_cond_broadcast(c)
 }
 
@@ -175,15 +176,15 @@ Sema :: struct {
 	impl: _Sema,
 }
 
-sema_post :: proc(s: ^Sema, count := 1) {
+sema_post :: proc "contextless" (s: ^Sema, count := 1) {
 	_sema_post(s, count)
 }
 
-sema_wait :: proc(s: ^Sema) {
+sema_wait :: proc "contextless" (s: ^Sema) {
 	_sema_wait(s)
 }
 
-sema_wait_with_timeout :: proc(s: ^Sema, duration: time.Duration) -> bool {
+sema_wait_with_timeout :: proc "contextless" (s: ^Sema, duration: time.Duration) -> bool {
 	return _sema_wait_with_timeout(s, duration)
 }
 
@@ -194,16 +195,16 @@ sema_wait_with_timeout :: proc(s: ^Sema, duration: time.Duration) -> bool {
 // An Futex must not be copied after first use
 Futex :: distinct u32
 
-futex_wait :: proc(f: ^Futex, expected: u32) {
+futex_wait :: proc "contextless" (f: ^Futex, expected: u32) {
 	if u32(atomic_load_explicit(f, .Acquire)) != expected {
 		return
 	}
 	
-	assert(_futex_wait(f, expected), "futex_wait failure")
+	_assert(_futex_wait(f, expected), "futex_wait failure")
 }
 
 // returns true if the wait happened within the duration, false if it exceeded the time duration
-futex_wait_with_timeout :: proc(f: ^Futex, expected: u32, duration: time.Duration) -> bool {
+futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
 	if u32(atomic_load_explicit(f, .Acquire)) != expected {
 		return true
 	}
@@ -214,10 +215,25 @@ futex_wait_with_timeout :: proc(f: ^Futex, expected: u32, duration: time.Duratio
 	return _futex_wait_with_timeout(f, expected, duration)
 }
 
-futex_signal :: proc(f: ^Futex) {
+futex_signal :: proc "contextless" (f: ^Futex) {
 	_futex_signal(f)
 }
 
-futex_broadcast :: proc(f: ^Futex) {
+futex_broadcast :: proc "contextless" (f: ^Futex) {
 	_futex_broadcast(f)
 }
+
+
+@(private)
+_assert :: proc "contextless" (cond: bool, msg: string) {
+	if !cond {
+		_panic(msg)
+	}
+}
+
+@(private)
+_panic :: proc "contextless" (msg: string) -> ! {
+	runtime.print_string(msg)
+	runtime.print_byte('\n')
+	runtime.trap()
+}

+ 26 - 26
core/sync/primitives_atomic.odin

@@ -18,9 +18,9 @@ Atomic_Mutex :: struct {
 }
 
 // atomic_mutex_lock locks m
-atomic_mutex_lock :: proc(m: ^Atomic_Mutex) {
+atomic_mutex_lock :: proc "contextless" (m: ^Atomic_Mutex) {
 	@(cold)
-	lock_slow :: proc(m: ^Atomic_Mutex, curr_state: Atomic_Mutex_State) {
+	lock_slow :: proc "contextless" (m: ^Atomic_Mutex, curr_state: Atomic_Mutex_State) {
 		new_state := curr_state // Make a copy of it
 
 		spin_lock: for spin in 0..<i32(100) {
@@ -58,9 +58,9 @@ atomic_mutex_lock :: proc(m: ^Atomic_Mutex) {
 }
 
 // atomic_mutex_unlock unlocks m
-atomic_mutex_unlock :: proc(m: ^Atomic_Mutex) {
+atomic_mutex_unlock :: proc "contextless" (m: ^Atomic_Mutex) {
 	@(cold)
-	unlock_slow :: proc(m: ^Atomic_Mutex) {
+	unlock_slow :: proc "contextless" (m: ^Atomic_Mutex) {
 		futex_signal((^Futex)(&m.state))
 	}
 
@@ -76,7 +76,7 @@ atomic_mutex_unlock :: proc(m: ^Atomic_Mutex) {
 }
 
 // atomic_mutex_try_lock tries to lock m, will return true on success, and false on failure
-atomic_mutex_try_lock :: proc(m: ^Atomic_Mutex) -> bool {
+atomic_mutex_try_lock :: proc "contextless" (m: ^Atomic_Mutex) -> bool {
 	_, ok := atomic_compare_exchange_strong_explicit(&m.state, .Unlocked, .Locked, .Acquire, .Consume)
 	return ok
 }
@@ -88,7 +88,7 @@ Example:
 	}
 */
 @(deferred_in=atomic_mutex_unlock)
-atomic_mutex_guard :: proc(m: ^Atomic_Mutex) -> bool {
+atomic_mutex_guard :: proc "contextless" (m: ^Atomic_Mutex) -> bool {
 	atomic_mutex_lock(m)
 	return true
 }
@@ -117,7 +117,7 @@ Atomic_RW_Mutex :: struct {
 
 // atomic_rw_mutex_lock locks rw for writing (with a single writer)
 // If the mutex is already locked for reading or writing, the mutex blocks until the mutex is available.
-atomic_rw_mutex_lock :: proc(rw: ^Atomic_RW_Mutex) {
+atomic_rw_mutex_lock :: proc "contextless" (rw: ^Atomic_RW_Mutex) {
 	_ = atomic_add(&rw.state, Atomic_RW_Mutex_State_Writer)
 	atomic_mutex_lock(&rw.mutex)
 
@@ -128,13 +128,13 @@ atomic_rw_mutex_lock :: proc(rw: ^Atomic_RW_Mutex) {
 }
 
 // atomic_rw_mutex_unlock unlocks rw for writing (with a single writer)
-atomic_rw_mutex_unlock :: proc(rw: ^Atomic_RW_Mutex) {
+atomic_rw_mutex_unlock :: proc "contextless" (rw: ^Atomic_RW_Mutex) {
 	_ = atomic_and(&rw.state, ~Atomic_RW_Mutex_State_Is_Writing)
 	atomic_mutex_unlock(&rw.mutex)
 }
 
 // atomic_rw_mutex_try_lock tries to lock rw for writing (with a single writer)
-atomic_rw_mutex_try_lock :: proc(rw: ^Atomic_RW_Mutex) -> bool {
+atomic_rw_mutex_try_lock :: proc "contextless" (rw: ^Atomic_RW_Mutex) -> bool {
 	if atomic_mutex_try_lock(&rw.mutex) {
 		state := atomic_load(&rw.state)
 		if state & Atomic_RW_Mutex_State_Reader_Mask == 0 {
@@ -148,7 +148,7 @@ atomic_rw_mutex_try_lock :: proc(rw: ^Atomic_RW_Mutex) -> bool {
 }
 
 // atomic_rw_mutex_shared_lock locks rw for reading (with arbitrary number of readers)
-atomic_rw_mutex_shared_lock :: proc(rw: ^Atomic_RW_Mutex) {
+atomic_rw_mutex_shared_lock :: proc "contextless" (rw: ^Atomic_RW_Mutex) {
 	state := atomic_load(&rw.state)
 	for state & (Atomic_RW_Mutex_State_Is_Writing|Atomic_RW_Mutex_State_Writer_Mask) == 0 {
 		ok: bool
@@ -164,7 +164,7 @@ atomic_rw_mutex_shared_lock :: proc(rw: ^Atomic_RW_Mutex) {
 }
 
 // atomic_rw_mutex_shared_unlock unlocks rw for reading (with arbitrary number of readers)
-atomic_rw_mutex_shared_unlock :: proc(rw: ^Atomic_RW_Mutex) {
+atomic_rw_mutex_shared_unlock :: proc "contextless" (rw: ^Atomic_RW_Mutex) {
 	state := atomic_sub(&rw.state, Atomic_RW_Mutex_State_Reader)
 
 	if (state & Atomic_RW_Mutex_State_Reader_Mask == Atomic_RW_Mutex_State_Reader) &&
@@ -174,7 +174,7 @@ atomic_rw_mutex_shared_unlock :: proc(rw: ^Atomic_RW_Mutex) {
 }
 
 // atomic_rw_mutex_try_shared_lock tries to lock rw for reading (with arbitrary number of readers)
-atomic_rw_mutex_try_shared_lock :: proc(rw: ^Atomic_RW_Mutex) -> bool {
+atomic_rw_mutex_try_shared_lock :: proc "contextless" (rw: ^Atomic_RW_Mutex) -> bool {
 	state := atomic_load(&rw.state)
 	if state & (Atomic_RW_Mutex_State_Is_Writing|Atomic_RW_Mutex_State_Writer_Mask) == 0 {
 		_, ok := atomic_compare_exchange_strong(&rw.state, state, state + Atomic_RW_Mutex_State_Reader)
@@ -198,7 +198,7 @@ Example:
 	}
 */
 @(deferred_in=atomic_rw_mutex_unlock)
-atomic_rw_mutex_guard :: proc(m: ^Atomic_RW_Mutex) -> bool {
+atomic_rw_mutex_guard :: proc "contextless" (m: ^Atomic_RW_Mutex) -> bool {
 	atomic_rw_mutex_lock(m)
 	return true
 }
@@ -210,7 +210,7 @@ Example:
 	}
 */
 @(deferred_in=atomic_rw_mutex_shared_unlock)
-atomic_rw_mutex_shared_guard :: proc(m: ^Atomic_RW_Mutex) -> bool {
+atomic_rw_mutex_shared_guard :: proc "contextless" (m: ^Atomic_RW_Mutex) -> bool {
 	atomic_rw_mutex_shared_lock(m)
 	return true
 }
@@ -228,7 +228,7 @@ Atomic_Recursive_Mutex :: struct {
 	mutex: Mutex,
 }
 
-atomic_recursive_mutex_lock :: proc(m: ^Atomic_Recursive_Mutex) {
+atomic_recursive_mutex_lock :: proc "contextless" (m: ^Atomic_Recursive_Mutex) {
 	tid := current_thread_id()
 	if tid != m.owner {
 		mutex_lock(&m.mutex)
@@ -238,9 +238,9 @@ atomic_recursive_mutex_lock :: proc(m: ^Atomic_Recursive_Mutex) {
 	m.recursion += 1
 }
 
-atomic_recursive_mutex_unlock :: proc(m: ^Atomic_Recursive_Mutex) {
+atomic_recursive_mutex_unlock :: proc "contextless" (m: ^Atomic_Recursive_Mutex) {
 	tid := current_thread_id()
-	assert(tid == m.owner)
+	_assert(tid == m.owner, "tid != m.owner")
 	m.recursion -= 1
 	recursion := m.recursion
 	if recursion == 0 {
@@ -253,7 +253,7 @@ atomic_recursive_mutex_unlock :: proc(m: ^Atomic_Recursive_Mutex) {
 
 }
 
-atomic_recursive_mutex_try_lock :: proc(m: ^Atomic_Recursive_Mutex) -> bool {
+atomic_recursive_mutex_try_lock :: proc "contextless" (m: ^Atomic_Recursive_Mutex) -> bool {
 	tid := current_thread_id()
 	if m.owner == tid {
 		return mutex_try_lock(&m.mutex)
@@ -274,7 +274,7 @@ Example:
 	}
 */
 @(deferred_in=atomic_recursive_mutex_unlock)
-atomic_recursive_mutex_guard :: proc(m: ^Atomic_Recursive_Mutex) -> bool {
+atomic_recursive_mutex_guard :: proc "contextless" (m: ^Atomic_Recursive_Mutex) -> bool {
 	atomic_recursive_mutex_lock(m)
 	return true
 }
@@ -289,7 +289,7 @@ Atomic_Cond :: struct {
 	state: Futex,
 }
 
-atomic_cond_wait :: proc(c: ^Atomic_Cond, m: ^Atomic_Mutex) {
+atomic_cond_wait :: proc "contextless" (c: ^Atomic_Cond, m: ^Atomic_Mutex) {
 	state := u32(atomic_load_explicit(&c.state, .Relaxed))
 	unlock(m)
 	futex_wait(&c.state, state)
@@ -297,7 +297,7 @@ atomic_cond_wait :: proc(c: ^Atomic_Cond, m: ^Atomic_Mutex) {
 
 }
 
-atomic_cond_wait_with_timeout :: proc(c: ^Atomic_Cond, m: ^Atomic_Mutex, duration: time.Duration) -> (ok: bool) {
+atomic_cond_wait_with_timeout :: proc "contextless" (c: ^Atomic_Cond, m: ^Atomic_Mutex, duration: time.Duration) -> (ok: bool) {
 	state := u32(atomic_load_explicit(&c.state, .Relaxed))
 	unlock(m)
 	ok = futex_wait_with_timeout(&c.state, state, duration)
@@ -306,12 +306,12 @@ atomic_cond_wait_with_timeout :: proc(c: ^Atomic_Cond, m: ^Atomic_Mutex, duratio
 }
 
 
-atomic_cond_signal :: proc(c: ^Atomic_Cond) {
+atomic_cond_signal :: proc "contextless" (c: ^Atomic_Cond) {
 	atomic_add_explicit(&c.state, 1, .Release)
 	futex_signal(&c.state)
 }
 
-atomic_cond_broadcast :: proc(c: ^Atomic_Cond) {
+atomic_cond_broadcast :: proc "contextless" (c: ^Atomic_Cond) {
 	atomic_add_explicit(&c.state, 1, .Release)
 	futex_broadcast(&c.state)
 }
@@ -324,7 +324,7 @@ Atomic_Sema :: struct {
 	count: Futex,
 }
 
-atomic_sema_post :: proc(s: ^Atomic_Sema, count := 1) {
+atomic_sema_post :: proc "contextless" (s: ^Atomic_Sema, count := 1) {
 	atomic_add_explicit(&s.count, Futex(count), .Release)
 	if count == 1 {
 		futex_signal(&s.count)
@@ -333,7 +333,7 @@ atomic_sema_post :: proc(s: ^Atomic_Sema, count := 1) {
 	}
 }
 
-atomic_sema_wait :: proc(s: ^Atomic_Sema) {
+atomic_sema_wait :: proc "contextless" (s: ^Atomic_Sema) {
 	for {
 		original_count := atomic_load_explicit(&s.count, .Relaxed)
 		for original_count == 0 {
@@ -346,7 +346,7 @@ atomic_sema_wait :: proc(s: ^Atomic_Sema) {
 	}
 }
 
-atomic_sema_wait_with_timeout :: proc(s: ^Atomic_Sema, duration: time.Duration) -> bool {
+atomic_sema_wait_with_timeout :: proc "contextless" (s: ^Atomic_Sema, duration: time.Duration) -> bool {
 	if duration <= 0 {
 		return false
 	}

+ 19 - 19
core/sync/primitives_internal.odin

@@ -7,15 +7,15 @@ _Sema :: struct {
 	atomic: Atomic_Sema,
 }
 
-_sema_post :: proc(s: ^Sema, count := 1) {
+_sema_post :: proc "contextless" (s: ^Sema, count := 1) {
 	atomic_sema_post(&s.impl.atomic, count)
 }
 
-_sema_wait :: proc(s: ^Sema) {
+_sema_wait :: proc "contextless" (s: ^Sema) {
 	atomic_sema_wait(&s.impl.atomic)
 }
 
-_sema_wait_with_timeout :: proc(s: ^Sema, duration: time.Duration) -> bool {
+_sema_wait_with_timeout :: proc "contextless" (s: ^Sema, duration: time.Duration) -> bool {
 	return atomic_sema_wait_with_timeout(&s.impl.atomic, duration)
 }
 
@@ -25,7 +25,7 @@ _Recursive_Mutex :: struct {
 	recursion: i32,
 }
 
-_recursive_mutex_lock :: proc(m: ^Recursive_Mutex) {
+_recursive_mutex_lock :: proc "contextless" (m: ^Recursive_Mutex) {
 	tid := Futex(current_thread_id())
 	for {
 		prev_owner := atomic_compare_exchange_strong_explicit(&m.impl.owner, 0, tid, .Acquire, .Acquire)
@@ -40,7 +40,7 @@ _recursive_mutex_lock :: proc(m: ^Recursive_Mutex) {
 	}
 }
 
-_recursive_mutex_unlock :: proc(m: ^Recursive_Mutex) {
+_recursive_mutex_unlock :: proc "contextless" (m: ^Recursive_Mutex) {
 	m.impl.recursion -= 1
 	if m.impl.recursion != 0 {
 		return
@@ -52,7 +52,7 @@ _recursive_mutex_unlock :: proc(m: ^Recursive_Mutex) {
 
 }
 
-_recursive_mutex_try_lock :: proc(m: ^Recursive_Mutex) -> bool {
+_recursive_mutex_try_lock :: proc "contextless" (m: ^Recursive_Mutex) -> bool {
 	tid := Futex(current_thread_id())
 	prev_owner := atomic_compare_exchange_strong_explicit(&m.impl.owner, 0, tid, .Acquire, .Acquire)
 	switch prev_owner {
@@ -70,15 +70,15 @@ when ODIN_OS != .Windows {
 		mutex: Atomic_Mutex,
 	}
 
-	_mutex_lock :: proc(m: ^Mutex) {
+	_mutex_lock :: proc "contextless" (m: ^Mutex) {
 		atomic_mutex_lock(&m.impl.mutex)
 	}
 
-	_mutex_unlock :: proc(m: ^Mutex) {
+	_mutex_unlock :: proc "contextless" (m: ^Mutex) {
 		atomic_mutex_unlock(&m.impl.mutex)
 	}
 
-	_mutex_try_lock :: proc(m: ^Mutex) -> bool {
+	_mutex_try_lock :: proc "contextless" (m: ^Mutex) -> bool {
 		return atomic_mutex_try_lock(&m.impl.mutex)
 	}
 
@@ -86,19 +86,19 @@ when ODIN_OS != .Windows {
 		cond: Atomic_Cond,
 	}
 
-	_cond_wait :: proc(c: ^Cond, m: ^Mutex) {
+	_cond_wait :: proc "contextless" (c: ^Cond, m: ^Mutex) {
 		atomic_cond_wait(&c.impl.cond, &m.impl.mutex)
 	}
 
-	_cond_wait_with_timeout :: proc(c: ^Cond, m: ^Mutex, duration: time.Duration) -> bool {
+	_cond_wait_with_timeout :: proc "contextless" (c: ^Cond, m: ^Mutex, duration: time.Duration) -> bool {
 		return atomic_cond_wait_with_timeout(&c.impl.cond, &m.impl.mutex, duration)
 	}
 
-	_cond_signal :: proc(c: ^Cond) {
+	_cond_signal :: proc "contextless" (c: ^Cond) {
 		atomic_cond_signal(&c.impl.cond)
 	}
 
-	_cond_broadcast :: proc(c: ^Cond) {
+	_cond_broadcast :: proc "contextless" (c: ^Cond) {
 		atomic_cond_broadcast(&c.impl.cond)
 	}
 
@@ -107,27 +107,27 @@ when ODIN_OS != .Windows {
 		mutex: Atomic_RW_Mutex,
 	}
 
-	_rw_mutex_lock :: proc(rw: ^RW_Mutex) {
+	_rw_mutex_lock :: proc "contextless" (rw: ^RW_Mutex) {
 		atomic_rw_mutex_lock(&rw.impl.mutex)
 	}
 
-	_rw_mutex_unlock :: proc(rw: ^RW_Mutex) {
+	_rw_mutex_unlock :: proc "contextless" (rw: ^RW_Mutex) {
 		atomic_rw_mutex_unlock(&rw.impl.mutex)
 	}
 
-	_rw_mutex_try_lock :: proc(rw: ^RW_Mutex) -> bool {
+	_rw_mutex_try_lock :: proc "contextless" (rw: ^RW_Mutex) -> bool {
 		return atomic_rw_mutex_try_lock(&rw.impl.mutex)
 	}
 
-	_rw_mutex_shared_lock :: proc(rw: ^RW_Mutex) {
+	_rw_mutex_shared_lock :: proc "contextless" (rw: ^RW_Mutex) {
 		atomic_rw_mutex_shared_lock(&rw.impl.mutex)
 	}
 
-	_rw_mutex_shared_unlock :: proc(rw: ^RW_Mutex) {
+	_rw_mutex_shared_unlock :: proc "contextless" (rw: ^RW_Mutex) {
 		atomic_rw_mutex_shared_unlock(&rw.impl.mutex)
 	}
 
-	_rw_mutex_try_shared_lock :: proc(rw: ^RW_Mutex) -> bool {
+	_rw_mutex_try_shared_lock :: proc "contextless" (rw: ^RW_Mutex) -> bool {
 		return atomic_rw_mutex_try_shared_lock(&rw.impl.mutex)
 	}
 }

+ 13 - 13
core/sync/primitives_windows.odin

@@ -13,15 +13,15 @@ _Mutex :: struct {
 	srwlock: win32.SRWLOCK,
 }
 
-_mutex_lock :: proc(m: ^Mutex) {
+_mutex_lock :: proc "contextless" (m: ^Mutex) {
 	win32.AcquireSRWLockExclusive(&m.impl.srwlock)
 }
 
-_mutex_unlock :: proc(m: ^Mutex) {
+_mutex_unlock :: proc "contextless" (m: ^Mutex) {
 	win32.ReleaseSRWLockExclusive(&m.impl.srwlock)
 }
 
-_mutex_try_lock :: proc(m: ^Mutex) -> bool {
+_mutex_try_lock :: proc "contextless" (m: ^Mutex) -> bool {
 	return bool(win32.TryAcquireSRWLockExclusive(&m.impl.srwlock))
 }
 
@@ -29,27 +29,27 @@ _RW_Mutex :: struct {
 	srwlock: win32.SRWLOCK,
 }
 
-_rw_mutex_lock :: proc(rw: ^RW_Mutex) {
+_rw_mutex_lock :: proc "contextless" (rw: ^RW_Mutex) {
 	win32.AcquireSRWLockExclusive(&rw.impl.srwlock)
 }
 
-_rw_mutex_unlock :: proc(rw: ^RW_Mutex) {
+_rw_mutex_unlock :: proc "contextless" (rw: ^RW_Mutex) {
 	win32.ReleaseSRWLockExclusive(&rw.impl.srwlock)
 }
 
-_rw_mutex_try_lock :: proc(rw: ^RW_Mutex) -> bool {
+_rw_mutex_try_lock :: proc "contextless" (rw: ^RW_Mutex) -> bool {
 	return bool(win32.TryAcquireSRWLockExclusive(&rw.impl.srwlock))
 }
 
-_rw_mutex_shared_lock :: proc(rw: ^RW_Mutex) {
+_rw_mutex_shared_lock :: proc "contextless" (rw: ^RW_Mutex) {
 	win32.AcquireSRWLockShared(&rw.impl.srwlock)
 }
 
-_rw_mutex_shared_unlock :: proc(rw: ^RW_Mutex) {
+_rw_mutex_shared_unlock :: proc "contextless" (rw: ^RW_Mutex) {
 	win32.ReleaseSRWLockShared(&rw.impl.srwlock)
 }
 
-_rw_mutex_try_shared_lock :: proc(rw: ^RW_Mutex) -> bool {
+_rw_mutex_try_shared_lock :: proc "contextless" (rw: ^RW_Mutex) -> bool {
 	return bool(win32.TryAcquireSRWLockShared(&rw.impl.srwlock))
 }
 
@@ -58,22 +58,22 @@ _Cond :: struct {
 	cond: win32.CONDITION_VARIABLE,
 }
 
-_cond_wait :: proc(c: ^Cond, m: ^Mutex) {
+_cond_wait :: proc "contextless" (c: ^Cond, m: ^Mutex) {
 	_ = win32.SleepConditionVariableSRW(&c.impl.cond, &m.impl.srwlock, win32.INFINITE, 0)
 }
 
-_cond_wait_with_timeout :: proc(c: ^Cond, m: ^Mutex, duration: time.Duration) -> bool {
+_cond_wait_with_timeout :: proc "contextless" (c: ^Cond, m: ^Mutex, duration: time.Duration) -> bool {
 	duration := u32(duration / time.Millisecond)
 	ok := win32.SleepConditionVariableSRW(&c.impl.cond, &m.impl.srwlock, duration, 0)
 	return bool(ok)
 }
 
 
-_cond_signal :: proc(c: ^Cond) {
+_cond_signal :: proc "contextless" (c: ^Cond) {
 	win32.WakeConditionVariable(&c.impl.cond)
 }
 
-_cond_broadcast :: proc(c: ^Cond) {
+_cond_broadcast :: proc "contextless" (c: ^Cond) {
 	win32.WakeAllConditionVariable(&c.impl.cond)
 }
 

+ 1 - 0
core/sys/windows/gdi32.odin

@@ -62,6 +62,7 @@ foreign gdi32 {
 
 	SetPixelFormat :: proc(hdc: HDC, format: c_int, ppfd: ^PIXELFORMATDESCRIPTOR) -> BOOL ---
 	ChoosePixelFormat :: proc(hdc: HDC, ppfd: ^PIXELFORMATDESCRIPTOR) -> c_int ---
+	DescribePixelFormat :: proc(hdc: HDC, iPixelFormat: c_int, nBytes: UINT, ppfd: ^PIXELFORMATDESCRIPTOR) -> c_int ---
 	SwapBuffers :: proc(HDC) -> BOOL ---
 
 	SetDCBrushColor :: proc(hdc: HDC, color: COLORREF) -> COLORREF ---

+ 19 - 0
core/sys/windows/types.odin

@@ -1554,6 +1554,25 @@ WA_INACTIVE    :: 0
 WA_ACTIVE      :: 1
 WA_CLICKACTIVE :: 2
 
+// Struct pointed to by WM_GETMINMAXINFO lParam
+MINMAXINFO :: struct {
+	ptReserved: POINT,
+	ptMaxSize: POINT,
+	ptMaxPosition: POINT,
+	ptMinTrackSize: POINT,
+	ptMaxTrackSize: POINT,
+}
+PMINMAXINFO  :: ^MINMAXINFO
+LPMINMAXINFO :: PMINMAXINFO
+
+MONITORINFO :: struct {
+	cbSize: DWORD,
+	rcMonitor: RECT,
+	rcWork: RECT,
+	dwFlags: DWORD,
+}
+LPMONITORINFO :: ^MONITORINFO
+
 // SetWindowsHook() codes
 WH_MIN             :: -1
 WH_MSGFILTER       :: -1

+ 1 - 0
core/sys/windows/user32.odin

@@ -100,6 +100,7 @@ foreign user32 {
 	AdjustWindowRectExForDpi :: proc(lpRect: LPRECT, dwStyle: DWORD, bMenu: BOOL, dwExStyle: DWORD, dpi: UINT) -> BOOL ---
 
 	SystemParametersInfoW :: proc(uiAction, uiParam: UINT, pvParam: PVOID, fWinIni: UINT) -> BOOL ---
+	GetMonitorInfoW :: proc(hMonitor: HMONITOR, lpmi: LPMONITORINFO) -> BOOL ---
 
 	GetWindowDC :: proc(hWnd: HWND) -> HDC ---
 	GetDC :: proc(hWnd: HWND) -> HDC ---

+ 5 - 0
src/check_decl.cpp

@@ -1544,8 +1544,12 @@ void check_proc_body(CheckerContext *ctx_, Token token, DeclInfo *decl, Type *ty
 			// NOTE(bill): Don't err here
 		}
 
+		GB_ASSERT(decl->defer_use_checked == false);
+
 		check_stmt_list(ctx, bs->stmts, Stmt_CheckScopeDecls);
 
+		decl->defer_use_checked = true;
+
 		for_array(i, bs->stmts) {
 			Ast *stmt = bs->stmts[i];
 			if (stmt->kind == Ast_ValueDecl) {
@@ -1580,6 +1584,7 @@ void check_proc_body(CheckerContext *ctx_, Token token, DeclInfo *decl, Type *ty
 				}
 			}
 		}
+
 	}
 	check_close_scope(ctx);
 

+ 3 - 0
src/check_expr.cpp

@@ -6763,6 +6763,9 @@ ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *call, Ast *pr
 	if (initial_entity != nullptr && initial_entity->kind == Entity_Procedure) {
 		if (initial_entity->Procedure.deferred_procedure.entity != nullptr) {
 			call->viral_state_flags |= ViralStateFlag_ContainsDeferredProcedure;
+			if (c->decl) {
+				c->decl->defer_used += 1;
+			}
 		}
 	}
 

+ 3 - 0
src/check_stmt.cpp

@@ -2018,6 +2018,9 @@ void check_stmt_internal(CheckerContext *ctx, Ast *node, u32 flags) {
 			ctx->in_defer = true;
 			check_stmt(ctx, ds->stmt, 0);
 			ctx->in_defer = out_in_defer;
+			if (ctx->decl) {
+				ctx->decl->defer_used += 1;
+			}
 		}
 	case_end;
 

+ 2 - 0
src/checker.hpp

@@ -158,6 +158,8 @@ struct DeclInfo {
 	bool          is_using;
 	bool          where_clauses_evaluated;
 	bool          proc_checked;
+	isize         defer_used;
+	bool          defer_use_checked;
 
 	CommentGroup *comment;
 	CommentGroup *docs;

+ 156 - 43
src/llvm_abi.cpp

@@ -1,3 +1,5 @@
+#define ALLOW_SPLIT_MULTI_RETURNS true
+
 enum lbArgKind {
 	lbArg_Direct,
 	lbArg_Indirect,
@@ -48,8 +50,16 @@ struct lbFunctionType {
 	ProcCallingConvention calling_convention;
 	Array<lbArgType> args;
 	lbArgType        ret;
+
+	LLVMTypeRef      multiple_return_original_type; // nullptr if not used
+	isize            original_arg_count;
 };
 
+gbAllocator lb_function_type_args_allocator(void) {
+	return heap_allocator();
+}
+
+
 i64 llvm_align_formula(i64 off, i64 a) {
 	return (off + a - 1) / a * a;
 }
@@ -100,7 +110,9 @@ LLVMTypeRef lb_function_type_to_llvm_raw(lbFunctionType *ft, bool is_var_arg) {
 			}
 			args[arg_index++] = arg_type;
 		} else if (arg->kind == lbArg_Indirect) {
-			GB_ASSERT(!lb_is_type_kind(arg->type, LLVMPointerTypeKind));
+			if (ft->multiple_return_original_type == nullptr || i < ft->original_arg_count) {
+				GB_ASSERT(!lb_is_type_kind(arg->type, LLVMPointerTypeKind));
+			}
 			args[arg_index++] = LLVMPointerType(arg->type, 0);
 		} else if (arg->kind == lbArg_Ignore) {
 			// ignore
@@ -147,6 +159,13 @@ void lb_add_function_type_attributes(LLVMValueRef fn, lbFunctionType *ft, ProcCa
 			LLVMAddAttributeAtIndex(fn, arg_index+1, arg->align_attribute);
 		}
 
+		if (ft->multiple_return_original_type) {
+			if (ft->original_arg_count <= i) {
+				LLVMAddAttributeAtIndex(fn, arg_index+1, noalias_attr);
+				LLVMAddAttributeAtIndex(fn, arg_index+1, nonnull_attr);
+			}
+		}
+
 		arg_index++;
 	}
 
@@ -307,20 +326,58 @@ i64 lb_alignof(LLVMTypeRef type) {
 }
 
 
-#define LB_ABI_INFO(name) lbFunctionType *name(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count, LLVMTypeRef return_type, bool return_is_defined, ProcCallingConvention calling_convention)
+#define LB_ABI_INFO(name) lbFunctionType *name(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count, LLVMTypeRef return_type, bool return_is_defined, bool return_is_tuple, ProcCallingConvention calling_convention)
 typedef LB_ABI_INFO(lbAbiInfoType);
 
+#define LB_ABI_COMPUTE_RETURN_TYPE(name) lbArgType name(lbFunctionType *ft, LLVMContextRef c, LLVMTypeRef return_type, bool return_is_defined, bool return_is_tuple)
+typedef LB_ABI_COMPUTE_RETURN_TYPE(lbAbiComputeReturnType);
+
+
+lbArgType lb_abi_modify_return_is_tuple(lbFunctionType *ft, LLVMContextRef c, LLVMTypeRef return_type, lbAbiComputeReturnType *compute_return_type) {
+	GB_ASSERT(return_type != nullptr);
+	GB_ASSERT(compute_return_type != nullptr);
+
+	lbArgType return_arg = {};
+	if (lb_is_type_kind(return_type, LLVMStructTypeKind)) {
+		unsigned field_count = LLVMCountStructElementTypes(return_type);
+		if (field_count > 1) {
+			ft->original_arg_count = ft->args.count;
+			ft->multiple_return_original_type = return_type;
+
+			for (unsigned i = 0; i < field_count-1; i++) {
+				LLVMTypeRef field_type = LLVMStructGetTypeAtIndex(return_type, i);
+				LLVMTypeRef field_pointer_type = LLVMPointerType(field_type, 0);
+				lbArgType ret_partial = lb_arg_type_direct(field_pointer_type);
+				array_add(&ft->args, ret_partial);
+			}
+
+			// override the return type for the last field
+			LLVMTypeRef new_return_type = LLVMStructGetTypeAtIndex(return_type, field_count-1);
+			return_arg = compute_return_type(ft, c, new_return_type, true, false);
+		}
+	}
+	return return_arg;
+}
+
+#define LB_ABI_MODIFY_RETURN_IF_TUPLE_MACRO() do {                                                                  \
+	if (return_is_tuple) {                                                                                      \
+		lbArgType new_return_type = lb_abi_modify_return_is_tuple(ft, c, return_type, compute_return_type); \
+		if (new_return_type.type != nullptr) {                                                              \
+			return new_return_type;                                                                     \
+		}                                                                                                   \
+	}                                                                                                           \
+} while (0)
 
 // NOTE(bill): I hate `namespace` in C++ but this is just because I don't want to prefix everything
 namespace lbAbi386 {
 	Array<lbArgType> compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count);
-	lbArgType compute_return_type(LLVMContextRef c, LLVMTypeRef return_type, bool return_is_defined);
+	LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type);
 
 	LB_ABI_INFO(abi_info) {
 		lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType);
 		ft->ctx = c;
 		ft->args = compute_arg_types(c, arg_types, arg_count);
-		ft->ret = compute_return_type(c, return_type, return_is_defined);
+		ft->ret = compute_return_type(ft, c, return_type, return_is_defined, return_is_tuple);
 		ft->calling_convention = calling_convention;
 		return ft;
 	}
@@ -353,7 +410,7 @@ namespace lbAbi386 {
 	}
 
 	Array<lbArgType> compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count) {
-		auto args = array_make<lbArgType>(heap_allocator(), arg_count);
+		auto args = array_make<lbArgType>(lb_function_type_args_allocator(), arg_count);
 
 		for (unsigned i = 0; i < arg_count; i++) {
 			LLVMTypeRef t = arg_types[i];
@@ -372,7 +429,7 @@ namespace lbAbi386 {
 		return args;
 	}
 
-	lbArgType compute_return_type(LLVMContextRef c, LLVMTypeRef return_type, bool return_is_defined) {
+	LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type) {
 		if (!return_is_defined) {
 			return lb_arg_type_direct(LLVMVoidTypeInContext(c));
 		} else if (lb_is_type_kind(return_type, LLVMStructTypeKind) || lb_is_type_kind(return_type, LLVMArrayTypeKind)) {
@@ -383,6 +440,9 @@ namespace lbAbi386 {
 			case 4: return lb_arg_type_direct(return_type, LLVMIntTypeInContext(c, 32), nullptr, nullptr);
 			case 8: return lb_arg_type_direct(return_type, LLVMIntTypeInContext(c, 64), nullptr, nullptr);
 			}
+
+			LB_ABI_MODIFY_RETURN_IF_TUPLE_MACRO();
+
 			LLVMAttributeRef attr = lb_create_enum_attribute_with_type(c, "sret", return_type);
 			return lb_arg_type_indirect(return_type, attr);
 		}
@@ -392,19 +452,19 @@ namespace lbAbi386 {
 
 namespace lbAbiAmd64Win64 {
 	Array<lbArgType> compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count);
-
+	LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type);
 
 	LB_ABI_INFO(abi_info) {
 		lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType);
 		ft->ctx = c;
 		ft->args = compute_arg_types(c, arg_types, arg_count);
-		ft->ret = lbAbi386::compute_return_type(c, return_type, return_is_defined);
+		ft->ret = compute_return_type(ft, c, return_type, return_is_defined, return_is_tuple);
 		ft->calling_convention = calling_convention;
 		return ft;
 	}
 
 	Array<lbArgType> compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count) {
-		auto args = array_make<lbArgType>(heap_allocator(), arg_count);
+		auto args = array_make<lbArgType>(lb_function_type_args_allocator(), arg_count);
 
 		for (unsigned i = 0; i < arg_count; i++) {
 			LLVMTypeRef t = arg_types[i];
@@ -428,6 +488,26 @@ namespace lbAbiAmd64Win64 {
 		}
 		return args;
 	}
+
+	LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type) {
+		if (!return_is_defined) {
+			return lb_arg_type_direct(LLVMVoidTypeInContext(c));
+		} else if (lb_is_type_kind(return_type, LLVMStructTypeKind) || lb_is_type_kind(return_type, LLVMArrayTypeKind)) {
+			i64 sz = lb_sizeof(return_type);
+			switch (sz) {
+			case 1: return lb_arg_type_direct(return_type, LLVMIntTypeInContext(c,  8), nullptr, nullptr);
+			case 2: return lb_arg_type_direct(return_type, LLVMIntTypeInContext(c, 16), nullptr, nullptr);
+			case 4: return lb_arg_type_direct(return_type, LLVMIntTypeInContext(c, 32), nullptr, nullptr);
+			case 8: return lb_arg_type_direct(return_type, LLVMIntTypeInContext(c, 64), nullptr, nullptr);
+			}
+
+			LB_ABI_MODIFY_RETURN_IF_TUPLE_MACRO();
+
+			LLVMAttributeRef attr = lb_create_enum_attribute_with_type(c, "sret", return_type);
+			return lb_arg_type_indirect(return_type, attr);
+		}
+		return lbAbi386::non_struct(c, return_type, true);
+	}
 };
 
 // NOTE(bill): I hate `namespace` in C++ but this is just because I don't want to prefix everything
@@ -478,7 +558,7 @@ namespace lbAbiAmd64SysV {
 		Amd64TypeAttribute_StructRect,
 	};
 
-	lbArgType compute_return_type(LLVMContextRef c, LLVMTypeRef return_type, bool return_is_defined);
+	LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type);
 	void classify_with(LLVMTypeRef t, Array<RegClass> *cls, i64 ix, i64 off);
 	void fixup(LLVMTypeRef t, Array<RegClass> *cls);
 	lbArgType amd64_type(LLVMContextRef c, LLVMTypeRef type, Amd64TypeAttributeKind attribute_kind, ProcCallingConvention calling_convention);
@@ -490,7 +570,7 @@ namespace lbAbiAmd64SysV {
 		ft->ctx = c;
 		ft->calling_convention = calling_convention;
 
-		ft->args = array_make<lbArgType>(heap_allocator(), arg_count);
+		ft->args = array_make<lbArgType>(lb_function_type_args_allocator(), arg_count);
 		for (unsigned i = 0; i < arg_count; i++) {
 			ft->args[i] = amd64_type(c, arg_types[i], Amd64TypeAttribute_ByVal, calling_convention);
 		}
@@ -875,7 +955,7 @@ namespace lbAbiAmd64SysV {
 		}
 	}
 
-	lbArgType compute_return_type(LLVMContextRef c, LLVMTypeRef return_type, bool return_is_defined) {
+	LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type) {
 		if (!return_is_defined) {
 			return lb_arg_type_direct(LLVMVoidTypeInContext(c));
 		} else if (lb_is_type_kind(return_type, LLVMStructTypeKind)) {
@@ -886,6 +966,9 @@ namespace lbAbiAmd64SysV {
 			case 4: return lb_arg_type_direct(return_type, LLVMIntTypeInContext(c, 32), nullptr, nullptr);
 			case 8: return lb_arg_type_direct(return_type, LLVMIntTypeInContext(c, 64), nullptr, nullptr);
 			}
+
+			LB_ABI_MODIFY_RETURN_IF_TUPLE_MACRO();
+
 			LLVMAttributeRef attr = lb_create_enum_attribute_with_type(c, "sret", return_type);
 			return lb_arg_type_indirect(return_type, attr);
 		} else if (build_context.metrics.os == TargetOs_windows && lb_is_type_kind(return_type, LLVMIntegerTypeKind) && lb_sizeof(return_type) == 16) {
@@ -898,13 +981,13 @@ namespace lbAbiAmd64SysV {
 
 namespace lbAbiArm64 {
 	Array<lbArgType> compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count);
-	lbArgType compute_return_type(LLVMContextRef c, LLVMTypeRef return_type, bool return_is_defined);
+	LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type);
 	bool is_homogenous_aggregate(LLVMContextRef c, LLVMTypeRef type, LLVMTypeRef *base_type_, unsigned *member_count_);
 
 	LB_ABI_INFO(abi_info) {
 		lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType);
 		ft->ctx = c;
-		ft->ret = compute_return_type(c, return_type, return_is_defined);
+		ft->ret = compute_return_type(ft, c, return_type, return_is_defined, return_is_tuple);
 		ft -> args = compute_arg_types(c, arg_types, arg_count);
 		ft->calling_convention = calling_convention;
 		return ft;
@@ -1012,27 +1095,29 @@ namespace lbAbiArm64 {
 		return (member_count <= 4);
 	}
 
-	lbArgType compute_return_type(LLVMContextRef c, LLVMTypeRef type, bool return_is_defined) {
+	LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type) {
 		LLVMTypeRef homo_base_type = nullptr;
 		unsigned homo_member_count = 0;
 
 		if (!return_is_defined) {
 			return lb_arg_type_direct(LLVMVoidTypeInContext(c));
-		} else if (is_register(type)) {
-			return non_struct(c, type);
-		} else if (is_homogenous_aggregate(c, type, &homo_base_type, &homo_member_count)) {
+		} else if (is_register(return_type)) {
+			return non_struct(c, return_type);
+		} else if (is_homogenous_aggregate(c, return_type, &homo_base_type, &homo_member_count)) {
 			if (is_homogenous_aggregate_small_enough(homo_base_type, homo_member_count)) {
-				return lb_arg_type_direct(type, LLVMArrayType(homo_base_type, homo_member_count), nullptr, nullptr);
+				return lb_arg_type_direct(return_type, LLVMArrayType(homo_base_type, homo_member_count), nullptr, nullptr);
 			} else {
 				//TODO(Platin): do i need to create stuff that can handle the diffrent return type?
 				//              else this needs a fix in llvm_backend_proc as we would need to cast it to the correct array type
 
+				LB_ABI_MODIFY_RETURN_IF_TUPLE_MACRO();
+
 				//LLVMTypeRef array_type = LLVMArrayType(homo_base_type, homo_member_count);
-				LLVMAttributeRef attr = lb_create_enum_attribute_with_type(c, "sret", type);
-				return lb_arg_type_indirect(type, attr);
+				LLVMAttributeRef attr = lb_create_enum_attribute_with_type(c, "sret", return_type);
+				return lb_arg_type_indirect(return_type, attr);
 			}
 		} else {
-			i64 size = lb_sizeof(type);
+			i64 size = lb_sizeof(return_type);
 			if (size <= 16) {
 				LLVMTypeRef cast_type = nullptr;
 				if (size <= 1) {
@@ -1047,16 +1132,18 @@ namespace lbAbiArm64 {
 					unsigned count = cast(unsigned)((size+7)/8);
 					cast_type = LLVMArrayType(LLVMInt64TypeInContext(c), count);
 				}
-				return lb_arg_type_direct(type, cast_type, nullptr, nullptr);
+				return lb_arg_type_direct(return_type, cast_type, nullptr, nullptr);
 			} else {
-				LLVMAttributeRef attr = lb_create_enum_attribute_with_type(c, "sret", type);
-				return lb_arg_type_indirect(type, attr);
+				LB_ABI_MODIFY_RETURN_IF_TUPLE_MACRO();
+
+				LLVMAttributeRef attr = lb_create_enum_attribute_with_type(c, "sret", return_type);
+				return lb_arg_type_indirect(return_type, attr);
 			}
 		}
 	}
     
 	Array<lbArgType> compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count) {
-		auto args = array_make<lbArgType>(heap_allocator(), arg_count);
+		auto args = array_make<lbArgType>(lb_function_type_args_allocator(), arg_count);
 
 		for (unsigned i = 0; i < arg_count; i++) {
 			LLVMTypeRef type = arg_types[i];
@@ -1102,7 +1189,7 @@ namespace lbAbiWasm {
 		            registers/arguments if possible rather than by pointer.
 	*/
 	Array<lbArgType> compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count);
-	lbArgType compute_return_type(LLVMContextRef c, LLVMTypeRef return_type, bool return_is_defined);
+	LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type);
 
 	enum {MAX_DIRECT_STRUCT_SIZE = 32};
 
@@ -1110,7 +1197,7 @@ namespace lbAbiWasm {
 		lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType);
 		ft->ctx = c;
 		ft->args = compute_arg_types(c, arg_types, arg_count);
-		ft->ret = compute_return_type(c, return_type, return_is_defined);
+		ft->ret = compute_return_type(ft, c, return_type, return_is_defined, return_is_tuple);
 		ft->calling_convention = calling_convention;
 		return ft;
 	}
@@ -1188,7 +1275,7 @@ namespace lbAbiWasm {
 	
 
 	Array<lbArgType> compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count) {
-		auto args = array_make<lbArgType>(heap_allocator(), arg_count);
+		auto args = array_make<lbArgType>(lb_function_type_args_allocator(), arg_count);
 
 		for (unsigned i = 0; i < arg_count; i++) {
 			LLVMTypeRef t = arg_types[i];
@@ -1202,7 +1289,7 @@ namespace lbAbiWasm {
 		return args;
 	}
 
-	lbArgType compute_return_type(LLVMContextRef c, LLVMTypeRef return_type, bool return_is_defined) {
+	LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type) {
 		if (!return_is_defined) {
 			return lb_arg_type_direct(LLVMVoidTypeInContext(c));
 		} else if (lb_is_type_kind(return_type, LLVMStructTypeKind) || lb_is_type_kind(return_type, LLVMArrayTypeKind)) {
@@ -1217,6 +1304,9 @@ namespace lbAbiWasm {
 			case 4: return lb_arg_type_direct(return_type, LLVMIntTypeInContext(c, 32), nullptr, nullptr);
 			case 8: return lb_arg_type_direct(return_type, LLVMIntTypeInContext(c, 64), nullptr, nullptr);
 			}
+
+			LB_ABI_MODIFY_RETURN_IF_TUPLE_MACRO();
+
 			LLVMAttributeRef attr = lb_create_enum_attribute_with_type(c, "sret", return_type);
 			return lb_arg_type_indirect(return_type, attr);
 		}
@@ -1266,7 +1356,7 @@ namespace lbAbiArm32 {
 	}
 
 	Array<lbArgType> compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count, ProcCallingConvention calling_convention) {
-		auto args = array_make<lbArgType>(heap_allocator(), arg_count);
+		auto args = array_make<lbArgType>(lb_function_type_args_allocator(), arg_count);
 
 		for (unsigned i = 0; i < arg_count; i++) {
 			LLVMTypeRef t = arg_types[i];
@@ -1307,14 +1397,14 @@ namespace lbAbiArm32 {
 };
 
 
-LB_ABI_INFO(lb_get_abi_info) {
+LB_ABI_INFO(lb_get_abi_info_internal) {
 	switch (calling_convention) {
 	case ProcCC_None:
 	case ProcCC_InlineAsm:
 		{
 			lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType);
 			ft->ctx = c;
-			ft->args = array_make<lbArgType>(heap_allocator(), arg_count);
+			ft->args = array_make<lbArgType>(lb_function_type_args_allocator(), arg_count);
 			for (unsigned i = 0; i < arg_count; i++) {
 				ft->args[i] = lb_arg_type_direct(arg_types[i]);
 			}
@@ -1328,32 +1418,55 @@ LB_ABI_INFO(lb_get_abi_info) {
 		}
 	case ProcCC_Win64:
 		GB_ASSERT(build_context.metrics.arch == TargetArch_amd64);
-		return lbAbiAmd64Win64::abi_info(c, arg_types, arg_count, return_type, return_is_defined, calling_convention);
+		return lbAbiAmd64Win64::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention);
 	case ProcCC_SysV:
 		GB_ASSERT(build_context.metrics.arch == TargetArch_amd64);
-		return lbAbiAmd64SysV::abi_info(c, arg_types, arg_count, return_type, return_is_defined, calling_convention);
+		return lbAbiAmd64SysV::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention);
 	}
 
 	switch (build_context.metrics.arch) {
 	case TargetArch_amd64:
-		if (build_context.metrics.os == TargetOs_windows || build_context.metrics.abi == TargetABI_Win64) {
-			return lbAbiAmd64Win64::abi_info(c, arg_types, arg_count, return_type, return_is_defined, calling_convention);
+		if (build_context.metrics.os == TargetOs_windows) {
+			return lbAbiAmd64Win64::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention);
+		} else if (build_context.metrics.abi == TargetABI_Win64) {
+			return lbAbiAmd64Win64::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention);
 		} else if (build_context.metrics.abi == TargetABI_SysV) {
-			return lbAbiAmd64SysV::abi_info(c, arg_types, arg_count, return_type, return_is_defined, calling_convention);
+			return lbAbiAmd64SysV::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention);
 		} else {
-			return lbAbiAmd64SysV::abi_info(c, arg_types, arg_count, return_type, return_is_defined, calling_convention);
+			return lbAbiAmd64SysV::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention);
 		}
 	case TargetArch_i386:
-		return lbAbi386::abi_info(c, arg_types, arg_count, return_type, return_is_defined, calling_convention);
+		return lbAbi386::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention);
 	case TargetArch_arm32:
-		return lbAbiArm32::abi_info(c, arg_types, arg_count, return_type, return_is_defined, calling_convention);
+		return lbAbiArm32::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention);
 	case TargetArch_arm64:
-		return lbAbiArm64::abi_info(c, arg_types, arg_count, return_type, return_is_defined, calling_convention);
+		return lbAbiArm64::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention);
 	case TargetArch_wasm32:
 	case TargetArch_wasm64:
-		return lbAbiWasm::abi_info(c, arg_types, arg_count, return_type, return_is_defined, calling_convention);
+		return lbAbiWasm::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention);
 	}
 
 	GB_PANIC("Unsupported ABI");
 	return {};
 }
+
+
+LB_ABI_INFO(lb_get_abi_info) {
+	lbFunctionType *ft = lb_get_abi_info_internal(
+		c,
+		arg_types, arg_count,
+		return_type, return_is_defined,
+		ALLOW_SPLIT_MULTI_RETURNS && return_is_tuple && is_calling_convention_odin(calling_convention),
+		calling_convention);
+
+
+	// NOTE(bill): this is handled here rather than when developing the type in `lb_type_internal_for_procedures_raw`
+	// This is to make it consistent when and how it is handled
+	if (calling_convention == ProcCC_Odin) {
+		// append the `context` pointer
+		lbArgType context_param = lb_arg_type_direct(LLVMPointerType(LLVMInt8TypeInContext(c), 0));
+		array_add(&ft->args, context_param);
+	}
+
+	return ft;
+}

+ 3 - 3
src/llvm_backend.cpp

@@ -1165,7 +1165,7 @@ lbProcedure *lb_create_startup_runtime(lbModule *main_module, lbProcedure *start
 	
 	for (Entity *e : info->init_procedures) {
 		lbValue value = lb_find_procedure_value_from_entity(main_module, e);
-		lb_emit_call(p, value, {}, ProcInlining_none, false);
+		lb_emit_call(p, value, {}, ProcInlining_none);
 	}
 
 
@@ -1243,7 +1243,7 @@ lbProcedure *lb_create_main_procedure(lbModule *m, lbProcedure *startup_runtime)
 	}
 	
 	lbValue startup_runtime_value = {startup_runtime->value, startup_runtime->type};
-	lb_emit_call(p, startup_runtime_value, {}, ProcInlining_none, false);
+	lb_emit_call(p, startup_runtime_value, {}, ProcInlining_none);
 
 	if (build_context.command_kind == Command_test) {
 		Type *t_Internal_Test = find_type_in_pkg(m->info, str_lit("testing"), str_lit("Internal_Test"));
@@ -1304,7 +1304,7 @@ lbProcedure *lb_create_main_procedure(lbModule *m, lbProcedure *startup_runtime)
 	
 	if (call_cleanup) {
 		lbValue cleanup_runtime_value = lb_find_runtime_value(m, str_lit("_cleanup_runtime"));
-		lb_emit_call(p, cleanup_runtime_value, {}, ProcInlining_none, false);
+		lb_emit_call(p, cleanup_runtime_value, {}, ProcInlining_none);
 	}
 	
 

+ 8 - 14
src/llvm_backend.hpp

@@ -254,17 +254,15 @@ struct lbTargetList {
 };
 
 
+struct lbTupleFix {
+	Slice<lbValue> values;
+};
+
 enum lbProcedureFlag : u32 {
 	lbProcedureFlag_WithoutMemcpyPass = 1<<0,
 	lbProcedureFlag_DebugAllocaCopy = 1<<1,
 };
 
-struct lbCopyElisionHint {
-	lbValue ptr;
-	Ast *   ast;
-	bool    used;
-};
-
 struct lbProcedure {
 	u32 flags;
 	u16 state_flags;
@@ -310,10 +308,9 @@ struct lbProcedure {
 
 	LLVMMetadataRef debug_info;
 
-	lbCopyElisionHint copy_elision_hint;
-
 	PtrMap<Ast *, lbValue> selector_values;
 	PtrMap<Ast *, lbAddr>  selector_addr;
+	PtrMap<LLVMValueRef, lbTupleFix> tuple_fix_map;
 };
 
 
@@ -368,6 +365,7 @@ lbValue lb_emit_epi(lbModule *m, lbValue const &value, isize index);
 lbValue lb_emit_array_epi(lbModule *m, lbValue s, isize index);
 lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index);
 lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index);
+lbValue lb_emit_tuple_ev(lbProcedure *p, lbValue value, i32 index);
 lbValue lb_emit_array_epi(lbProcedure *p, lbValue value, isize index);
 lbValue lb_emit_array_ep(lbProcedure *p, lbValue s, lbValue index);
 lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection sel);
@@ -383,7 +381,7 @@ lbValue lb_emit_byte_swap(lbProcedure *p, lbValue value, Type *end_type);
 void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block);
 lbValue lb_emit_transmute(lbProcedure *p, lbValue value, Type *t);
 lbValue lb_emit_comp(lbProcedure *p, TokenKind op_kind, lbValue left, lbValue right);
-lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> const &args, ProcInlining inlining = ProcInlining_none, bool use_return_ptr_hint = false);
+lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> const &args, ProcInlining inlining = ProcInlining_none);
 lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t);
 lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind, lbValue x);
 
@@ -400,7 +398,7 @@ lbContextData *lb_push_context_onto_stack_from_implicit_parameter(lbProcedure *p
 
 
 lbAddr lb_add_global_generated(lbModule *m, Type *type, lbValue value={}, Entity **entity_=nullptr);
-lbAddr lb_add_local(lbProcedure *p, Type *type, Entity *e=nullptr, bool zero_init=true, i32 param_index=0, bool force_no_init=false);
+lbAddr lb_add_local(lbProcedure *p, Type *type, Entity *e=nullptr, bool zero_init=true, bool force_no_init=false);
 
 void lb_add_foreign_library_path(lbModule *m, Entity *e);
 
@@ -497,10 +495,6 @@ void lb_mem_zero_ptr(lbProcedure *p, LLVMValueRef ptr, Type *type, unsigned alig
 
 void lb_emit_init_context(lbProcedure *p, lbAddr addr);
 
-lbCopyElisionHint lb_set_copy_elision_hint(lbProcedure *p, lbAddr const &addr, Ast *ast);
-void lb_reset_copy_elision_hint(lbProcedure *p, lbCopyElisionHint prev_hint);
-lbValue lb_consume_copy_elision_hint(lbProcedure *p);
-
 
 lbStructFieldRemapping lb_get_struct_remapping(lbModule *m, Type *t);
 LLVMTypeRef lb_type_padding_filler(lbModule *m, i64 padding, i64 padding_align);

+ 1 - 1
src/llvm_backend_expr.cpp

@@ -134,7 +134,7 @@ lbValue lb_emit_unary_arith(lbProcedure *p, TokenKind op, lbValue x, Type *type)
 		Type *elem_type = base_array_type(type);
 
 		// NOTE(bill): Doesn't need to be zero because it will be initialized in the loops
-		lbAddr res_addr = lb_add_local(p, type, nullptr, false, 0, true);
+		lbAddr res_addr = lb_add_local(p, type, nullptr, false, true);
 		lbValue res = lb_addr_get_ptr(p, res_addr);
 
 		bool inline_array_arith = lb_can_try_to_inline_array_arith(type);

+ 17 - 22
src/llvm_backend_general.cpp

@@ -1499,9 +1499,6 @@ LLVMTypeRef lb_type_internal_for_procedures_raw(lbModule *m, Type *type) {
 	}
 
 	unsigned param_count = 0;
-	if (type->Proc.calling_convention == ProcCC_Odin) {
-		param_count += 1;
-	}
 
 	if (type->Proc.param_count != 0) {
 		GB_ASSERT(type->Proc.params->kind == Type_Tuple);
@@ -1519,21 +1516,23 @@ LLVMTypeRef lb_type_internal_for_procedures_raw(lbModule *m, Type *type) {
 	m->internal_type_level += 1;
 	defer (m->internal_type_level -= 1);
 
+	bool return_is_tuple = false;
 	LLVMTypeRef ret = nullptr;
 	LLVMTypeRef *params = gb_alloc_array(permanent_allocator(), LLVMTypeRef, param_count);
 	bool *params_by_ptr = gb_alloc_array(permanent_allocator(), bool, param_count);
 	if (type->Proc.result_count != 0) {
 		Type *single_ret = reduce_tuple_to_single_type(type->Proc.results);
-		 if (is_type_proc(single_ret)) {
+		if (is_type_proc(single_ret)) {
 			single_ret = t_rawptr;
 		}
 		ret = lb_type(m, single_ret);
-		if (ret != nullptr) {
-			if (is_type_boolean(single_ret) &&
-			    is_calling_convention_none(type->Proc.calling_convention) &&
-			    type_size_of(single_ret) <= 1) {
-				ret = LLVMInt1TypeInContext(m->ctx);
-			}
+		if (is_type_tuple(single_ret)) {
+			return_is_tuple = true;
+		}
+		if (is_type_boolean(single_ret) &&
+		    is_calling_convention_none(type->Proc.calling_convention) &&
+		    type_size_of(single_ret) <= 1) {
+			ret = LLVMInt1TypeInContext(m->ctx);
 		}
 	}
 
@@ -1571,12 +1570,8 @@ LLVMTypeRef lb_type_internal_for_procedures_raw(lbModule *m, Type *type) {
 			params[param_index++] = param_type;
 		}
 	}
-	if (param_index < param_count) {
-		params[param_index++] = lb_type(m, t_rawptr);
-	}
 	GB_ASSERT(param_index == param_count);
-
-	lbFunctionType *ft = lb_get_abi_info(m->ctx, params, param_count, ret, ret != nullptr, type->Proc.calling_convention);
+	lbFunctionType *ft = lb_get_abi_info(m->ctx, params, param_count, ret, ret != nullptr, return_is_tuple, type->Proc.calling_convention);
 	{
 		for_array(j, ft->args) {
 			auto arg = ft->args[j];
@@ -1593,10 +1588,10 @@ LLVMTypeRef lb_type_internal_for_procedures_raw(lbModule *m, Type *type) {
 		              LLVMPrintTypeToString(ft->ret.type),
 		              LLVMGetTypeContext(ft->ret.type), ft->ctx, LLVMGetGlobalContext());
 	}
-	for_array(j, ft->args) {
-		if (params_by_ptr[j]) {
+	for (unsigned i = 0; i < param_count; i++) {
+		if (params_by_ptr[i]) {
 			// NOTE(bill): The parameter needs to be passed "indirectly", override it
-			ft->args[j].kind = lbArg_Indirect;
+			ft->args[i].kind = lbArg_Indirect;
 		}
 	}
 
@@ -2169,11 +2164,11 @@ LLVMTypeRef lb_type(lbModule *m, Type *type) {
 	return llvm_type;
 }
 
-lbFunctionType *lb_get_function_type(lbModule *m, lbProcedure *p, Type *pt) {
+lbFunctionType *lb_get_function_type(lbModule *m, Type *pt) {
 	lbFunctionType **ft_found = nullptr;
 	ft_found = map_get(&m->function_type_map, pt);
 	if (!ft_found) {
-		LLVMTypeRef llvm_proc_type = lb_type(p->module, pt);
+		LLVMTypeRef llvm_proc_type = lb_type(m, pt);
 		gb_unused(llvm_proc_type);
 		ft_found = map_get(&m->function_type_map, pt);
 	}
@@ -2873,7 +2868,7 @@ lbValue lb_build_cond(lbProcedure *p, Ast *cond, lbBlock *true_block, lbBlock *f
 }
 
 
-lbAddr lb_add_local(lbProcedure *p, Type *type, Entity *e, bool zero_init, i32 param_index, bool force_no_init) {
+lbAddr lb_add_local(lbProcedure *p, Type *type, Entity *e, bool zero_init, bool force_no_init) {
 	GB_ASSERT(p->decl_block != p->curr_block);
 	LLVMPositionBuilderAtEnd(p->builder, p->decl_block->block);
 
@@ -2927,7 +2922,7 @@ lbAddr lb_add_local_generated(lbProcedure *p, Type *type, bool zero_init) {
 }
 
 lbAddr lb_add_local_generated_temp(lbProcedure *p, Type *type, i64 min_alignment) {
-	lbAddr res = lb_add_local(p, type, nullptr, false, 0, true);
+	lbAddr res = lb_add_local(p, type, nullptr, false, true);
 	lb_try_update_alignment(res.addr, cast(unsigned)min_alignment);
 	return res;
 }

+ 166 - 42
src/llvm_backend_proc.cpp

@@ -1,4 +1,3 @@
-
 LLVMValueRef lb_call_intrinsic(lbProcedure *p, const char *name, LLVMValueRef* args, unsigned arg_count, LLVMTypeRef* types, unsigned type_count)
 {
 	unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name));
@@ -123,6 +122,7 @@ lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool ignore_body)
 	p->scope_stack.allocator   = a;
 	map_init(&p->selector_values,  a, 0);
 	map_init(&p->selector_addr,    a, 0);
+	map_init(&p->tuple_fix_map,    a, 0);
 
 	if (p->is_foreign) {
 		lb_add_foreign_library_path(p->module, entity->Procedure.foreign_library);
@@ -346,6 +346,7 @@ lbProcedure *lb_create_dummy_procedure(lbModule *m, String link_name, Type *type
 	p->blocks.allocator        = a;
 	p->branch_blocks.allocator = a;
 	p->context_stack.allocator = a;
+	map_init(&p->tuple_fix_map, a, 0);
 
 
 	char *c_link_name = alloc_cstring(permanent_allocator(), p->name);
@@ -501,8 +502,23 @@ void lb_begin_procedure_body(lbProcedure *p) {
 			// NOTE(bill): this must be parameter 0
 
 			String name = str_lit("agg.result");
+			if (ft->multiple_return_original_type &&
+			    p->type->Proc.has_named_results) {
+				auto const &variables = p->type->Proc.results->Tuple.variables;
+				Entity *e = variables[variables.count-1];
+				if (!is_blank_ident(e->token)) {
+					name = e->token.string;
+				}
+			}
 
-			Type *ptr_type = alloc_type_pointer(reduce_tuple_to_single_type(p->type->Proc.results));
+			Type *return_ptr_type = reduce_tuple_to_single_type(p->type->Proc.results);
+			bool split_returns = ft->multiple_return_original_type != nullptr;
+			if (split_returns) {
+				GB_ASSERT(is_type_tuple(return_ptr_type));
+				auto const &variables = return_ptr_type->Tuple.variables;
+				return_ptr_type = variables[variables.count-1]->type;
+			}
+			Type *ptr_type = alloc_type_pointer(return_ptr_type);
 			Entity *e = alloc_entity_param(nullptr, make_token_ident(name), ptr_type, false, false);
 			e->flags |= EntityFlag_NoAlias;
 
@@ -580,14 +596,70 @@ void lb_begin_procedure_body(lbProcedure *p) {
 				if (e->token.string != "") {
 					GB_ASSERT(!is_blank_ident(e->token));
 
-					// NOTE(bill): Don't even bother trying to optimize this with the return ptr value
-					// This will violate the defer rules if you do:
-					//         foo :: proc() -> (x, y: T) {
-					//                 defer x = ... // defer is executed after the `defer`
-					//                 return // the values returned should be zeroed
-					//         }
-					// NOTE(bill): REALLY, don't even bother.
-					lbAddr res = lb_add_local(p, e->type, e);
+					lbAddr res = {};
+					if (p->entity && p->entity->decl_info &&
+					    p->entity->decl_info->defer_use_checked &&
+					    p->entity->decl_info->defer_used == 0) {
+
+						// NOTE(bill): this is a bodge to get around the issue of the problem BELOW
+						// We check to see if we ever use a defer statement ever within a procedure and if it
+						// if it never happens, see if you can possibly do take the return value pointer
+						//
+						// NOTE(bill): this could be buggy in that I have missed a case where `defer` was used
+						//
+						// TODO(bill): This could be optimized to check to see where a `defer` only uses
+						// the variable in question
+
+						bool has_return_ptr = p->return_ptr.addr.value != nullptr;
+						lbValue ptr = {};
+
+						if (ft->multiple_return_original_type != nullptr) {
+							isize the_offset = -1;
+							if (i+1 < results->variables.count) {
+								the_offset = cast(isize)param_offset + ft->original_arg_count + i;
+							} else if (has_return_ptr) {
+								GB_ASSERT(i+1 == results->variables.count);
+								the_offset = 0;
+							}
+							if (the_offset >= 0) {
+								lbValue ptr = {};
+								ptr.value = LLVMGetParam(p->value, cast(unsigned)the_offset);
+								ptr.type = alloc_type_pointer(e->type);
+
+
+							}
+						} else if (has_return_ptr) {
+							lbValue ptr = p->return_ptr.addr;
+
+							if (results->variables.count > 1) {
+								ptr = lb_emit_tuple_ep(p, ptr, cast(i32)i);
+							}
+							GB_ASSERT(is_type_pointer(ptr.type));
+							GB_ASSERT(are_types_identical(type_deref(ptr.type), e->type));
+						}
+
+						if (ptr.value != nullptr) {
+							lb_add_entity(p->module, e, ptr);
+							lb_add_debug_local_variable(p, ptr.value, e->type, e->token);
+							// NOTE(bill): no need to zero on the callee side as it is zeroed on the caller side
+
+							res = lb_addr(ptr);
+						}
+					}
+
+					if (res.addr.type == nullptr) {
+						// NOTE(bill): Don't even bother trying to optimize this with the return ptr value
+						// This will violate the defer rules if you do:
+						//         foo :: proc() -> (x, y: T) {
+						//                 defer x = ... // defer is executed after the `defer`
+						//                 return // the values returned should be zeroed
+						//         }
+						// NOTE(bill): REALLY, don't even bother.
+						//
+						// IMPORTANT NOTE(bill): REALLY, don't even bother!!!!!!
+						res = lb_add_local(p, e->type, e);
+					}
+
 					if (e->Variable.param_value.kind != ParameterValue_Invalid) {
 						lbValue c = lb_handle_param_value(p, e->type, e->Variable.param_value, e->token.pos);
 						lb_addr_store(p, res, c);
@@ -700,15 +772,8 @@ Array<lbValue> lb_value_to_array(lbProcedure *p, lbValue value) {
 	if (t == nullptr) {
 		// Do nothing
 	} else if (is_type_tuple(t)) {
-		GB_ASSERT(t->kind == Type_Tuple);
-		auto *rt = &t->Tuple;
-		if (rt->variables.count > 0) {
-			array = array_make<lbValue>(permanent_allocator(), rt->variables.count);
-			for_array(i, rt->variables) {
-				lbValue elem = lb_emit_struct_ev(p, value, cast(i32)i);
-				array[i] = elem;
-			}
-		}
+		array = array_make<lbValue>(permanent_allocator(), 0, t->Tuple.variables.count);
+		lb_append_tuple_values(p, &array, value);
 	} else {
 		array = array_make<lbValue>(permanent_allocator(), 1);
 		array[0] = value;
@@ -734,6 +799,7 @@ lbValue lb_emit_call_internal(lbProcedure *p, lbValue value, lbValue return_ptr,
 	if (return_ptr.value != nullptr) {
 		args[arg_index++] = return_ptr.value;
 	}
+
 	for_array(i, processed_args) {
 		lbValue arg = processed_args[i];
 		if (is_type_proc(arg.type)) {
@@ -741,16 +807,23 @@ lbValue lb_emit_call_internal(lbProcedure *p, lbValue value, lbValue return_ptr,
 		}
 		args[arg_index++] = arg.value;
 	}
+
 	if (context_ptr.addr.value != nullptr) {
 		LLVMValueRef cp = context_ptr.addr.value;
 		cp = LLVMBuildPointerCast(p->builder, cp, lb_type(p->module, t_rawptr), "");
 		args[arg_index++] = cp;
 	}
+
+	GB_ASSERT(arg_index == arg_count);
+
 	LLVMBasicBlockRef curr_block = LLVMGetInsertBlock(p->builder);
 	GB_ASSERT(curr_block != p->decl_block->block);
 
 	{
-		LLVMTypeRef fnp = lb_type_internal_for_procedures_raw(p->module, value.type);
+		Type *proc_type = base_type(value.type);
+		GB_ASSERT(proc_type->kind == Type_Proc);
+
+		LLVMTypeRef fnp = lb_type_internal_for_procedures_raw(p->module, proc_type);
 		LLVMTypeRef ftp = LLVMPointerType(fnp, 0);
 		LLVMValueRef fn = value.value;
 		if (!lb_is_type_kind(LLVMTypeOf(value.value), LLVMFunctionTypeKind)) {
@@ -775,10 +848,11 @@ lbValue lb_emit_call_internal(lbProcedure *p, lbValue value, lbValue return_ptr,
 				// LLVMTypeKind arg_kind = LLVMGetTypeKind(arg_type);
 				GB_ASSERT_MSG(
 					arg_type == param_type,
-					"Parameter types do not match: %s != %s, argument: %s",
+					"Parameter types do not match: %s != %s, argument: %s\n\t%s",
 					LLVMPrintTypeToString(arg_type),
 					LLVMPrintTypeToString(param_type),
-					LLVMPrintValueToString(args[i])
+					LLVMPrintValueToString(args[i]),
+					LLVMPrintTypeToString(fnp)
 				);
 			}
 		}
@@ -882,7 +956,7 @@ lbValue lb_emit_conjugate(lbProcedure *p, lbValue val, Type *type) {
 	return lb_emit_load(p, res);
 }
 
-lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> const &args, ProcInlining inlining, bool use_copy_elision_hint) {
+lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> const &args, ProcInlining inlining) {
 	lbModule *m = p->module;
 
 	Type *pt = base_type(value.type);
@@ -915,8 +989,9 @@ lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> const &args,
 
 		bool is_odin_cc = is_calling_convention_odin(pt->Proc.calling_convention);
 
-		lbFunctionType *ft = lb_get_function_type(m, p, pt);
+		lbFunctionType *ft = lb_get_function_type(m, pt);
 		bool return_by_pointer = ft->ret.kind == lbArg_Indirect;
+		bool split_returns = ft->multiple_return_original_type != nullptr;
 
 		unsigned param_index = 0;
 		for (isize i = 0; i < param_count; i++) {
@@ -979,18 +1054,19 @@ lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> const &args,
 		}
 
 		Type *rt = reduce_tuple_to_single_type(results);
-		if (return_by_pointer) {
-			lbValue return_ptr = {};
-			if (use_copy_elision_hint && p->copy_elision_hint.ptr.value != nullptr) {
-				if (are_types_identical(type_deref(p->copy_elision_hint.ptr.type), rt)) {
-					return_ptr = lb_consume_copy_elision_hint(p);
-				}
-			}
-			if (return_ptr.value == nullptr) {
-				lbAddr r = lb_add_local_generated(p, rt, true);
-				return_ptr = r.addr;
+		Type *original_rt = rt;
+		if (split_returns) {
+			GB_ASSERT(rt->kind == Type_Tuple);
+			for (isize j = 0; j < rt->Tuple.variables.count-1; j++) {
+				Type *partial_return_type = rt->Tuple.variables[j]->type;
+				lbValue partial_return_ptr = lb_add_local(p, partial_return_type, nullptr, true, false).addr;
+				array_add(&processed_args, partial_return_ptr);
 			}
-			GB_ASSERT(is_type_pointer(return_ptr.type));
+			rt = reduce_tuple_to_single_type(rt->Tuple.variables[rt->Tuple.variables.count-1]->type);
+		}
+
+		if (return_by_pointer) {
+			lbValue return_ptr = lb_add_local_generated(p, rt, true).addr;
 			lb_emit_call_internal(p, value, return_ptr, processed_args, nullptr, context_ptr, inlining);
 			result = lb_emit_load(p, return_ptr);
 		} else if (rt != nullptr) {
@@ -1010,6 +1086,47 @@ lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> const &args,
 			lb_emit_call_internal(p, value, {}, processed_args, nullptr, context_ptr, inlining);
 		}
 
+		if (original_rt != rt) {
+			GB_ASSERT(split_returns);
+			GB_ASSERT(is_type_tuple(original_rt));
+
+			// IMPORTANT NOTE(bill, 2022-11-24)
+			// result_ptr is a dummy value which is only used to reference a tuple
+			// value for the "tuple-fix"
+			//
+			// The reason for the fake stack allocation is to have a unique pointer
+			// for the value to be used as a key within the procedure itself
+
+			lbValue result_ptr = lb_add_local_generated(p, original_rt, false).addr;
+			isize ret_count = original_rt->Tuple.variables.count;
+
+			auto tuple_fix_values = slice_make<lbValue>(permanent_allocator(), ret_count);
+			auto tuple_geps = slice_make<lbValue>(permanent_allocator(), ret_count);
+
+			isize offset = ft->original_arg_count;
+			for (isize j = 0; j < ret_count-1; j++) {
+				lbValue ret_arg_ptr = processed_args[offset + j];
+				lbValue ret_arg = lb_emit_load(p, ret_arg_ptr);
+				tuple_fix_values[j] = ret_arg;
+			}
+			tuple_fix_values[ret_count-1] = result;
+
+		#if 0
+			for (isize j = 0; j < ret_count; j++) {
+				tuple_geps[j] = lb_emit_struct_ep(p, result_ptr, cast(i32)j);
+			}
+			for (isize j = 0; j < ret_count; j++) {
+				lb_emit_store(p, tuple_geps[j], tuple_fix_values[j]);
+			}
+		#endif
+
+			result = lb_emit_load(p, result_ptr);
+
+			lbTupleFix tf = {tuple_fix_values};
+			map_set(&p->tuple_fix_map, result_ptr.value, tf);
+			map_set(&p->tuple_fix_map, result.value, tf);
+		}
+
 	}
 
 	Entity **found = map_get(&p->module->procedure_values, value.value);
@@ -2300,7 +2417,7 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv,
 		);
 		LLVMSetWeak(value, weak);
 
-		if (tv.type->kind == Type_Tuple) {
+		if (is_type_tuple(tv.type)) {
 			Type *fix_typed = alloc_type_tuple();
 			slice_init(&fix_typed->Tuple.variables, permanent_allocator(), 2);
 			fix_typed->Tuple.variables[0] = tv.type->Tuple.variables[0];
@@ -3032,7 +3149,7 @@ lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
 			}
 		}
 
-		return lb_emit_call(p, value, args, ce->inlining, p->copy_elision_hint.ast == expr);
+		return lb_emit_call(p, value, args, ce->inlining);
 	}
 
 	isize arg_index = 0;
@@ -3044,7 +3161,7 @@ lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
 		GB_ASSERT_MSG(tav.mode != Addressing_Invalid, "%s %s %d", expr_to_string(arg), expr_to_string(expr), tav.mode);
 		GB_ASSERT_MSG(tav.mode != Addressing_ProcGroup, "%s", expr_to_string(arg));
 		Type *at = tav.type;
-		if (at->kind == Type_Tuple) {
+		if (is_type_tuple(at)) {
 			arg_count += at->Tuple.variables.count;
 		} else {
 			arg_count++;
@@ -3084,9 +3201,16 @@ lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
 			lbValue a = lb_build_expr(p, arg);
 			Type *at = a.type;
 			if (at->kind == Type_Tuple) {
-				for_array(i, at->Tuple.variables) {
-					lbValue v = lb_emit_struct_ev(p, a, cast(i32)i);
-					args[arg_index++] = v;
+				lbTupleFix *tf = map_get(&p->tuple_fix_map, a.value);
+				if (tf) {
+					for_array(j, tf->values) {
+						args[arg_index++] = tf->values[j];
+					}
+				} else {
+					for_array(j, at->Tuple.variables) {
+						lbValue v = lb_emit_struct_ev(p, a, cast(i32)j);
+						args[arg_index++] = v;
+					}
 				}
 			} else {
 				args[arg_index++] = a;
@@ -3213,6 +3337,6 @@ lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
 	}
 
 	auto call_args = array_slice(args, 0, final_count);
-	return lb_emit_call(p, value, call_args, ce->inlining, p->copy_elision_hint.ast == expr);
+	return lb_emit_call(p, value, call_args, ce->inlining);
 }
 

+ 88 - 83
src/llvm_backend_stmt.cpp

@@ -1,31 +1,3 @@
-lbCopyElisionHint lb_set_copy_elision_hint(lbProcedure *p, lbAddr const &addr, Ast *ast) {
-	lbCopyElisionHint prev = p->copy_elision_hint;
-	p->copy_elision_hint.used = false;
-	p->copy_elision_hint.ptr = {};
-	p->copy_elision_hint.ast = nullptr;
-	#if 0
-	if (addr.kind == lbAddr_Default && addr.addr.value != nullptr) {
-		p->copy_elision_hint.ptr = lb_addr_get_ptr(p, addr);
-		p->copy_elision_hint.ast = unparen_expr(ast);
-	}
-	#endif
-	return prev;
-}
-
-void lb_reset_copy_elision_hint(lbProcedure *p, lbCopyElisionHint prev_hint) {
-	p->copy_elision_hint = prev_hint;
-}
-
-
-lbValue lb_consume_copy_elision_hint(lbProcedure *p) {
-	lbValue return_ptr = p->copy_elision_hint.ptr;
-	p->copy_elision_hint.used = true;
-	p->copy_elision_hint.ptr = {};
-	p->copy_elision_hint.ast = nullptr;
-	return return_ptr;
-}
-
-
 void lb_build_constant_value_decl(lbProcedure *p, AstValueDecl *vd) {
 	if (vd == nullptr || vd->is_mutable) {
 		return;
@@ -726,13 +698,13 @@ void lb_build_range_tuple(lbProcedure *p, Ast *expr, Type *val0_type, Type *val1
 	i32 tuple_count = cast(i32)tuple->Tuple.variables.count;
 	i32 cond_index = tuple_count-1;
 
-	lbValue cond = lb_emit_struct_ev(p, tuple_value, cond_index);
+	lbValue cond = lb_emit_tuple_ev(p, tuple_value, cond_index);
 	lb_emit_if(p, cond, body, done);
 	lb_start_block(p, body);
 
 
-	if (val0_) *val0_ = lb_emit_struct_ev(p, tuple_value, 0);
-	if (val1_) *val1_ = lb_emit_struct_ev(p, tuple_value, 1);
+	if (val0_) *val0_ = lb_emit_tuple_ev(p, tuple_value, 0);
+	if (val1_) *val1_ = lb_emit_tuple_ev(p, tuple_value, 1);
 	if (loop_) *loop_ = loop;
 	if (done_) *done_ = done;
 }
@@ -1571,6 +1543,24 @@ void lb_build_static_variables(lbProcedure *p, AstValueDecl *vd) {
 		lb_add_member(p->module, mangled_name, global_val);
 	}
 }
+void lb_append_tuple_values(lbProcedure *p, Array<lbValue> *dst_values, lbValue src_value) {
+	Type *t = src_value.type;
+	if (t->kind == Type_Tuple) {
+		lbTupleFix *tf = map_get(&p->tuple_fix_map, src_value.value);
+		if (tf) {
+			for_array(j, tf->values) {
+				array_add(dst_values, tf->values[j]);
+			}
+		} else {
+			for_array(i, t->Tuple.variables) {
+				lbValue v = lb_emit_tuple_ev(p, src_value, cast(i32)i);
+				array_add(dst_values, v);
+			}
+		}
+	} else {
+		array_add(dst_values, src_value);
+	}
+}
 
 
 void lb_build_assignment(lbProcedure *p, Array<lbAddr> &lvals, Slice<Ast *> const &values) {
@@ -1582,23 +1572,8 @@ void lb_build_assignment(lbProcedure *p, Array<lbAddr> &lvals, Slice<Ast *> cons
 
 	for_array(i, values) {
 		Ast *rhs = values[i];
-		if (is_type_tuple(type_of_expr(rhs))) {
-			lbValue init = lb_build_expr(p, rhs);
-			Type *t = init.type;
-			GB_ASSERT(t->kind == Type_Tuple);
-			for_array(i, t->Tuple.variables) {
-				lbValue v = lb_emit_struct_ev(p, init, cast(i32)i);
-				array_add(&inits, v);
-			}
-		} else {
-			auto prev_hint = lb_set_copy_elision_hint(p, lvals[inits.count], rhs);
-			lbValue init = lb_build_expr(p, rhs);
-			if (p->copy_elision_hint.used) {
-				lvals[inits.count] = {}; // zero lval
-			}
-			lb_reset_copy_elision_hint(p, prev_hint);
-			array_add(&inits, init);
-		}
+		lbValue init = lb_build_expr(p, rhs);
+		lb_append_tuple_values(p, &inits, init);
 	}
 
 	GB_ASSERT(lvals.count == inits.count);
@@ -1609,9 +1584,14 @@ void lb_build_assignment(lbProcedure *p, Array<lbAddr> &lvals, Slice<Ast *> cons
 	}
 }
 
-void lb_build_return_stmt_internal(lbProcedure *p, lbValue const &res) {
-	lbFunctionType *ft = lb_get_function_type(p->module, p, p->type);
+void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) {
+	lbFunctionType *ft = lb_get_function_type(p->module, p->type);
 	bool return_by_pointer = ft->ret.kind == lbArg_Indirect;
+	bool split_returns = ft->multiple_return_original_type != nullptr;
+
+	if (split_returns) {
+		GB_ASSERT(res.value == nullptr || !is_type_tuple(res.type));
+	}
 
 	if (return_by_pointer) {
 		if (res.value != nullptr) {
@@ -1650,7 +1630,7 @@ void lb_build_return_stmt(lbProcedure *p, Slice<Ast *> const &return_results) {
 	isize return_count = p->type->Proc.result_count;
 	isize res_count = return_results.count;
 
-	lbFunctionType *ft = lb_get_function_type(p->module, p, p->type);
+	lbFunctionType *ft = lb_get_function_type(p->module, p->type);
 	bool return_by_pointer = ft->ret.kind == lbArg_Indirect;
 
 	if (return_count == 0) {
@@ -1683,15 +1663,7 @@ void lb_build_return_stmt(lbProcedure *p, Slice<Ast *> const &return_results) {
 		if (res_count != 0) {
 			for (isize res_index = 0; res_index < res_count; res_index++) {
 				lbValue res = lb_build_expr(p, return_results[res_index]);
-				Type *t = res.type;
-				if (t->kind == Type_Tuple) {
-					for_array(i, t->Tuple.variables) {
-						lbValue v = lb_emit_struct_ev(p, res, cast(i32)i);
-						array_add(&results, v);
-					}
-				} else {
-					array_add(&results, res);
-				}
+				lb_append_tuple_values(p, &results, res);
 			}
 		} else {
 			for (isize res_index = 0; res_index < return_count; res_index++) {
@@ -1727,35 +1699,68 @@ void lb_build_return_stmt(lbProcedure *p, Slice<Ast *> const &return_results) {
 			}
 		}
 
-		Type *ret_type = p->type->Proc.results;
+		bool split_returns = ft->multiple_return_original_type != nullptr;
+		if (split_returns) {
+			auto result_values = slice_make<lbValue>(temporary_allocator(), results.count);
+			auto result_eps = slice_make<lbValue>(temporary_allocator(), results.count-1);
+
+			for_array(i, results) {
+				result_values[i] = lb_emit_conv(p, results[i], tuple->variables[i]->type);
+			}
+
+			isize param_offset = return_by_pointer ? 1 : 0;
+			param_offset += ft->original_arg_count;
+			for_array(i, result_eps) {
+				lbValue result_ep = {};
+				result_ep.value = LLVMGetParam(p->value, cast(unsigned)(param_offset+i));
+				result_ep.type = alloc_type_pointer(tuple->variables[i]->type);
+				result_eps[i] = result_ep;
+			}
+			for_array(i, result_eps) {
+				lb_emit_store(p, result_eps[i], result_values[i]);
+			}
+			if (return_by_pointer) {
+				GB_ASSERT(result_values.count-1 == result_eps.count);
+				lb_addr_store(p, p->return_ptr, result_values[result_values.count-1]);
+
+				lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr);
+				LLVMBuildRetVoid(p->builder);
+				return;
+			} else {
+				return lb_build_return_stmt_internal(p, result_values[result_values.count-1]);
+			}
 
-		// NOTE(bill): Doesn't need to be zero because it will be initialized in the loops
-		if (return_by_pointer) {
-			res = p->return_ptr.addr;
 		} else {
-			res = lb_add_local_generated(p, ret_type, false).addr;
-		}
+			Type *ret_type = p->type->Proc.results;
+
+			// NOTE(bill): Doesn't need to be zero because it will be initialized in the loops
+			if (return_by_pointer) {
+				res = p->return_ptr.addr;
+			} else {
+				res = lb_add_local_generated(p, ret_type, false).addr;
+			}
 
-		auto result_values = slice_make<lbValue>(temporary_allocator(), results.count);
-		auto result_eps = slice_make<lbValue>(temporary_allocator(), results.count);
+			auto result_values = slice_make<lbValue>(temporary_allocator(), results.count);
+			auto result_eps = slice_make<lbValue>(temporary_allocator(), results.count);
 
-		for_array(i, results) {
-			result_values[i] = lb_emit_conv(p, results[i], tuple->variables[i]->type);
-		}
-		for_array(i, results) {
-			result_eps[i] = lb_emit_struct_ep(p, res, cast(i32)i);
-		}
-		for_array(i, result_values) {
-			lb_emit_store(p, result_eps[i], result_values[i]);
-		}
+			for_array(i, results) {
+				result_values[i] = lb_emit_conv(p, results[i], tuple->variables[i]->type);
+			}
+			for_array(i, results) {
+				result_eps[i] = lb_emit_struct_ep(p, res, cast(i32)i);
+			}
+			for_array(i, result_eps) {
+				lb_emit_store(p, result_eps[i], result_values[i]);
+			}
 
-		if (return_by_pointer) {
-			lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr);
-			LLVMBuildRetVoid(p->builder);
-			return;
-		}
+			if (return_by_pointer) {
+				lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr);
+				LLVMBuildRetVoid(p->builder);
+				return;
+			}
 
-		res = lb_emit_load(p, res);
+			res = lb_emit_load(p, res);
+		}
 	}
 	lb_build_return_stmt_internal(p, res);
 }

+ 88 - 36
src/llvm_backend_utility.cpp

@@ -339,16 +339,16 @@ void lb_emit_try_lhs_rhs(lbProcedure *p, Ast *arg, TypeAndValue const &tv, lbVal
 	if (is_type_tuple(value.type)) {
 		i32 n = cast(i32)(value.type->Tuple.variables.count-1);
 		if (value.type->Tuple.variables.count == 2) {
-			lhs = lb_emit_struct_ev(p, value, 0);
+			lhs = lb_emit_tuple_ev(p, value, 0);
 		} else {
 			lbAddr lhs_addr = lb_add_local_generated(p, tv.type, false);
 			lbValue lhs_ptr = lb_addr_get_ptr(p, lhs_addr);
 			for (i32 i = 0; i < n; i++) {
-				lb_emit_store(p, lb_emit_struct_ep(p, lhs_ptr, i), lb_emit_struct_ev(p, value, i));
+				lb_emit_store(p, lb_emit_struct_ep(p, lhs_ptr, i), lb_emit_tuple_ev(p, value, i));
 			}
 			lhs = lb_addr_load(p, lhs_addr);
 		}
-		rhs = lb_emit_struct_ev(p, value, n);
+		rhs = lb_emit_tuple_ev(p, value, n);
 	} else {
 		rhs = value;
 	}
@@ -436,7 +436,7 @@ lbValue lb_emit_or_else(lbProcedure *p, Ast *arg, Ast *else_expr, TypeAndValue c
 }
 
 void lb_build_return_stmt(lbProcedure *p, Slice<Ast *> const &return_results);
-void lb_build_return_stmt_internal(lbProcedure *p, lbValue const &res);
+void lb_build_return_stmt_internal(lbProcedure *p, lbValue res);
 
 lbValue lb_emit_or_return(lbProcedure *p, Ast *arg, TypeAndValue const &tv) {
 	lbValue lhs = {};
@@ -943,6 +943,54 @@ char const *llvm_type_kinds[] = {
 	"LLVMBFloatTypeKind",
 };
 
+gb_internal lbValue lb_emit_struct_ep_internal(lbProcedure *p, lbValue s, i32 index, Type *result_type) {
+	Type *t = base_type(type_deref(s.type));
+
+	i32 original_index = index;
+	index = lb_convert_struct_index(p->module, t, index);
+
+	if (lb_is_const(s)) {
+		// NOTE(bill): this cannot be replaced with lb_emit_epi
+		lbModule *m = p->module;
+		lbValue res = {};
+		LLVMValueRef indices[2] = {llvm_zero(m), LLVMConstInt(lb_type(m, t_i32), index, false)};
+		res.value = LLVMConstGEP2(lb_type(m, type_deref(s.type)), s.value, indices, gb_count_of(indices));
+		res.type = alloc_type_pointer(result_type);
+		return res;
+	} else {
+		lbValue res = {};
+		LLVMTypeRef st = lb_type(p->module, type_deref(s.type));
+		// gb_printf_err("%s\n", type_to_string(s.type));
+		// gb_printf_err("%s\n", LLVMPrintTypeToString(LLVMTypeOf(s.value)));
+		// gb_printf_err("%d\n", index);
+		GB_ASSERT_MSG(LLVMGetTypeKind(st) == LLVMStructTypeKind, "%s", llvm_type_kinds[LLVMGetTypeKind(st)]);
+		unsigned count = LLVMCountStructElementTypes(st);
+		GB_ASSERT_MSG(count >= cast(unsigned)index, "%u %d %d", count, index, original_index);
+
+		res.value = LLVMBuildStructGEP2(p->builder, st, s.value, cast(unsigned)index, "");
+		res.type = alloc_type_pointer(result_type);
+		return res;
+	}
+}
+
+lbValue lb_emit_tuple_ep(lbProcedure *p, lbValue ptr, i32 index) {
+	Type *t = type_deref(ptr.type);
+	GB_ASSERT(is_type_tuple(t));
+	Type *result_type = t->Tuple.variables[index]->type;
+
+	lbValue res = {};
+	lbTupleFix *tf = map_get(&p->tuple_fix_map, ptr.value);
+	if (tf) {
+		res = tf->values[index];
+		GB_ASSERT(are_types_identical(res.type, result_type));
+		res = lb_address_from_load_or_generate_local(p, res);
+	} else {
+		res = lb_emit_struct_ep_internal(p, ptr, index, result_type);
+	}
+	return res;
+}
+
+
 lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) {
 	GB_ASSERT(is_type_pointer(s.type));
 	Type *t = base_type(type_deref(s.type));
@@ -958,8 +1006,7 @@ lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) {
 		GB_ASSERT(index == -1);
 		return lb_emit_union_tag_ptr(p, s);
 	} else if (is_type_tuple(t)) {
-		GB_ASSERT(t->Tuple.variables.count > 0);
-		result_type = t->Tuple.variables[index]->type;
+		return lb_emit_tuple_ep(p, s, index);
 	} else if (is_type_complex(t)) {
 		Type *ft = base_complex_elem_type(t);
 		switch (index) {
@@ -1024,34 +1071,45 @@ lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) {
 
 	GB_ASSERT_MSG(result_type != nullptr, "%s %d", type_to_string(t), index);
 	
-	i32 original_index = index;
-	index = lb_convert_struct_index(p->module, t, index);
-	
-	if (lb_is_const(s)) {
-		// NOTE(bill): this cannot be replaced with lb_emit_epi
-		lbModule *m = p->module;
-		lbValue res = {};
-		LLVMValueRef indices[2] = {llvm_zero(m), LLVMConstInt(lb_type(m, t_i32), index, false)};
-		res.value = LLVMConstGEP2(lb_type(m, type_deref(s.type)), s.value, indices, gb_count_of(indices));
-		res.type = alloc_type_pointer(result_type);
-		return res;
+	return lb_emit_struct_ep_internal(p, s, index, result_type);
+}
+
+lbValue lb_emit_tuple_ev(lbProcedure *p, lbValue value, i32 index) {
+	Type *t = value.type;
+	GB_ASSERT(is_type_tuple(t));
+	Type *result_type = t->Tuple.variables[index]->type;
+
+	lbValue res = {};
+	lbTupleFix *tf = map_get(&p->tuple_fix_map, value.value);
+	if (tf) {
+		res = tf->values[index];
+		GB_ASSERT(are_types_identical(res.type, result_type));
 	} else {
-		lbValue res = {};
-		LLVMTypeRef st = lb_type(p->module, type_deref(s.type));
-		// gb_printf_err("%s\n", type_to_string(s.type));
-		// gb_printf_err("%s\n", LLVMPrintTypeToString(LLVMTypeOf(s.value)));
-		// gb_printf_err("%d\n", index);
-		GB_ASSERT_MSG(LLVMGetTypeKind(st) == LLVMStructTypeKind, "%s", llvm_type_kinds[LLVMGetTypeKind(st)]);
-		unsigned count = LLVMCountStructElementTypes(st);
-		GB_ASSERT_MSG(count >= cast(unsigned)index, "%u %d %d", count, index, original_index);
-		
-		res.value = LLVMBuildStructGEP2(p->builder, st, s.value, cast(unsigned)index, "");
-		res.type = alloc_type_pointer(result_type);
-		return res;
+		if (t->Tuple.variables.count == 1) {
+			GB_ASSERT(index == 0);
+			// value.type = result_type;
+			return value;
+		}
+		if (LLVMIsALoadInst(value.value)) {
+			lbValue res = {};
+			res.value = LLVMGetOperand(value.value, 0);
+			res.type = alloc_type_pointer(value.type);
+			lbValue ptr = lb_emit_struct_ep(p, res, index);
+			return lb_emit_load(p, ptr);
+		}
+
+		res.value = LLVMBuildExtractValue(p->builder, value.value, cast(unsigned)index, "");
+		res.type = result_type;
 	}
+	return res;
 }
 
 lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index) {
+	Type *t = base_type(s.type);
+	if (is_type_tuple(t)) {
+		return lb_emit_tuple_ev(p, s, index);
+	}
+
 	if (LLVMIsALoadInst(s.value)) {
 		lbValue res = {};
 		res.value = LLVMGetOperand(s.value, 0);
@@ -1060,7 +1118,6 @@ lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index) {
 		return lb_emit_load(p, ptr);
 	}
 
-	Type *t = base_type(s.type);
 	Type *result_type = nullptr;
 
 	switch (t->kind) {
@@ -1113,12 +1170,7 @@ lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index) {
 		GB_PANIC("lb_emit_union_tag_value");
 
 	case Type_Tuple:
-		GB_ASSERT(t->Tuple.variables.count > 0);
-		result_type = t->Tuple.variables[index]->type;
-		if (t->Tuple.variables.count == 1) {
-			return s;
-		}
-		break;
+		return lb_emit_tuple_ev(p, s, index);
 	case Type_Slice:
 		switch (index) {
 		case 0: result_type = alloc_type_pointer(t->Slice.elem); break;

+ 2 - 2
tools/odinfmt/main.odin

@@ -55,7 +55,7 @@ format_file :: proc(filepath: string) -> (string, bool) {
 
 files: [dynamic]string;
 
-walk_files :: proc(info: os.File_Info, in_err: os.Errno) -> (err: os.Errno, skip_dir: bool) {
+walk_files :: proc(info: os.File_Info, in_err: os.Errno, user_data: rawptr) -> (err: os.Errno, skip_dir: bool) {
 	if info.is_dir {
 		return 0, false;
 	}
@@ -111,7 +111,7 @@ main :: proc() {
 			}
 		}
 	} else if os.is_dir(path) {
-		filepath.walk(path, walk_files);
+		filepath.walk(path, walk_files, nil);
 
 		for file in files {