Browse Source

Merge branch 'master' into file-tags-without-comments

Karl Zylinski 11 months ago
parent
commit
093ade0504
97 changed files with 7030 additions and 1020 deletions
  1. 2 2
      base/runtime/core_builtin.odin
  2. 7 4
      base/runtime/internal.odin
  3. 2 2
      core/bytes/bytes.odin
  4. 837 63
      core/mem/alloc.odin
  5. 912 245
      core/mem/allocators.odin
  6. 103 23
      core/mem/doc.odin
  7. 436 49
      core/mem/mem.odin
  8. 23 4
      core/mem/mutex_allocator.odin
  9. 85 11
      core/mem/raw.odin
  10. 243 104
      core/mem/rollback_stack_allocator.odin
  11. 98 21
      core/mem/tracking_allocator.odin
  12. 10 0
      core/odin/parser/parser.odin
  13. 1 1
      core/os/os_freebsd.odin
  14. 19 70
      core/os/os_js.odin
  15. 1 1
      core/os/os_netbsd.odin
  16. 7 7
      core/strings/strings.odin
  17. 42 41
      core/sync/chan/chan.odin
  18. 38 37
      core/sync/extended.odin
  19. 17 8
      core/sync/futex_darwin.odin
  20. 4 4
      core/sync/futex_freebsd.odin
  21. 4 4
      core/sync/futex_linux.odin
  22. 4 4
      core/sync/futex_netbsd.odin
  23. 4 4
      core/sync/futex_openbsd.odin
  24. 4 4
      core/sync/futex_wasm.odin
  25. 2 18
      core/sync/primitives.odin
  26. 2 2
      core/sync/primitives_atomic.odin
  27. 11 0
      core/sys/darwin/sync.odin
  28. 4 0
      core/sys/info/platform_darwin.odin
  29. 4 0
      core/testing/runner.odin
  30. 22 0
      core/testing/runner_windows.odin
  31. 11 0
      core/testing/signal_handler_libc.odin
  32. 8 4
      core/testing/testing.odin
  33. 6 6
      core/thread/thread.odin
  34. 1 0
      core/thread/thread_pool.odin
  35. 8 24
      core/thread/thread_unix.odin
  36. 3 3
      core/thread/thread_windows.odin
  37. 2 0
      src/bug_report.cpp
  38. 4 2
      src/build_settings.cpp
  39. 16 0
      src/check_builtin.cpp
  40. 6 1
      src/check_expr.cpp
  41. 14 4
      src/check_stmt.cpp
  42. 32 0
      src/check_type.cpp
  43. 52 19
      src/checker.cpp
  44. 1 0
      src/entity.cpp
  45. 10 2
      src/gb/gb.h
  46. 1 1
      src/llvm_backend.hpp
  47. 44 36
      src/llvm_backend_debug.cpp
  48. 57 5
      src/llvm_backend_proc.cpp
  49. 49 2
      src/main.cpp
  50. 20 11
      src/parser.cpp
  51. 20 13
      tests/core/flags/test_core_flags.odin
  52. 2 2
      tests/core/mem/test_mem_dynamic_pool.odin
  53. 2 0
      tests/core/normal.odin
  54. 274 0
      tests/core/sync/chan/test_core_sync_chan.odin
  55. 714 0
      tests/core/sync/test_core_sync.odin
  56. 3 0
      tests/core/sys/posix/structs.odin
  57. 18 4
      vendor/box2d/box2d.odin
  58. 4 0
      vendor/box2d/box2d_wasm.odin
  59. 2 0
      vendor/box2d/build_box2d.sh
  60. BIN
      vendor/box2d/lib/box2d_wasm.o
  61. BIN
      vendor/box2d/lib/box2d_wasm_simd.o
  62. 32 0
      vendor/box2d/wasm.Makefile
  63. 5 0
      vendor/cgltf/cgltf.odin
  64. 4 0
      vendor/cgltf/cgltf_wasm.odin
  65. BIN
      vendor/cgltf/lib/cgltf_wasm.o
  66. 4 0
      vendor/cgltf/src/Makefile
  67. 12 0
      vendor/libc/README.md
  68. 15 0
      vendor/libc/assert.odin
  69. 16 0
      vendor/libc/include/assert.h
  70. 21 0
      vendor/libc/include/math.h
  71. 47 0
      vendor/libc/include/stdio.h
  72. 19 0
      vendor/libc/include/stdlib.h
  73. 21 0
      vendor/libc/include/string.h
  74. 25 0
      vendor/libc/libc.odin
  75. 100 0
      vendor/libc/math.odin
  76. 106 0
      vendor/libc/stdio.odin
  77. 119 0
      vendor/libc/stdlib.odin
  78. 111 0
      vendor/libc/string.odin
  79. 44 13
      vendor/stb/image/stb_image.odin
  80. 5 0
      vendor/stb/image/stb_image_resize.odin
  81. 4 0
      vendor/stb/image/stb_image_wasm.odin
  82. 16 6
      vendor/stb/image/stb_image_write.odin
  83. BIN
      vendor/stb/lib/stb_image_resize_wasm.o
  84. BIN
      vendor/stb/lib/stb_image_wasm.o
  85. BIN
      vendor/stb/lib/stb_image_write_wasm.o
  86. BIN
      vendor/stb/lib/stb_rect_pack_wasm.o
  87. BIN
      vendor/stb/lib/stb_sprintf_wasm.o
  88. BIN
      vendor/stb/lib/stb_truetype_wasm.o
  89. 5 0
      vendor/stb/rect_pack/stb_rect_pack.odin
  90. 4 0
      vendor/stb/rect_pack/stb_rect_pack_wasm.odin
  91. 37 0
      vendor/stb/sprintf/stb_sprintf.odin
  92. 12 2
      vendor/stb/src/Makefile
  93. 2 0
      vendor/stb/src/stb_sprintf.c
  94. 1906 0
      vendor/stb/src/stb_sprintf.h
  95. 0 46
      vendor/stb/src/stb_truetype_wasm.c
  96. 5 2
      vendor/stb/truetype/stb_truetype.odin
  97. 1 79
      vendor/stb/truetype/stb_truetype_wasm.odin

+ 2 - 2
base/runtime/core_builtin.odin

@@ -913,7 +913,7 @@ card :: proc "contextless" (s: $S/bit_set[$E; $U]) -> int {
 
 @builtin
 @(disabled=ODIN_DISABLE_ASSERT)
-assert :: proc(condition: bool, message := "", loc := #caller_location) {
+assert :: proc(condition: bool, message := #caller_expression(condition), loc := #caller_location) {
 	if !condition {
 		// NOTE(bill): This is wrapped in a procedure call
 		// to improve performance to make the CPU not
@@ -952,7 +952,7 @@ unimplemented :: proc(message := "", loc := #caller_location) -> ! {
 
 @builtin
 @(disabled=ODIN_DISABLE_ASSERT)
-assert_contextless :: proc "contextless" (condition: bool, message := "", loc := #caller_location) {
+assert_contextless :: proc "contextless" (condition: bool, message := #caller_expression(condition), loc := #caller_location) {
 	if !condition {
 		// NOTE(bill): This is wrapped in a procedure call
 		// to improve performance to make the CPU not

+ 7 - 4
base/runtime/internal.odin

@@ -118,16 +118,15 @@ mem_copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> r
 DEFAULT_ALIGNMENT :: 2*align_of(rawptr)
 
 mem_alloc_bytes :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
-	if size == 0 {
-		return nil, nil
-	}
-	if allocator.procedure == nil {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
+	if size == 0 || allocator.procedure == nil{
 		return nil, nil
 	}
 	return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
 }
 
 mem_alloc :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if size == 0 || allocator.procedure == nil {
 		return nil, nil
 	}
@@ -135,6 +134,7 @@ mem_alloc :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, a
 }
 
 mem_alloc_non_zeroed :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if size == 0 || allocator.procedure == nil {
 		return nil, nil
 	}
@@ -174,6 +174,7 @@ mem_free_all :: #force_inline proc(allocator := context.allocator, loc := #calle
 }
 
 _mem_resize :: #force_inline proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, should_zero: bool, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if allocator.procedure == nil {
 		return nil, nil
 	}
@@ -215,9 +216,11 @@ _mem_resize :: #force_inline proc(ptr: rawptr, old_size, new_size: int, alignmen
 }
 
 mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	return _mem_resize(ptr, old_size, new_size, alignment, allocator, true, loc)
 }
 non_zero_mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	return _mem_resize(ptr, old_size, new_size, alignment, allocator, false, loc)
 }
 

+ 2 - 2
core/bytes/bytes.odin

@@ -334,7 +334,7 @@ Inputs:
 Returns:
 - index: The index of the byte `c`, or -1 if it was not found.
 */
-index_byte :: proc(s: []byte, c: byte) -> (index: int) #no_bounds_check {
+index_byte :: proc "contextless" (s: []byte, c: byte) -> (index: int) #no_bounds_check {
 	i, l := 0, len(s)
 
 	// Guard against small strings.  On modern systems, it is ALWAYS
@@ -469,7 +469,7 @@ Inputs:
 Returns:
 - index: The index of the byte `c`, or -1 if it was not found.
 */
-last_index_byte :: proc(s: []byte, c: byte) -> int #no_bounds_check {
+last_index_byte :: proc "contextless" (s: []byte, c: byte) -> int #no_bounds_check {
 	i := len(s)
 
 	// Guard against small strings.  On modern systems, it is ALWAYS

File diff suppressed because it is too large
+ 837 - 63
core/mem/alloc.odin


File diff suppressed because it is too large
+ 912 - 245
core/mem/allocators.odin


+ 103 - 23
core/mem/doc.odin

@@ -1,34 +1,114 @@
 /*
-package mem implements various types of allocators.
+The `mem` package implements various allocators and provides utility procedures
+for dealing with memory, pointers and slices.
 
+The documentation below describes basic concepts, applicable to the `mem`
+package.
 
-An example of how to use the `Tracking_Allocator` to track subsequent allocations
-in your program and report leaks and bad frees:
+## Pointers, multipointers, and slices
 
-Example:
-	package foo
+A *pointer* is an abstraction of an *address*, a numberic value representing the
+location of an object in memory. That object is said to be *pointed to* by the
+pointer. To obtain the address of a pointer, cast it to `uintptr`.
 
-	import "core:mem"
-	import "core:fmt"
+A multipointer is a pointer that points to multiple objects. Unlike a pointer,
+a multipointer can be indexed, but does not have a definite length. A slice is
+a pointer that points to multiple objects equipped with the length, specifying
+the amount of objects a slice points to.
 
-	_main :: proc() {
-		// do stuff
-	}
+When object's values are read through a pointer, that operation is called a
+*load* operation. When memory is read through a pointer, that operation is
+called a *store* operation. Both of these operations can be called a *memory
+access operation*.
 
-	main :: proc() {
-		track: mem.Tracking_Allocator
-		mem.tracking_allocator_init(&track, context.allocator)
-		defer mem.tracking_allocator_destroy(&track)
-		context.allocator = mem.tracking_allocator(&track)
+## Allocators
 
-		_main()
+In C and C++ memory models, allocations of objects in memory are typically
+treated individually with a generic allocator (The `malloc` procedure). Which in
+some scenarios can lead to poor cache utilization, slowdowns on individual
+objects' memory management and growing complexity of the code needing to keep
+track of the pointers and their lifetimes.
 
-		for _, leak in track.allocation_map {
-			fmt.printf("%v leaked %m\n", leak.location, leak.size)
-		}
-		for bad_free in track.bad_free_array {
-			fmt.printf("%v allocation %p was freed badly\n", bad_free.location, bad_free.memory)
-		}
-	}
+Using different kinds of *allocators* for different purposes can solve these
+problems. The allocators are typically optimized for specific use-cases and
+can potentially simplify the memory management code.
+
+For example, in the context of making a game, having an Arena allocator could
+simplify allocations of any temporary memory, because the programmer doesn't
+have to keep track of which objects need to be freed every time they are
+allocated, because at the end of every frame the whole allocator is reset to
+its initial state and all objects are freed at once.
+
+The allocators have different kinds of restrictions on object lifetimes, sizes,
+alignment and can be a significant gain, if used properly. Odin supports
+allocators on a language level.
+
+Operations such as `new`, `free` and `delete` by default will use
+`context.allocator`, which can be overridden by the user. When an override
+happens all called procedures will inherit the new context and use the same
+allocator.
+
+We will define one concept to simplify the description of some allocator-related
+procedures, which is ownership. If the memory was allocated via a specific
+allocator, that allocator is said to be the *owner* of that memory region. To
+note, unlike Rust, in Odin the memory ownership model is not strict.
+
+## Alignment
+
+An address is said to be *aligned to `N` bytes*, if the addresses's numeric
+value is divisible by `N`. The number `N` in this case can be referred to as
+the *alignment boundary*. Typically an alignment is a power of two integer
+value.
+
+A *natural alignment* of an object is typically equal to its size. For example
+a 16 bit integer has a natural alignment of 2 bytes. When an object is not
+located on its natural alignment boundary, accesses to that object are
+considered *unaligned*.
+
+Some machines issue a hardware **exception**, or experience **slowdowns** when a
+memory access operation occurs from an unaligned address. Examples of such
+operations are:
+
+- SIMD instructions on x86. These instructions require all memory accesses to be
+  on an address that is aligned to 16 bytes.
+- On ARM unaligned loads have an extra cycle penalty.
+
+As such, many operations that allocate memory in this package allow to
+explicitly specify the alignment of allocated pointers/slices. The default
+alignment for all operations is specified in a constant `mem.DEFAULT_ALIGNMENT`.
+
+## Zero by default
+
+Whenever new memory is allocated, via an allocator, or on the stack, by default
+Odin will zero-initialize that memory, even if it wasn't explicitly
+initialized. This allows for some convenience in certain scenarios and ease of
+debugging, which will not be described in detail here.
+
+However zero-initialization can be a cause of slowdowns, when allocating large
+buffers. For this reason, allocators have `*_non_zeroed` modes of allocation
+that allow the user to request for uninitialized memory and will avoid a
+relatively expensive zero-filling of the buffer.
+
+## Naming conventions
+
+The word `size` is used to denote the **size in bytes**. The word `length` is
+used to denote the count of objects.
+
+The allocation procedures use the following conventions:
+
+- If the name contains `alloc_bytes` or `resize_bytes`, then the procedure takes
+  in slice parameters and returns slices.
+- If the procedure name contains `alloc` or `resize`, then the procedure takes
+  in a raw pointer and returns raw pointers.
+- If the procedure name contains `free_bytes`, then the procedure takes in a
+  slice.
+- If the procedure name contains `free`, then the procedure takes in a pointer.
+
+Higher-level allocation procedures follow the following naming scheme:
+
+- `new`: Allocates a single object
+- `free`: Free a single object (opposite of `new`)
+- `make`: Allocate a group of objects
+- `delete`: Free a group of objects (opposite of `make`)
 */
 package mem

+ 436 - 49
core/mem/mem.odin

@@ -3,49 +3,185 @@ package mem
 import "base:runtime"
 import "base:intrinsics"
 
-Byte     :: runtime.Byte
+/*
+The size, in bytes, of a single byte.
+
+This constant is equal to the value of `1`.
+*/
+Byte :: runtime.Byte
+
+/*
+The size, in bytes, of one kilobyte.
+
+This constant is equal to the amount of bytes in one kilobyte (also known as
+kibibyte), which is equal to 1024 bytes.
+*/
 Kilobyte :: runtime.Kilobyte
+
+/*
+The size, in bytes, of one megabyte.
+
+This constant is equal to the amount of bytes in one megabyte (also known as
+mebibyte), which is equal to 1024 kilobyte.
+*/
 Megabyte :: runtime.Megabyte
+
+/*
+The size, in bytes, of one gigabyte.
+
+This constant is equal to the amount of bytes in one gigabyte (also known as
+gibiibyte), which is equal to 1024 megabytes.
+*/
 Gigabyte :: runtime.Gigabyte
+
+/*
+The size, in bytes, of one terabyte.
+
+This constant is equal to the amount of bytes in one terabyte (also known as
+tebiibyte), which is equal to 1024 gigabytes.
+*/
 Terabyte :: runtime.Terabyte
+
+/*
+The size, in bytes, of one petabyte.
+
+This constant is equal to the amount of bytes in one petabyte (also known as
+pebiibyte), which is equal to 1024 terabytes.
+*/
 Petabyte :: runtime.Petabyte
-Exabyte  :: runtime.Exabyte
 
+/*
+The size, in bytes, of one exabyte.
+
+This constant is equal to the amount of bytes in one exabyte (also known as
+exbibyte), which is equal to 1024 petabytes.
+*/
+Exabyte :: runtime.Exabyte
+
+/*
+Set each byte of a memory range to a specific value.
+
+This procedure copies value specified by the `value` parameter into each of the
+`len` bytes of a memory range, located at address `data`.
+
+This procedure returns the pointer to `data`.
+*/
 set :: proc "contextless" (data: rawptr, value: byte, len: int) -> rawptr {
 	return runtime.memset(data, i32(value), len)
 }
+
+/*
+Set each byte of a memory range to zero.
+
+This procedure copies the value `0` into the `len` bytes of a memory range,
+starting at address `data`.
+
+This procedure returns the pointer to `data`.
+*/
 zero :: proc "contextless" (data: rawptr, len: int) -> rawptr {
 	intrinsics.mem_zero(data, len)
 	return data
 }
+
+/*
+Set each byte of a memory range to zero.
+
+This procedure copies the value `0` into the `len` bytes of a memory range,
+starting at address `data`.
+
+This procedure returns the pointer to `data`.
+
+Unlike the `zero()` procedure, which can be optimized away or reordered by the
+compiler under certain circumstances, `zero_explicit()` procedure can not be
+optimized away or reordered with other memory access operations, and the
+compiler assumes volatile semantics of the memory.
+*/
 zero_explicit :: proc "contextless" (data: rawptr, len: int) -> rawptr {
 	// This routine tries to avoid the compiler optimizing away the call,
-	// so that it is always executed.  It is intended to provided
+	// so that it is always executed.  It is intended to provide
 	// equivalent semantics to those provided by the C11 Annex K 3.7.4.1
 	// memset_s call.
 	intrinsics.mem_zero_volatile(data, len) // Use the volatile mem_zero
 	intrinsics.atomic_thread_fence(.Seq_Cst) // Prevent reordering
 	return data
 }
+
+/*
+Zero-fill the memory of an object.
+
+This procedure sets each byte of the object pointed to by the pointer `item`
+to zero, and returns the pointer to `item`.
+*/
 zero_item :: proc "contextless" (item: $P/^$T) -> P {
 	intrinsics.mem_zero(item, size_of(T))
 	return item
 }
+
+/*
+Zero-fill the memory of the slice.
+
+This procedure sets each byte of the slice pointed to by the slice `data`
+to zero, and returns the slice `data`.
+*/
 zero_slice :: proc "contextless" (data: $T/[]$E) -> T {
 	zero(raw_data(data), size_of(E)*len(data))
 	return data
 }
 
+/*
+Copy bytes from one memory range to another.
 
+This procedure copies `len` bytes of data, from the memory range pointed to by
+the `src` pointer into the memory range pointed to by the `dst` pointer, and
+returns the `dst` pointer.
+*/
 copy :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
 	intrinsics.mem_copy(dst, src, len)
 	return dst
 }
+
+/*
+Copy bytes between two non-overlapping memory ranges.
+
+This procedure copies `len` bytes of data, from the memory range pointed to by
+the `src` pointer into the memory range pointed to by the `dst` pointer, and
+returns the `dst` pointer.
+
+This is a slightly more optimized version of the `copy` procedure that requires
+that memory ranges specified by the parameters to this procedure are not
+overlapping. If the memory ranges specified by `dst` and `src` pointers overlap,
+the behavior of this function may be unpredictable.
+*/
 copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
 	intrinsics.mem_copy_non_overlapping(dst, src, len)
 	return dst
 }
 
+/*
+Compare two memory ranges defined by slices.
+
+This procedure performs a byte-by-byte comparison between memory ranges
+specified by slices `a` and `b`, and returns a value, specifying their relative
+ordering.
+
+If the return value is:
+- Equal to `-1`, then `a` is "smaller" than `b`.
+- Equal to `+1`, then `a` is "bigger"  than `b`.
+- Equal to `0`, then `a` and `b` are equal.
+
+The comparison is performed as follows:
+1. Each byte, upto `min(len(a), len(b))` bytes is compared between `a` and `b`.
+  - If the byte in slice `a` is smaller than a byte in slice `b`, then comparison
+  stops and this procedure returns `-1`.
+  - If the byte in slice `a` is bigger than a byte in slice `b`, then comparison
+  stops and this procedure returns `+1`.
+  - Otherwise the comparison continues until `min(len(a), len(b))` are compared.
+2. If all the bytes in the range are equal, then the lengths of the slices are
+  compared.
+  - If the length of slice `a` is smaller than the length of slice `b`, then `-1` is returned.
+  - If the length of slice `b` is smaller than the length of slice `b`, then `+1` is returned.
+  - Otherwise `0` is returned.
+*/
 @(require_results)
 compare :: proc "contextless" (a, b: []byte) -> int {
 	res := compare_byte_ptrs(raw_data(a), raw_data(b), min(len(a), len(b)))
@@ -57,16 +193,89 @@ compare :: proc "contextless" (a, b: []byte) -> int {
 	return res
 }
 
+/*
+Compare two memory ranges defined by byte pointers.
+
+This procedure performs a byte-by-byte comparison between memory ranges of size
+`n` located at addresses `a` and `b`, and returns a value, specifying their relative
+ordering.
+
+If the return value is:
+- Equal to `-1`, then `a` is "smaller" than `b`.
+- Equal to `+1`, then `a` is "bigger"  than `b`.
+- Equal to `0`, then `a` and `b` are equal.
+
+The comparison is performed as follows:
+1. Each byte, upto `n` bytes is compared between `a` and `b`.
+  - If the byte in `a` is smaller than a byte in `b`, then comparison stops
+  and this procedure returns `-1`.
+  - If the byte in `a` is bigger than a byte in `b`, then comparison stops
+  and this procedure returns `+1`.
+  - Otherwise the comparison continues until `n` bytes are compared.
+2. If all the bytes in the range are equal, this procedure returns `0`.
+*/
 @(require_results)
 compare_byte_ptrs :: proc "contextless" (a, b: ^byte, n: int) -> int #no_bounds_check {
 	return runtime.memory_compare(a, b, n)
 }
 
+/*
+Compare two memory ranges defined by pointers.
+
+This procedure performs a byte-by-byte comparison between memory ranges of size
+`n` located at addresses `a` and `b`, and returns a value, specifying their relative
+ordering.
+
+If the return value is:
+- Equal to `-1`, then `a` is "smaller" than `b`.
+- Equal to `+1`, then `a` is "bigger"  than `b`.
+- Equal to `0`, then `a` and `b` are equal.
+
+The comparison is performed as follows:
+1. Each byte, upto `n` bytes is compared between `a` and `b`.
+  - If the byte in `a` is smaller than a byte in `b`, then comparison stops
+  and this procedure returns `-1`.
+  - If the byte in `a` is bigger than a byte in `b`, then comparison stops
+  and this procedure returns `+1`.
+  - Otherwise the comparison continues until `n` bytes are compared.
+2. If all the bytes in the range are equal, this procedure returns `0`.
+*/
+@(require_results)
+compare_ptrs :: proc "contextless" (a, b: rawptr, n: int) -> int {
+	return compare_byte_ptrs((^byte)(a), (^byte)(b), n)
+}
+
+/*
+Check whether two objects are equal on binary level.
+
+This procedure checks whether the memory ranges occupied by objects `a` and
+`b` are equal. See `compare_byte_ptrs()` for how this comparison is done.
+*/
+@(require_results)
+simple_equal :: proc "contextless" (a, b: $T) -> bool where intrinsics.type_is_simple_compare(T) {
+	a, b := a, b
+	return compare_byte_ptrs((^byte)(&a), (^byte)(&b), size_of(T)) == 0
+}
+
+/*
+Check if the memory range defined by a slice is zero-filled.
+
+This procedure checks whether every byte, pointed to by the slice, specified
+by the parameter `data`, is zero. If all bytes of the slice are zero, this
+procedure returns `true`. Otherwise this procedure returns `false`.
+*/
 @(require_results)
 check_zero :: proc(data: []byte) -> bool {
 	return check_zero_ptr(raw_data(data), len(data))
 }
 
+/*
+Check if the memory range defined defined by a pointer is zero-filled.
+
+This procedure checks whether each of the `len` bytes, starting at address
+`ptr` is zero. If all bytes of this range are zero, this procedure returns
+`true`. Otherwise this procedure returns `false`.
+*/
 @(require_results)
 check_zero_ptr :: proc(ptr: rawptr, len: int) -> bool {
 	switch {
@@ -81,57 +290,99 @@ check_zero_ptr :: proc(ptr: rawptr, len: int) -> bool {
 	case 4: return intrinsics.unaligned_load((^u32)(ptr)) == 0
 	case 8: return intrinsics.unaligned_load((^u64)(ptr)) == 0
 	}
-
 	start := uintptr(ptr)
 	start_aligned := align_forward_uintptr(start, align_of(uintptr))
 	end := start + uintptr(len)
 	end_aligned := align_backward_uintptr(end, align_of(uintptr))
-
 	for b in start..<start_aligned {
 		if (^byte)(b)^ != 0 {
 			return false
 		}
 	}
-
 	for b := start_aligned; b < end_aligned; b += size_of(uintptr) {
 		if (^uintptr)(b)^ != 0 {
 			return false
 		}
 	}
-
 	for b in end_aligned..<end {
 		if (^byte)(b)^ != 0 {
 			return false
 		}
 	}
-
 	return true
 }
 
-@(require_results)
-simple_equal :: proc "contextless" (a, b: $T) -> bool where intrinsics.type_is_simple_compare(T) {
-	a, b := a, b
-	return compare_byte_ptrs((^byte)(&a), (^byte)(&b), size_of(T)) == 0
-}
+/*
+Offset a given pointer by a given amount.
 
-@(require_results)
-compare_ptrs :: proc "contextless" (a, b: rawptr, n: int) -> int {
-	return compare_byte_ptrs((^byte)(a), (^byte)(b), n)
-}
+This procedure offsets the pointer `ptr` to an object of type `T`, by the amount
+of bytes specified by `offset*size_of(T)`, and returns the pointer `ptr`.
 
+**Note**: Prefer to use multipointer types, if possible.
+*/
 ptr_offset :: intrinsics.ptr_offset
+
+/*
+Offset a given pointer by a given amount backwards.
+
+This procedure offsets the pointer `ptr` to an object of type `T`, by the amount
+of bytes specified by `offset*size_of(T)` in the negative direction, and
+returns the pointer `ptr`.
+*/
 ptr_sub :: intrinsics.ptr_sub
 
+/*
+Construct a slice from pointer and length.
+
+This procedure creates a slice, that points to `len` amount of objects located
+at an address, specified by `ptr`.
+*/
 @(require_results)
 slice_ptr :: proc "contextless" (ptr: ^$T, len: int) -> []T {
 	return ([^]T)(ptr)[:len]
 }
 
+/*
+Construct a byte slice from raw pointer and length.
+
+This procedure creates a byte slice, that points to `len` amount of bytes
+located at an address specified by `data`.
+*/
 @(require_results)
 byte_slice :: #force_inline proc "contextless" (data: rawptr, #any_int len: int) -> []byte {
 	return ([^]u8)(data)[:max(len, 0)]
 }
 
+/*
+Create a byte slice from pointer and length.
+
+This procedure creates a byte slice, pointing to `len` objects, starting from
+the address specified by `ptr`.
+*/
+@(require_results)
+ptr_to_bytes :: proc "contextless" (ptr: ^$T, len := 1) -> []byte {
+	return transmute([]byte)Raw_Slice{ptr, len*size_of(T)}
+}
+
+/*
+Obtain the slice, pointing to the contents of `any`.
+
+This procedure returns the slice, pointing to the contents of the specified
+value of the `any` type.
+*/
+@(require_results)
+any_to_bytes :: proc "contextless" (val: any) -> []byte {
+	ti := type_info_of(val.id)
+	size := ti != nil ? ti.size : 0
+	return transmute([]byte)Raw_Slice{val.data, size}
+}
+
+/*
+Obtain a byte slice from any slice.
+
+This procedure returns a slice, that points to the same bytes as the slice,
+specified by `slice` and returns the resulting byte slice.
+*/
 @(require_results)
 slice_to_bytes :: proc "contextless" (slice: $E/[]$T) -> []byte {
 	s := transmute(Raw_Slice)slice
@@ -139,6 +390,15 @@ slice_to_bytes :: proc "contextless" (slice: $E/[]$T) -> []byte {
 	return transmute([]byte)s
 }
 
+/*
+Transmute slice to a different type.
+
+This procedure performs an operation similar to transmute, returning a slice of
+type `T` that points to the same bytes as the slice specified by `slice`
+parameter. Unlike plain transmute operation, this procedure adjusts the length
+of the resulting slice, such that the resulting slice points to the correct
+amount of objects to cover the memory region pointed to by `slice`.
+*/
 @(require_results)
 slice_data_cast :: proc "contextless" ($T: typeid/[]$A, slice: $S/[]$B) -> T {
 	when size_of(A) == 0 || size_of(B) == 0 {
@@ -150,12 +410,25 @@ slice_data_cast :: proc "contextless" ($T: typeid/[]$A, slice: $S/[]$B) -> T {
 	}
 }
 
+/*
+Obtain data and length of a slice.
+
+This procedure returns the pointer to the start of the memory region pointed to
+by slice `slice` and the length of the slice.
+*/
 @(require_results)
 slice_to_components :: proc "contextless" (slice: $E/[]$T) -> (data: ^T, len: int) {
 	s := transmute(Raw_Slice)slice
 	return (^T)(s.data), s.len
 }
 
+/*
+Create a dynamic array from slice.
+
+This procedure creates a dynamic array, using slice `backing` as the backing
+buffer for the dynamic array. The resulting dynamic array can not grow beyond
+the size of the specified slice.
+*/
 @(require_results)
 buffer_from_slice :: proc "contextless" (backing: $T/[]$E) -> [dynamic]E {
 	return transmute([dynamic]E)Raw_Dynamic_Array{
@@ -169,19 +442,12 @@ buffer_from_slice :: proc "contextless" (backing: $T/[]$E) -> [dynamic]E {
 	}
 }
 
-@(require_results)
-ptr_to_bytes :: proc "contextless" (ptr: ^$T, len := 1) -> []byte {
-	return transmute([]byte)Raw_Slice{ptr, len*size_of(T)}
-}
-
-@(require_results)
-any_to_bytes :: proc "contextless" (val: any) -> []byte {
-	ti := type_info_of(val.id)
-	size := ti != nil ? ti.size : 0
-	return transmute([]byte)Raw_Slice{val.data, size}
-}
-
+/*
+Check whether a number is a power of two.
 
+This procedure checks whether a given pointer-sized unsigned integer contains
+a power-of-two value.
+*/
 @(require_results)
 is_power_of_two :: proc "contextless" (x: uintptr) -> bool {
 	if x <= 0 {
@@ -190,66 +456,167 @@ is_power_of_two :: proc "contextless" (x: uintptr) -> bool {
 	return (x & (x-1)) == 0
 }
 
-@(require_results)
-align_forward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
-	return rawptr(align_forward_uintptr(uintptr(ptr), align))
+/*
+Check if a pointer is aligned.
+
+This procedure checks whether a pointer `x` is aligned to a boundary specified
+by `align`, and returns `true` if the pointer is aligned, and false otherwise.
+*/
+is_aligned :: proc "contextless" (x: rawptr, align: int) -> bool {
+	p := uintptr(x)
+	return (p & (1<<uintptr(align) - 1)) == 0
 }
 
+/*
+Align uintptr forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 align_forward_uintptr :: proc(ptr, align: uintptr) -> uintptr {
 	assert(is_power_of_two(align))
+	return (ptr + align-1) & ~(align-1)
+}
 
-	p := ptr
-	modulo := p & (align-1)
-	if modulo != 0 {
-		p += align - modulo
-	}
-	return p
+/*
+Align pointer forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
+@(require_results)
+align_forward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
+	return rawptr(align_forward_uintptr(uintptr(ptr), align))
 }
 
+/*
+Align int forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 align_forward_int :: proc(ptr, align: int) -> int {
 	return int(align_forward_uintptr(uintptr(ptr), uintptr(align)))
 }
+
+/*
+Align uint forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 align_forward_uint :: proc(ptr, align: uint) -> uint {
 	return uint(align_forward_uintptr(uintptr(ptr), uintptr(align)))
 }
 
+/*
+Align uintptr backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
-align_backward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
-	return rawptr(align_backward_uintptr(uintptr(ptr), align))
+align_backward_uintptr :: proc(ptr, align: uintptr) -> uintptr {
+	assert(is_power_of_two(align))
+	return ptr & ~(align-1)
 }
 
+/*
+Align rawptr backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
-align_backward_uintptr :: proc(ptr, align: uintptr) -> uintptr {
-	return align_forward_uintptr(ptr - align + 1, align)
+align_backward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
+	return rawptr(align_backward_uintptr(uintptr(ptr), align))
 }
 
+/*
+Align int backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 align_backward_int :: proc(ptr, align: int) -> int {
 	return int(align_backward_uintptr(uintptr(ptr), uintptr(align)))
 }
+
+/*
+Align uint backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 align_backward_uint :: proc(ptr, align: uint) -> uint {
 	return uint(align_backward_uintptr(uintptr(ptr), uintptr(align)))
 }
 
+/*
+Create a context with a given allocator.
+
+This procedure returns a copy of the current context with the allocator replaced
+by the allocator `a`.
+*/
 @(require_results)
 context_from_allocator :: proc(a: Allocator) -> type_of(context) {
 	context.allocator = a
 	return context
 }
 
+/*
+Copy the value from a pointer into a value.
+
+This procedure copies the object of type `T` pointed to by the pointer `ptr`
+into a new stack-allocated value and returns that value.
+*/
 @(require_results)
 reinterpret_copy :: proc "contextless" ($T: typeid, ptr: rawptr) -> (value: T) {
 	copy(&value, ptr, size_of(T))
 	return
 }
 
+/*
+Dynamic array with a fixed capacity buffer.
 
+This type represents dynamic arrays with a fixed-size backing buffer. Upon
+allocating memory beyond reaching the maximum capacity, allocations from fixed
+byte buffers return `nil` and no error.
+*/
 Fixed_Byte_Buffer :: distinct [dynamic]byte
 
+/*
+Create a fixed byte buffer from a slice.
+*/
 @(require_results)
 make_fixed_byte_buffer :: proc "contextless" (backing: []byte) -> Fixed_Byte_Buffer {
 	s := transmute(Raw_Slice)backing
@@ -264,40 +631,60 @@ make_fixed_byte_buffer :: proc "contextless" (backing: []byte) -> Fixed_Byte_Buf
 	return transmute(Fixed_Byte_Buffer)d
 }
 
+/*
+General-purpose align formula.
 
-
+This procedure is equivalent to `align_forward`, but it does not require the
+alignment to be a power of two.
+*/
 @(require_results)
 align_formula :: proc "contextless" (size, align: int) -> int {
 	result := size + align-1
 	return result - result%align
 }
 
+/*
+Calculate the padding for header preceding aligned data.
+
+This procedure returns the padding, following the specified pointer `ptr` that
+will be able to fit in a header of the size `header_size`, immediately
+preceding the memory region, aligned on a boundary specified by `align`. See
+the following diagram for a visual representation.
+
+        header size
+	    |<------>|
+	+---+--------+------------- - - -
+	    | HEADER |  DATA...
+	+---+--------+------------- - - -
+	^            ^
+	|<---------->|
+	|  padding   |
+	ptr          aligned ptr
+
+The function takes in `ptr` and `header_size`, as well as the required
+alignment for `DATA`. The return value of the function is the padding between
+`ptr` and `aligned_ptr` that will be able to fit the header.
+*/
 @(require_results)
 calc_padding_with_header :: proc "contextless" (ptr: uintptr, align: uintptr, header_size: int) -> int {
 	p, a := ptr, align
 	modulo := p & (a-1)
-
 	padding := uintptr(0)
 	if modulo != 0 {
 		padding = a - modulo
 	}
-
 	needed_space := uintptr(header_size)
 	if padding < needed_space {
 		needed_space -= padding
-
 		if needed_space & (a-1) > 0 {
 			padding += align * (1+(needed_space/align))
 		} else {
 			padding += align * (needed_space/align)
 		}
 	}
-
 	return int(padding)
 }
 
-
-
 @(require_results, deprecated="prefer 'slice.clone'")
 clone_slice :: proc(slice: $T/[]$E, allocator := context.allocator, loc := #caller_location) -> (new_slice: T) {
 	new_slice, _ = make(T, len(slice), allocator, loc)

+ 23 - 4
core/mem/mutex_allocator.odin

@@ -3,17 +3,31 @@ package mem
 
 import "core:sync"
 
+/*
+The data for mutex allocator.
+*/
 Mutex_Allocator :: struct {
 	backing: Allocator,
 	mutex:   sync.Mutex,
 }
 
+/*
+Initialize the mutex allocator.
+
+This procedure initializes the mutex allocator using `backin_allocator` as the
+allocator that will be used to pass all allocation requests through.
+*/
 mutex_allocator_init :: proc(m: ^Mutex_Allocator, backing_allocator: Allocator) {
 	m.backing = backing_allocator
 	m.mutex = {}
 }
 
+/*
+Mutex allocator.
 
+The mutex allocator is a wrapper for allocators that is used to serialize all
+allocator requests across multiple threads.
+*/
 @(require_results)
 mutex_allocator :: proc(m: ^Mutex_Allocator) -> Allocator {
 	return Allocator{
@@ -22,11 +36,16 @@ mutex_allocator :: proc(m: ^Mutex_Allocator) -> Allocator {
 	}
 }
 
-mutex_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                             size, alignment: int,
-                             old_memory: rawptr, old_size: int, loc := #caller_location) -> (result: []byte, err: Allocator_Error) {
+mutex_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size: int,
+	alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
 	m := (^Mutex_Allocator)(allocator_data)
-
 	sync.mutex_guard(&m.mutex)
 	return m.backing.procedure(m.backing.data, mode, size, alignment, old_memory, old_size, loc)
 }

+ 85 - 11
core/mem/raw.odin

@@ -3,26 +3,100 @@ package mem
 import "base:builtin"
 import "base:runtime"
 
-Raw_Any           :: runtime.Raw_Any
-Raw_String        :: runtime.Raw_String
-Raw_Cstring       :: runtime.Raw_Cstring
-Raw_Slice         :: runtime.Raw_Slice
+/*
+Memory layout of the `any` type.
+*/
+Raw_Any :: runtime.Raw_Any
+
+/*
+Memory layout of the `string` type.
+*/
+Raw_String :: runtime.Raw_String
+
+/*
+Memory layout of the `cstring` type.
+*/
+Raw_Cstring :: runtime.Raw_Cstring
+
+/*
+Memory layout of `[]T` types.
+*/
+Raw_Slice :: runtime.Raw_Slice
+
+/*
+Memory layout of `[dynamic]T` types.
+*/
 Raw_Dynamic_Array :: runtime.Raw_Dynamic_Array
-Raw_Map           :: runtime.Raw_Map
-Raw_Soa_Pointer   :: runtime.Raw_Soa_Pointer
 
-Raw_Complex32     :: runtime.Raw_Complex32
-Raw_Complex64     :: runtime.Raw_Complex64
-Raw_Complex128    :: runtime.Raw_Complex128
-Raw_Quaternion64  :: runtime.Raw_Quaternion64
+/*
+Memory layout of `map[K]V` types.
+*/
+Raw_Map :: runtime.Raw_Map
+
+/*
+Memory layout of `#soa []T` types.
+*/
+Raw_Soa_Pointer :: runtime.Raw_Soa_Pointer
+
+/*
+Memory layout of the `complex32` type.
+*/
+Raw_Complex32 :: runtime.Raw_Complex32
+
+/*
+Memory layout of the `complex64` type.
+*/
+Raw_Complex64 :: runtime.Raw_Complex64
+
+/*
+Memory layout of the `complex128` type.
+*/
+Raw_Complex128 :: runtime.Raw_Complex128
+
+/*
+Memory layout of the `quaternion64` type.
+*/
+Raw_Quaternion64 :: runtime.Raw_Quaternion64
+
+/*
+Memory layout of the `quaternion128` type.
+*/
 Raw_Quaternion128 :: runtime.Raw_Quaternion128
+
+/*
+Memory layout of the `quaternion256` type.
+*/
 Raw_Quaternion256 :: runtime.Raw_Quaternion256
-Raw_Quaternion64_Vector_Scalar  :: runtime.Raw_Quaternion64_Vector_Scalar
+
+/*
+Memory layout of the `quaternion64` type.
+*/
+Raw_Quaternion64_Vector_Scalar :: runtime.Raw_Quaternion64_Vector_Scalar
+
+/*
+Memory layout of the `quaternion128` type.
+*/
 Raw_Quaternion128_Vector_Scalar :: runtime.Raw_Quaternion128_Vector_Scalar
+
+/*
+Memory layout of the `quaternion256` type.
+*/
 Raw_Quaternion256_Vector_Scalar :: runtime.Raw_Quaternion256_Vector_Scalar
 
+/*
+Create a value of the any type.
+
+This procedure creates a value with type `any` that points to an object with
+typeid `id` located at an address specified by `data`.
+*/
 make_any :: proc "contextless" (data: rawptr, id: typeid) -> any {
 	return transmute(any)Raw_Any{data, id}
 }
 
+/*
+Obtain pointer to the data.
+
+This procedure returns the pointer to the data of a slice, string, or a dynamic
+array.
+*/
 raw_data :: builtin.raw_data

+ 243 - 104
core/mem/rollback_stack_allocator.odin

@@ -1,52 +1,36 @@
 package mem
 
-// The Rollback Stack Allocator was designed for the test runner to be fast,
-// able to grow, and respect the Tracking Allocator's requirement for
-// individual frees. It is not overly concerned with fragmentation, however.
-//
-// It has support for expansion when configured with a block allocator and
-// limited support for out-of-order frees.
-//
-// Allocation has constant-time best and usual case performance.
-// At worst, it is linear according to the number of memory blocks.
-//
-// Allocation follows a first-fit strategy when there are multiple memory
-// blocks.
-//
-// Freeing has constant-time best and usual case performance.
-// At worst, it is linear according to the number of memory blocks and number
-// of freed items preceding the last item in a block.
-//
-// Resizing has constant-time performance, if it's the last item in a block, or
-// the new size is smaller. Naturally, this becomes linear-time if there are
-// multiple blocks to search for the pointer's owning block. Otherwise, the
-// allocator defaults to a combined alloc & free operation internally.
-//
-// Out-of-order freeing is accomplished by collapsing a run of freed items
-// from the last allocation backwards.
-//
-// Each allocation has an overhead of 8 bytes and any extra bytes to satisfy
-// the requested alignment.
-
 import "base:runtime"
 
+/*
+Rollback stack default block size.
+*/
 ROLLBACK_STACK_DEFAULT_BLOCK_SIZE :: 4 * Megabyte
 
-// This limitation is due to the size of `prev_ptr`, but it is only for the
-// head block; any allocation in excess of the allocator's `block_size` is
-// valid, so long as the block allocator can handle it.
-//
-// This is because allocations over the block size are not split up if the item
-// within is freed; they are immediately returned to the block allocator.
-ROLLBACK_STACK_MAX_HEAD_BLOCK_SIZE :: 2 * Gigabyte
+/*
+Rollback stack max head block size.
 
+This limitation is due to the size of `prev_ptr`, but it is only for the
+head block; any allocation in excess of the allocator's `block_size` is
+valid, so long as the block allocator can handle it.
+
+This is because allocations over the block size are not split up if the item
+within is freed; they are immediately returned to the block allocator.
+*/
+ROLLBACK_STACK_MAX_HEAD_BLOCK_SIZE :: 2 * Gigabyte
 
+/*
+Allocation header of the rollback stack allocator.
+*/
 Rollback_Stack_Header :: bit_field u64 {
 	prev_offset:  uintptr | 32,
 	is_free:         bool |  1,
 	prev_ptr:     uintptr | 31,
 }
 
+/*
+Block header of the rollback stack allocator.
+*/
 Rollback_Stack_Block :: struct {
 	next_block: ^Rollback_Stack_Block,
 	last_alloc: rawptr,
@@ -54,13 +38,15 @@ Rollback_Stack_Block :: struct {
 	buffer: []byte,
 }
 
+/*
+Rollback stack allocator data.
+*/
 Rollback_Stack :: struct {
 	head: ^Rollback_Stack_Block,
 	block_size: int,
 	block_allocator: Allocator,
 }
 
-
 @(private="file", require_results)
 rb_ptr_in_bounds :: proc(block: ^Rollback_Stack_Block, ptr: rawptr) -> bool {
 	start := raw_data(block.buffer)
@@ -110,6 +96,9 @@ rb_rollback_block :: proc(block: ^Rollback_Stack_Block, header: ^Rollback_Stack_
 	}
 }
 
+/*
+Free memory to a rollback stack allocator.
+*/
 @(private="file", require_results)
 rb_free :: proc(stack: ^Rollback_Stack, ptr: rawptr) -> Allocator_Error {
 	parent, block, header := rb_find_ptr(stack, ptr) or_return
@@ -128,6 +117,9 @@ rb_free :: proc(stack: ^Rollback_Stack, ptr: rawptr) -> Allocator_Error {
 	return nil
 }
 
+/*
+Free all memory owned by the rollback stack allocator.
+*/
 @(private="file")
 rb_free_all :: proc(stack: ^Rollback_Stack) {
 	for block := stack.head.next_block; block != nil; /**/ {
@@ -141,45 +133,75 @@ rb_free_all :: proc(stack: ^Rollback_Stack) {
 	stack.head.offset = 0
 }
 
-@(private="file", require_results)
-rb_resize :: proc(stack: ^Rollback_Stack, ptr: rawptr, old_size, size, alignment: int) -> (result: []byte, err: Allocator_Error) {
-	if ptr != nil {
-		if block, _, ok := rb_find_last_alloc(stack, ptr); ok {
-			// `block.offset` should never underflow because it is contingent
-			// on `old_size` in the first place, assuming sane arguments.
-			assert(block.offset >= cast(uintptr)old_size, "Rollback Stack Allocator received invalid `old_size`.")
-
-			if block.offset + cast(uintptr)size - cast(uintptr)old_size < cast(uintptr)len(block.buffer) {
-				// Prevent singleton allocations from fragmenting by forbidding
-				// them to shrink, removing the possibility of overflow bugs.
-				if len(block.buffer) <= stack.block_size {
-					block.offset += cast(uintptr)size - cast(uintptr)old_size
-				}
-				#no_bounds_check return (cast([^]byte)ptr)[:size], nil
-			}
-		}
+/*
+Allocate memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
+	if bytes != nil {
+		zero_slice(bytes)
 	}
+	return raw_data(bytes), err
+}
 
-	result = rb_alloc(stack, size, alignment) or_return
-	runtime.mem_copy_non_overlapping(raw_data(result), ptr, old_size)
-	err = rb_free(stack, ptr)
+/*
+Allocate memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc_bytes :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	bytes, err := rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
+	if bytes != nil {
+		zero_slice(bytes)
+	}
+	return bytes, err
+}
 
-	return
+/*
+Allocate non-initialized memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
+	return raw_data(bytes), err
 }
 
-@(private="file", require_results)
-rb_alloc :: proc(stack: ^Rollback_Stack, size, alignment: int) -> (result: []byte, err: Allocator_Error) {
+/*
+Allocate non-initialized memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc_bytes_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
+	assert(size >= 0, "Size must be positive or zero.", loc)
+	assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", loc)
 	parent: ^Rollback_Stack_Block
 	for block := stack.head; /**/; block = block.next_block {
 		when !ODIN_DISABLE_ASSERT {
 			allocated_new_block: bool
 		}
-
 		if block == nil {
 			if stack.block_allocator.procedure == nil {
 				return nil, .Out_Of_Memory
 			}
-
 			minimum_size_required := size_of(Rollback_Stack_Header) + size + alignment - 1
 			new_block_size := max(minimum_size_required, stack.block_size)
 			block = rb_make_block(new_block_size, stack.block_allocator) or_return
@@ -188,10 +210,8 @@ rb_alloc :: proc(stack: ^Rollback_Stack, size, alignment: int) -> (result: []byt
 				allocated_new_block = true
 			}
 		}
-
 		start := raw_data(block.buffer)[block.offset:]
 		padding := cast(uintptr)calc_padding_with_header(cast(uintptr)start, cast(uintptr)alignment, size_of(Rollback_Stack_Header))
-
 		if block.offset + padding + cast(uintptr)size > cast(uintptr)len(block.buffer) {
 			when !ODIN_DISABLE_ASSERT {
 				if allocated_new_block {
@@ -201,54 +221,150 @@ rb_alloc :: proc(stack: ^Rollback_Stack, size, alignment: int) -> (result: []byt
 			parent = block
 			continue
 		}
-
 		header := cast(^Rollback_Stack_Header)(start[padding - size_of(Rollback_Stack_Header):])
 		ptr := start[padding:]
-
 		header^ = {
 			prev_offset = block.offset,
 			prev_ptr = uintptr(0) if block.last_alloc == nil else cast(uintptr)block.last_alloc - cast(uintptr)raw_data(block.buffer),
 			is_free = false,
 		}
-
 		block.last_alloc = ptr
 		block.offset += padding + cast(uintptr)size
-
 		if len(block.buffer) > stack.block_size {
 			// This block exceeds the allocator's standard block size and is considered a singleton.
 			// Prevent any further allocations on it.
 			block.offset = cast(uintptr)len(block.buffer)
 		}
-		
 		#no_bounds_check return ptr[:size], nil
 	}
-
 	return nil, .Out_Of_Memory
 }
 
+/*
+Resize an allocation owned by rollback stack allocator.
+*/
+@(require_results)
+rb_resize :: proc(
+	stack: ^Rollback_Stack,
+	old_ptr: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_resize_bytes_non_zeroed(stack, byte_slice(old_ptr, old_size), size, alignment, loc)
+	if bytes != nil {
+		if old_ptr == nil {
+			zero_slice(bytes)
+		} else if size > old_size {
+			zero_slice(bytes[old_size:])
+		}
+	}
+	return raw_data(bytes), err
+}
+
+/*
+Resize an allocation owned by rollback stack allocator.
+*/
+@(require_results)
+rb_resize_bytes :: proc(
+	stack: ^Rollback_Stack,
+	old_memory: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]u8, Allocator_Error) {
+	bytes, err := rb_resize_bytes_non_zeroed(stack, old_memory, size, alignment, loc)
+	if bytes != nil {
+		if old_memory == nil {
+			zero_slice(bytes)
+		} else if size > len(old_memory) {
+			zero_slice(bytes[len(old_memory):])
+		}
+	}
+	return bytes, err
+}
+
+/*
+Resize an allocation owned by rollback stack allocator without explicit
+zero-initialization.
+*/
+@(require_results)
+rb_resize_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	old_ptr: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_resize_bytes_non_zeroed(stack, byte_slice(old_ptr, old_size), size, alignment, loc)
+	return raw_data(bytes), err
+}
+
+/*
+Resize an allocation owned by rollback stack allocator without explicit
+zero-initialization.
+*/
+@(require_results)
+rb_resize_bytes_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	old_memory: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
+	old_size := len(old_memory)
+	ptr := raw_data(old_memory)
+	assert(size >= 0, "Size must be positive or zero.", loc)
+	assert(old_size >= 0, "Old size must be positive or zero.", loc)
+	assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", loc)
+	if ptr != nil {
+		if block, _, ok := rb_find_last_alloc(stack, ptr); ok {
+			// `block.offset` should never underflow because it is contingent
+			// on `old_size` in the first place, assuming sane arguments.
+			assert(block.offset >= cast(uintptr)old_size, "Rollback Stack Allocator received invalid `old_size`.")
+			if block.offset + cast(uintptr)size - cast(uintptr)old_size < cast(uintptr)len(block.buffer) {
+				// Prevent singleton allocations from fragmenting by forbidding
+				// them to shrink, removing the possibility of overflow bugs.
+				if len(block.buffer) <= stack.block_size {
+					block.offset += cast(uintptr)size - cast(uintptr)old_size
+				}
+				#no_bounds_check return (ptr)[:size], nil
+			}
+		}
+	}
+	result = rb_alloc_bytes_non_zeroed(stack, size, alignment) or_return
+	runtime.mem_copy_non_overlapping(raw_data(result), ptr, old_size)
+	err = rb_free(stack, ptr)
+	return
+}
+
 @(private="file", require_results)
 rb_make_block :: proc(size: int, allocator: Allocator) -> (block: ^Rollback_Stack_Block, err: Allocator_Error) {
 	buffer := runtime.mem_alloc(size_of(Rollback_Stack_Block) + size, align_of(Rollback_Stack_Block), allocator) or_return
-
 	block = cast(^Rollback_Stack_Block)raw_data(buffer)
 	#no_bounds_check block.buffer = buffer[size_of(Rollback_Stack_Block):]
 	return
 }
 
-
+/*
+Initialize the rollback stack allocator using a fixed backing buffer.
+*/
 rollback_stack_init_buffered :: proc(stack: ^Rollback_Stack, buffer: []byte, location := #caller_location) {
 	MIN_SIZE :: size_of(Rollback_Stack_Block) + size_of(Rollback_Stack_Header) + size_of(rawptr)
 	assert(len(buffer) >= MIN_SIZE, "User-provided buffer to Rollback Stack Allocator is too small.", location)
-
 	block := cast(^Rollback_Stack_Block)raw_data(buffer)
 	block^ = {}
 	#no_bounds_check block.buffer = buffer[size_of(Rollback_Stack_Block):]
-
 	stack^ = {}
 	stack.head = block
 	stack.block_size = len(block.buffer)
 }
 
+/*
+Initialize the rollback stack alocator using a backing block allocator.
+*/
 rollback_stack_init_dynamic :: proc(
 	stack: ^Rollback_Stack,
 	block_size : int = ROLLBACK_STACK_DEFAULT_BLOCK_SIZE,
@@ -261,22 +377,25 @@ rollback_stack_init_dynamic :: proc(
 		// size is insufficient; check only on platforms with big enough ints.
 		assert(block_size <= ROLLBACK_STACK_MAX_HEAD_BLOCK_SIZE, "Rollback Stack Allocators cannot support head blocks larger than 2 gigabytes.", location)
 	}
-
 	block := rb_make_block(block_size, block_allocator) or_return
-
 	stack^ = {}
 	stack.head = block
 	stack.block_size = block_size
 	stack.block_allocator = block_allocator
-
 	return nil
 }
 
+/*
+Initialize the rollback stack.
+*/
 rollback_stack_init :: proc {
 	rollback_stack_init_buffered,
 	rollback_stack_init_dynamic,
 }
 
+/*
+Destroy a rollback stack.
+*/
 rollback_stack_destroy :: proc(stack: ^Rollback_Stack) {
 	if stack.block_allocator.procedure != nil {
 		rb_free_all(stack)
@@ -285,6 +404,37 @@ rollback_stack_destroy :: proc(stack: ^Rollback_Stack) {
 	stack^ = {}
 }
 
+/*
+Rollback stack allocator.
+
+The Rollback Stack Allocator was designed for the test runner to be fast,
+able to grow, and respect the Tracking Allocator's requirement for
+individual frees. It is not overly concerned with fragmentation, however.
+
+It has support for expansion when configured with a block allocator and
+limited support for out-of-order frees.
+
+Allocation has constant-time best and usual case performance.
+At worst, it is linear according to the number of memory blocks.
+
+Allocation follows a first-fit strategy when there are multiple memory
+blocks.
+
+Freeing has constant-time best and usual case performance.
+At worst, it is linear according to the number of memory blocks and number
+of freed items preceding the last item in a block.
+
+Resizing has constant-time performance, if it's the last item in a block, or
+the new size is smaller. Naturally, this becomes linear-time if there are
+multiple blocks to search for the pointer's owning block. Otherwise, the
+allocator defaults to a combined alloc & free operation internally.
+
+Out-of-order freeing is accomplished by collapsing a run of freed items
+from the last allocation backwards.
+
+Each allocation has an overhead of 8 bytes and any extra bytes to satisfy
+the requested alignment.
+*/
 @(require_results)
 rollback_stack_allocator :: proc(stack: ^Rollback_Stack) -> Allocator {
 	return Allocator {
@@ -294,48 +444,37 @@ rollback_stack_allocator :: proc(stack: ^Rollback_Stack) -> Allocator {
 }
 
 @(require_results)
-rollback_stack_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                                      size, alignment: int,
-                                      old_memory: rawptr, old_size: int, location := #caller_location,
+rollback_stack_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size, alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
 ) -> (result: []byte, err: Allocator_Error) {
 	stack := cast(^Rollback_Stack)allocator_data
-
 	switch mode {
-	case .Alloc, .Alloc_Non_Zeroed:
-		assert(size >= 0, "Size must be positive or zero.", location)
-		assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", location)
-		result = rb_alloc(stack, size, alignment) or_return
-
-		if mode == .Alloc {
-			zero_slice(result)
-		}
-
+	case .Alloc:
+		return rb_alloc_bytes(stack, size, alignment, loc)
+	case .Alloc_Non_Zeroed:
+		return rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
 	case .Free:
-		err = rb_free(stack, old_memory)
-
+		return nil, rb_free(stack, old_memory)
 	case .Free_All:
 		rb_free_all(stack)
-
-	case .Resize, .Resize_Non_Zeroed:
-		assert(size >= 0, "Size must be positive or zero.", location)
-		assert(old_size >= 0, "Old size must be positive or zero.", location)
-		assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", location)
-		result = rb_resize(stack, old_memory, old_size, size, alignment) or_return
-
-		#no_bounds_check if mode == .Resize && size > old_size {
-			zero_slice(result[old_size:])
-		}
-
+		return nil, nil
+	case .Resize:
+		return rb_resize_bytes(stack, byte_slice(old_memory, old_size), size, alignment, loc)
+	case .Resize_Non_Zeroed:
+		return rb_resize_bytes_non_zeroed(stack, byte_slice(old_memory, old_size), size, alignment, loc)
 	case .Query_Features:
 		set := (^Allocator_Mode_Set)(old_memory)
 		if set != nil {
 			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Free_All, .Resize, .Resize_Non_Zeroed}
 		}
 		return nil, nil
-
 	case .Query_Info:
 		return nil, .Mode_Not_Implemented
 	}
-
 	return
 }

+ 98 - 21
core/mem/tracking_allocator.odin

@@ -4,50 +4,85 @@ package mem
 import "base:runtime"
 import "core:sync"
 
+/*
+Allocation entry for the tracking allocator.
+
+This structure stores the data related to an allocation.
+*/
 Tracking_Allocator_Entry :: struct {
-	memory:    rawptr,
-	size:      int,
+	// Pointer to an allocated region.
+	memory: rawptr,
+	// Size of the allocated memory region.
+	size: int,
+	// Requested alignment.
 	alignment: int,
-	mode:      Allocator_Mode,
-	err:       Allocator_Error,
+	// Mode of the operation.
+	mode: Allocator_Mode,
+	// Error.
+	err: Allocator_Error,
+	// Location of the allocation.
 	location:  runtime.Source_Code_Location,
 }
+
+/*
+Bad free entry for a tracking allocator.
+*/
 Tracking_Allocator_Bad_Free_Entry :: struct {
-	memory:   rawptr,
+	// Pointer, on which free operation was called.
+	memory: rawptr,
+	// The source location of where the operation was called.
 	location: runtime.Source_Code_Location,
 }
+
+/*
+Tracking allocator data.
+*/
 Tracking_Allocator :: struct {
-	backing:           Allocator,
-	allocation_map:    map[rawptr]Tracking_Allocator_Entry,
-	bad_free_array:    [dynamic]Tracking_Allocator_Bad_Free_Entry,
-	mutex:             sync.Mutex,
+	backing: Allocator,
+	allocation_map: map[rawptr]Tracking_Allocator_Entry,
+	bad_free_array: [dynamic]Tracking_Allocator_Bad_Free_Entry,
+	mutex: sync.Mutex,
 	clear_on_free_all: bool,
-
-	total_memory_allocated:   i64,
-	total_allocation_count:   i64,
-	total_memory_freed:       i64,
-	total_free_count:         i64,
-	peak_memory_allocated:    i64,
+	total_memory_allocated: i64,
+	total_allocation_count: i64,
+	total_memory_freed: i64,
+	total_free_count: i64,
+	peak_memory_allocated: i64,
 	current_memory_allocated: i64,
 }
 
+/*
+Initialize the tracking allocator.
+
+This procedure initializes the tracking allocator `t` with a backing allocator
+specified with `backing_allocator`. The `internals_allocator` will used to
+allocate the tracked data.
+*/
 tracking_allocator_init :: proc(t: ^Tracking_Allocator, backing_allocator: Allocator, internals_allocator := context.allocator) {
 	t.backing = backing_allocator
 	t.allocation_map.allocator = internals_allocator
 	t.bad_free_array.allocator = internals_allocator
-
 	if .Free_All in query_features(t.backing) {
 		t.clear_on_free_all = true
 	}
 }
 
+/*
+Destroy the tracking allocator.
+*/
 tracking_allocator_destroy :: proc(t: ^Tracking_Allocator) {
 	delete(t.allocation_map)
 	delete(t.bad_free_array)
 }
 
+/*
+Clear the tracking allocator.
+
+This procedure clears the tracked data from a tracking allocator.
 
-// Clear only the current allocation data while keeping the totals intact.
+**Note**: This procedure clears only the current allocation data while keeping
+the totals intact.
+*/
 tracking_allocator_clear :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_lock(&t.mutex)
 	clear(&t.allocation_map)
@@ -56,7 +91,11 @@ tracking_allocator_clear :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_unlock(&t.mutex)
 }
 
-// Reset all of a Tracking Allocator's allocation data back to zero.
+/*
+Reset the tracking allocator.
+
+Reset all of a Tracking Allocator's allocation data back to zero.
+*/
 tracking_allocator_reset :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_lock(&t.mutex)
 	clear(&t.allocation_map)
@@ -70,6 +109,39 @@ tracking_allocator_reset :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_unlock(&t.mutex)
 }
 
+/*
+Tracking allocator.
+
+The tracking allocator is an allocator wrapper that tracks memory allocations.
+This allocator stores all the allocations in a map. Whenever a pointer that's
+not inside of the map is freed, the `bad_free_array` entry is added.
+
+An example of how to use the `Tracking_Allocator` to track subsequent allocations
+in your program and report leaks and bad frees:
+
+Example:
+
+	package foo
+
+	import "core:mem"
+	import "core:fmt"
+
+	main :: proc() {
+		track: mem.Tracking_Allocator
+		mem.tracking_allocator_init(&track, context.allocator)
+		defer mem.tracking_allocator_destroy(&track)
+		context.allocator = mem.tracking_allocator(&track)
+
+		do_stuff()
+
+		for _, leak in track.allocation_map {
+			fmt.printf("%v leaked %m\n", leak.location, leak.size)
+		}
+		for bad_free in track.bad_free_array {
+			fmt.printf("%v allocation %p was freed badly\n", bad_free.location, bad_free.memory)
+		}
+	}
+*/
 @(require_results)
 tracking_allocator :: proc(data: ^Tracking_Allocator) -> Allocator {
 	return Allocator{
@@ -78,9 +150,14 @@ tracking_allocator :: proc(data: ^Tracking_Allocator) -> Allocator {
 	}
 }
 
-tracking_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                                size, alignment: int,
-                                old_memory: rawptr, old_size: int, loc := #caller_location) -> (result: []byte, err: Allocator_Error) {
+tracking_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size, alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
 	track_alloc :: proc(data: ^Tracking_Allocator, entry: ^Tracking_Allocator_Entry) {
 		data.total_memory_allocated += i64(entry.size)
 		data.total_allocation_count += 1

+ 10 - 0
core/odin/parser/parser.odin

@@ -2302,6 +2302,16 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr {
 			bd.name = name.text
 			return bd
 
+		case "caller_expression":
+			bd := ast.new(ast.Basic_Directive, tok.pos, end_pos(name))
+			bd.tok  = tok
+			bd.name = name.text
+
+			if peek_token_kind(p, .Open_Paren) {
+				return parse_call_expr(p, bd)
+			}
+			return bd
+
 		case "location", "exists", "load", "load_directory", "load_hash", "hash", "assert", "panic", "defined", "config":
 			bd := ast.new(ast.Basic_Directive, tok.pos, end_pos(name))
 			bd.tok  = tok

+ 1 - 1
core/os/os_freebsd.odin

@@ -920,7 +920,7 @@ get_page_size :: proc() -> int {
 _processor_core_count :: proc() -> int {
 	count : int = 0
 	count_size := size_of(count)
-	if _sysctlbyname("hw.logicalcpu", &count, &count_size, nil, 0) == 0 {
+	if _sysctlbyname("hw.ncpu", &count, &count_size, nil, 0) == 0 {
 		if count > 0 {
 			return count
 		}

+ 19 - 70
core/os/os_js.odin

@@ -3,33 +3,38 @@ package os
 
 import "base:runtime"
 
+foreign import "odin_env"
+
 @(require_results)
 is_path_separator :: proc(c: byte) -> bool {
 	return c == '/' || c == '\\'
 }
 
+Handle :: distinct u32
+
+stdout: Handle = 1
+stderr: Handle = 2
+
 @(require_results)
 open :: proc(path: string, mode: int = O_RDONLY, perm: int = 0) -> (Handle, Error) {
 	unimplemented("core:os procedure not supported on JS target")
 }
 
 close :: proc(fd: Handle) -> Error {
-	unimplemented("core:os procedure not supported on JS target")
+	return nil
 }
 
 flush :: proc(fd: Handle) -> (err: Error) {
-	unimplemented("core:os procedure not supported on JS target")
+	return nil
 }
 
-
-
 write :: proc(fd: Handle, data: []byte) -> (int, Error) {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-@(private="file")
-read_console :: proc(handle: Handle, b: []byte) -> (n: int, err: Error) {
-	unimplemented("core:os procedure not supported on JS target")
+	foreign odin_env {
+		@(link_name="write")
+		_write :: proc "contextless" (fd: Handle, p: []byte) ---
+	}
+	_write(fd, data)
+	return len(data), nil
 }
 
 read :: proc(fd: Handle, data: []byte) -> (int, Error) {
@@ -45,19 +50,6 @@ file_size :: proc(fd: Handle) -> (i64, Error) {
 	unimplemented("core:os procedure not supported on JS target")
 }
 
-
-@(private)
-MAX_RW :: 1<<30
-
-@(private)
-pread :: proc(fd: Handle, data: []byte, offset: i64) -> (int, Error) {
-	unimplemented("core:os procedure not supported on JS target")
-}
-@(private)
-pwrite :: proc(fd: Handle, data: []byte, offset: i64) -> (int, Error) {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
 read_at :: proc(fd: Handle, data: []byte, offset: i64) -> (n: int, err: Error) {
 	unimplemented("core:os procedure not supported on JS target")
 }
@@ -65,16 +57,6 @@ write_at :: proc(fd: Handle, data: []byte, offset: i64) -> (n: int, err: Error)
 	unimplemented("core:os procedure not supported on JS target")
 }
 
-stdout: Handle = 1
-stderr: Handle = 2
-
-@(require_results)
-get_std_handle :: proc "contextless" (h: uint) -> Handle {
-	context = runtime.default_context()
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-
 @(require_results)
 exists :: proc(path: string) -> bool {
 	unimplemented("core:os procedure not supported on JS target")
@@ -90,9 +72,6 @@ is_dir :: proc(path: string) -> bool {
 	unimplemented("core:os procedure not supported on JS target")
 }
 
-// NOTE(tetra): GetCurrentDirectory is not thread safe with SetCurrentDirectory and GetFullPathName
-//@private cwd_lock := win32.SRWLOCK{} // zero is initialized
-
 @(require_results)
 get_current_directory :: proc(allocator := context.allocator) -> string {
 	unimplemented("core:os procedure not supported on JS target")
@@ -118,18 +97,6 @@ remove_directory :: proc(path: string) -> (err: Error) {
 }
 
 
-
-@(private, require_results)
-is_abs :: proc(path: string) -> bool {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-@(private, require_results)
-fix_long_path :: proc(path: string) -> string {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-
 link :: proc(old_name, new_name: string) -> (err: Error) {
 	unimplemented("core:os procedure not supported on JS target")
 }
@@ -169,7 +136,6 @@ read_dir :: proc(fd: Handle, n: int, allocator := context.allocator) -> (fi: []F
 	unimplemented("core:os procedure not supported on JS target")
 }
 
-Handle    :: distinct uintptr
 File_Time :: distinct u64
 
 _Platform_Error :: enum i32 {
@@ -254,12 +220,7 @@ WSAECONNRESET             :: Platform_Error.WSAECONNRESET
 ERROR_FILE_IS_PIPE        :: General_Error.File_Is_Pipe
 ERROR_FILE_IS_NOT_DIR     :: General_Error.Not_Dir
 
-// "Argv" arguments converted to Odin strings
-args := _alloc_command_line_arguments()
-
-
-
-
+args: []string
 
 @(require_results)
 last_write_time :: proc(fd: Handle) -> (File_Time, Error) {
@@ -279,26 +240,14 @@ get_page_size :: proc() -> int {
 
 @(private, require_results)
 _processor_core_count :: proc() -> int {
-	unimplemented("core:os procedure not supported on JS target")
+	return 1
 }
 
 exit :: proc "contextless" (code: int) -> ! {
-	context = runtime.default_context()
-	unimplemented("core:os procedure not supported on JS target")
+	unimplemented_contextless("core:os procedure not supported on JS target")
 }
 
-
-
 @(require_results)
 current_thread_id :: proc "contextless" () -> int {
-	context = runtime.default_context()
-	unimplemented("core:os procedure not supported on JS target")
+	return 0
 }
-
-
-
-@(require_results)
-_alloc_command_line_arguments :: proc() -> []string {
-	return nil
-}
-

+ 1 - 1
core/os/os_netbsd.odin

@@ -978,7 +978,7 @@ get_page_size :: proc() -> int {
 _processor_core_count :: proc() -> int {
 	count : int = 0
 	count_size := size_of(count)
-	if _sysctlbyname("hw.logicalcpu", &count, &count_size, nil, 0) == 0 {
+	if _sysctlbyname("hw.ncpu", &count, &count_size, nil, 0) == 0 {
 		if count > 0 {
 			return count
 		}

+ 7 - 7
core/strings/strings.odin

@@ -93,7 +93,7 @@ Inputs:
 Returns:
 - res: A string created from the null-terminated byte pointer and length
 */
-string_from_null_terminated_ptr :: proc(ptr: [^]byte, len: int) -> (res: string) {
+string_from_null_terminated_ptr :: proc "contextless" (ptr: [^]byte, len: int) -> (res: string) {
 	s := string(ptr[:len])
 	s = truncate_to_byte(s, 0)
 	return s
@@ -139,7 +139,7 @@ NOTE: Failure to find the byte results in returning the entire string.
 Returns:
 - res: The truncated string
 */
-truncate_to_byte :: proc(str: string, b: byte) -> (res: string) {
+truncate_to_byte :: proc "contextless" (str: string, b: byte) -> (res: string) {
 	n := index_byte(str, b)
 	if n < 0 {
 		n = len(str)
@@ -261,7 +261,7 @@ Inputs:
 Returns:
 - result: `-1` if `lhs` comes first, `1` if `rhs` comes first, or `0` if they are equal
 */
-compare :: proc(lhs, rhs: string) -> (result: int) {
+compare :: proc "contextless" (lhs, rhs: string) -> (result: int) {
 	return mem.compare(transmute([]byte)lhs, transmute([]byte)rhs)
 }
 /*
@@ -1447,7 +1447,7 @@ Output:
 	-1
 
 */
-index_byte :: proc(s: string, c: byte) -> (res: int) {
+index_byte :: proc "contextless" (s: string, c: byte) -> (res: int) {
 	return #force_inline bytes.index_byte(transmute([]u8)s, c)
 }
 /*
@@ -1482,7 +1482,7 @@ Output:
 	-1
 
 */
-last_index_byte :: proc(s: string, c: byte) -> (res: int) {
+last_index_byte :: proc "contextless" (s: string, c: byte) -> (res: int) {
 	return #force_inline bytes.last_index_byte(transmute([]u8)s, c)
 }
 /*
@@ -1576,8 +1576,8 @@ Output:
 	-1
 
 */
-index :: proc(s, substr: string) -> (res: int) {
-	hash_str_rabin_karp :: proc(s: string) -> (hash: u32 = 0, pow: u32 = 1) {
+index :: proc "contextless" (s, substr: string) -> (res: int) {
+	hash_str_rabin_karp :: proc "contextless" (s: string) -> (hash: u32 = 0, pow: u32 = 1) {
 		for i := 0; i < len(s); i += 1 {
 			hash = hash*PRIME_RABIN_KARP + u32(s[i])
 		}

+ 42 - 41
core/sync/chan/chan.odin

@@ -22,19 +22,17 @@ Raw_Chan :: struct {
 	allocator:       runtime.Allocator,
 	allocation_size: int,
 	msg_size:        u16,
-	closed:          b16, // atomic
+	closed:          b16, // guarded by `mutex`
 	mutex:           sync.Mutex,
 	r_cond:          sync.Cond,
 	w_cond:          sync.Cond,
-	r_waiting:       int,  // atomic
-	w_waiting:       int,  // atomic
+	r_waiting:       int,  // guarded by `mutex`
+	w_waiting:       int,  // guarded by `mutex`
 
 	// Buffered
 	queue: ^Raw_Queue,
 
 	// Unbuffered
-	r_mutex:         sync.Mutex,
-	w_mutex:         sync.Mutex,
 	unbuffered_data: rawptr,
 }
 
@@ -164,27 +162,30 @@ send_raw :: proc "contextless" (c: ^Raw_Chan, msg_in: rawptr) -> (ok: bool) {
 	}
 	if c.queue != nil { // buffered
 		sync.guard(&c.mutex)
-		for c.queue.len == c.queue.cap {
-			sync.atomic_add(&c.w_waiting, 1)
+		for !c.closed && c.queue.len == c.queue.cap {
+			c.w_waiting += 1
 			sync.wait(&c.w_cond, &c.mutex)
-			sync.atomic_sub(&c.w_waiting, 1)
+			c.w_waiting -= 1
+		}
+
+		if c.closed {
+			return false
 		}
 
 		ok = raw_queue_push(c.queue, msg_in)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 		}
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.w_mutex)
 		sync.guard(&c.mutex)
 
-		if sync.atomic_load(&c.closed) {
+		if c.closed {
 			return false
 		}
 
 		mem.copy(c.unbuffered_data, msg_in, int(c.msg_size))
-		sync.atomic_add(&c.w_waiting, 1)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		c.w_waiting += 1
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 		}
 		sync.wait(&c.w_cond, &c.mutex)
@@ -201,13 +202,13 @@ recv_raw :: proc "contextless" (c: ^Raw_Chan, msg_out: rawptr) -> (ok: bool) {
 	if c.queue != nil { // buffered
 		sync.guard(&c.mutex)
 		for c.queue.len == 0 {
-			if sync.atomic_load(&c.closed) {
+			if c.closed {
 				return
 			}
 
-			sync.atomic_add(&c.r_waiting, 1)
+			c.r_waiting += 1
 			sync.wait(&c.r_cond, &c.mutex)
-			sync.atomic_sub(&c.r_waiting, 1)
+			c.r_waiting -= 1
 		}
 
 		msg := raw_queue_pop(c.queue)
@@ -215,27 +216,26 @@ recv_raw :: proc "contextless" (c: ^Raw_Chan, msg_out: rawptr) -> (ok: bool) {
 			mem.copy(msg_out, msg, int(c.msg_size))
 		}
 
-		if sync.atomic_load(&c.w_waiting) > 0 {
+		if c.w_waiting > 0 {
 			sync.signal(&c.w_cond)
 		}
 		ok = true
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.r_mutex)
 		sync.guard(&c.mutex)
 
-		for !sync.atomic_load(&c.closed) &&
-		    sync.atomic_load(&c.w_waiting) == 0 {
-			sync.atomic_add(&c.r_waiting, 1)
+		for !c.closed &&
+			c.w_waiting == 0 {
+			c.r_waiting += 1
 			sync.wait(&c.r_cond, &c.mutex)
-			sync.atomic_sub(&c.r_waiting, 1)
+			c.r_waiting -= 1
 		}
 
-		if sync.atomic_load(&c.closed) {
+		if c.closed {
 			return
 		}
 
 		mem.copy(msg_out, c.unbuffered_data, int(c.msg_size))
-		sync.atomic_sub(&c.w_waiting, 1)
+		c.w_waiting -= 1
 
 		sync.signal(&c.w_cond)
 		ok = true
@@ -255,21 +255,24 @@ try_send_raw :: proc "contextless" (c: ^Raw_Chan, msg_in: rawptr) -> (ok: bool)
 			return false
 		}
 
+		if c.closed {
+			return false
+		}
+
 		ok = raw_queue_push(c.queue, msg_in)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 		}
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.w_mutex)
 		sync.guard(&c.mutex)
 
-		if sync.atomic_load(&c.closed) {
+		if c.closed {
 			return false
 		}
 
 		mem.copy(c.unbuffered_data, msg_in, int(c.msg_size))
-		sync.atomic_add(&c.w_waiting, 1)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		c.w_waiting += 1
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 		}
 		sync.wait(&c.w_cond, &c.mutex)
@@ -294,21 +297,19 @@ try_recv_raw :: proc "contextless" (c: ^Raw_Chan, msg_out: rawptr) -> bool {
 			mem.copy(msg_out, msg, int(c.msg_size))
 		}
 
-		if sync.atomic_load(&c.w_waiting) > 0 {
+		if c.w_waiting > 0 {
 			sync.signal(&c.w_cond)
 		}
 		return true
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.r_mutex)
 		sync.guard(&c.mutex)
 
-		if sync.atomic_load(&c.closed) ||
-		   sync.atomic_load(&c.w_waiting) == 0 {
+		if c.closed || c.w_waiting == 0 {
 			return false
 		}
 
 		mem.copy(msg_out, c.unbuffered_data, int(c.msg_size))
-		sync.atomic_sub(&c.w_waiting, 1)
+		c.w_waiting -= 1
 
 		sync.signal(&c.w_cond)
 		return true
@@ -351,10 +352,10 @@ close :: proc "contextless" (c: ^Raw_Chan) -> bool {
 		return false
 	}
 	sync.guard(&c.mutex)
-	if sync.atomic_load(&c.closed) {
+	if c.closed {
 		return false
 	}
-	sync.atomic_store(&c.closed, true)
+	c.closed = true
 	sync.broadcast(&c.r_cond)
 	sync.broadcast(&c.w_cond)
 	return true
@@ -366,7 +367,7 @@ is_closed :: proc "contextless" (c: ^Raw_Chan) -> bool {
 		return true
 	}
 	sync.guard(&c.mutex)
-	return bool(sync.atomic_load(&c.closed))
+	return bool(c.closed)
 }
 
 
@@ -423,9 +424,9 @@ raw_queue_pop :: proc "contextless" (q: ^Raw_Queue) -> (data: rawptr) {
 can_recv :: proc "contextless" (c: ^Raw_Chan) -> bool {
 	sync.guard(&c.mutex)
 	if is_buffered(c) {
-		return len(c) > 0
+		return c.queue.len > 0
 	}
-	return sync.atomic_load(&c.w_waiting) > 0
+	return c.w_waiting > 0
 }
 
 
@@ -435,7 +436,7 @@ can_send :: proc "contextless" (c: ^Raw_Chan) -> bool {
 	if is_buffered(c) {
 		return c.queue.len < c.queue.cap
 	}
-	return sync.atomic_load(&c.r_waiting) > 0
+	return c.w_waiting == 0
 }
 
 
@@ -484,4 +485,4 @@ select_raw :: proc "odin" (recvs: []^Raw_Chan, sends: []^Raw_Chan, send_msgs: []
 		ok = send_raw(sends[sel.idx], send_msgs[sel.idx])
 	}
 	return
-}
+}

+ 38 - 37
core/sync/extended.odin

@@ -8,7 +8,7 @@ _ :: vg
 Wait group.
 
 Wait group is a synchronization primitive used by the waiting thread to wait,
-until a all working threads finish work.
+until all working threads finish work.
 
 The waiting thread first sets the number of working threads it will expect to
 wait for using `wait_group_add` call, and start waiting using `wait_group_wait`
@@ -35,7 +35,7 @@ Wait_Group :: struct #no_copy {
 /*
 Increment an internal counter of a wait group.
 
-This procedure atomicaly increments a number to the specified wait group's
+This procedure atomically increments a number to the specified wait group's
 internal counter by a specified amount. This operation can be done on any
 thread.
 */
@@ -48,12 +48,12 @@ wait_group_add :: proc "contextless" (wg: ^Wait_Group, delta: int) {
 
 	atomic_add(&wg.counter, delta)
 	if wg.counter < 0 {
-		_panic("sync.Wait_Group negative counter")
+		panic_contextless("sync.Wait_Group negative counter")
 	}
 	if wg.counter == 0 {
 		cond_broadcast(&wg.cond)
 		if wg.counter != 0 {
-			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			panic_contextless("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 	}
 }
@@ -81,7 +81,7 @@ wait_group_wait :: proc "contextless" (wg: ^Wait_Group) {
 	if wg.counter != 0 {
 		cond_wait(&wg.cond, &wg.mutex)
 		if wg.counter != 0 {
-			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			panic_contextless("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 	}
 }
@@ -105,7 +105,7 @@ wait_group_wait_with_timeout :: proc "contextless" (wg: ^Wait_Group, duration: t
 			return false
 		}
 		if wg.counter != 0 {
-			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			panic_contextless("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 	}
 	return true
@@ -121,7 +121,7 @@ When `barrier_wait` procedure is called by any thread, that thread will block
 the execution, until all threads associated with the barrier reach the same
 point of execution and also call `barrier_wait`.
 
-when barrier is initialized, a `thread_count` parameter is passed, signifying
+When a barrier is initialized, a `thread_count` parameter is passed, signifying
 the amount of participant threads of the barrier. The barrier also keeps track
 of an internal atomic counter. When a thread calls `barrier_wait`, the internal
 counter is incremented. When the internal counter reaches `thread_count`, it is
@@ -208,7 +208,7 @@ Represents a thread synchronization primitive that, when signalled, releases one
 single waiting thread and then resets automatically to a state where it can be
 signalled again.
 
-When a thread calls `auto_reset_event_wait`, it's execution will be blocked,
+When a thread calls `auto_reset_event_wait`, its execution will be blocked,
 until the event is signalled by another thread. The call to
 `auto_reset_event_signal` wakes up exactly one thread waiting for the event.
 */
@@ -228,15 +228,15 @@ thread.
 */
 auto_reset_event_signal :: proc "contextless" (e: ^Auto_Reset_Event) {
 	old_status := atomic_load_explicit(&e.status, .Relaxed)
+	new_status := old_status + 1 if old_status < 1 else 1
 	for {
-		new_status := old_status + 1 if old_status < 1 else 1
 		if _, ok := atomic_compare_exchange_weak_explicit(&e.status, old_status, new_status, .Release, .Relaxed); ok {
 			break
 		}
-
-		if old_status < 0 {
-			sema_post(&e.sema)
-		}
+		cpu_relax()
+	}
+	if old_status < 0 {
+		sema_post(&e.sema)
 	}
 }
 
@@ -297,7 +297,7 @@ waiting to acquire the lock, exactly one of those threads is unblocked and
 allowed into the critical section.
 */
 ticket_mutex_unlock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) {
-	atomic_add_explicit(&m.serving, 1, .Relaxed)
+	atomic_add_explicit(&m.serving, 1, .Release)
 }
 
 /*
@@ -331,8 +331,8 @@ Benaphore.
 
 A benaphore is a combination of an atomic variable and a semaphore that can
 improve locking efficiency in a no-contention system. Acquiring a benaphore
-lock doesn't call into an internal semaphore, if no other thread in a middle of
-a critical section.
+lock doesn't call into an internal semaphore, if no other thread is in the
+middle of a critical section.
 
 Once a lock on a benaphore is acquired by a thread, no other thread is allowed
 into any critical sections, associted with the same benaphore, until the lock
@@ -355,7 +355,7 @@ from entering any critical sections associated with the same benaphore, until
 until the lock is released.
 */
 benaphore_lock :: proc "contextless" (b: ^Benaphore) {
-	if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 {
+	if atomic_add_explicit(&b.counter, 1, .Acquire) > 0 {
 		sema_wait(&b.sema)
 	}
 }
@@ -381,10 +381,10 @@ Release a lock on a benaphore.
 
 This procedure releases a lock on the specified benaphore. If any of the threads
 are waiting on the lock, exactly one thread is allowed into a critical section
-associated with the same banaphore.
+associated with the same benaphore.
 */
 benaphore_unlock :: proc "contextless" (b: ^Benaphore) {
-	if atomic_sub_explicit(&b.counter, 1, .Release) > 0 {
+	if atomic_sub_explicit(&b.counter, 1, .Release) > 1 {
 		sema_post(&b.sema)
 	}
 }
@@ -418,8 +418,8 @@ benaphore_guard :: proc "contextless" (m: ^Benaphore) -> bool {
 /*
 Recursive benaphore.
 
-Recurisve benaphore is just like a plain benaphore, except it allows reentrancy
-into the critical section.
+A recursive benaphore is just like a plain benaphore, except it allows
+reentrancy into the critical section.
 
 When a lock is acquired on a benaphore, all other threads attempting to
 acquire a lock on the same benaphore will be blocked from any critical sections,
@@ -449,13 +449,15 @@ recursive benaphore, until the lock is released.
 */
 recursive_benaphore_lock :: proc "contextless" (b: ^Recursive_Benaphore) {
 	tid := current_thread_id()
-	if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 {
-		if tid != b.owner {
-			sema_wait(&b.sema)
+	check_owner: if tid != atomic_load_explicit(&b.owner, .Acquire) {
+		atomic_add_explicit(&b.counter, 1, .Relaxed)
+		if _, ok := atomic_compare_exchange_strong_explicit(&b.owner, 0, tid, .Release, .Relaxed); ok {
+			break check_owner
 		}
+		sema_wait(&b.sema)
+		atomic_store_explicit(&b.owner, tid, .Release)
 	}
 	// inside the lock
-	b.owner = tid
 	b.recursion += 1
 }
 
@@ -472,15 +474,14 @@ benaphore, until the lock is released.
 */
 recursive_benaphore_try_lock :: proc "contextless" (b: ^Recursive_Benaphore) -> bool {
 	tid := current_thread_id()
-	if b.owner == tid {
-		atomic_add_explicit(&b.counter, 1, .Acquire)
-	}
-
-	if v, _ := atomic_compare_exchange_strong_explicit(&b.counter, 0, 1, .Acquire, .Acquire); v != 0 {
+	check_owner: if tid != atomic_load_explicit(&b.owner, .Acquire) {
+		if _, ok := atomic_compare_exchange_strong_explicit(&b.owner, 0, tid, .Release, .Relaxed); ok {
+			atomic_add_explicit(&b.counter, 1, .Relaxed)
+			break check_owner
+		}
 		return false
 	}
 	// inside the lock
-	b.owner = tid
 	b.recursion += 1
 	return true
 }
@@ -494,14 +495,14 @@ for other threads for entering.
 */
 recursive_benaphore_unlock :: proc "contextless" (b: ^Recursive_Benaphore) {
 	tid := current_thread_id()
-	_assert(tid == b.owner, "tid != b.owner")
+	assert_contextless(tid == atomic_load_explicit(&b.owner, .Relaxed), "tid != b.owner")
 	b.recursion -= 1
 	recursion := b.recursion
+
 	if recursion == 0 {
-		b.owner = 0
-	}
-	if atomic_sub_explicit(&b.counter, 1, .Release) > 0 {
-		if recursion == 0 {
+		if atomic_sub_explicit(&b.counter, 1, .Relaxed) == 1 {
+			atomic_store_explicit(&b.owner, 0, .Release)
+		} else {
 			sema_post(&b.sema)
 		}
 	}
@@ -740,4 +741,4 @@ Make event available.
 one_shot_event_signal :: proc "contextless" (e: ^One_Shot_Event) {
 	atomic_store_explicit(&e.state, 1, .Release)
 	futex_broadcast(&e.state)
-}
+}

+ 17 - 8
core/sync/futex_darwin.odin

@@ -12,6 +12,8 @@ foreign System {
 	// __ulock_wait is not available on 10.15
 	// See https://github.com/odin-lang/Odin/issues/1959
 	__ulock_wait  :: proc "c" (operation: u32, addr: rawptr, value: u64, timeout_us: u32) -> c.int ---
+	// >= MacOS 11.
+	__ulock_wait2 :: proc "c" (operation: u32, addr: rawptr, value: u64, timeout_ns: u64, value2: u64) -> c.int ---
 	__ulock_wake  :: proc "c" (operation: u32, addr: rawptr, wake_value: u64) -> c.int ---
 }
 
@@ -48,22 +50,29 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 		case -ETIMEDOUT:
 			return false
 		case:
-			_panic("darwin.os_sync_wait_on_address_with_timeout failure")
+			panic_contextless("darwin.os_sync_wait_on_address_with_timeout failure")
 		}
 	} else {
 
-	timeout_ns := u32(duration)
-	s := __ulock_wait(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, u64(expected), timeout_ns)
+	when darwin.ULOCK_WAIT_2_AVAILABLE {
+		timeout_ns := u64(duration)
+		s := __ulock_wait2(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, u64(expected), timeout_ns, 0)
+	} else {
+		timeout_us := u32(duration / time.Microsecond)
+		s := __ulock_wait(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, u64(expected), timeout_us)
+	}
+
 	if s >= 0 {
 		return true
 	}
+
 	switch s {
 	case EINTR, EFAULT:
 		return true
 	case ETIMEDOUT:
 		return false
 	case:
-		_panic("futex_wait failure")
+		panic_contextless("futex_wait failure")
 	}
 	return true
 
@@ -83,7 +92,7 @@ _futex_signal :: proc "contextless" (f: ^Futex) {
 			case -ENOENT:
 				return
 			case:
-				_panic("darwin.os_sync_wake_by_address_any failure")
+				panic_contextless("darwin.os_sync_wake_by_address_any failure")
 			}
 		}
 	} else {
@@ -99,7 +108,7 @@ _futex_signal :: proc "contextless" (f: ^Futex) {
 		case ENOENT:
 			return
 		case:
-			_panic("futex_wake_single failure")
+			panic_contextless("futex_wake_single failure")
 		}
 	}
 
@@ -119,7 +128,7 @@ _futex_broadcast :: proc "contextless" (f: ^Futex) {
 			case -ENOENT:
 				return
 			case:
-				_panic("darwin.os_sync_wake_by_address_all failure")
+				panic_contextless("darwin.os_sync_wake_by_address_all failure")
 			}
 		}
 	} else {
@@ -135,7 +144,7 @@ _futex_broadcast :: proc "contextless" (f: ^Futex) {
 		case ENOENT:
 			return
 		case:
-			_panic("futex_wake_all failure")
+			panic_contextless("futex_wake_all failure")
 		}
 	}
 

+ 4 - 4
core/sync/futex_freebsd.odin

@@ -21,7 +21,7 @@ _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 			continue
 		}
 
-		_panic("_futex_wait failure")
+		panic_contextless("_futex_wait failure")
 	}
 
 	unreachable()
@@ -44,14 +44,14 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 		return false
 	}
 
-	_panic("_futex_wait_with_timeout failure")
+	panic_contextless("_futex_wait_with_timeout failure")
 }
 
 _futex_signal :: proc "contextless" (f: ^Futex) {
 	errno := freebsd._umtx_op(f, .WAKE, 1, nil, nil)
 
 	if errno != nil {
-		_panic("_futex_signal failure")
+		panic_contextless("_futex_signal failure")
 	}
 }
 
@@ -59,6 +59,6 @@ _futex_broadcast :: proc "contextless" (f: ^Futex)  {
 	errno := freebsd._umtx_op(f, .WAKE, cast(c.ulong)max(i32), nil, nil)
 
 	if errno != nil {
-		_panic("_futex_broadcast failure")
+		panic_contextless("_futex_broadcast failure")
 	}
 }

+ 4 - 4
core/sync/futex_linux.odin

@@ -15,7 +15,7 @@ _futex_wait :: proc "contextless" (futex: ^Futex, expected: u32) -> bool {
 		return true
 	case:
 		// TODO(flysand): More descriptive panic messages based on the vlaue of `errno`
-		_panic("futex_wait failure")
+		panic_contextless("futex_wait failure")
 	}
 }
 
@@ -34,7 +34,7 @@ _futex_wait_with_timeout :: proc "contextless" (futex: ^Futex, expected: u32, du
 	case .NONE, .EINTR, .EAGAIN:
 		return true
 	case:
-		_panic("futex_wait_with_timeout failure")
+		panic_contextless("futex_wait_with_timeout failure")
 	}
 }
 
@@ -44,7 +44,7 @@ _futex_signal :: proc "contextless" (futex: ^Futex) {
 	case .NONE:
 		return
 	case:
-		_panic("futex_wake_single failure")
+		panic_contextless("futex_wake_single failure")
 	}
 }
 
@@ -57,6 +57,6 @@ _futex_broadcast :: proc "contextless" (futex: ^Futex)  {
 	case .NONE:
 		return
 	case:
-		_panic("_futex_wake_all failure")
+		panic_contextless("_futex_wake_all failure")
 	}
 }

+ 4 - 4
core/sync/futex_netbsd.odin

@@ -35,7 +35,7 @@ _futex_wait :: proc "contextless" (futex: ^Futex, expected: u32) -> bool {
 		case EINTR, EAGAIN:
 			return true
 		case:
-			_panic("futex_wait failure")
+			panic_contextless("futex_wait failure")
 		}	
 	}
 	return true
@@ -55,7 +55,7 @@ _futex_wait_with_timeout :: proc "contextless" (futex: ^Futex, expected: u32, du
 		case ETIMEDOUT:
 			return false
 		case:
-			_panic("futex_wait_with_timeout failure")
+			panic_contextless("futex_wait_with_timeout failure")
 		}
 	}
 	return true
@@ -63,12 +63,12 @@ _futex_wait_with_timeout :: proc "contextless" (futex: ^Futex, expected: u32, du
 
 _futex_signal :: proc "contextless" (futex: ^Futex) {
 	if _, ok := intrinsics.syscall_bsd(unix.SYS___futex, uintptr(futex), FUTEX_WAKE_PRIVATE, 1, 0, 0, 0); !ok {
-		_panic("futex_wake_single failure")
+		panic_contextless("futex_wake_single failure")
 	}
 }
 
 _futex_broadcast :: proc "contextless" (futex: ^Futex)  {
 	if _, ok := intrinsics.syscall_bsd(unix.SYS___futex, uintptr(futex), FUTEX_WAKE_PRIVATE, uintptr(max(i32)), 0, 0, 0); !ok {
-		_panic("_futex_wake_all failure")
+		panic_contextless("_futex_wake_all failure")
 	}
 }

+ 4 - 4
core/sync/futex_openbsd.odin

@@ -36,7 +36,7 @@ _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 		return false
 	}
 
-	_panic("futex_wait failure")
+	panic_contextless("futex_wait failure")
 }
 
 _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
@@ -62,14 +62,14 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 		return false
 	}
 
-	_panic("futex_wait_with_timeout failure")
+	panic_contextless("futex_wait_with_timeout failure")
 }
 
 _futex_signal :: proc "contextless" (f: ^Futex) {
 	res := _unix_futex(f, FUTEX_WAKE_PRIVATE, 1, nil)
 
 	if res == -1 {
-		_panic("futex_wake_single failure")
+		panic_contextless("futex_wake_single failure")
 	}
 }
 
@@ -77,6 +77,6 @@ _futex_broadcast :: proc "contextless" (f: ^Futex)  {
 	res := _unix_futex(f, FUTEX_WAKE_PRIVATE, u32(max(i32)), nil)
 
 	if res == -1 {
-		_panic("_futex_wake_all failure")
+		panic_contextless("_futex_wake_all failure")
 	}
 }

+ 4 - 4
core/sync/futex_wasm.odin

@@ -10,7 +10,7 @@ import "core:time"
 
 _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 		s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, -1)
 		return s != 0
@@ -19,7 +19,7 @@ _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 
 _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 		s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, i64(duration))
 		return s != 0
@@ -28,7 +28,7 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 
 _futex_signal :: proc "contextless" (f: ^Futex) {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 		loop: for {
 			s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), 1)
@@ -41,7 +41,7 @@ _futex_signal :: proc "contextless" (f: ^Futex) {
 
 _futex_broadcast :: proc "contextless" (f: ^Futex) {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 		loop: for {
 			s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), ~u32(0))

+ 2 - 18
core/sync/primitives.odin

@@ -1,6 +1,5 @@
 package sync
 
-import "base:runtime"
 import "core:time"
 
 /*
@@ -390,7 +389,7 @@ recursive_mutex_guard :: proc "contextless" (m: ^Recursive_Mutex) -> bool {
 A condition variable.
 
 `Cond` implements a condition variable, a rendezvous point for threads waiting
-for signalling the occurence of an event. Condition variables are used on
+for signalling the occurence of an event. Condition variables are used in
 conjuction with mutexes to provide a shared access to one or more shared
 variable.
 
@@ -560,7 +559,7 @@ futex_wait :: proc "contextless" (f: ^Futex, expected: u32) {
 		return
 	}
 	ok := _futex_wait(f, expected)
-	_assert(ok, "futex_wait failure")
+	assert_contextless(ok, "futex_wait failure")
 }
 
 /*
@@ -597,18 +596,3 @@ Wake up multiple threads waiting on a futex.
 futex_broadcast :: proc "contextless" (f: ^Futex) {
 	_futex_broadcast(f)
 }
-
-
-@(private)
-_assert :: proc "contextless" (cond: bool, msg: string) {
-	if !cond {
-		_panic(msg)
-	}
-}
-
-@(private)
-_panic :: proc "contextless" (msg: string) -> ! {
-	runtime.print_string(msg)
-	runtime.print_byte('\n')
-	runtime.trap()
-}

+ 2 - 2
core/sync/primitives_atomic.odin

@@ -240,7 +240,7 @@ atomic_recursive_mutex_lock :: proc "contextless" (m: ^Atomic_Recursive_Mutex) {
 
 atomic_recursive_mutex_unlock :: proc "contextless" (m: ^Atomic_Recursive_Mutex) {
 	tid := current_thread_id()
-	_assert(tid == m.owner, "tid != m.owner")
+	assert_contextless(tid == m.owner, "tid != m.owner")
 	m.recursion -= 1
 	recursion := m.recursion
 	if recursion == 0 {
@@ -361,7 +361,7 @@ atomic_sema_wait_with_timeout :: proc "contextless" (s: ^Atomic_Sema, duration:
 			if !futex_wait_with_timeout(&s.count, u32(original_count), remaining) {
 				return false
 			}
-			original_count = s.count
+			original_count = atomic_load_explicit(&s.count, .Relaxed)
 		}
 		if original_count == atomic_compare_exchange_strong_explicit(&s.count, original_count, original_count-1, .Acquire, .Acquire) {
 			return true

+ 11 - 0
core/sys/darwin/sync.odin

@@ -5,6 +5,7 @@ foreign import system "system:System.framework"
 // #define OS_WAIT_ON_ADDR_AVAILABILITY \
 // 	__API_AVAILABLE(macos(14.4), ios(17.4), tvos(17.4), watchos(10.4))
 when ODIN_OS == .Darwin {
+
 	when ODIN_PLATFORM_SUBTARGET == .iOS && ODIN_MINIMUM_OS_VERSION >= 17_04_00 {
 		WAIT_ON_ADDRESS_AVAILABLE :: true
 	} else when ODIN_MINIMUM_OS_VERSION >= 14_04_00 {
@@ -12,8 +13,18 @@ when ODIN_OS == .Darwin {
 	} else {
 		WAIT_ON_ADDRESS_AVAILABLE :: false
 	}
+
+	when ODIN_PLATFORM_SUBTARGET == .iOS && ODIN_MINIMUM_OS_VERSION >= 14_00_00 {
+		ULOCK_WAIT_2_AVAILABLE :: true
+	} else when ODIN_MINIMUM_OS_VERSION >= 11_00_00 {
+		ULOCK_WAIT_2_AVAILABLE :: true
+	} else {
+		ULOCK_WAIT_2_AVAILABLE :: false
+	}
+
 } else {
 	WAIT_ON_ADDRESS_AVAILABLE :: false
+	ULOCK_WAIT_2_AVAILABLE    :: false
 }
 
 os_sync_wait_on_address_flag :: enum u32 {

+ 4 - 0
core/sys/info/platform_darwin.odin

@@ -530,6 +530,10 @@ macos_release_map: map[string]Darwin_To_Release = {
 	"23F79"      = {{23, 5, 0}, "macOS", {"Sonoma",         {14, 5, 0}}},
 	"23G80"      = {{23, 6, 0}, "macOS", {"Sonoma",         {14, 6, 0}}},
 	"23G93"      = {{23, 6, 0}, "macOS", {"Sonoma",         {14, 6, 1}}},
+	"23H124"     = {{23, 6, 0}, "macOS", {"Sonoma",         {14, 7, 0}}},
+
+	// MacOS Sequoia
+	"24A335"     = {{24, 0, 0}, "macOS", {"Sequoia",        {15, 0, 0}}},
 }
 
 @(private)

+ 4 - 0
core/testing/runner.odin

@@ -204,6 +204,10 @@ runner :: proc(internal_tests: []Internal_Test) -> bool {
 		}
 	}
 
+	when ODIN_OS == .Windows {
+		console_ansi_init()
+	}
+
 	stdout := io.to_writer(os.stream_from_handle(os.stdout))
 	stderr := io.to_writer(os.stream_from_handle(os.stderr))
 

+ 22 - 0
core/testing/runner_windows.odin

@@ -0,0 +1,22 @@
+//+private
+package testing
+
+import win32 "core:sys/windows"
+
+console_ansi_init :: proc() {
+	stdout := win32.GetStdHandle(win32.STD_OUTPUT_HANDLE)
+	if stdout != win32.INVALID_HANDLE && stdout != nil {
+		old_console_mode: u32
+		if win32.GetConsoleMode(stdout, &old_console_mode) {
+			win32.SetConsoleMode(stdout, old_console_mode | win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING)
+		}
+	}
+
+	stderr := win32.GetStdHandle(win32.STD_ERROR_HANDLE)
+	if stderr != win32.INVALID_HANDLE && stderr != nil {
+		old_console_mode: u32
+		if win32.GetConsoleMode(stderr, &old_console_mode) {
+			win32.SetConsoleMode(stderr, old_console_mode | win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING)
+		}
+	}
+}

+ 11 - 0
core/testing/signal_handler_libc.odin

@@ -26,6 +26,8 @@ import "core:os"
 
 @(private="file", thread_local)
 local_test_index: libc.sig_atomic_t
+@(private="file", thread_local)
+local_test_index_set: bool
 
 // Windows does not appear to have a SIGTRAP, so this is defined here, instead
 // of in the libc package, just so there's no confusion about it being
@@ -45,6 +47,13 @@ stop_runner_callback :: proc "c" (sig: libc.int) {
 
 @(private="file")
 stop_test_callback :: proc "c" (sig: libc.int) {
+	if !local_test_index_set {
+		// We're a thread created by a test thread.
+		//
+		// There's nothing we can do to inform the test runner about who
+		// signalled, so hopefully the test will handle their own sub-threads.
+		return
+	}
 	if local_test_index == -1 {
 		// We're the test runner, and we ourselves have caught a signal from
 		// which there is no recovery.
@@ -114,6 +123,7 @@ This is a dire bug and should be reported to the Odin developers.
 
 _setup_signal_handler :: proc() {
 	local_test_index = -1
+	local_test_index_set = true
 
 	// Catch user interrupt / CTRL-C.
 	libc.signal(libc.SIGINT, stop_runner_callback)
@@ -135,6 +145,7 @@ _setup_signal_handler :: proc() {
 
 _setup_task_signal_handler :: proc(test_index: int) {
 	local_test_index = cast(libc.sig_atomic_t)test_index
+	local_test_index_set = true
 }
 
 _should_stop_runner :: proc() -> bool {

+ 8 - 4
core/testing/testing.odin

@@ -105,9 +105,13 @@ cleanup :: proc(t: ^T, procedure: proc(rawptr), user_data: rawptr) {
 	append(&t.cleanups, Internal_Cleanup{procedure, user_data, context})
 }
 
-expect :: proc(t: ^T, ok: bool, msg: string = "", loc := #caller_location) -> bool {
+expect :: proc(t: ^T, ok: bool, msg := "", expr := #caller_expression(ok), loc := #caller_location) -> bool {
 	if !ok {
-		log.error(msg, location=loc)
+		if msg == "" {
+			log.errorf("expected %v to be true", expr, location=loc)
+		} else {
+			log.error(msg, location=loc)
+		}
 	}
 	return ok
 }
@@ -119,10 +123,10 @@ expectf :: proc(t: ^T, ok: bool, format: string, args: ..any, loc := #caller_loc
 	return ok
 }
 
-expect_value :: proc(t: ^T, value, expected: $T, loc := #caller_location) -> bool where intrinsics.type_is_comparable(T) {
+expect_value :: proc(t: ^T, value, expected: $T, loc := #caller_location, value_expr := #caller_expression(value)) -> bool where intrinsics.type_is_comparable(T) {
 	ok := value == expected || reflect.is_nil(value) && reflect.is_nil(expected)
 	if !ok {
-		log.errorf("expected %v, got %v", expected, value, location=loc)
+		log.errorf("expected %v to be %v, got %v", value_expr, expected, value, location=loc)
 	}
 	return ok
 }

+ 6 - 6
core/thread/thread.odin

@@ -272,7 +272,7 @@ create_and_start :: proc(fn: proc(), init_context: Maybe(runtime.Context) = nil,
 	t := create(thread_proc, priority)
 	t.data = rawptr(fn)
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 	t.init_context = init_context
 	start(t)
@@ -307,7 +307,7 @@ create_and_start_with_data :: proc(data: rawptr, fn: proc(data: rawptr), init_co
 	t.user_index = 1
 	t.user_args[0] = data
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 	t.init_context = init_context
 	start(t)
@@ -347,7 +347,7 @@ create_and_start_with_poly_data :: proc(data: $T, fn: proc(data: T), init_contex
 	mem.copy(&t.user_args[0], &data, size_of(T))
 
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 
 	t.init_context = init_context
@@ -394,7 +394,7 @@ create_and_start_with_poly_data2 :: proc(arg1: $T1, arg2: $T2, fn: proc(T1, T2),
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg2))
 
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 
 	t.init_context = init_context
@@ -443,7 +443,7 @@ create_and_start_with_poly_data3 :: proc(arg1: $T1, arg2: $T2, arg3: $T3, fn: pr
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg3))
 
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 
 	t.init_context = init_context
@@ -494,7 +494,7 @@ create_and_start_with_poly_data4 :: proc(arg1: $T1, arg2: $T2, arg3: $T3, arg4:
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg4))
 
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 
 	t.init_context = init_context

+ 1 - 0
core/thread/thread_pool.odin

@@ -60,6 +60,7 @@ pool_thread_runner :: proc(t: ^Thread) {
 		if task, ok := pool_pop_waiting(pool); ok {
 			data.task = task
 			pool_do_work(pool, task)
+			sync.guard(&pool.mutex)
 			data.task = {}
 		}
 	}

+ 8 - 24
core/thread/thread_unix.odin

@@ -5,18 +5,14 @@ package thread
 import "base:runtime"
 import "core:sync"
 import "core:sys/unix"
-import "core:time"
 
 _IS_SUPPORTED :: true
 
-CAS :: sync.atomic_compare_exchange_strong
-
 // NOTE(tetra): Aligned here because of core/unix/pthread_linux.odin/pthread_t.
 // Also see core/sys/darwin/mach_darwin.odin/semaphore_t.
 Thread_Os_Specific :: struct #align(16) {
 	unix_thread: unix.pthread_t, // NOTE: very large on Darwin, small on Linux.
-	cond:        sync.Cond,
-	mutex:       sync.Mutex,
+	start_ok:    sync.Sema,
 }
 //
 // Creates a thread which will run the given procedure.
@@ -29,14 +25,10 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 		// We need to give the thread a moment to start up before we enable cancellation.
 		can_set_thread_cancel_state := unix.pthread_setcancelstate(unix.PTHREAD_CANCEL_ENABLE, nil) == 0
 
-		sync.lock(&t.mutex)
-
 		t.id = sync.current_thread_id()
 
-		for (.Started not_in sync.atomic_load(&t.flags)) {
-			// HACK: use a timeout so in the event that the condition is signalled at THIS comment's exact point
-			// (after checking flags, before starting the wait) it gets itself out of that deadlock after a ms.
-			sync.wait_with_timeout(&t.cond, &t.mutex, time.Millisecond)
+		if .Started not_in sync.atomic_load(&t.flags) {
+			sync.wait(&t.start_ok)
 		}
 
 		if .Joined in sync.atomic_load(&t.flags) {
@@ -66,8 +58,6 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 
 		sync.atomic_or(&t.flags, { .Done })
 
-		sync.unlock(&t.mutex)
-
 		if .Self_Cleanup in sync.atomic_load(&t.flags) {
 			res := unix.pthread_detach(t.unix_thread)
 			assert_contextless(res == 0)
@@ -132,7 +122,7 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 
 _start :: proc(t: ^Thread) {
 	sync.atomic_or(&t.flags, { .Started })
-	sync.signal(&t.cond)
+	sync.post(&t.start_ok)
 }
 
 _is_done :: proc(t: ^Thread) -> bool {
@@ -140,24 +130,18 @@ _is_done :: proc(t: ^Thread) -> bool {
 }
 
 _join :: proc(t: ^Thread) {
-	// sync.guard(&t.mutex)
-
 	if unix.pthread_equal(unix.pthread_self(), t.unix_thread) {
 		return
 	}
 
-	// Preserve other flags besides `.Joined`, like `.Started`.
-	unjoined := sync.atomic_load(&t.flags) - {.Joined}
-	joined   := unjoined + {.Joined}
-
-	// Try to set `t.flags` from unjoined to joined. If it returns joined,
-	// it means the previous value had that flag set and we can return.
-	if res, ok := CAS(&t.flags, unjoined, joined); res == joined && !ok {
+	// If the previous value was already `Joined`, then we can return.
+	if .Joined in sync.atomic_or(&t.flags, {.Joined}) {
 		return
 	}
+
 	// Prevent non-started threads from blocking main thread with initial wait
 	// condition.
-	if .Started not_in unjoined {
+	if .Started not_in sync.atomic_load(&t.flags) {
 		_start(t)
 	}
 	unix.pthread_join(t.unix_thread, nil)

+ 3 - 3
core/thread/thread_windows.odin

@@ -27,7 +27,7 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 	__windows_thread_entry_proc :: proc "system" (t_: rawptr) -> win32.DWORD {
 		t := (^Thread)(t_)
 
-		if .Joined in t.flags {
+		if .Joined in sync.atomic_load(&t.flags) {
 			return 0
 		}
 
@@ -48,9 +48,9 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 			t.procedure(t)
 		}
 
-		intrinsics.atomic_store(&t.flags, t.flags + {.Done})
+		intrinsics.atomic_or(&t.flags, {.Done})
 
-		if .Self_Cleanup in t.flags {
+		if .Self_Cleanup in sync.atomic_load(&t.flags) {
 			win32.CloseHandle(t.win32_thread)
 			t.win32_thread = win32.INVALID_HANDLE
 			// NOTE(ftphikari): It doesn't matter which context 'free' received, right?

+ 2 - 0
src/bug_report.cpp

@@ -919,6 +919,8 @@ gb_internal void report_os_info() {
 			{"23F79",    {23,  5,  0}, "macOS", {"Sonoma",        {14,  5,  0}}},
 			{"23G80",    {23,  6,  0}, "macOS", {"Sonoma",        {14,  6,  0}}},
 			{"23G93",    {23,  6,  0}, "macOS", {"Sonoma",        {14,  6,  1}}},
+			{"23H124",   {23,  6,  0}, "macOS", {"Sonoma",        {14,  7,  0}}},
+			{"24A335",   {24,  0,  0}, "macOS", {"Sequoia",       {15,  0,  0}}},
 		};
 
 

+ 4 - 2
src/build_settings.cpp

@@ -285,6 +285,7 @@ enum VetFlags : u64 {
 	VetFlag_Deprecated      = 1u<<7,
 	VetFlag_Cast            = 1u<<8,
 	VetFlag_Tabs            = 1u<<9,
+	VetFlag_UnusedProcedures = 1u<<10,
 
 	VetFlag_Unused = VetFlag_UnusedVariables|VetFlag_UnusedImports,
 
@@ -316,6 +317,8 @@ u64 get_vet_flag_from_name(String const &name) {
 		return VetFlag_Cast;
 	} else if (name == "tabs") {
 		return VetFlag_Tabs;
+	} else if (name == "unused-procedures") {
+		return VetFlag_UnusedProcedures;
 	}
 	return VetFlag_NONE;
 }
@@ -383,6 +386,7 @@ struct BuildContext {
 
 	u64 vet_flags;
 	u32 sanitizer_flags;
+	StringSet vet_packages;
 
 	bool   has_resource;
 	String link_flags;
@@ -1462,8 +1466,6 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta
 		bc->thread_count = gb_max(bc->affinity.thread_count, 1);
 	}
 
-	string_set_init(&bc->custom_attributes);
-
 	bc->ODIN_VENDOR  = str_lit("odin");
 	bc->ODIN_VERSION = ODIN_VERSION;
 	bc->ODIN_ROOT    = odin_root_dir();

+ 16 - 0
src/check_builtin.cpp

@@ -1632,6 +1632,22 @@ gb_internal bool check_builtin_procedure_directive(CheckerContext *c, Operand *o
 
 		operand->type = t_source_code_location;
 		operand->mode = Addressing_Value;
+	} else if (name == "caller_expression") {
+		if (ce->args.count > 1) {
+			error(ce->args[0], "'#caller_expression' expects either 0 or 1 arguments, got %td", ce->args.count);
+		}
+		if (ce->args.count > 0) {
+			Ast *arg = ce->args[0];
+			Operand o = {};
+			Entity *e = check_ident(c, &o, arg, nullptr, nullptr, true);
+			if (e == nullptr || (e->flags & EntityFlag_Param) == 0) {
+				error(ce->args[0], "'#caller_expression' expected a valid earlier parameter name");
+			}
+			arg->Ident.entity = e;
+		}
+
+		operand->type = t_string;
+		operand->mode = Addressing_Value;
 	} else if (name == "exists") {
 		if (ce->args.count != 1) {
 			error(ce->close, "'#exists' expects 1 argument, got %td", ce->args.count);

+ 6 - 1
src/check_expr.cpp

@@ -7807,7 +7807,8 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c
 		    name == "load" ||
 		    name == "load_directory" ||
 		    name == "load_hash" ||
-		    name == "hash"
+		    name == "hash" ||
+		    name == "caller_expression"
 		) {
 			operand->mode = Addressing_Builtin;
 			operand->builtin_id = BuiltinProc_DIRECTIVE;
@@ -8725,6 +8726,10 @@ gb_internal ExprKind check_basic_directive_expr(CheckerContext *c, Operand *o, A
 		error(node, "#caller_location may only be used as a default argument parameter");
 		o->type = t_source_code_location;
 		o->mode = Addressing_Value;
+	} else if (name == "caller_expression") {
+		error(node, "#caller_expression may only be used as a default argument parameter");
+		o->type = t_string;
+		o->mode = Addressing_Value;
 	} else {
 		if (name == "location") {
 			init_core_source_code_location(c->checker);

+ 14 - 4
src/check_stmt.cpp

@@ -1641,6 +1641,8 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 
 	Ast *expr = unparen_expr(rs->expr);
 
+	Operand rhs_operand = {};
+
 	bool is_range = false;
 	bool is_possibly_addressable = true;
 	isize max_val_count = 2;
@@ -1698,7 +1700,7 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 					}
 				}
 			}
-			bool is_ptr = is_type_pointer(type_deref(operand.type));
+			bool is_ptr = is_type_pointer(operand.type);
 			Type *t = base_type(type_deref(operand.type));
 
 			switch (t->kind) {
@@ -1750,16 +1752,19 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 				break;
 
 			case Type_DynamicArray:
+				is_possibly_addressable = true;
 				array_add(&vals, t->DynamicArray.elem);
 				array_add(&vals, t_int);
 				break;
 
 			case Type_Slice:
+				is_possibly_addressable = true;
 				array_add(&vals, t->Slice.elem);
 				array_add(&vals, t_int);
 				break;
 
 			case Type_Map:
+				is_possibly_addressable = true;
 				is_map = true;
 				array_add(&vals, t->Map.key);
 				array_add(&vals, t->Map.value);
@@ -1781,6 +1786,8 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 
 			case Type_Tuple:
 				{
+					is_possibly_addressable = false;
+
 					isize count = t->Tuple.variables.count;
 					if (count < 1) {
 						ERROR_BLOCK();
@@ -1810,8 +1817,6 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 						array_add(&vals, e->type);
 					}
 
-					is_possibly_addressable = false;
-
 					bool do_break = false;
 					for (isize i = rs->vals.count-1; i >= 0; i--) {
 						if (rs->vals[i] != nullptr && count < i+2) {
@@ -1831,6 +1836,11 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 
 			case Type_Struct:
 				if (t->Struct.soa_kind != StructSoa_None) {
+					if (t->Struct.soa_kind == StructSoa_Fixed) {
+						is_possibly_addressable = operand.mode == Addressing_Variable || is_ptr;
+					} else {
+						is_possibly_addressable = true;
+					}
 					is_soa = true;
 					array_add(&vals, t->Struct.soa_elem);
 					array_add(&vals, t_int);
@@ -1907,7 +1917,7 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 					if (is_possibly_addressable && i == addressable_index) {
 						entity->flags &= ~EntityFlag_Value;
 					} else {
-						char const *idx_name = is_map ? "key" : is_bit_set ? "element" : "index";
+						char const *idx_name = is_map ? "key" : (is_bit_set || i == 0) ? "element" : "index";
 						error(token, "The %s variable '%.*s' cannot be made addressable", idx_name, LIT(str));
 					}
 				}

+ 32 - 0
src/check_type.cpp

@@ -1605,6 +1605,25 @@ gb_internal bool is_expr_from_a_parameter(CheckerContext *ctx, Ast *expr) {
 	return false;
 }
 
+gb_internal bool is_caller_expression(Ast *expr) {
+	if (expr->kind == Ast_BasicDirective && expr->BasicDirective.name.string == "caller_expression") {
+		return true;
+	}
+
+	Ast *call = unparen_expr(expr);
+	if (call->kind != Ast_CallExpr) {
+		return false;
+	}
+
+	ast_node(ce, CallExpr, call);
+	if (ce->proc->kind != Ast_BasicDirective) {
+		return false;
+	}
+
+	ast_node(bd, BasicDirective, ce->proc);
+	String name = bd->name.string;
+	return name == "caller_expression";
+}
 
 gb_internal ParameterValue handle_parameter_value(CheckerContext *ctx, Type *in_type, Type **out_type_, Ast *expr, bool allow_caller_location) {
 	ParameterValue param_value = {};
@@ -1626,7 +1645,19 @@ gb_internal ParameterValue handle_parameter_value(CheckerContext *ctx, Type *in_
 		if (in_type) {
 			check_assignment(ctx, &o, in_type, str_lit("parameter value"));
 		}
+	} else if (is_caller_expression(expr)) {
+		if (expr->kind != Ast_BasicDirective) {
+			check_builtin_procedure_directive(ctx, &o, expr, t_string);
+		}
+
+		param_value.kind = ParameterValue_Expression;
+		o.type = t_string;
+		o.mode = Addressing_Value;
+		o.expr = expr;
 
+		if (in_type) {
+			check_assignment(ctx, &o, in_type, str_lit("parameter value"));
+		}
 	} else {
 		if (in_type) {
 			check_expr_with_type_hint(ctx, &o, expr, in_type);
@@ -1858,6 +1889,7 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para
 			case ParameterValue_Nil:
 				break;
 			case ParameterValue_Location:
+			case ParameterValue_Expression:
 			case ParameterValue_Value:
 				gbString str = type_to_string(type);
 				error(params[i], "A default value for a parameter must not be a polymorphic constant type, got %s", str);

+ 52 - 19
src/checker.cpp

@@ -533,18 +533,13 @@ gb_internal u64 check_vet_flags(CheckerContext *c) {
 	    c->curr_proc_decl->proc_lit) {
 		file = c->curr_proc_decl->proc_lit->file();
 	}
-	if (file && file->vet_flags_set) {
-		return file->vet_flags;
-	}
-	return build_context.vet_flags;
+
+	return ast_file_vet_flags(file);
 }
 
 gb_internal u64 check_vet_flags(Ast *node) {
 	AstFile *file = node->file();
-	if (file && file->vet_flags_set) {
-		return file->vet_flags;
-	}
-	return build_context.vet_flags;
+	return ast_file_vet_flags(file);
 }
 
 enum VettedEntityKind {
@@ -681,20 +676,45 @@ gb_internal bool check_vet_unused(Checker *c, Entity *e, VettedEntity *ve) {
 	return false;
 }
 
-gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
-	bool vet_unused = (vet_flags & VetFlag_Unused) != 0;
-	bool vet_shadowing = (vet_flags & (VetFlag_Shadowing|VetFlag_Using)) != 0;
-
+gb_internal void check_scope_usage_internal(Checker *c, Scope *scope, u64 vet_flags, bool per_entity) {
+	u64 original_vet_flags = vet_flags;
 	Array<VettedEntity> vetted_entities = {};
 	array_init(&vetted_entities, heap_allocator());
+	defer (array_free(&vetted_entities));
 
 	rw_mutex_shared_lock(&scope->mutex);
 	for (auto const &entry : scope->elements) {
 		Entity *e = entry.value;
 		if (e == nullptr) continue;
+
+		vet_flags = original_vet_flags;
+		if (per_entity) {
+			vet_flags = ast_file_vet_flags(e->file);
+		}
+
+		bool vet_unused = (vet_flags & VetFlag_Unused) != 0;
+		bool vet_shadowing = (vet_flags & (VetFlag_Shadowing|VetFlag_Using)) != 0;
+		bool vet_unused_procedures = (vet_flags & VetFlag_UnusedProcedures) != 0;
+
 		VettedEntity ve_unused = {};
 		VettedEntity ve_shadowed = {};
-		bool is_unused = vet_unused && check_vet_unused(c, e, &ve_unused);
+		bool is_unused = false;
+		if (vet_unused && check_vet_unused(c, e, &ve_unused)) {
+			is_unused = true;
+		} else if (vet_unused_procedures &&
+		           e->kind == Entity_Procedure) {
+			if (e->flags&EntityFlag_Used) {
+				is_unused = false;
+			} else if (e->flags & EntityFlag_Require) {
+				is_unused = false;
+			} else if (e->pkg && e->pkg->kind == Package_Init && e->token.string == "main") {
+				is_unused = false;
+			} else {
+				is_unused = true;
+				ve_unused.kind = VettedEntity_Unused;
+				ve_unused.entity = e;
+			}
+		}
 		bool is_shadowed = vet_shadowing && check_vet_shadowing(c, e, &ve_shadowed);
 		if (is_unused && is_shadowed) {
 			VettedEntity ve_both = ve_shadowed;
@@ -717,13 +737,18 @@ gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
 	}
 	rw_mutex_shared_unlock(&scope->mutex);
 
-	gb_sort(vetted_entities.data, vetted_entities.count, gb_size_of(VettedEntity), vetted_entity_variable_pos_cmp);
+	array_sort(vetted_entities, vetted_entity_variable_pos_cmp);
 
 	for (auto const &ve : vetted_entities) {
 		Entity *e = ve.entity;
 		Entity *other = ve.other;
 		String name = e->token.string;
 
+		vet_flags = original_vet_flags;
+		if (per_entity) {
+			vet_flags = ast_file_vet_flags(e->file);
+		}
+
 		if (ve.kind == VettedEntity_Shadowed_And_Unused) {
 			error(e->token, "'%.*s' declared but not used, possibly shadows declaration at line %d", LIT(name), other->token.pos.line);
 		} else if (vet_flags) {
@@ -732,6 +757,9 @@ gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
 				if (e->kind == Entity_Variable && (vet_flags & VetFlag_UnusedVariables) != 0) {
 					error(e->token, "'%.*s' declared but not used", LIT(name));
 				}
+				if (e->kind == Entity_Procedure && (vet_flags & VetFlag_UnusedProcedures) != 0) {
+					error(e->token, "'%.*s' declared but not used", LIT(name));
+				}
 				if ((e->kind == Entity_ImportName || e->kind == Entity_LibraryName) && (vet_flags & VetFlag_UnusedImports) != 0) {
 					error(e->token, "'%.*s' declared but not used", LIT(name));
 				}
@@ -749,7 +777,11 @@ gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
 		}
 	}
 
-	array_free(&vetted_entities);
+}
+
+
+gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
+	check_scope_usage_internal(c, scope, vet_flags, false);
 
 	for (Scope *child = scope->head_child; child != nullptr; child = child->next) {
 		if (child->flags & (ScopeFlag_Proc|ScopeFlag_Type|ScopeFlag_File)) {
@@ -6497,12 +6529,13 @@ gb_internal void check_parsed_files(Checker *c) {
 	TIME_SECTION("check scope usage");
 	for (auto const &entry : c->info.files) {
 		AstFile *f = entry.value;
-		u64 vet_flags = build_context.vet_flags;
-		if (f->vet_flags_set) {
-			vet_flags = f->vet_flags;
-		}
+		u64 vet_flags = ast_file_vet_flags(f);
 		check_scope_usage(c, f->scope, vet_flags);
 	}
+	for (auto const &entry : c->info.packages) {
+		AstPackage *pkg = entry.value;
+		check_scope_usage_internal(c, pkg->scope, 0, true);
+	}
 
 	TIME_SECTION("add basic type information");
 	// Add "Basic" type information

+ 1 - 0
src/entity.cpp

@@ -104,6 +104,7 @@ enum ParameterValueKind {
 	ParameterValue_Constant,
 	ParameterValue_Nil,
 	ParameterValue_Location,
+	ParameterValue_Expression,
 	ParameterValue_Value,
 };
 

+ 10 - 2
src/gb/gb.h

@@ -3195,11 +3195,11 @@ void gb_affinity_init(gbAffinity *a) {
 	a->core_count       = 1;
 	a->threads_per_core = 1;
 
-	if (sysctlbyname("hw.logicalcpu", &count, &count_size, NULL, 0) == 0) {
+	if (sysctlbyname("kern.smp.cpus", &count, &count_size, NULL, 0) == 0) {
 		if (count > 0) {
 			a->thread_count = count;
 			// Get # of physical cores
-			if (sysctlbyname("hw.physicalcpu", &count, &count_size, NULL, 0) == 0) {
+			if (sysctlbyname("kern.smp.cores", &count, &count_size, NULL, 0) == 0) {
 				if (count > 0) {
 					a->core_count = count;
 					a->threads_per_core = a->thread_count / count;
@@ -3210,6 +3210,14 @@ void gb_affinity_init(gbAffinity *a) {
 				}
 			}
 		}
+	} else if (sysctlbyname("hw.ncpu", &count, &count_size, NULL, 0) == 0) {
+		// SMP disabled or unavailable.
+		if (count > 0) {
+			a->is_accurate      = true;
+			a->thread_count     = count;
+			a->core_count       = count;
+			a->threads_per_core = 1;
+		}
 	}
 
 }

+ 1 - 1
src/llvm_backend.hpp

@@ -528,7 +528,7 @@ gb_internal lbAddr lb_store_range_stmt_val(lbProcedure *p, Ast *stmt_val, lbValu
 gb_internal lbValue lb_emit_source_code_location_const(lbProcedure *p, String const &procedure, TokenPos const &pos);
 gb_internal lbValue lb_const_source_code_location_const(lbModule *m, String const &procedure, TokenPos const &pos);
 
-gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TokenPos const &pos);
+gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TypeProc *procedure_type, Ast *call_expression);
 
 gb_internal lbValue lb_equal_proc_for_type(lbModule *m, Type *type);
 gb_internal lbValue lb_hasher_proc_for_type(lbModule *m, Type *type);

+ 44 - 36
src/llvm_backend_debug.cpp

@@ -552,6 +552,48 @@ gb_internal LLVMMetadataRef lb_debug_bitset(lbModule *m, Type *type, String name
 	return final_decl;
 }
 
+gb_internal LLVMMetadataRef lb_debug_bitfield(lbModule *m, Type *type, String name, LLVMMetadataRef scope, LLVMMetadataRef file, unsigned line) {
+	Type *bt = base_type(type);
+	GB_ASSERT(bt->kind == Type_BitField);
+
+	lb_debug_file_line(m, bt->BitField.node, &file, &line);
+
+	u64 size_in_bits = 8*type_size_of(bt);
+	u32 align_in_bits = 8*cast(u32)type_align_of(bt);
+
+    unsigned element_count = cast(unsigned)bt->BitField.fields.count;
+    LLVMMetadataRef *elements = gb_alloc_array(permanent_allocator(), LLVMMetadataRef, element_count);
+
+    u64 offset_in_bits = 0;
+    for (unsigned i = 0; i < element_count; i++) {
+        Entity *f = bt->BitField.fields[i];
+        u8 bit_size = bt->BitField.bit_sizes[i];
+        GB_ASSERT(f->kind == Entity_Variable);
+        String name = f->token.string;
+        elements[i] = LLVMDIBuilderCreateBitFieldMemberType(m->debug_builder, scope, cast(char const *)name.text, name.len, file, line,
+            bit_size, offset_in_bits, 0,
+            LLVMDIFlagZero, lb_debug_type(m, f->type)
+        );
+
+        offset_in_bits += bit_size;
+    }
+
+	LLVMMetadataRef final_decl = LLVMDIBuilderCreateStructType(
+		m->debug_builder, scope,
+		cast(char const *)name.text, cast(size_t)name.len,
+		file, line,
+		size_in_bits, align_in_bits,
+		LLVMDIFlagZero,
+		nullptr,
+		elements, element_count,
+		0,
+		nullptr,
+		"", 0
+	);
+	lb_set_llvm_metadata(m, type, final_decl);
+	return final_decl;
+}
+
 gb_internal LLVMMetadataRef lb_debug_enum(lbModule *m, Type *type, String name, LLVMMetadataRef scope, LLVMMetadataRef file, unsigned line) {
 	Type *bt = base_type(type);
 	GB_ASSERT(bt->kind == Type_Enum);
@@ -816,6 +858,7 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) {
 	case Type_Union:        return lb_debug_union(        m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 	case Type_BitSet:       return lb_debug_bitset(       m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 	case Type_Enum:         return lb_debug_enum(         m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
+	case Type_BitField:     return lb_debug_bitfield(     m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 
 	case Type_Tuple:
 		if (type->Tuple.variables.count == 1) {
@@ -901,42 +944,6 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) {
 			lb_debug_type(m, type->Matrix.elem),
 			subscripts, gb_count_of(subscripts));
 	}
-
-	case Type_BitField: {
-		LLVMMetadataRef parent_scope = nullptr;
-		LLVMMetadataRef scope = nullptr;
-		LLVMMetadataRef file = nullptr;
-		unsigned line = 0;
-		u64 size_in_bits = 8*cast(u64)type_size_of(type);
-		u32 align_in_bits = 8*cast(u32)type_align_of(type);
-		LLVMDIFlags flags = LLVMDIFlagZero;
-
-		unsigned element_count = cast(unsigned)type->BitField.fields.count;
-		LLVMMetadataRef *elements = gb_alloc_array(permanent_allocator(), LLVMMetadataRef, element_count);
-
-		u64 offset_in_bits = 0;
-		for (unsigned i = 0; i < element_count; i++) {
-			Entity *f = type->BitField.fields[i];
-			u8 bit_size = type->BitField.bit_sizes[i];
-			GB_ASSERT(f->kind == Entity_Variable);
-			String name = f->token.string;
-			unsigned field_line = 0;
-			LLVMDIFlags field_flags = LLVMDIFlagZero;
-			elements[i] = LLVMDIBuilderCreateBitFieldMemberType(m->debug_builder, scope, cast(char const *)name.text, name.len, file, field_line,
-				bit_size, offset_in_bits, offset_in_bits,
-				field_flags, lb_debug_type(m, f->type)
-			);
-
-			offset_in_bits += bit_size;
-		}
-
-
-		return LLVMDIBuilderCreateStructType(m->debug_builder, parent_scope, "", 0, file, line,
-			size_in_bits, align_in_bits, flags,
-			nullptr, elements, element_count, 0, nullptr,
-			"", 0
-		);
-	}
 	}
 
 	GB_PANIC("Invalid type %s", type_to_string(type));
@@ -1022,6 +1029,7 @@ gb_internal LLVMMetadataRef lb_debug_type(lbModule *m, Type *type) {
 		case Type_Union:        return lb_debug_union(m, type, name, scope, file, line);
 		case Type_BitSet:       return lb_debug_bitset(m, type, name, scope, file, line);
 		case Type_Enum:         return lb_debug_enum(m, type, name, scope, file, line);
+		case Type_BitField:     return lb_debug_bitfield(m, type, name, scope, file, line);
 		}
 	}
 

+ 57 - 5
src/llvm_backend_proc.cpp

@@ -699,7 +699,9 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) {
 					}
 
 					if (e->Variable.param_value.kind != ParameterValue_Invalid) {
-						lbValue c = lb_handle_param_value(p, e->type, e->Variable.param_value, e->token.pos);
+						GB_ASSERT(e->Variable.param_value.kind != ParameterValue_Location);
+						GB_ASSERT(e->Variable.param_value.kind != ParameterValue_Expression);
+						lbValue c = lb_handle_param_value(p, e->type, e->Variable.param_value, nullptr, nullptr);
 						lb_addr_store(p, res, c);
 					}
 
@@ -3420,7 +3422,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
 }
 
 
-gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TokenPos const &pos) {
+gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TypeProc *procedure_type, Ast* call_expression) {
 	switch (param_value.kind) {
 	case ParameterValue_Constant:
 		if (is_type_constant_type(parameter_type)) {
@@ -3446,8 +3448,60 @@ gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type,
 			if (p->entity != nullptr) {
 				proc_name = p->entity->token.string;
 			}
+
+			ast_node(ce, CallExpr, call_expression);
+			TokenPos pos = ast_token(ce->proc).pos;
+
 			return lb_emit_source_code_location_as_global(p, proc_name, pos);
 		}
+	case ParameterValue_Expression:
+		{
+			Ast *orig = param_value.original_ast_expr;
+			if (orig->kind == Ast_BasicDirective) {
+				gbString expr = expr_to_string(call_expression, temporary_allocator());
+				return lb_const_string(p->module, make_string_c(expr));
+			}
+
+			isize param_idx = -1;
+			String param_str = {0};
+			{
+				Ast *call = unparen_expr(orig);
+				GB_ASSERT(call->kind == Ast_CallExpr);
+				ast_node(ce, CallExpr, call);
+				GB_ASSERT(ce->proc->kind == Ast_BasicDirective);
+				GB_ASSERT(ce->args.count == 1);
+				Ast *target = ce->args[0];
+				GB_ASSERT(target->kind == Ast_Ident);
+				String target_str = target->Ident.token.string;
+
+				param_idx = lookup_procedure_parameter(procedure_type, target_str);
+				param_str = target_str;
+			}
+			GB_ASSERT(param_idx >= 0);
+
+
+			Ast *target_expr = nullptr;
+			ast_node(ce, CallExpr, call_expression);
+
+			if (ce->split_args->positional.count > param_idx) {
+				target_expr = ce->split_args->positional[param_idx];
+			}
+
+			for_array(i, ce->split_args->named) {
+				Ast *arg = ce->split_args->named[i];
+				ast_node(fv, FieldValue, arg);
+				GB_ASSERT(fv->field->kind == Ast_Ident);
+				String name = fv->field->Ident.token.string;
+				if (name == param_str) {
+					target_expr = fv->value;
+					break;
+				}
+			}
+
+			gbString expr = expr_to_string(target_expr, temporary_allocator());
+			return lb_const_string(p->module, make_string_c(expr));
+		}
+
 	case ParameterValue_Value:
 		return lb_build_expr(p, param_value.ast_value);
 	}
@@ -3739,8 +3793,6 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
 		}
 	}
 
-	TokenPos pos = ast_token(ce->proc).pos;
-
 
 	if (pt->params != nullptr)  {
 		isize min_count = pt->params->Tuple.variables.count;
@@ -3764,7 +3816,7 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
 					args[arg_index] = lb_const_nil(p->module, e->type);
 					break;
 				case Entity_Variable:
-					args[arg_index] = lb_handle_param_value(p, e->type, e->Variable.param_value, pos);
+					args[arg_index] = lb_handle_param_value(p, e->type, e->Variable.param_value, pt, expr);
 					break;
 
 				case Entity_Constant:

+ 49 - 2
src/main.cpp

@@ -340,12 +340,14 @@ enum BuildFlagKind {
 	BuildFlag_VetUnused,
 	BuildFlag_VetUnusedImports,
 	BuildFlag_VetUnusedVariables,
+	BuildFlag_VetUnusedProcedures,
 	BuildFlag_VetUsingStmt,
 	BuildFlag_VetUsingParam,
 	BuildFlag_VetStyle,
 	BuildFlag_VetSemicolon,
 	BuildFlag_VetCast,
 	BuildFlag_VetTabs,
+	BuildFlag_VetPackages,
 
 	BuildFlag_CustomAttribute,
 	BuildFlag_IgnoreUnknownAttributes,
@@ -547,6 +549,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
 	add_flag(&build_flags, BuildFlag_Vet,                     str_lit("vet"),                       BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnused,               str_lit("vet-unused"),                BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnusedVariables,      str_lit("vet-unused-variables"),      BuildFlagParam_None,    Command__does_check);
+	add_flag(&build_flags, BuildFlag_VetUnusedProcedures,     str_lit("vet-unused-procedures"),     BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnusedImports,        str_lit("vet-unused-imports"),        BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetShadowing,            str_lit("vet-shadowing"),             BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUsingStmt,            str_lit("vet-using-stmt"),            BuildFlagParam_None,    Command__does_check);
@@ -555,6 +558,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
 	add_flag(&build_flags, BuildFlag_VetSemicolon,            str_lit("vet-semicolon"),             BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetCast,                 str_lit("vet-cast"),                  BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetTabs,                 str_lit("vet-tabs"),                  BuildFlagParam_None,    Command__does_check);
+	add_flag(&build_flags, BuildFlag_VetPackages,             str_lit("vet-packages"),              BuildFlagParam_String,  Command__does_check);
 
 	add_flag(&build_flags, BuildFlag_CustomAttribute,         str_lit("custom-attribute"),          BuildFlagParam_String,  Command__does_check, true);
 	add_flag(&build_flags, BuildFlag_IgnoreUnknownAttributes, str_lit("ignore-unknown-attributes"), BuildFlagParam_None,    Command__does_check);
@@ -1220,6 +1224,36 @@ gb_internal bool parse_build_flags(Array<String> args) {
 						case BuildFlag_VetSemicolon:       build_context.vet_flags |= VetFlag_Semicolon;       break;
 						case BuildFlag_VetCast:            build_context.vet_flags |= VetFlag_Cast;            break;
 						case BuildFlag_VetTabs:            build_context.vet_flags |= VetFlag_Tabs;            break;
+						case BuildFlag_VetUnusedProcedures:
+							build_context.vet_flags |= VetFlag_UnusedProcedures;
+							if (!set_flags[BuildFlag_VetPackages]) {
+								gb_printf_err("-%.*s must be used with -vet-packages\n", LIT(name));
+								bad_flags = true;
+							}
+							break;
+
+						case BuildFlag_VetPackages:
+							{
+								GB_ASSERT(value.kind == ExactValue_String);
+								String val = value.value_string;
+								String_Iterator it = {val, 0};
+								for (;;) {
+									String pkg = string_split_iterator(&it, ',');
+									if (pkg.len == 0) {
+										break;
+									}
+
+									pkg = string_trim_whitespace(pkg);
+									if (!string_is_valid_identifier(pkg)) {
+										gb_printf_err("-%.*s '%.*s' must be a valid identifier\n", LIT(name), LIT(pkg));
+										bad_flags = true;
+										continue;
+									}
+
+									string_set_add(&build_context.vet_packages, pkg);
+								}
+							}
+							break;
 
 						case BuildFlag_CustomAttribute:
 							{
@@ -1234,7 +1268,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
 
 									attr = string_trim_whitespace(attr);
 									if (!string_is_valid_identifier(attr)) {
-										gb_printf_err("-custom-attribute '%.*s' must be a valid identifier\n", LIT(attr));
+										gb_printf_err("-%.*s '%.*s' must be a valid identifier\n", LIT(name), LIT(attr));
 										bad_flags = true;
 										continue;
 									}
@@ -2364,7 +2398,7 @@ gb_internal void print_show_help(String const arg0, String const &command) {
 		print_usage_line(0, "");
 
 		print_usage_line(1, "-vet-unused");
-		print_usage_line(2, "Checks for unused declarations.");
+		print_usage_line(2, "Checks for unused declarations (variables and imports).");
 		print_usage_line(0, "");
 
 		print_usage_line(1, "-vet-unused-variables");
@@ -2406,6 +2440,16 @@ gb_internal void print_show_help(String const arg0, String const &command) {
 		print_usage_line(1, "-vet-tabs");
 		print_usage_line(2, "Errs when the use of tabs has not been used for indentation.");
 		print_usage_line(0, "");
+
+		print_usage_line(1, "-vet-packages:<comma-separated-strings>");
+		print_usage_line(2, "Sets which packages by name will be vetted.");
+		print_usage_line(2, "Files with specific +vet tags will not be ignored if they are not in the packages set.");
+		print_usage_line(0, "");
+
+		print_usage_line(1, "-vet-unused-procedures");
+		print_usage_line(2, "Checks for unused procedures.");
+		print_usage_line(2, "Must be used with -vet-packages or specified on a per file with +vet tags.");
+		print_usage_line(0, "");
 	}
 
 	if (check) {
@@ -3150,6 +3194,9 @@ int main(int arg_count, char const **arg_ptr) {
 
 	build_context.command = command;
 
+	string_set_init(&build_context.custom_attributes);
+	string_set_init(&build_context.vet_packages);
+
 	if (!parse_build_flags(args)) {
 		return 1;
 	}

+ 20 - 11
src/parser.cpp

@@ -1,10 +1,28 @@
 #include "parser_pos.cpp"
 
+gb_internal bool in_vet_packages(AstFile *file) {
+	if (file == nullptr) {
+		return true;
+	}
+	if (file->pkg == nullptr) {
+		return true;
+	}
+	if (build_context.vet_packages.entries.count == 0) {
+		return true;
+	}
+	return string_set_exists(&build_context.vet_packages, file->pkg->name);
+}
+
 gb_internal u64 ast_file_vet_flags(AstFile *f) {
 	if (f != nullptr && f->vet_flags_set) {
 		return f->vet_flags;
 	}
-	return build_context.vet_flags;
+
+	bool found = in_vet_packages(f);
+	if (found) {
+		return build_context.vet_flags;
+	}
+	return 0;
 }
 
 gb_internal bool ast_file_vet_style(AstFile *f) {
@@ -5378,18 +5396,9 @@ gb_internal Ast *parse_stmt(AstFile *f) {
 }
 
 
-
-gb_internal u64 check_vet_flags(AstFile *file) {
-	if (file && file->vet_flags_set) {
-		return file->vet_flags;
-	}
-	return build_context.vet_flags;
-}
-
-
 gb_internal void parse_enforce_tabs(AstFile *f) {
 	// Checks to see if tabs have been used for indentation
-	if ((check_vet_flags(f) & VetFlag_Tabs) == 0) {
+	if ((ast_file_vet_flags(f) & VetFlag_Tabs) == 0) {
 		return;
 	}
 

+ 20 - 13
tests/core/flags/test_core_flags.odin

@@ -12,6 +12,26 @@ import "core:strings"
 import "core:testing"
 import "core:time/datetime"
 
+Custom_Data :: struct {
+	a: int,
+}
+
+@(init)
+init_custom_type_setter :: proc() {
+	// NOTE: This is done here so it can be out of the flow of the
+	// multi-threaded test runner, to prevent any data races that could be
+	// reported by using `-sanitize:thread`.
+	//
+	// Do mind that this means every test here acknowledges the `Custom_Data` type.
+	flags.register_type_setter(proc (data: rawptr, data_type: typeid, _, _: string) -> (string, bool, runtime.Allocator_Error) {
+		if data_type == Custom_Data {
+			(cast(^Custom_Data)data).a = 32
+			return "", true, nil
+		}
+		return "", false, nil
+	})
+}
+
 @(test)
 test_no_args :: proc(t: ^testing.T) {
 	S :: struct {
@@ -1230,9 +1250,6 @@ test_net :: proc(t: ^testing.T) {
 @(test)
 test_custom_type_setter :: proc(t: ^testing.T) {
 	Custom_Bool :: distinct bool
-	Custom_Data :: struct {
-		a: int,
-	}
 
 	S :: struct {
 		a: Custom_Data,
@@ -1240,16 +1257,6 @@ test_custom_type_setter :: proc(t: ^testing.T) {
 	}
 	s: S
 
-	// NOTE: Mind that this setter is global state, and the test runner is multi-threaded.
-	// It should be fine so long as all type setter tests are in this one test proc.
-	flags.register_type_setter(proc (data: rawptr, data_type: typeid, _, _: string) -> (string, bool, runtime.Allocator_Error) {
-		if data_type == Custom_Data {
-			(cast(^Custom_Data)data).a = 32
-			return "", true, nil
-		}
-		return "", false, nil
-	})
-	defer flags.register_type_setter(nil)
 	args := [?]string { "-a:hellope", "-b:true" }
 	result := flags.parse(&s, args[:])
 	testing.expect_value(t, result, nil)

+ 2 - 2
tests/core/mem/test_mem_dynamic_pool.odin

@@ -6,7 +6,7 @@ import "core:mem"
 
 expect_pool_allocation :: proc(t: ^testing.T, expected_used_bytes, num_bytes, alignment: int) {
 	pool: mem.Dynamic_Pool
-	mem.dynamic_pool_init(pool = &pool, alignment = alignment)
+	mem.dynamic_pool_init(&pool, alignment = alignment)
 	pool_allocator := mem.dynamic_pool_allocator(&pool)
 
 	element, err := mem.alloc(num_bytes, alignment, pool_allocator)
@@ -48,7 +48,7 @@ expect_pool_allocation_out_of_band :: proc(t: ^testing.T, num_bytes, out_band_si
 	testing.expect(t, num_bytes >= out_band_size, "Sanity check failed, your test call is flawed! Make sure that num_bytes >= out_band_size!")
 
 	pool: mem.Dynamic_Pool
-	mem.dynamic_pool_init(pool = &pool, out_band_size = out_band_size)
+	mem.dynamic_pool_init(&pool, out_band_size = out_band_size)
 	pool_allocator := mem.dynamic_pool_allocator(&pool)
 
 	element, err := mem.alloc(num_bytes, allocator = pool_allocator)

+ 2 - 0
tests/core/normal.odin

@@ -39,6 +39,8 @@ download_assets :: proc() {
 @(require) import "slice"
 @(require) import "strconv"
 @(require) import "strings"
+@(require) import "sync"
+@(require) import "sync/chan"
 @(require) import "sys/posix"
 @(require) import "sys/windows"
 @(require) import "text/i18n"

+ 274 - 0
tests/core/sync/chan/test_core_sync_chan.odin

@@ -0,0 +1,274 @@
+package test_core_sync_chan
+
+import "base:runtime"
+import "base:intrinsics"
+import "core:log"
+import "core:math/rand"
+import "core:sync/chan"
+import "core:testing"
+import "core:thread"
+import "core:time"
+
+
+Message_Type :: enum i32 {
+	Result,
+	Add,
+	Multiply,
+	Subtract,
+	Divide,
+	End,
+}
+
+Message :: struct {
+	type: Message_Type,
+	i: i64,
+}
+
+Comm :: struct {
+	host: chan.Chan(Message),
+	client: chan.Chan(Message),
+	manual_buffering: bool,
+}
+
+BUFFER_SIZE :: 8
+MAX_RAND    :: 32
+FAIL_TIME   :: 1 * time.Second
+SLEEP_TIME  :: 1 * time.Millisecond
+
+comm_client :: proc(th: ^thread.Thread) {
+	data := cast(^Comm)th.data
+	manual_buffering := data.manual_buffering
+
+	n: i64
+
+	for manual_buffering && !chan.can_recv(data.host) {
+		thread.yield()
+	}
+
+	recv_loop: for msg in chan.recv(data.host) {
+		#partial switch msg.type {
+		case .Add:      n += msg.i
+		case .Multiply: n *= msg.i
+		case .Subtract: n -= msg.i
+		case .Divide:   n /= msg.i
+		case .End:
+			break recv_loop
+		case:
+			panic("Unknown message type for client.")
+		}
+
+		for manual_buffering && !chan.can_recv(data.host) {
+			thread.yield()
+		}
+	}
+
+	for manual_buffering && !chan.can_send(data.host) {
+		thread.yield()
+	}
+
+	chan.send(data.client, Message{.Result, n})
+	chan.close(data.client)
+}
+
+send_messages :: proc(t: ^testing.T, host: chan.Chan(Message), manual_buffering: bool = false) -> (expected: i64) {
+	expected = 1
+	for manual_buffering && !chan.can_send(host) {
+		thread.yield()
+	}
+	chan.send(host, Message{.Add, 1})
+	log.debug(Message{.Add, 1})
+
+	for _ in 0..<1+2*BUFFER_SIZE {
+		msg: Message
+		msg.i = 1 + rand.int63_max(MAX_RAND)
+		switch rand.int_max(4) {
+		case 0:
+			msg.type = .Add
+			expected += msg.i
+		case 1:
+			msg.type = .Multiply
+			expected *= msg.i
+		case 2:
+			msg.type = .Subtract
+			expected -= msg.i
+		case 3:
+			msg.type = .Divide
+			expected /= msg.i
+		}
+
+		for manual_buffering && !chan.can_send(host) {
+			thread.yield()
+		}
+		if manual_buffering {
+			testing.expect(t, chan.len(host) == 0)
+		}
+
+		chan.send(host, msg)
+		log.debug(msg)
+	}
+
+	for manual_buffering && !chan.can_send(host) {
+		thread.yield()
+	}
+	chan.send(host, Message{.End, 0})
+	log.debug(Message{.End, 0})
+	chan.close(host)
+
+	return
+}
+
+@test
+test_chan_buffered :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	comm: Comm
+	alloc_err: runtime.Allocator_Error
+	comm.host,   alloc_err = chan.create_buffered(chan.Chan(Message), BUFFER_SIZE, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	comm.client, alloc_err = chan.create_buffered(chan.Chan(Message), BUFFER_SIZE, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer {
+		chan.destroy(comm.host)
+		chan.destroy(comm.client)
+	}
+
+	testing.expect(t, chan.is_buffered(comm.host))
+	testing.expect(t, chan.is_buffered(comm.client))
+	testing.expect(t, !chan.is_unbuffered(comm.host))
+	testing.expect(t, !chan.is_unbuffered(comm.client))
+	testing.expect_value(t, chan.len(comm.host), 0)
+	testing.expect_value(t, chan.len(comm.client), 0)
+	testing.expect_value(t, chan.cap(comm.host), BUFFER_SIZE)
+	testing.expect_value(t, chan.cap(comm.client), BUFFER_SIZE)
+
+	reckoner := thread.create(comm_client)
+	defer thread.destroy(reckoner)
+	reckoner.data = &comm
+	thread.start(reckoner)
+
+	expected := send_messages(t, comm.host, manual_buffering = false)
+
+	// Sleep so we can give the other thread enough time to buffer its message.
+	time.sleep(SLEEP_TIME)
+
+	testing.expect_value(t, chan.len(comm.client), 1)
+	result, ok := chan.try_recv(comm.client)
+
+	// One more sleep to ensure it has enough time to close.
+	time.sleep(SLEEP_TIME)
+
+	testing.expect_value(t, chan.is_closed(comm.client), true)
+	testing.expect_value(t, ok, true)
+	testing.expect_value(t, result.i, expected)
+	log.debug(result, expected)
+
+	// Make sure sending to closed channels fails.
+	testing.expect_value(t, chan.send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.send(comm.client, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.client, Message{.End, 0}), false)
+	_, ok = chan.recv(comm.host);       testing.expect_value(t, ok, false)
+	_, ok = chan.recv(comm.client);     testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.host);   testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.client); testing.expect_value(t, ok, false)
+}
+
+@test
+test_chan_unbuffered :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	comm: Comm
+	comm.manual_buffering = true
+	alloc_err: runtime.Allocator_Error
+	comm.host,   alloc_err = chan.create_unbuffered(chan.Chan(Message), context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	comm.client, alloc_err = chan.create_unbuffered(chan.Chan(Message), context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer {
+		chan.destroy(comm.host)
+		chan.destroy(comm.client)
+	}
+
+	testing.expect(t, !chan.is_buffered(comm.host))
+	testing.expect(t, !chan.is_buffered(comm.client))
+	testing.expect(t, chan.is_unbuffered(comm.host))
+	testing.expect(t, chan.is_unbuffered(comm.client))
+	testing.expect_value(t, chan.len(comm.host), 0)
+	testing.expect_value(t, chan.len(comm.client), 0)
+	testing.expect_value(t, chan.cap(comm.host), 0)
+	testing.expect_value(t, chan.cap(comm.client), 0)
+
+	reckoner := thread.create(comm_client)
+	defer thread.destroy(reckoner)
+	reckoner.data = &comm
+	thread.start(reckoner)
+
+	for !chan.can_send(comm.client) {
+		thread.yield()
+	}
+
+	expected := send_messages(t, comm.host)
+	testing.expect_value(t, chan.is_closed(comm.host), true)
+
+	for !chan.can_recv(comm.client) {
+		thread.yield()
+	}
+
+	result, ok := chan.try_recv(comm.client)
+	testing.expect_value(t, ok, true)
+	testing.expect_value(t, result.i, expected)
+	log.debug(result, expected)
+
+	// Sleep so we can give the other thread enough time to close its side
+	// after we've received its message.
+	time.sleep(SLEEP_TIME)
+
+	testing.expect_value(t, chan.is_closed(comm.client), true)
+
+	// Make sure sending and receiving on closed channels fails.
+	testing.expect_value(t, chan.send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.send(comm.client, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.client, Message{.End, 0}), false)
+	_, ok = chan.recv(comm.host);       testing.expect_value(t, ok, false)
+	_, ok = chan.recv(comm.client);     testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.host);   testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.client); testing.expect_value(t, ok, false)
+}
+
+@test
+test_full_buffered_closed_chan_deadlock :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	ch, alloc_err := chan.create_buffered(chan.Chan(int), 1, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer chan.destroy(ch)
+
+	testing.expect(t, chan.can_send(ch))
+	testing.expect(t, chan.send(ch, 32))
+	testing.expect(t, chan.close(ch))
+	testing.expect(t, !chan.send(ch, 32))
+}
+
+// This test guarantees a buffered channel's messages can still be received
+// even after closing. This is currently how the API works. If that changes,
+// this test will need to change.
+@test
+test_accept_message_from_closed_buffered_chan :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	ch, alloc_err := chan.create_buffered(chan.Chan(int), 2, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer chan.destroy(ch)
+
+	testing.expect(t, chan.can_send(ch))
+	testing.expect(t, chan.send(ch, 32))
+	testing.expect(t, chan.send(ch, 64))
+	testing.expect(t, chan.close(ch))
+	result, ok := chan.recv(ch)
+	testing.expect_value(t, result, 32)
+	testing.expect(t, ok)
+	result, ok = chan.try_recv(ch)
+	testing.expect_value(t, result, 64)
+	testing.expect(t, ok)
+}

+ 714 - 0
tests/core/sync/test_core_sync.odin

@@ -0,0 +1,714 @@
+// NOTE(Feoramund): These tests should be run a few hundred times, with and
+// without `-sanitize:thread` enabled, to ensure maximum safety.
+//
+// Keep in mind that running with the debug logs uncommented can result in
+// failures disappearing due to the delay of sending the log message causing
+// different synchronization patterns.
+
+package test_core_sync
+
+import "base:intrinsics"
+// import "core:log"
+import "core:sync"
+import "core:testing"
+import "core:thread"
+import "core:time"
+
+FAIL_TIME        :: 1 * time.Second
+SLEEP_TIME       :: 1 * time.Millisecond
+SMALL_SLEEP_TIME :: 10 * time.Microsecond
+
+// This needs to be high enough to cause a data race if any of the
+// synchronization primitives fail.
+THREADS :: 8
+
+// Manually wait on all threads to finish.
+//
+// This reduces a dependency on a `Wait_Group` or similar primitives.
+//
+// It's also important that we wait for every thread to finish, as it's
+// possible for a thread to finish after the test if we don't check, despite
+// joining it to the test thread.
+wait_for :: proc(threads: []^thread.Thread) {
+	wait_loop: for {
+		count := len(threads)
+		for v in threads {
+			if thread.is_done(v) {
+				count -= 1
+			}
+		}
+		if count == 0 {
+			break wait_loop
+		}
+		thread.yield()
+	}
+	for t in threads {
+		thread.join(t)
+		thread.destroy(t)
+	}
+}
+
+//
+// core:sync/primitives.odin
+//
+
+@test
+test_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m: sync.Mutex,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("MUTEX-%v> locking", th.id)
+		sync.mutex_lock(&data.m)
+		data.number += 1
+		// log.debugf("MUTEX-%v> unlocking", th.id)
+		sync.mutex_unlock(&data.m)
+		// log.debugf("MUTEX-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_rw_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m1: sync.RW_Mutex,
+		m2: sync.RW_Mutex,
+		number1: int,
+		number2: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.rw_mutex_shared_lock(&data.m1)
+		n := data.number1
+		sync.rw_mutex_shared_unlock(&data.m1)
+
+		sync.rw_mutex_lock(&data.m2)
+		data.number2 += n
+		sync.rw_mutex_unlock(&data.m2)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	sync.rw_mutex_lock(&data.m1)
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	data.number1 = 1
+	sync.rw_mutex_unlock(&data.m1)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number2, THREADS)
+}
+
+@test
+test_recursive_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m: sync.Recursive_Mutex,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("REC_MUTEX-%v> locking", th.id)
+		tried1 := sync.recursive_mutex_try_lock(&data.m)
+		for _ in 0..<3 {
+			sync.recursive_mutex_lock(&data.m)
+		}
+		tried2 := sync.recursive_mutex_try_lock(&data.m)
+		// log.debugf("REC_MUTEX-%v> locked", th.id)
+		data.number += 1
+		// log.debugf("REC_MUTEX-%v> unlocking", th.id)
+		for _ in 0..<3 {
+			sync.recursive_mutex_unlock(&data.m)
+		}
+		if tried1 { sync.recursive_mutex_unlock(&data.m) }
+		if tried2 { sync.recursive_mutex_unlock(&data.m) }
+		// log.debugf("REC_MUTEX-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_cond :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		c: sync.Cond,
+		m: sync.Mutex,
+		i: int,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.mutex_lock(&data.m)
+
+		for intrinsics.atomic_load(&data.i) != 1 {
+			sync.cond_wait(&data.c, &data.m)
+		}
+
+		data.number += intrinsics.atomic_load(&data.i)
+
+		sync.mutex_unlock(&data.m)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+	data.i = -1
+
+	sync.mutex_lock(&data.m)
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	time.sleep(SLEEP_TIME)
+	data.i = 1
+	sync.mutex_unlock(&data.m)
+	sync.cond_broadcast(&data.c)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_cond_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	c: sync.Cond
+	m: sync.Mutex
+	sync.mutex_lock(&m)
+	sync.cond_wait_with_timeout(&c, &m, SLEEP_TIME)
+}
+
+@test
+test_semaphore :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		s: sync.Sema,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("SEM-%v> waiting", th.id)
+		sync.sema_wait(&data.s)
+		data.number += 1
+		// log.debugf("SEM-%v> posting", th.id)
+		sync.sema_post(&data.s)
+		// log.debugf("SEM-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+	sync.sema_post(&data.s)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_semaphore_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	s: sync.Sema
+	sync.sema_wait_with_timeout(&s, SLEEP_TIME)
+}
+
+@test
+test_futex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		f: sync.Futex,
+		i: int,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("FUTEX-%v> waiting", th.id)
+		sync.futex_wait(&data.f, 3)
+		// log.debugf("FUTEX-%v> done", th.id)
+
+		n := data.i
+		intrinsics.atomic_add(&data.number, n)
+	}
+
+	data: Data
+	data.i = -1
+	data.f = 3
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	data.i = 1
+	// Change the futex variable to keep late-starters from stalling.
+	data.f = 0
+	sync.futex_broadcast(&data.f)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_futex_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	f: sync.Futex = 1
+	sync.futex_wait_with_timeout(&f, 1, SLEEP_TIME)
+}
+
+//
+// core:sync/extended.odin
+//
+
+@test
+test_wait_group :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		step1: sync.Wait_Group,
+		step2: sync.Wait_Group,
+		i: int,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.wait_group_wait(&data.step1)
+
+		n := data.i
+		intrinsics.atomic_add(&data.number, n)
+
+		sync.wait_group_done(&data.step2)
+	}
+
+	data: Data
+	data.i = -1
+	threads: [THREADS]^thread.Thread
+
+	sync.wait_group_add(&data.step1, 1)
+	sync.wait_group_add(&data.step2, THREADS)
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	time.sleep(SMALL_SLEEP_TIME)
+	data.i = 1
+	sync.wait_group_done(&data.step1)
+
+	sync.wait_group_wait(&data.step2)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.step1.counter, 0)
+	testing.expect_value(t, data.step2.counter, 0)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_wait_group_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	wg: sync.Wait_Group
+	sync.wait_group_wait_with_timeout(&wg, SLEEP_TIME)
+}
+
+@test
+test_barrier :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		b: sync.Barrier,
+		i: int,
+		number: int,
+
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.barrier_wait(&data.b)
+
+		intrinsics.atomic_add(&data.number, data.i)
+	}
+
+	data: Data
+	data.i = -1
+	threads: [THREADS]^thread.Thread
+
+	sync.barrier_init(&data.b, THREADS + 1) // +1 for this thread, of course.
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+	time.sleep(SMALL_SLEEP_TIME)
+	data.i = 1
+	sync.barrier_wait(&data.b)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.b.index, 0)
+	testing.expect_value(t, data.b.generation_id, 1)
+	testing.expect_value(t, data.b.thread_count, THREADS + 1)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_auto_reset :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		a: sync.Auto_Reset_Event,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("AUR-%v> entering", th.id)
+		sync.auto_reset_event_wait(&data.a)
+		// log.debugf("AUR-%v> adding", th.id)
+		data.number += 1
+		// log.debugf("AUR-%v> signalling", th.id)
+		sync.auto_reset_event_signal(&data.a)
+		// log.debugf("AUR-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	// There is a chance that this test can stall if a signal is sent before
+	// all threads are queued, because it's possible for some number of threads
+	// to get to the waiting state, the signal to fire, all of the waited
+	// threads to pass successfully, then the other threads come in with no-one
+	// to run a signal.
+	//
+	// So we'll just test a fully-waited queue of cascading threads.
+	for {
+		status := intrinsics.atomic_load(&data.a.status)
+		if status == -THREADS {
+			// log.debug("All Auto_Reset_Event threads have queued.")
+			break
+		}
+		intrinsics.cpu_relax()
+	}
+
+	sync.auto_reset_event_signal(&data.a)
+
+	wait_for(threads[:])
+
+	// The last thread should leave this primitive in a signalled state.
+	testing.expect_value(t, data.a.status, 1)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_auto_reset_already_signalled :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	a: sync.Auto_Reset_Event
+	sync.auto_reset_event_signal(&a)
+	sync.auto_reset_event_wait(&a)
+	testing.expect_value(t, a.status, 0)
+}
+
+@test
+test_ticket_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m: sync.Ticket_Mutex,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("TIC-%i> entering", th.id)
+		// intrinsics.debug_trap()
+		sync.ticket_mutex_lock(&data.m)
+		// log.debugf("TIC-%i> locked", th.id)
+		data.number += 1
+		// log.debugf("TIC-%i> unlocking", th.id)
+		sync.ticket_mutex_unlock(&data.m)
+		// log.debugf("TIC-%i> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.m.ticket, THREADS)
+	testing.expect_value(t, data.m.serving, THREADS)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_benaphore :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		b: sync.Benaphore,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+		sync.benaphore_lock(&data.b)
+		data.number += 1
+		sync.benaphore_unlock(&data.b)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.b.counter, 0)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_recursive_benaphore :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		b: sync.Recursive_Benaphore,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("REC_BEP-%i> entering", th.id)
+		tried1 := sync.recursive_benaphore_try_lock(&data.b)
+		for _ in 0..<3 {
+			sync.recursive_benaphore_lock(&data.b)
+		}
+		tried2 := sync.recursive_benaphore_try_lock(&data.b)
+		// log.debugf("REC_BEP-%i> locked", th.id)
+		data.number += 1
+		for _ in 0..<3 {
+			sync.recursive_benaphore_unlock(&data.b)
+		}
+		if tried1 { sync.recursive_benaphore_unlock(&data.b) }
+		if tried2 { sync.recursive_benaphore_unlock(&data.b) }
+		// log.debugf("REC_BEP-%i> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	// The benaphore should be unowned at the end.
+	testing.expect_value(t, data.b.counter, 0)
+	testing.expect_value(t, data.b.owner, 0)
+	testing.expect_value(t, data.b.recursion, 0)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_once :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		once: sync.Once,
+		number: int,
+	}
+
+	write :: proc "contextless" (data: rawptr) {
+		data := cast(^Data)data
+		data.number += 1
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+		// log.debugf("ONCE-%v> entering", th.id)
+		sync.once_do_with_data_contextless(&data.once, write, data)
+		// log.debugf("ONCE-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.once.done, true)
+	testing.expect_value(t, data.number, 1)
+}
+
+@test
+test_park :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		car: sync.Parker,
+		number: int,
+	}
+
+	data: Data
+
+	th := thread.create_and_start_with_data(&data, proc(data: rawptr) {
+		data := cast(^Data)data
+		time.sleep(SLEEP_TIME)
+		sync.unpark(&data.car)
+		data.number += 1
+	})
+
+	sync.park(&data.car)
+
+	wait_for([]^thread.Thread{ th })
+
+	PARKER_EMPTY :: 0
+	testing.expect_value(t, data.car.state, PARKER_EMPTY)
+	testing.expect_value(t, data.number, 1)
+}
+
+@test
+test_park_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	car: sync.Parker
+	sync.park_with_timeout(&car, SLEEP_TIME)
+}
+
+@test
+test_one_shot_event :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		event: sync.One_Shot_Event,
+		number: int,
+	}
+
+	data: Data
+
+	th := thread.create_and_start_with_data(&data, proc(data: rawptr) {
+		data := cast(^Data)data
+		time.sleep(SLEEP_TIME)
+		sync.one_shot_event_signal(&data.event)
+		data.number += 1
+	})
+
+	sync.one_shot_event_wait(&data.event)
+
+	wait_for([]^thread.Thread{ th })
+
+	testing.expect_value(t, data.event.state, 1)
+	testing.expect_value(t, data.number, 1)
+}

+ 3 - 0
tests/core/sys/posix/structs.odin

@@ -63,6 +63,9 @@ execute_struct_checks :: proc(t: ^testing.T) {
 		waiting: for {
 			status: i32
 			wpid := posix.waitpid(pid, &status, {})
+			if status == posix.EINTR {
+				continue
+			}
 			if !testing.expectf(t, wpid != -1, "waitpid() failure: %v", posix.strerror()) {
 				return false
 			}

+ 18 - 4
vendor/box2d/box2d.odin

@@ -3,7 +3,11 @@ package vendor_box2d
 import "base:intrinsics"
 import "core:c"
 
-@(private) VECTOR_EXT :: "avx2" when #config(VENDOR_BOX2D_ENABLE_AVX2, intrinsics.has_target_feature("avx2")) else "sse2"
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	@(private) VECTOR_EXT :: "_simd" when #config(VENDOR_BOX2D_ENABLE_SIMD128, intrinsics.has_target_feature("simd128")) else ""
+} else {
+	@(private) VECTOR_EXT :: "avx2" when #config(VENDOR_BOX2D_ENABLE_AVX2, intrinsics.has_target_feature("avx2")) else "sse2"
+}
 
 when ODIN_OS == .Windows {
 	@(private) LIB_PATH :: "lib/box2d_windows_amd64_" + VECTOR_EXT + ".lib"
@@ -13,6 +17,8 @@ when ODIN_OS == .Windows {
 	@(private) LIB_PATH :: "lib/box2d_darwin_amd64_" + VECTOR_EXT + ".a"
 } else when ODIN_ARCH == .amd64 {
 	@(private) LIB_PATH :: "lib/box2d_other_amd64_" + VECTOR_EXT + ".a"
+} else when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	@(private) LIB_PATH :: "lib/box2d_wasm" + VECTOR_EXT + ".o"
 } else {
 	@(private) LIB_PATH :: "lib/box2d_other.a"
 }
@@ -21,8 +27,16 @@ when !#exists(LIB_PATH) {
 	#panic("Could not find the compiled box2d libraries at \"" + LIB_PATH + "\", they can be compiled by running the `build.sh` script at `" + ODIN_ROOT + "vendor/box2d/build_box2d.sh\"`")
 }
 
-foreign import lib {
-	LIB_PATH,
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	when VECTOR_EXT == "_simd" {
+		foreign import lib "lib/box2d_wasm_simd.o"
+	} else {
+		foreign import lib "lib/box2d_wasm.o"
+	}
+} else {
+	foreign import lib {
+		LIB_PATH,
+	}
 }
 
 
@@ -1520,4 +1534,4 @@ IsValid :: proc{
 	Joint_IsValid,
 
 	IsValidRay,
-}
+}

+ 4 - 0
vendor/box2d/box2d_wasm.odin

@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package vendor_box2d
+
+@(require) import _ "vendor:libc"

+ 2 - 0
vendor/box2d/build_box2d.sh

@@ -68,5 +68,7 @@ esac
 
 cd ..
 
+make -f wasm.Makefile
+
 rm -rf v3.0.0.tar.gz
 rm -rf box2d-3.0.0

BIN
vendor/box2d/lib/box2d_wasm.o


BIN
vendor/box2d/lib/box2d_wasm_simd.o


+ 32 - 0
vendor/box2d/wasm.Makefile

@@ -0,0 +1,32 @@
+# Custom Makefile to build box2d for Odin's WASM targets.
+# I tried to make a cmake toolchain file for this / use cmake but this is far easier.
+# NOTE: We are pretending to be emscripten to box2d so it takes WASM code paths, but we don't actually use emscripten.
+
+# CC = $(shell brew --prefix llvm)/bin/clang
+# LD = $(shell brew --prefix llvm)/bin/wasm-ld
+
+VERSION   = 3.0.0
+SRCS      = $(wildcard box2d-$(VERSION)/src/*.c)
+OBJS_SIMD = $(SRCS:.c=_simd.o)
+OBJS      = $(SRCS:.c=.o)
+SYSROOT   = $(shell odin root)/vendor/libc
+CFLAGS    = -Ibox2d-$(VERSION)/include -Ibox2d-$(VERSION)/Extern/simde --target=wasm32 -D__EMSCRIPTEN__ -DNDEBUG -O3 --sysroot=$(SYSROOT)
+
+all: lib/box2d_wasm.o lib/box2d_wasm_simd.o clean
+
+%.o: %.c
+	$(CC) -c $(CFLAGS) $< -o $@
+
+%_simd.o: %.c
+	$(CC) -c $(CFLAGS) -msimd128 $< -o $@
+
+lib/box2d_wasm.o: $(OBJS)
+	$(LD) -r -o lib/box2d_wasm.o $(OBJS)
+
+lib/box2d_wasm_simd.o: $(OBJS_SIMD)
+	$(LD) -r -o lib/box2d_wasm_simd.o $(OBJS_SIMD)
+
+clean:
+	rm -rf $(OBJS) $(OBJS_SIMD)
+
+.PHONY: clean

+ 5 - 0
vendor/cgltf/cgltf.odin

@@ -5,6 +5,7 @@ LIB :: (
 	     "lib/cgltf.lib"      when ODIN_OS == .Windows
 	else "lib/cgltf.a"        when ODIN_OS == .Linux
 	else "lib/darwin/cgltf.a" when ODIN_OS == .Darwin
+	else "lib/cgltf_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -13,7 +14,11 @@ when LIB != "" {
 		// Windows library is shipped with the compiler, so a Windows specific message should not be needed.
 		#panic("Could not find the compiled cgltf library, it can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/cgltf/src\"`")
 	}
+}
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import lib "lib/cgltf_wasm.o"
+} else when LIB != "" {
 	foreign import lib { LIB }
 } else {
 	foreign import lib "system:cgltf"

+ 4 - 0
vendor/cgltf/cgltf_wasm.odin

@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package cgltf
+
+@(require) import _ "vendor:libc"

BIN
vendor/cgltf/lib/cgltf_wasm.o


+ 4 - 0
vendor/cgltf/src/Makefile

@@ -6,6 +6,10 @@ else
 all: unix
 endif
 
+wasm:
+	mkdir -p ../lib
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc cgltf.c -o ../lib/cgltf_wasm.o
+
 unix:
 	mkdir -p ../lib
 	$(CC) -c -O2 -Os -fPIC cgltf.c 	

+ 12 - 0
vendor/libc/README.md

@@ -0,0 +1,12 @@
+# vendor:libc
+
+A (very small) subset of a libc implementation over Odin libraries.
+This is mainly intended for use in Odin WASM builds to allow using libraries like box2d, cgltf etc. without emscripten hacks.
+
+You can use this with clang by doing `clang -c --target=wasm32 --sysroot=$(odin root)/vendor/libc` (+ all other flags and inputs).
+This will (if all the libc usage of the library is implemented) spit out a `.o` file you can use with the foreign import system.
+If you then also make sure this package is included in the Odin side of the project (`@(require) import "vendor:libc"`) you will be able
+compile to WASM like Odin expects.
+
+This is currently used by `vendor:box2d`, `vendor:stb/image`, `vendor:stb/truetype`, `vendor:stb/rect_pack`, and `vendor:cgltf`.
+You can see how building works by looking at those.

+ 15 - 0
vendor/libc/assert.odin

@@ -0,0 +1,15 @@
+package odin_libc
+
+import "base:runtime"
+
+@(require, linkage="strong", link_name="__odin_libc_assert_fail")
+__odin_libc_assert_fail :: proc "c" (func: cstring, file: cstring, line: i32, expr: cstring) -> ! {
+	context = g_ctx
+	loc := runtime.Source_Code_Location{
+		file_path = string(file),
+		line      = line,
+		column    = 0,
+		procedure = string(func),
+	}
+	context.assertion_failure_proc("runtime assertion", string(expr), loc)
+}

+ 16 - 0
vendor/libc/include/assert.h

@@ -0,0 +1,16 @@
+#ifdef NDEBUG
+#define	assert(e)	((void)0)
+#else
+
+#ifdef __FILE_NAME__
+#define __ASSERT_FILE_NAME __FILE_NAME__
+#else /* __FILE_NAME__ */
+#define __ASSERT_FILE_NAME __FILE__
+#endif /* __FILE_NAME__ */
+
+void __odin_libc_assert_fail(const char *, const char *, int, const char *);
+
+#define	assert(e) \
+    (__builtin_expect(!(e), 0) ? __odin_libc_assert_fail(__func__, __ASSERT_FILE_NAME, __LINE__, #e) : (void)0)
+
+#endif /* NDEBUG */

+ 21 - 0
vendor/libc/include/math.h

@@ -0,0 +1,21 @@
+#include <stdbool.h>
+
+float sqrtf(float);
+float cosf(float);
+float sinf(float);
+float atan2f(float, float);
+bool isnan(float);
+bool isinf(float);
+double floor(double x);
+double ceil(double x);
+double sqrt(double x);
+double pow(double x, double y);
+double fmod(double x, double y);
+double cos(double x);
+double acos(double x);
+double fabs(double x);
+int abs(int);
+double ldexp(double, int);
+double exp(double);
+float log(float);
+float sin(float);

+ 47 - 0
vendor/libc/include/stdio.h

@@ -0,0 +1,47 @@
+#include <stddef.h>
+#include <stdarg.h>
+
+#pragma once
+
+typedef struct {} FILE;
+
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+
+#define stdout ((FILE *)2)
+#define stderr ((FILE *)3)
+
+FILE *fopen(const char *, char *);
+int fclose(FILE *);
+int fseek(FILE *, long, int);
+long ftell(FILE *);
+size_t fread(void *, size_t, size_t, FILE *);
+size_t fwrite(const void *, size_t, size_t, FILE *);
+
+int vfprintf(FILE *, const char *, va_list);
+int vsnprintf(char *, size_t, const char *, va_list);
+
+static inline int snprintf(char *buf, size_t size, const char *fmt, ...) {
+	va_list args;
+	va_start(args, fmt);
+	int result = vsnprintf(buf, size, fmt, args);
+	va_end(args);
+	return result;
+}
+
+static inline int fprintf(FILE *f, const char *fmt, ...) {
+	va_list args;
+	va_start(args, fmt);
+	int result = vfprintf(f, fmt, args);
+	va_end(args);
+	return result;
+}
+
+static inline int printf(const char *fmt, ...) {
+	va_list args;
+	va_start(args, fmt);
+	int result = vfprintf(stdout, fmt, args);
+	va_end(args);
+	return result;
+}

+ 19 - 0
vendor/libc/include/stdlib.h

@@ -0,0 +1,19 @@
+#include <stddef.h>
+
+void *malloc(size_t size);
+
+void *aligned_alloc(size_t alignment, size_t size);
+
+void free(void *);
+
+void *realloc(void *, size_t);
+
+void qsort(void* base, size_t num, size_t size, int (*compare)(const void*, const void*));
+
+int atoi(const char *);
+long atol(const char *);
+long long atoll(const char *);
+
+double atof(const char *);
+
+long strtol(const char *, char **, int);

+ 21 - 0
vendor/libc/include/string.h

@@ -0,0 +1,21 @@
+#include <stddef.h>
+
+void *memcpy(void *, const void *, size_t);
+void *memset(void *, int, size_t);
+void *memmove(void *, void *, size_t);
+int memcmp(const void *, const void *, size_t);
+
+unsigned long strlen(const char *str);
+
+char *strchr(const char *, int);
+char *strrchr(const char *, int);
+
+char *strncpy(char *, const char *, size_t);
+char *strcpy(char *, const char *);
+
+size_t strcspn(const char *, const char *);
+
+int strcmp(const char *, const char *);
+int strncmp(const char *, const char *, size_t);
+
+char *strstr(const char *, const char *);

+ 25 - 0
vendor/libc/libc.odin

@@ -0,0 +1,25 @@
+package odin_libc
+
+import "base:runtime"
+
+import "core:mem"
+
+@(private)
+g_ctx: runtime.Context
+@(private)
+g_allocator: mem.Compat_Allocator
+
+@(init)
+init_context :: proc() {
+	g_ctx = context
+
+	// Wrapping the allocator with the mem.Compat_Allocator so we can
+	// mimic the realloc semantics.
+	mem.compat_allocator_init(&g_allocator, g_ctx.allocator)
+	g_ctx.allocator = mem.compat_allocator(&g_allocator)
+}
+
+// NOTE: the allocator must respect an `old_size` of `-1` on resizes!
+set_context :: proc(ctx := context) {
+	g_ctx = ctx
+}

+ 100 - 0
vendor/libc/math.odin

@@ -0,0 +1,100 @@
+package odin_libc
+
+import "base:builtin"
+
+import "core:math"
+
+@(require, linkage="strong", link_name="sqrtf")
+sqrtf :: proc "c" (v: f32) -> f32 {
+	return math.sqrt(v)
+}
+
+@(require, linkage="strong", link_name="cosf")
+cosf :: proc "c" (v: f32) -> f32 {
+	return math.cos(v)
+}
+
+@(require, linkage="strong", link_name="sinf")
+sinf :: proc "c" (v: f32) -> f32 {
+	return math.sin(v)
+}
+
+@(require, linkage="strong", link_name="atan2f")
+atan2f :: proc "c" (v: f32, v2: f32) -> f32 {
+	return math.atan2(v, v2)
+}
+
+@(require, linkage="strong", link_name="isnan")
+isnan :: proc "c" (v: f32) -> bool {
+	return math.is_nan(v)
+}
+
+@(require, linkage="strong", link_name="isinf")
+isinf :: proc "c" (v: f32) -> bool {
+	return math.is_inf(v)
+}
+
+@(require, linkage="strong", link_name="sqrt")
+sqrt :: proc "c" (x: f64) -> f64 {
+	return math.sqrt(x)
+}
+
+@(require, linkage="strong", link_name="floor")
+floor :: proc "c" (x: f64) -> f64 {
+	return math.floor(x)
+}
+
+@(require, linkage="strong", link_name="ceil")
+ceil :: proc "c" (x: f64) -> f64 {
+	return math.ceil(x)
+}
+
+@(require, linkage="strong", link_name="pow")
+pow :: proc "c" (x, y: f64) -> f64 {
+	return math.pow(x, y)
+}
+
+@(require, linkage="strong", link_name="fmod")
+fmod :: proc "c" (x, y: f64) -> f64 {
+	return math.mod(x, y)
+}
+
+@(require, linkage="strong", link_name="cos")
+cos :: proc "c" (x: f64) -> f64 {
+	return math.cos(x)
+}
+
+@(require, linkage="strong", link_name="acos")
+acos :: proc "c" (x: f64) -> f64 {
+	return math.acos(x)
+}
+
+@(require, linkage="strong", link_name="fabs")
+fabs :: proc "c" (x: f64) -> f64 {
+	return math.abs(x)
+}
+
+@(require, linkage="strong", link_name="abs")
+abs :: proc "c" (x: i32) -> i32 {
+	return builtin.abs(x)
+}
+
+@(require, linkage="strong", link_name="ldexp")
+ldexp :: proc "c" (x: f64, y: i32) -> f64{
+	return math.ldexp(x, int(y))
+}
+
+@(require, linkage="strong", link_name="exp")
+exp :: proc "c" (x: f64) -> f64 {
+	return math.exp(x)
+}
+
+@(require, linkage="strong", link_name="log")
+log :: proc "c" (x: f32) -> f32 {
+	return math.ln(x)
+}
+
+@(require, linkage="strong", link_name="sin")
+sin :: proc "c" (x: f32) -> f32 {
+	return math.sin(x)
+}

+ 106 - 0
vendor/libc/stdio.odin

@@ -0,0 +1,106 @@
+package odin_libc
+
+import "core:c"
+import "core:io"
+import "core:os"
+
+import stb "vendor:stb/sprintf"
+
+FILE :: uintptr
+
+@(require, linkage="strong", link_name="fopen")
+fopen :: proc "c" (path: cstring, mode: cstring) -> FILE {
+	context = g_ctx
+	unimplemented("odin_libc.fopen")
+}
+
+@(require, linkage="strong", link_name="fseek")
+fseek :: proc "c" (file: FILE, offset: c.long, whence: i32) -> i32 {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	_, err := os.seek(handle, i64(offset), int(whence))
+	if err != nil {
+		return -1
+	}
+	return 0
+}
+
+@(require, linkage="strong", link_name="ftell")
+ftell :: proc "c" (file: FILE) -> c.long {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	off, err := os.seek(handle, 0, os.SEEK_CUR)
+	if err != nil {
+		return -1
+	}
+	return c.long(off)
+}
+
+@(require, linkage="strong", link_name="fclose")
+fclose :: proc "c" (file: FILE) -> i32 {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	if os.close(handle) != nil {
+		return -1
+	}
+	return 0
+}
+
+@(require, linkage="strong", link_name="fread")
+fread :: proc "c" (buffer: [^]byte, size: uint, count: uint, file: FILE) -> uint {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	n, _   := os.read(handle, buffer[:min(size, count)])
+	return uint(max(0, n))
+}
+
+@(require, linkage="strong", link_name="fwrite")
+fwrite :: proc "c" (buffer: [^]byte, size: uint, count: uint, file: FILE) -> uint {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	n, _   := os.write(handle, buffer[:min(size, count)])
+	return uint(max(0, n))
+}
+
+@(require, linkage="strong", link_name="vsnprintf")
+vsnprintf :: proc "c" (buf: [^]byte, count: uint, fmt: cstring, args: ^c.va_list) -> i32 {
+	i32_count := i32(count)
+	assert_contextless(i32_count >= 0)
+	return stb.vsnprintf(buf, i32_count, fmt, args)
+}
+
+@(require, linkage="strong", link_name="vfprintf")
+vfprintf :: proc "c" (file: FILE, fmt: cstring, args: ^c.va_list) -> i32 {
+	context = g_ctx
+
+	handle := os.Handle(file-1)
+
+	MAX_STACK :: 4096
+
+	buf: []byte
+	stack_buf: [MAX_STACK]byte = ---
+	{
+		n := stb.vsnprintf(&stack_buf[0], MAX_STACK, fmt, args)
+		if n <= 0 {
+			return n
+		}
+
+		if n >= MAX_STACK {
+			buf = make([]byte, n)
+			n2 := stb.vsnprintf(raw_data(buf), i32(len(buf)), fmt, args)
+			assert(n == n2)
+		} else {
+			buf = stack_buf[:n]
+		}
+	}
+	defer if len(buf) > MAX_STACK {
+		delete(buf)
+	}
+
+	_, err := io.write_full(os.stream_from_handle(handle), buf)
+	if err != nil {
+		return -1
+	}
+
+	return i32(len(buf))
+}

+ 119 - 0
vendor/libc/stdlib.odin

@@ -0,0 +1,119 @@
+package odin_libc
+
+import "base:runtime"
+
+import "core:c"
+import "core:slice"
+import "core:sort"
+import "core:strconv"
+import "core:strings"
+
+@(require, linkage="strong", link_name="malloc")
+malloc :: proc "c" (size: uint) -> rawptr {
+	context = g_ctx
+	ptr, err := runtime.mem_alloc_non_zeroed(int(size))
+	assert(err == nil, "allocation failure")
+	return raw_data(ptr)
+}
+
+@(require, linkage="strong", link_name="aligned_alloc")
+aligned_alloc :: proc "c" (alignment: uint, size: uint) -> rawptr {
+	context = g_ctx
+	ptr, err := runtime.mem_alloc_non_zeroed(int(size), int(alignment))
+	assert(err == nil, "allocation failure")
+	return raw_data(ptr)
+}
+
+@(require, linkage="strong", link_name="free")
+free :: proc "c" (ptr: rawptr) {
+	context = g_ctx
+	runtime.mem_free(ptr)
+}
+
+@(require, linkage="strong", link_name="realloc")
+realloc :: proc "c" (ptr: rawptr, new_size: uint) -> rawptr {
+	context = g_ctx
+	// -1 for the old_size, assumed to be wrapped with the mem.Compat_Allocator to get the right size.
+	// Note that realloc does not actually care about alignment and is allowed to just align it to something
+	// else than the original allocation.
+	ptr, err := runtime.non_zero_mem_resize(ptr, -1, int(new_size))
+	assert(err != nil, "realloc failure")
+	return raw_data(ptr)
+}
+
+@(require, linkage="strong", link_name="qsort")
+qsort :: proc "c" (base: rawptr, num: uint, size: uint, cmp: proc "c" (a, b: rawptr) -> i32) {
+	context = g_ctx
+
+	Inputs :: struct {
+		base: rawptr,
+		num:  uint,
+		size: uint,
+		cmp:  proc "c" (a, b: rawptr) -> i32,
+	}
+
+	sort.sort({
+		collection = &Inputs{base, num, size, cmp},
+		len = proc(it: sort.Interface) -> int {
+			inputs := (^Inputs)(it.collection)
+			return int(inputs.num)
+		},
+		less = proc(it: sort.Interface, i, j: int) -> bool {
+			inputs := (^Inputs)(it.collection)
+			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
+			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
+			return inputs.cmp(a, b) < 0
+		},
+		swap = proc(it: sort.Interface, i, j: int) {
+			inputs := (^Inputs)(it.collection)
+
+			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
+			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
+
+			slice.ptr_swap_non_overlapping(a, b, int(inputs.size))
+		},
+	})
+}
+
+@(require, linkage="strong", link_name="atoi")
+atoi :: proc "c" (str: cstring) -> i32 {
+	return i32(atoll(str))
+}
+
+@(require, linkage="strong", link_name="atol")
+atol :: proc "c" (str: cstring) -> c.long {
+	return c.long(atoll(str))
+}
+
+@(require, linkage="strong", link_name="atoll")
+atoll :: proc "c" (str: cstring) -> c.longlong {
+	context = g_ctx
+
+	sstr := string(str)
+	sstr  = strings.trim_left_space(sstr)
+	i, _ := strconv.parse_i64_of_base(sstr, 10)
+	return c.longlong(i)
+}
+
+@(require, linkage="strong", link_name="atof")
+atof :: proc "c" (str: cstring) -> f64 {
+	context = g_ctx
+
+	sstr := string(str)
+	sstr  = strings.trim_left_space(sstr)
+	f, _ := strconv.parse_f64(sstr)
+	return f
+}
+
+@(require, linkage="strong", link_name="strtol")
+strtol :: proc "c" (str: cstring, str_end: ^cstring, base: i32) -> c.long {
+	context = g_ctx
+
+	sstr := string(str)
+	sstr  = strings.trim_left_space(sstr)
+
+	n: int
+	i, _ := strconv.parse_i64_of_base(sstr, int(base), &n)
+	str_end ^= cstring(raw_data(sstr)[n:])
+	return c.long(clamp(i, i64(min(c.long)), i64(max(c.long))))
+}

+ 111 - 0
vendor/libc/string.odin

@@ -0,0 +1,111 @@
+package odin_libc
+
+import "base:intrinsics"
+
+import "core:c"
+import "core:strings"
+import "core:mem"
+
+// NOTE: already defined by Odin.
+// void *memcpy(void *, const void *, size_t);
+// void *memset(void *, int, size_t);
+
+@(require, linkage="strong", link_name="memcmp")
+memcmp :: proc "c" (lhs: [^]byte, rhs: [^]byte, count: uint) -> i32 {
+	icount := int(count)
+	assert_contextless(icount >= 0)
+	return i32(mem.compare(lhs[:icount], rhs[:icount]))
+}
+
+@(require, linkage="strong", link_name="strlen")
+strlen :: proc "c" (str: cstring) -> c.ulong {
+	return c.ulong(len(str))
+}
+
+@(require, linkage="strong", link_name="strchr")
+strchr :: proc "c" (str: cstring, ch: i32) -> cstring {
+	bch  := u8(ch)
+	sstr := string(str)
+	if bch == 0 {
+		return cstring(raw_data(sstr)[len(sstr):])
+	}
+
+	idx := strings.index_byte(sstr, bch)
+	if idx < 0 {
+		return nil
+	}
+
+	return cstring(raw_data(sstr)[idx:])
+}
+
+@(require, linkage="strong", link_name="strrchr")
+strrchr :: proc "c" (str: cstring, ch: i32) -> cstring {
+	bch  := u8(ch)
+	sstr := string(str)
+	if bch == 0 {
+		return cstring(raw_data(sstr)[len(sstr):])
+	}
+
+	idx := strings.last_index_byte(sstr, bch)
+	if idx < 0 {
+		return nil
+	}
+
+	return cstring(raw_data(sstr)[idx:])
+}
+
+@(require, linkage="strong", link_name="strncpy")
+strncpy :: proc "c" (dst: [^]byte, src: cstring, count: uint) -> cstring {
+	icount := int(count)
+	assert_contextless(icount >= 0)
+	cnt := min(len(src), icount)
+	intrinsics.mem_copy_non_overlapping(dst, rawptr(src), cnt)
+	intrinsics.mem_zero(dst, icount-cnt)
+	return cstring(dst)
+}
+
+@(require, linkage="strong", link_name="strcpy")
+strcpy :: proc "c" (dst: [^]byte, src: cstring) -> cstring {
+	intrinsics.mem_copy_non_overlapping(dst, rawptr(src), len(src)+1)
+	return cstring(dst)
+}
+
+@(require, linkage="strong", link_name="strcspn")
+strcspn :: proc "c" (dst: cstring, src: cstring) -> uint {
+	context = g_ctx
+	sdst := string(dst)
+	idx := strings.index_any(sdst, string(src))
+	if idx == -1 {
+		return len(sdst)
+	}
+	return uint(idx)
+}
+
+@(require, linkage="strong", link_name="strncmp")
+strncmp :: proc "c" (lhs: cstring, rhs: cstring, count: uint) -> i32 {
+	icount := int(count)
+	assert_contextless(icount >= 0)
+	lhss := strings.string_from_null_terminated_ptr(([^]byte)(lhs), icount)
+	rhss := strings.string_from_null_terminated_ptr(([^]byte)(rhs), icount)
+	return i32(strings.compare(lhss, rhss))
+}
+
+@(require, linkage="strong", link_name="strcmp")
+strcmp :: proc "c" (lhs: cstring, rhs: cstring) -> i32 {
+	return i32(strings.compare(string(lhs), string(rhs)))
+}
+
+@(require, linkage="strong", link_name="strstr")
+strstr :: proc "c" (str: cstring, substr: cstring) -> cstring {
+	if substr == "" {
+		return str
+	}
+
+	idx := strings.index(string(str), string(substr))
+	if idx < 0 {
+		return nil
+	}
+
+	return cstring(([^]byte)(str)[idx:])
+}
+

+ 44 - 13
vendor/stb/image/stb_image.odin

@@ -7,6 +7,7 @@ LIB :: (
 	     "../lib/stb_image.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_image.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_image.a" when ODIN_OS == .Darwin
+	else "../lib/stb_image_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -15,12 +16,19 @@ when LIB != "" {
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
+}
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import stbi "../lib/stb_image_wasm.o"
+	foreign import stbi { LIB }
+} else when LIB != "" {
 	foreign import stbi { LIB }
 } else {
 	foreign import stbi "system:stb_image"
 }
 
+NO_STDIO :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
+
 #assert(size_of(c.int) == size_of(b32))
 #assert(size_of(b32) == size_of(c.int))
 
@@ -33,14 +41,48 @@ Io_Callbacks :: struct {
 	eof:  proc "c" (user: rawptr) -> c.int,                             // returns nonzero if we are at end of file/data
 }
 
+when !NO_STDIO {
+	@(default_calling_convention="c", link_prefix="stbi_")
+	foreign stbi {
+		////////////////////////////////////
+		//
+		// 8-bits-per-channel interface
+		//
+		load           :: proc(filename: cstring, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
+		load_from_file :: proc(f: ^c.FILE,        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
+
+		////////////////////////////////////
+		//
+		// 16-bits-per-channel interface
+		//
+		load_16           :: proc(filename: cstring, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
+		load_16_from_file :: proc(f: ^c.FILE,        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
+
+		////////////////////////////////////
+		//
+		// float-per-channel interface
+		//
+		loadf           :: proc(filename: cstring, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
+		loadf_from_file :: proc(f: ^c.FILE,        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
+
+		is_hdr           :: proc(filename: cstring) -> c.int ---
+		is_hdr_from_file :: proc(f: ^c.FILE)        -> c.int ---
+
+		// get image dimensions & components without fully decoding
+		info           :: proc(filename: cstring, x, y, comp: ^c.int) -> c.int ---
+		info_from_file :: proc(f: ^c.FILE,        x, y, comp: ^c.int) -> c.int ---
+
+		is_16_bit           :: proc(filename: cstring) -> b32 ---
+		is_16_bit_from_file :: proc(f: ^c.FILE)        -> b32 ---
+	}
+}
+
 @(default_calling_convention="c", link_prefix="stbi_")
 foreign stbi {
 	////////////////////////////////////
 	//
 	// 8-bits-per-channel interface
 	//
-	load                :: proc(filename: cstring,                 x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
-	load_from_file      :: proc(f: ^c.FILE,                        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
 	load_from_memory    :: proc(buffer: [^]byte, len: c.int,       x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
 	load_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
 
@@ -50,8 +92,6 @@ foreign stbi {
 	//
 	// 16-bits-per-channel interface
 	//
-	load_16                :: proc(filename: cstring,           x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
-	load_16_from_file      :: proc(f: ^c.FILE,                  x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
 	load_16_from_memory    :: proc(buffer: [^]byte, len: c.int, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
 	load_16_from_callbacks :: proc(clbk: ^Io_Callbacks,         x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
 
@@ -59,8 +99,6 @@ foreign stbi {
 	//
 	// float-per-channel interface
 	//
-	loadf                 :: proc(filename: cstring,                 x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
-	loadf_from_file       :: proc(f: ^c.FILE,                        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
 	loadf_from_memory     :: proc(buffer: [^]byte, len: c.int,       x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
 	loadf_from_callbacks  :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
 
@@ -73,9 +111,6 @@ foreign stbi {
 	is_hdr_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr) -> c.int ---
 	is_hdr_from_memory    :: proc(buffer: [^]byte, len: c.int)       -> c.int ---
 
-	is_hdr           :: proc(filename: cstring) -> c.int ---
-	is_hdr_from_file :: proc(f: ^c.FILE)        -> c.int ---
-
 	// get a VERY brief reason for failure
 	// NOT THREADSAFE
 	failure_reason :: proc() -> cstring ---
@@ -84,13 +119,9 @@ foreign stbi {
 	image_free :: proc(retval_from_load: rawptr) ---
 
 	// get image dimensions & components without fully decoding
-	info                :: proc(filename: cstring,                 x, y, comp: ^c.int) -> c.int ---
-	info_from_file      :: proc(f: ^c.FILE,                        x, y, comp: ^c.int) -> c.int ---
 	info_from_memory    :: proc(buffer: [^]byte, len: c.int,       x, y, comp: ^c.int) -> c.int ---
 	info_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, comp: ^c.int) -> c.int ---
 	
-	is_16_bit             :: proc(filename: cstring) -> b32 ---
-	is_16_bit_from_file   :: proc(f: ^c.FILE) -> b32 ---
 	is_16_bit_from_memory :: proc(buffer: [^]byte, len: c.int) -> c.int ---
 
 	// for image formats that explicitly notate that they have premultiplied alpha,

+ 5 - 0
vendor/stb/image/stb_image_resize.odin

@@ -7,6 +7,7 @@ RESIZE_LIB :: (
 	     "../lib/stb_image_resize.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_image_resize.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_image_resize.a" when ODIN_OS == .Darwin
+	else "../lib/stb_image_resize_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -15,7 +16,11 @@ when RESIZE_LIB != "" {
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
+}
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import lib "../lib/stb_image_resize_wasm.o"
+} else when RESIZE_LIB != "" {
 	foreign import lib { RESIZE_LIB }
 } else {
 	foreign import lib "system:stb_image_resize"

+ 4 - 0
vendor/stb/image/stb_image_wasm.odin

@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package stb_image
+
+@(require) import _ "vendor:libc"

+ 16 - 6
vendor/stb/image/stb_image_write.odin

@@ -7,6 +7,7 @@ WRITE_LIB :: (
 	     "../lib/stb_image_write.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_image_write.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_image_write.a" when ODIN_OS == .Darwin
+	else "../lib/stb_image_write_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -15,7 +16,11 @@ when WRITE_LIB != "" {
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
+}
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import stbiw "../lib/stb_image_write_wasm.o"
+} else when WRITE_LIB != "" {
 	foreign import stbiw { WRITE_LIB }
 } else {
 	foreign import stbiw "system:stb_image_write"
@@ -25,12 +30,6 @@ write_func :: proc "c" (ctx: rawptr, data: rawptr, size: c.int)
 
 @(default_calling_convention="c", link_prefix="stbi_")
 foreign stbiw {
-	write_png :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, stride_in_bytes: c.int)     -> c.int ---
-	write_bmp :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
-	write_tga :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
-	write_hdr :: proc(filename: cstring, w, h, comp: c.int, data: [^]f32)                             -> c.int ---
-	write_jpg :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, quality: c.int /*0..=100*/) -> c.int ---
-	
 	write_png_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr, stride_in_bytes: c.int)     -> c.int ---
 	write_bmp_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr)                             -> c.int ---
 	write_tga_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr)                             -> c.int ---
@@ -39,3 +38,14 @@ foreign stbiw {
 	
 	flip_vertically_on_write :: proc(flip_boolean: b32) ---
 }
+
+when !NO_STDIO {
+	@(default_calling_convention="c", link_prefix="stbi_")
+	foreign stbiw {
+		write_png :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, stride_in_bytes: c.int)     -> c.int ---
+		write_bmp :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
+		write_tga :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
+		write_hdr :: proc(filename: cstring, w, h, comp: c.int, data: [^]f32)                             -> c.int ---
+		write_jpg :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, quality: c.int /*0..=100*/) -> c.int ---
+	}
+}

BIN
vendor/stb/lib/stb_image_resize_wasm.o


BIN
vendor/stb/lib/stb_image_wasm.o


BIN
vendor/stb/lib/stb_image_write_wasm.o


BIN
vendor/stb/lib/stb_rect_pack_wasm.o


BIN
vendor/stb/lib/stb_sprintf_wasm.o


BIN
vendor/stb/lib/stb_truetype_wasm.o


+ 5 - 0
vendor/stb/rect_pack/stb_rect_pack.odin

@@ -9,6 +9,7 @@ LIB :: (
 	     "../lib/stb_rect_pack.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_rect_pack.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_rect_pack.a" when ODIN_OS == .Darwin
+	else "../lib/stb_rect_pack_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -16,7 +17,11 @@ when LIB != "" {
 	when !#exists(LIB) {
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
+}
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import lib "../lib/stb_rect_pack_wasm.o"
+} else when LIB != "" {
 	foreign import lib { LIB }
 } else {
 	foreign import lib "system:stb_rect_pack"

+ 4 - 0
vendor/stb/rect_pack/stb_rect_pack_wasm.odin

@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package stb_rect_pack
+
+@(require) import _ "vendor:libc"

+ 37 - 0
vendor/stb/sprintf/stb_sprintf.odin

@@ -0,0 +1,37 @@
+package stb_sprintf
+
+import "core:c"
+
+@(private)
+LIB :: (
+	     "../lib/stb_sprintf.lib"      when ODIN_OS == .Windows
+	else "../lib/stb_sprintf.a"        when ODIN_OS == .Linux
+	else "../lib/darwin/stb_sprintf.a" when ODIN_OS == .Darwin
+	else "../lib/stb_sprintf_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
+	else ""
+)
+
+when LIB != "" {
+	when !#exists(LIB) {
+		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
+	}
+}
+
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import stbpf "../lib/stb_sprintf_wasm.o"
+} else when LIB != "" {
+	foreign import stbpf { LIB }
+} else {
+	foreign import stbpf "system:stb_sprintf"
+}
+
+@(link_prefix="stbsp_", default_calling_convention="c")
+foreign stbpf {
+	sprintf    :: proc(buf: [^]byte, fmt: cstring, #c_vararg args: ..any) -> i32 ---
+	snprintf   :: proc(buf: [^]byte, count: i32, fmt: cstring, #c_vararg args: ..any) -> i32 ---
+	vsprintf   :: proc(buf: [^]byte, fmt: cstring, va: c.va_list) -> i32 ---
+	vsnprintf  :: proc(buf: [^]byte, count: i32, fmt: cstring, va: ^c.va_list) -> i32 ---
+	vsprintfcb :: proc(callback: SPRINTFCB, user: rawptr, buf: [^]byte, fmt: cstring, va: ^c.va_list) -> i32 ---
+}
+
+SPRINTFCB :: #type proc "c" (buf: [^]byte, user: rawptr, len: i32) -> cstring

+ 12 - 2
vendor/stb/src/Makefile

@@ -8,17 +8,24 @@ endif
 
 wasm:
 	mkdir -p ../lib
-	$(CC) -c -Os --target=wasm32 -nostdlib stb_truetype_wasm.c -o ../lib/stb_truetype_wasm.o
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_image.c        -o ../lib/stb_image_wasm.o        -DSTBI_NO_STDIO
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_image_write.c  -o ../lib/stb_image_write_wasm.o  -DSTBI_WRITE_NO_STDIO 
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_image_resize.c -o ../lib/stb_image_resize_wasm.o
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_truetype.c     -o ../lib/stb_truetype_wasm.o
+	# $(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_vorbis.c       -o ../lib/stb_vorbis_wasm.o       -DSTB_VORBIS_NO_STDIO
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_rect_pack.c    -o ../lib/stb_rect_pack_wasm.o
+	$(CC) -c -Os --target=wasm32                                          stb_sprintf.c      -o ../lib/stb_sprintf_wasm.o
 
 unix:
 	mkdir -p ../lib
-	$(CC) -c -O2 -Os -fPIC stb_image.c stb_image_write.c stb_image_resize.c stb_truetype.c stb_rect_pack.c stb_vorbis.c
+	$(CC) -c -O2 -Os -fPIC stb_image.c stb_image_write.c stb_image_resize.c stb_truetype.c stb_rect_pack.c stb_vorbis.c stb_sprintf.c
 	$(AR) rcs ../lib/stb_image.a        stb_image.o
 	$(AR) rcs ../lib/stb_image_write.a  stb_image_write.o
 	$(AR) rcs ../lib/stb_image_resize.a stb_image_resize.o
 	$(AR) rcs ../lib/stb_truetype.a     stb_truetype.o
 	$(AR) rcs ../lib/stb_rect_pack.a    stb_rect_pack.o
 	$(AR) rcs ../lib/stb_vorbis.a       stb_vorbis.o
+	$(AR) rcs ../lib/stb_sprintf.a      stb_sprintf.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image.so         -o ../lib/stb_image.so        stb_image.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image_write.so   -o ../lib/stb_image_write.so  stb_image_write.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image_resize.so  -o ../lib/stb_image_resize.so stb_image_resize.o
@@ -47,4 +54,7 @@ darwin:
 	$(CC) -arch x86_64 -c -O2 -Os -fPIC stb_vorbis.c -o stb_vorbis-x86_64.o -mmacosx-version-min=10.12
 	$(CC) -arch arm64  -c -O2 -Os -fPIC stb_vorbis.c -o stb_vorbis-arm64.o -mmacosx-version-min=10.12
 	lipo -create stb_vorbis-x86_64.o stb_vorbis-arm64.o -output ../lib/darwin/stb_vorbis.a
+	$(CC) -arch x86_64 -c -O2 -Os -fPIC stb_sprintf.c -o stb_sprintf-x86_64.o -mmacosx-version-min=10.12
+	$(CC) -arch arm64  -c -O2 -Os -fPIC stb_sprintf.c -o stb_sprintf-arm64.o -mmacosx-version-min=10.12
+	lipo -create stb_sprintf-x86_64.o stb_sprintf-arm64.o -output ../lib/darwin/stb_sprintf.a
 	rm *.o

+ 2 - 0
vendor/stb/src/stb_sprintf.c

@@ -0,0 +1,2 @@
+#define STB_SPRINTF_IMPLEMENTATION
+#include "stb_sprintf.h"

+ 1906 - 0
vendor/stb/src/stb_sprintf.h

@@ -0,0 +1,1906 @@
+// stb_sprintf - v1.10 - public domain snprintf() implementation
+// originally by Jeff Roberts / RAD Game Tools, 2015/10/20
+// http://github.com/nothings/stb
+//
+// allowed types:  sc uidBboXx p AaGgEef n
+// lengths      :  hh h ll j z t I64 I32 I
+//
+// Contributors:
+//    Fabian "ryg" Giesen (reformatting)
+//    github:aganm (attribute format)
+//
+// Contributors (bugfixes):
+//    github:d26435
+//    github:trex78
+//    github:account-login
+//    Jari Komppa (SI suffixes)
+//    Rohit Nirmal
+//    Marcin Wojdyr
+//    Leonard Ritter
+//    Stefano Zanotti
+//    Adam Allison
+//    Arvid Gerstmann
+//    Markus Kolb
+//
+// LICENSE:
+//
+//   See end of file for license information.
+
+#ifndef STB_SPRINTF_H_INCLUDE
+#define STB_SPRINTF_H_INCLUDE
+
+/*
+Single file sprintf replacement.
+
+Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20.
+Hereby placed in public domain.
+
+This is a full sprintf replacement that supports everything that
+the C runtime sprintfs support, including float/double, 64-bit integers,
+hex floats, field parameters (%*.*d stuff), length reads backs, etc.
+
+Why would you need this if sprintf already exists?  Well, first off,
+it's *much* faster (see below). It's also much smaller than the CRT
+versions code-space-wise. We've also added some simple improvements
+that are super handy (commas in thousands, callbacks at buffer full,
+for example). Finally, the format strings for MSVC and GCC differ
+for 64-bit integers (among other small things), so this lets you use
+the same format strings in cross platform code.
+
+It uses the standard single file trick of being both the header file
+and the source itself. If you just include it normally, you just get
+the header file function definitions. To get the code, you include
+it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first.
+
+It only uses va_args macros from the C runtime to do it's work. It
+does cast doubles to S64s and shifts and divides U64s, which does
+drag in CRT code on most platforms.
+
+It compiles to roughly 8K with float support, and 4K without.
+As a comparison, when using MSVC static libs, calling sprintf drags
+in 16K.
+
+API:
+====
+int stbsp_sprintf( char * buf, char const * fmt, ... )
+int stbsp_snprintf( char * buf, int count, char const * fmt, ... )
+  Convert an arg list into a buffer.  stbsp_snprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintf( char * buf, char const * fmt, va_list va )
+int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va )
+  Convert a va_list arg list into a buffer.  stbsp_vsnprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+    typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len );
+  Convert into a buffer, calling back every STB_SPRINTF_MIN chars.
+  Your callback can then copy the chars out, print them or whatever.
+  This function is actually the workhorse for everything else.
+  The buffer you pass in must hold at least STB_SPRINTF_MIN characters.
+    // you return the next buffer to use or 0 to stop converting
+
+void stbsp_set_separators( char comma, char period )
+  Set the comma and period characters to use.
+
+FLOATS/DOUBLES:
+===============
+This code uses a internal float->ascii conversion method that uses
+doubles with error correction (double-doubles, for ~105 bits of
+precision).  This conversion is round-trip perfect - that is, an atof
+of the values output here will give you the bit-exact double back.
+
+One difference is that our insignificant digits will be different than
+with MSVC or GCC (but they don't match each other either).  We also
+don't attempt to find the minimum length matching float (pre-MSVC15
+doesn't either).
+
+If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT
+and you'll save 4K of code space.
+
+64-BIT INTS:
+============
+This library also supports 64-bit integers and you can use MSVC style or
+GCC style indicators (%I64d or %lld).  It supports the C99 specifiers
+for size_t and ptr_diff_t (%jd %zd) as well.
+
+EXTRAS:
+=======
+Like some GCCs, for integers and floats, you can use a ' (single quote)
+specifier and commas will be inserted on the thousands: "%'d" on 12345
+would print 12,345.
+
+For integers and floats, you can use a "$" specifier and the number
+will be converted to float and then divided to get kilo, mega, giga or
+tera and then printed, so "%$d" 1000 is "1.0 k", "%$.2d" 2536000 is
+"2.53 M", etc. For byte values, use two $:s, like "%$$d" to turn
+2536000 to "2.42 Mi". If you prefer JEDEC suffixes to SI ones, use three
+$:s: "%$$$d" -> "2.42 M". To remove the space between the number and the
+suffix, add "_" specifier: "%_$d" -> "2.53M".
+
+In addition to octal and hexadecimal conversions, you can print
+integers in binary: "%b" for 256 would print 100.
+
+PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC):
+===================================================================
+"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC)
+"%24d" across all 32-bit ints (4.5x/4.2x faster)
+"%x" across all 32-bit ints (4.5x/3.8x faster)
+"%08x" across all 32-bit ints (4.3x/3.8x faster)
+"%f" across e-10 to e+10 floats (7.3x/6.0x faster)
+"%e" across e-10 to e+10 floats (8.1x/6.0x faster)
+"%g" across e-10 to e+10 floats (10.0x/7.1x faster)
+"%f" for values near e-300 (7.9x/6.5x faster)
+"%f" for values near e+300 (10.0x/9.1x faster)
+"%e" for values near e-300 (10.1x/7.0x faster)
+"%e" for values near e+300 (9.2x/6.0x faster)
+"%.320f" for values near e-300 (12.6x/11.2x faster)
+"%a" for random values (8.6x/4.3x faster)
+"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster)
+"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster)
+"%s%s%s" for 64 char strings (7.1x/7.3x faster)
+"...512 char string..." ( 35.0x/32.5x faster!)
+*/
+
+#if defined(__clang__)
+ #if defined(__has_feature) && defined(__has_attribute)
+  #if __has_feature(address_sanitizer)
+   #if __has_attribute(__no_sanitize__)
+    #define STBSP__ASAN __attribute__((__no_sanitize__("address")))
+   #elif __has_attribute(__no_sanitize_address__)
+    #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+   #elif __has_attribute(__no_address_safety_analysis__)
+    #define STBSP__ASAN __attribute__((__no_address_safety_analysis__))
+   #endif
+  #endif
+ #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+ #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__
+  #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+ #endif
+#endif
+
+#ifndef STBSP__ASAN
+#define STBSP__ASAN
+#endif
+
+#ifdef STB_SPRINTF_STATIC
+#define STBSP__PUBLICDEC static
+#define STBSP__PUBLICDEF static STBSP__ASAN
+#else
+#ifdef __cplusplus
+#define STBSP__PUBLICDEC extern "C"
+#define STBSP__PUBLICDEF extern "C" STBSP__ASAN
+#else
+#define STBSP__PUBLICDEC extern
+#define STBSP__PUBLICDEF STBSP__ASAN
+#endif
+#endif
+
+#if defined(__has_attribute)
+ #if __has_attribute(format)
+   #define STBSP__ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va)))
+ #endif
+#endif
+
+#ifndef STBSP__ATTRIBUTE_FORMAT
+#define STBSP__ATTRIBUTE_FORMAT(fmt,va)
+#endif
+
+#ifdef _MSC_VER
+#define STBSP__NOTUSED(v)  (void)(v)
+#else
+#define STBSP__NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#include <stdarg.h> // for va_arg(), va_list()
+#include <stddef.h> // size_t, ptrdiff_t
+
+#ifndef STB_SPRINTF_MIN
+#define STB_SPRINTF_MIN 512 // how many characters per callback
+#endif
+typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len);
+
+#ifndef STB_SPRINTF_DECORATE
+#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names
+#endif
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsnprintf)(char *buf, int count, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(2,3);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(3,4);
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC void STB_SPRINTF_DECORATE(set_separators)(char comma, char period);
+
+#endif // STB_SPRINTF_H_INCLUDE
+
+#ifdef STB_SPRINTF_IMPLEMENTATION
+
+#define stbsp__uint32 unsigned int
+#define stbsp__int32 signed int
+
+#ifdef _MSC_VER
+#define stbsp__uint64 unsigned __int64
+#define stbsp__int64 signed __int64
+#else
+#define stbsp__uint64 unsigned long long
+#define stbsp__int64 signed long long
+#endif
+#define stbsp__uint16 unsigned short
+
+#ifndef stbsp__uintptr
+#if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__)
+#define stbsp__uintptr stbsp__uint64
+#else
+#define stbsp__uintptr stbsp__uint32
+#endif
+#endif
+
+#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC)
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define STB_SPRINTF_MSVC_MODE
+#endif
+#endif
+
+#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses
+#define STBSP__UNALIGNED(code)
+#else
+#define STBSP__UNALIGNED(code) code
+#endif
+
+#ifndef STB_SPRINTF_NOFLOAT
+// internal float utility functions
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits);
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value);
+#define STBSP__SPECIAL 0x7000
+#endif
+
+static char stbsp__period = '.';
+static char stbsp__comma = ',';
+static struct
+{
+   short temp; // force next field to be 2-byte aligned
+   char pair[201];
+} stbsp__digitpair =
+{
+  0,
+   "00010203040506070809101112131415161718192021222324"
+   "25262728293031323334353637383940414243444546474849"
+   "50515253545556575859606162636465666768697071727374"
+   "75767778798081828384858687888990919293949596979899"
+};
+
+STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char pcomma, char pperiod)
+{
+   stbsp__period = pperiod;
+   stbsp__comma = pcomma;
+}
+
+#define STBSP__LEFTJUST 1
+#define STBSP__LEADINGPLUS 2
+#define STBSP__LEADINGSPACE 4
+#define STBSP__LEADING_0X 8
+#define STBSP__LEADINGZERO 16
+#define STBSP__INTMAX 32
+#define STBSP__TRIPLET_COMMA 64
+#define STBSP__NEGATIVE 128
+#define STBSP__METRIC_SUFFIX 256
+#define STBSP__HALFWIDTH 512
+#define STBSP__METRIC_NOSPACE 1024
+#define STBSP__METRIC_1024 2048
+#define STBSP__METRIC_JEDEC 4096
+
+static void stbsp__lead_sign(stbsp__uint32 fl, char *sign)
+{
+   sign[0] = 0;
+   if (fl & STBSP__NEGATIVE) {
+      sign[0] = 1;
+      sign[1] = '-';
+   } else if (fl & STBSP__LEADINGSPACE) {
+      sign[0] = 1;
+      sign[1] = ' ';
+   } else if (fl & STBSP__LEADINGPLUS) {
+      sign[0] = 1;
+      sign[1] = '+';
+   }
+}
+
+static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit)
+{
+   char const * sn = s;
+
+   // get up to 4-byte alignment
+   for (;;) {
+      if (((stbsp__uintptr)sn & 3) == 0)
+         break;
+
+      if (!limit || *sn == 0)
+         return (stbsp__uint32)(sn - s);
+
+      ++sn;
+      --limit;
+   }
+
+   // scan over 4 bytes at a time to find terminating 0
+   // this will intentionally scan up to 3 bytes past the end of buffers,
+   // but becase it works 4B aligned, it will never cross page boundaries
+   // (hence the STBSP__ASAN markup; the over-read here is intentional
+   // and harmless)
+   while (limit >= 4) {
+      stbsp__uint32 v = *(stbsp__uint32 *)sn;
+      // bit hack to find if there's a 0 byte in there
+      if ((v - 0x01010101) & (~v) & 0x80808080UL)
+         break;
+
+      sn += 4;
+      limit -= 4;
+   }
+
+   // handle the last few characters to find actual size
+   while (limit && *sn) {
+      ++sn;
+      --limit;
+   }
+
+   return (stbsp__uint32)(sn - s);
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va)
+{
+   static char hex[] = "0123456789abcdefxp";
+   static char hexu[] = "0123456789ABCDEFXP";
+   char *bf;
+   char const *f;
+   int tlen = 0;
+
+   bf = buf;
+   f = fmt;
+   for (;;) {
+      stbsp__int32 fw, pr, tz;
+      stbsp__uint32 fl;
+
+      // macros for the callback buffer stuff
+      #define stbsp__chk_cb_bufL(bytes)                        \
+         {                                                     \
+            int len = (int)(bf - buf);                         \
+            if ((len + (bytes)) >= STB_SPRINTF_MIN) {          \
+               tlen += len;                                    \
+               if (0 == (bf = buf = callback(buf, user, len))) \
+                  goto done;                                   \
+            }                                                  \
+         }
+      #define stbsp__chk_cb_buf(bytes)    \
+         {                                \
+            if (callback) {               \
+               stbsp__chk_cb_bufL(bytes); \
+            }                             \
+         }
+      #define stbsp__flush_cb()                      \
+         {                                           \
+            stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \
+         } // flush if there is even one byte in the buffer
+      #define stbsp__cb_buf_clamp(cl, v)                \
+         cl = v;                                        \
+         if (callback) {                                \
+            int lg = STB_SPRINTF_MIN - (int)(bf - buf); \
+            if (cl > lg)                                \
+               cl = lg;                                 \
+         }
+
+      // fast copy everything up to the next % (or end of string)
+      for (;;) {
+         while (((stbsp__uintptr)f) & 3) {
+         schk1:
+            if (f[0] == '%')
+               goto scandd;
+         schk2:
+            if (f[0] == 0)
+               goto endfmt;
+            stbsp__chk_cb_buf(1);
+            *bf++ = f[0];
+            ++f;
+         }
+         for (;;) {
+            // Check if the next 4 bytes contain %(0x25) or end of string.
+            // Using the 'hasless' trick:
+            // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
+            stbsp__uint32 v, c;
+            v = *(stbsp__uint32 *)f;
+            c = (~v) & 0x80808080;
+            if (((v ^ 0x25252525) - 0x01010101) & c)
+               goto schk1;
+            if ((v - 0x01010101) & c)
+               goto schk2;
+            if (callback)
+               if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
+                  goto schk1;
+            #ifdef STB_SPRINTF_NOUNALIGNED
+                if(((stbsp__uintptr)bf) & 3) {
+                    bf[0] = f[0];
+                    bf[1] = f[1];
+                    bf[2] = f[2];
+                    bf[3] = f[3];
+                } else
+            #endif
+            {
+                *(stbsp__uint32 *)bf = v;
+            }
+            bf += 4;
+            f += 4;
+         }
+      }
+   scandd:
+
+      ++f;
+
+      // ok, we have a percent, read the modifiers first
+      fw = 0;
+      pr = -1;
+      fl = 0;
+      tz = 0;
+
+      // flags
+      for (;;) {
+         switch (f[0]) {
+         // if we have left justify
+         case '-':
+            fl |= STBSP__LEFTJUST;
+            ++f;
+            continue;
+         // if we have leading plus
+         case '+':
+            fl |= STBSP__LEADINGPLUS;
+            ++f;
+            continue;
+         // if we have leading space
+         case ' ':
+            fl |= STBSP__LEADINGSPACE;
+            ++f;
+            continue;
+         // if we have leading 0x
+         case '#':
+            fl |= STBSP__LEADING_0X;
+            ++f;
+            continue;
+         // if we have thousand commas
+         case '\'':
+            fl |= STBSP__TRIPLET_COMMA;
+            ++f;
+            continue;
+         // if we have kilo marker (none->kilo->kibi->jedec)
+         case '$':
+            if (fl & STBSP__METRIC_SUFFIX) {
+               if (fl & STBSP__METRIC_1024) {
+                  fl |= STBSP__METRIC_JEDEC;
+               } else {
+                  fl |= STBSP__METRIC_1024;
+               }
+            } else {
+               fl |= STBSP__METRIC_SUFFIX;
+            }
+            ++f;
+            continue;
+         // if we don't want space between metric suffix and number
+         case '_':
+            fl |= STBSP__METRIC_NOSPACE;
+            ++f;
+            continue;
+         // if we have leading zero
+         case '0':
+            fl |= STBSP__LEADINGZERO;
+            ++f;
+            goto flags_done;
+         default: goto flags_done;
+         }
+      }
+   flags_done:
+
+      // get the field width
+      if (f[0] == '*') {
+         fw = va_arg(va, stbsp__uint32);
+         ++f;
+      } else {
+         while ((f[0] >= '0') && (f[0] <= '9')) {
+            fw = fw * 10 + f[0] - '0';
+            f++;
+         }
+      }
+      // get the precision
+      if (f[0] == '.') {
+         ++f;
+         if (f[0] == '*') {
+            pr = va_arg(va, stbsp__uint32);
+            ++f;
+         } else {
+            pr = 0;
+            while ((f[0] >= '0') && (f[0] <= '9')) {
+               pr = pr * 10 + f[0] - '0';
+               f++;
+            }
+         }
+      }
+
+      // handle integer size overrides
+      switch (f[0]) {
+      // are we halfwidth?
+      case 'h':
+         fl |= STBSP__HALFWIDTH;
+         ++f;
+         if (f[0] == 'h')
+            ++f;  // QUARTERWIDTH
+         break;
+      // are we 64-bit (unix style)
+      case 'l':
+         fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0);
+         ++f;
+         if (f[0] == 'l') {
+            fl |= STBSP__INTMAX;
+            ++f;
+         }
+         break;
+      // are we 64-bit on intmax? (c99)
+      case 'j':
+         fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit on size_t or ptrdiff_t? (c99)
+      case 'z':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      case 't':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit (msft style)
+      case 'I':
+         if ((f[1] == '6') && (f[2] == '4')) {
+            fl |= STBSP__INTMAX;
+            f += 3;
+         } else if ((f[1] == '3') && (f[2] == '2')) {
+            f += 3;
+         } else {
+            fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0);
+            ++f;
+         }
+         break;
+      default: break;
+      }
+
+      // handle each replacement
+      switch (f[0]) {
+         #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307
+         char num[STBSP__NUMSZ];
+         char lead[8];
+         char tail[8];
+         char *s;
+         char const *h;
+         stbsp__uint32 l, n, cs;
+         stbsp__uint64 n64;
+#ifndef STB_SPRINTF_NOFLOAT
+         double fv;
+#endif
+         stbsp__int32 dp;
+         char const *sn;
+
+      case 's':
+         // get the string
+         s = va_arg(va, char *);
+         if (s == 0)
+            s = (char *)"null";
+         // get the length, limited to desired precision
+         // always limit to ~0u chars since our counts are 32b
+         l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u);
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         // copy the string in
+         goto scopy;
+
+      case 'c': // char
+         // get the character
+         s = num + STBSP__NUMSZ - 1;
+         *s = (char)va_arg(va, int);
+         l = 1;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+
+      case 'n': // weird write-bytes specifier
+      {
+         int *d = va_arg(va, int *);
+         *d = tlen + (int)(bf - buf);
+      } break;
+
+#ifdef STB_SPRINTF_NOFLOAT
+      case 'A':              // float
+      case 'a':              // hex float
+      case 'G':              // float
+      case 'g':              // float
+      case 'E':              // float
+      case 'e':              // float
+      case 'f':              // float
+         va_arg(va, double); // eat it
+         s = (char *)"No float";
+         l = 8;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         cs = 0;
+         STBSP__NOTUSED(dp);
+         goto scopy;
+#else
+      case 'A': // hex float
+      case 'a': // hex float
+         h = (f[0] == 'A') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv))
+            fl |= STBSP__NEGATIVE;
+
+         s = num + 64;
+
+         stbsp__lead_sign(fl, lead);
+
+         if (dp == -1023)
+            dp = (n64) ? -1022 : 0;
+         else
+            n64 |= (((stbsp__uint64)1) << 52);
+         n64 <<= (64 - 56);
+         if (pr < 15)
+            n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4));
+// add leading chars
+
+#ifdef STB_SPRINTF_MSVC_MODE
+         *s++ = '0';
+         *s++ = 'x';
+#else
+         lead[1 + lead[0]] = '0';
+         lead[2 + lead[0]] = 'x';
+         lead[0] += 2;
+#endif
+         *s++ = h[(n64 >> 60) & 15];
+         n64 <<= 4;
+         if (pr)
+            *s++ = stbsp__period;
+         sn = s;
+
+         // print the bits
+         n = pr;
+         if (n > 13)
+            n = 13;
+         if (pr > (stbsp__int32)n)
+            tz = pr - n;
+         pr = 0;
+         while (n--) {
+            *s++ = h[(n64 >> 60) & 15];
+            n64 <<= 4;
+         }
+
+         // print the expo
+         tail[1] = h[17];
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+         n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3));
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+
+         dp = (int)(s - sn);
+         l = (int)(s - (num + 64));
+         s = num + 64;
+         cs = 1 + (3 << 24);
+         goto scopy;
+
+      case 'G': // float
+      case 'g': // float
+         h = (f[0] == 'G') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6;
+         else if (pr == 0)
+            pr = 1; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+
+         // clamp the precision and delete extra zeros after clamp
+         n = pr;
+         if (l > (stbsp__uint32)pr)
+            l = pr;
+         while ((l > 1) && (pr) && (sn[l - 1] == '0')) {
+            --pr;
+            --l;
+         }
+
+         // should we use %e
+         if ((dp <= -4) || (dp > (stbsp__int32)n)) {
+            if (pr > (stbsp__int32)l)
+               pr = l - 1;
+            else if (pr)
+               --pr; // when using %e, there is one digit before the decimal
+            goto doexpfromg;
+         }
+         // this is the insane action to get the pr to match %g semantics for %f
+         if (dp > 0) {
+            pr = (dp < (stbsp__int32)l) ? l - dp : 0;
+         } else {
+            pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr);
+         }
+         goto dofloatfromg;
+
+      case 'E': // float
+      case 'e': // float
+         h = (f[0] == 'E') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+      doexpfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+         // handle leading chars
+         *s++ = sn[0];
+
+         if (pr)
+            *s++ = stbsp__period;
+
+         // handle after decimal
+         if ((l - 1) > (stbsp__uint32)pr)
+            l = pr + 1;
+         for (n = 1; n < l; n++)
+            *s++ = sn[n];
+         // trailing zeros
+         tz = pr - (l - 1);
+         pr = 0;
+         // dump expo
+         tail[1] = h[0xe];
+         dp -= 1;
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+#ifdef STB_SPRINTF_MSVC_MODE
+         n = 5;
+#else
+         n = (dp >= 100) ? 5 : 4;
+#endif
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+         cs = 1 + (3 << 24); // how many tens
+         goto flt_lead;
+
+      case 'f': // float
+         fv = va_arg(va, double);
+      doafloat:
+         // do kilos
+         if (fl & STBSP__METRIC_SUFFIX) {
+            double divisor;
+            divisor = 1000.0f;
+            if (fl & STBSP__METRIC_1024)
+               divisor = 1024.0;
+            while (fl < 0x4000000) {
+               if ((fv < divisor) && (fv > -divisor))
+                  break;
+               fv /= divisor;
+               fl += 0x1000000;
+            }
+         }
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr))
+            fl |= STBSP__NEGATIVE;
+      dofloatfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+
+         // handle the three decimal varieties
+         if (dp <= 0) {
+            stbsp__int32 i;
+            // handle 0.000*000xxxx
+            *s++ = '0';
+            if (pr)
+               *s++ = stbsp__period;
+            n = -dp;
+            if ((stbsp__int32)n > pr)
+               n = pr;
+            i = n;
+            while (i) {
+               if ((((stbsp__uintptr)s) & 3) == 0)
+                  break;
+               *s++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)s = 0x30303030;
+               s += 4;
+               i -= 4;
+            }
+            while (i) {
+               *s++ = '0';
+               --i;
+            }
+            if ((stbsp__int32)(l + n) > pr)
+               l = pr - n;
+            i = l;
+            while (i) {
+               *s++ = *sn++;
+               --i;
+            }
+            tz = pr - (n + l);
+            cs = 1 + (3 << 24); // how many tens did we write (for commas below)
+         } else {
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0;
+            if ((stbsp__uint32)dp >= l) {
+               // handle xxxx000*000.0
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= l)
+                        break;
+                  }
+               }
+               if (n < (stbsp__uint32)dp) {
+                  n = dp - n;
+                  if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                     while (n) {
+                        if ((((stbsp__uintptr)s) & 3) == 0)
+                           break;
+                        *s++ = '0';
+                        --n;
+                     }
+                     while (n >= 4) {
+                        *(stbsp__uint32 *)s = 0x30303030;
+                        s += 4;
+                        n -= 4;
+                     }
+                  }
+                  while (n) {
+                     if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                        cs = 0;
+                        *s++ = stbsp__comma;
+                     } else {
+                        *s++ = '0';
+                        --n;
+                     }
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr) {
+                  *s++ = stbsp__period;
+                  tz = pr;
+               }
+            } else {
+               // handle xxxxx.xxxx000*000
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= (stbsp__uint32)dp)
+                        break;
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr)
+                  *s++ = stbsp__period;
+               if ((l - dp) > (stbsp__uint32)pr)
+                  l = pr + dp;
+               while (n < l) {
+                  *s++ = sn[n];
+                  ++n;
+               }
+               tz = pr - (l - dp);
+            }
+         }
+         pr = 0;
+
+         // handle k,m,g,t
+         if (fl & STBSP__METRIC_SUFFIX) {
+            char idx;
+            idx = 1;
+            if (fl & STBSP__METRIC_NOSPACE)
+               idx = 0;
+            tail[0] = idx;
+            tail[1] = ' ';
+            {
+               if (fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'.
+                  if (fl & STBSP__METRIC_1024)
+                     tail[idx + 1] = "_KMGT"[fl >> 24];
+                  else
+                     tail[idx + 1] = "_kMGT"[fl >> 24];
+                  idx++;
+                  // If printing kibits and not in jedec, add the 'i'.
+                  if (fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) {
+                     tail[idx + 1] = 'i';
+                     idx++;
+                  }
+                  tail[0] = idx;
+               }
+            }
+         };
+
+      flt_lead:
+         // get the length that we copied
+         l = (stbsp__uint32)(s - (num + 64));
+         s = num + 64;
+         goto scopy;
+#endif
+
+      case 'B': // upper binary
+      case 'b': // lower binary
+         h = (f[0] == 'B') ? hexu : hex;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[0xb];
+         }
+         l = (8 << 4) | (1 << 8);
+         goto radixnum;
+
+      case 'o': // octal
+         h = hexu;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 1;
+            lead[1] = '0';
+         }
+         l = (3 << 4) | (3 << 8);
+         goto radixnum;
+
+      case 'p': // pointer
+         fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0;
+         pr = sizeof(void *) * 2;
+         fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros
+                                    // fall through - to X
+
+      case 'X': // upper hex
+      case 'x': // lower hex
+         h = (f[0] == 'X') ? hexu : hex;
+         l = (4 << 4) | (4 << 8);
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[16];
+         }
+      radixnum:
+         // get the number
+         if (fl & STBSP__INTMAX)
+            n64 = va_arg(va, stbsp__uint64);
+         else
+            n64 = va_arg(va, stbsp__uint32);
+
+         s = num + STBSP__NUMSZ;
+         dp = 0;
+         // clear tail, and clear leading if value is zero
+         tail[0] = 0;
+         if (n64 == 0) {
+            lead[0] = 0;
+            if (pr == 0) {
+               l = 0;
+               cs = 0;
+               goto scopy;
+            }
+         }
+         // convert to string
+         for (;;) {
+            *--s = h[n64 & ((1 << (l >> 8)) - 1)];
+            n64 >>= (l >> 8);
+            if (!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr)))
+               break;
+            if (fl & STBSP__TRIPLET_COMMA) {
+               ++l;
+               if ((l & 15) == ((l >> 4) & 15)) {
+                  l &= ~15;
+                  *--s = stbsp__comma;
+               }
+            }
+         };
+         // get the tens and the comma pos
+         cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24);
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         // copy it
+         goto scopy;
+
+      case 'u': // unsigned
+      case 'i':
+      case 'd': // integer
+         // get the integer and abs it
+         if (fl & STBSP__INTMAX) {
+            stbsp__int64 i64 = va_arg(va, stbsp__int64);
+            n64 = (stbsp__uint64)i64;
+            if ((f[0] != 'u') && (i64 < 0)) {
+               n64 = (stbsp__uint64)-i64;
+               fl |= STBSP__NEGATIVE;
+            }
+         } else {
+            stbsp__int32 i = va_arg(va, stbsp__int32);
+            n64 = (stbsp__uint32)i;
+            if ((f[0] != 'u') && (i < 0)) {
+               n64 = (stbsp__uint32)-i;
+               fl |= STBSP__NEGATIVE;
+            }
+         }
+
+#ifndef STB_SPRINTF_NOFLOAT
+         if (fl & STBSP__METRIC_SUFFIX) {
+            if (n64 < 1024)
+               pr = 0;
+            else if (pr == -1)
+               pr = 1;
+            fv = (double)(stbsp__int64)n64;
+            goto doafloat;
+         }
+#endif
+
+         // convert to string
+         s = num + STBSP__NUMSZ;
+         l = 0;
+
+         for (;;) {
+            // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators)
+            char *o = s - 8;
+            if (n64 >= 100000000) {
+               n = (stbsp__uint32)(n64 % 100000000);
+               n64 /= 100000000;
+            } else {
+               n = (stbsp__uint32)n64;
+               n64 = 0;
+            }
+            if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+               do {
+                  s -= 2;
+                  *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+                  n /= 100;
+               } while (n);
+            }
+            while (n) {
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = (char)(n % 10) + '0';
+                  n /= 10;
+               }
+            }
+            if (n64 == 0) {
+               if ((s[0] == '0') && (s != (num + STBSP__NUMSZ)))
+                  ++s;
+               break;
+            }
+            while (s != o)
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = '0';
+               }
+         }
+
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         if (l == 0) {
+            *--s = '0';
+            l = 1;
+         }
+         cs = l + (3 << 24);
+         if (pr < 0)
+            pr = 0;
+
+      scopy:
+         // get fw=leading/trailing space, pr=leading zeros
+         if (pr < (stbsp__int32)l)
+            pr = l;
+         n = pr + lead[0] + tail[0] + tz;
+         if (fw < (stbsp__int32)n)
+            fw = n;
+         fw -= n;
+         pr -= l;
+
+         // handle right justify and leading zeros
+         if ((fl & STBSP__LEFTJUST) == 0) {
+            if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr
+            {
+               pr = (fw > pr) ? fw : pr;
+               fw = 0;
+            } else {
+               fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas
+            }
+         }
+
+         // copy the spaces and/or zeros
+         if (fw + pr) {
+            stbsp__int32 i;
+            stbsp__uint32 c;
+
+            // copy leading spaces (or when doing %8.4d stuff)
+            if ((fl & STBSP__LEFTJUST) == 0)
+               while (fw > 0) {
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i) {
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  stbsp__chk_cb_buf(1);
+               }
+
+            // copy leader
+            sn = lead + 1;
+            while (lead[0]) {
+               stbsp__cb_buf_clamp(i, lead[0]);
+               lead[0] -= (char)i;
+               while (i) {
+                  *bf++ = *sn++;
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+
+            // copy leading zeros
+            c = cs >> 24;
+            cs &= 0xffffff;
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0;
+            while (pr > 0) {
+               stbsp__cb_buf_clamp(i, pr);
+               pr -= i;
+               if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = '0';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x30303030;
+                     bf += 4;
+                     i -= 4;
+                  }
+               }
+               while (i) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) {
+                     cs = 0;
+                     *bf++ = stbsp__comma;
+                  } else
+                     *bf++ = '0';
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+         }
+
+         // copy leader if there is still one
+         sn = lead + 1;
+         while (lead[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, lead[0]);
+            lead[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy the string
+         n = l;
+         while (n) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, n);
+            n -= i;
+            STBSP__UNALIGNED(while (i >= 4) {
+               *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s;
+               bf += 4;
+               s += 4;
+               i -= 4;
+            })
+            while (i) {
+               *bf++ = *s++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy trailing zeros
+         while (tz) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tz);
+            tz -= i;
+            while (i) {
+               if ((((stbsp__uintptr)bf) & 3) == 0)
+                  break;
+               *bf++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)bf = 0x30303030;
+               bf += 4;
+               i -= 4;
+            }
+            while (i) {
+               *bf++ = '0';
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy tail if there is one
+         sn = tail + 1;
+         while (tail[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tail[0]);
+            tail[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // handle the left justify
+         if (fl & STBSP__LEFTJUST)
+            if (fw > 0) {
+               while (fw) {
+                  stbsp__int32 i;
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i--)
+                     *bf++ = ' ';
+                  stbsp__chk_cb_buf(1);
+               }
+            }
+         break;
+
+      default: // unknown, just copy code
+         s = num + STBSP__NUMSZ - 1;
+         *s = f[0];
+         l = 1;
+         fw = fl = 0;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+      }
+      ++f;
+   }
+endfmt:
+
+   if (!callback)
+      *bf = 0;
+   else
+      stbsp__flush_cb();
+
+done:
+   return tlen + (int)(bf - buf);
+}
+
+// cleanup
+#undef STBSP__LEFTJUST
+#undef STBSP__LEADINGPLUS
+#undef STBSP__LEADINGSPACE
+#undef STBSP__LEADING_0X
+#undef STBSP__LEADINGZERO
+#undef STBSP__INTMAX
+#undef STBSP__TRIPLET_COMMA
+#undef STBSP__NEGATIVE
+#undef STBSP__METRIC_SUFFIX
+#undef STBSP__NUMSZ
+#undef stbsp__chk_cb_bufL
+#undef stbsp__chk_cb_buf
+#undef stbsp__flush_cb
+#undef stbsp__cb_buf_clamp
+
+// ============================================================================
+//   wrapper functions
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+   result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+   va_end(va);
+   return result;
+}
+
+typedef struct stbsp__context {
+   char *buf;
+   int count;
+   int length;
+   char tmp[STB_SPRINTF_MIN];
+} stbsp__context;
+
+static char *stbsp__clamp_callback(const char *buf, void *user, int len)
+{
+   stbsp__context *c = (stbsp__context *)user;
+   c->length += len;
+
+   if (len > c->count)
+      len = c->count;
+
+   if (len) {
+      if (buf != c->buf) {
+         const char *s, *se;
+         char *d;
+         d = c->buf;
+         s = buf;
+         se = buf + len;
+         do {
+            *d++ = *s++;
+         } while (s < se);
+      }
+      c->buf += len;
+      c->count -= len;
+   }
+
+   if (c->count <= 0)
+      return c->tmp;
+   return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can
+}
+
+static char * stbsp__count_clamp_callback( const char * buf, void * user, int len )
+{
+   stbsp__context * c = (stbsp__context*)user;
+   (void) sizeof(buf);
+
+   c->length += len;
+   return c->tmp; // go direct into buffer if you can
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va )
+{
+   stbsp__context c;
+
+   if ( (count == 0) && !buf )
+   {
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__count_clamp_callback, &c, c.tmp, fmt, va );
+   }
+   else
+   {
+      int l;
+
+      c.buf = buf;
+      c.count = count;
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va );
+
+      // zero-terminate
+      l = (int)( c.buf - buf );
+      if ( l >= count ) // should never be greater, only equal (or less) than count
+         l = count - 1;
+      buf[l] = 0;
+   }
+
+   return c.length;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+
+   result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va);
+   va_end(va);
+
+   return result;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va)
+{
+   return STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+}
+
+// =======================================================================
+//   low level float utility functions
+
+#ifndef STB_SPRINTF_NOFLOAT
+
+// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
+#define STBSP__COPYFP(dest, src)                   \
+   {                                               \
+      int cn;                                      \
+      for (cn = 0; cn < 8; cn++)                   \
+         ((char *)&dest)[cn] = ((char *)&src)[cn]; \
+   }
+
+// get float info
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value)
+{
+   double d;
+   stbsp__int64 b = 0;
+
+   // load value and round at the frac_digits
+   d = value;
+
+   STBSP__COPYFP(b, d);
+
+   *bits = b & ((((stbsp__uint64)1) << 52) - 1);
+   *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023);
+
+   return (stbsp__int32)((stbsp__uint64) b >> 63);
+}
+
+static double const stbsp__bot[23] = {
+   1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011,
+   1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022
+};
+static double const stbsp__negbot[22] = {
+   1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011,
+   1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022
+};
+static double const stbsp__negboterr[22] = {
+   -5.551115123125783e-018,  -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023,
+   4.5251888174113739e-024,  -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028,  2.0113352370744385e-029,
+   -3.0373745563400371e-030, 1.1806906454401013e-032,  -7.7705399876661076e-032, 2.0902213275965398e-033,  -7.1542424054621921e-034, -7.1542424054621926e-035,
+   2.4754073164739869e-036,  5.4846728545790429e-037,  9.2462547772103625e-038,  -4.8596774326570872e-039
+};
+static double const stbsp__top[13] = {
+   1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299
+};
+static double const stbsp__negtop[13] = {
+   1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299
+};
+static double const stbsp__toperr[13] = {
+   8388608,
+   6.8601809640529717e+028,
+   -7.253143638152921e+052,
+   -4.3377296974619174e+075,
+   -1.5559416129466825e+098,
+   -3.2841562489204913e+121,
+   -3.7745893248228135e+144,
+   -1.7356668416969134e+167,
+   -3.8893577551088374e+190,
+   -9.9566444326005119e+213,
+   6.3641293062232429e+236,
+   -5.2069140800249813e+259,
+   -5.2504760255204387e+282
+};
+static double const stbsp__negtoperr[13] = {
+   3.9565301985100693e-040,  -2.299904345391321e-063,  3.6506201437945798e-086,  1.1875228833981544e-109,
+   -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178,  -5.7778912386589953e-201,
+   7.4997100559334532e-224,  -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293,
+   8.0970921678014997e-317
+};
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1200)
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000,
+   100000000000,
+   1000000000000,
+   10000000000000,
+   100000000000000,
+   1000000000000000,
+   10000000000000000,
+   100000000000000000,
+   1000000000000000000,
+   10000000000000000000U
+};
+#define stbsp__tento19th ((stbsp__uint64)1000000000000000000)
+#else
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000ULL,
+   100000000000ULL,
+   1000000000000ULL,
+   10000000000000ULL,
+   100000000000000ULL,
+   1000000000000000ULL,
+   10000000000000000ULL,
+   100000000000000000ULL,
+   1000000000000000000ULL,
+   10000000000000000000ULL
+};
+#define stbsp__tento19th (1000000000000000000ULL)
+#endif
+
+#define stbsp__ddmulthi(oh, ol, xh, yh)                            \
+   {                                                               \
+      double ahi = 0, alo, bhi = 0, blo;                           \
+      stbsp__int64 bt;                                             \
+      oh = xh * yh;                                                \
+      STBSP__COPYFP(bt, xh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(ahi, bt);                                      \
+      alo = xh - ahi;                                              \
+      STBSP__COPYFP(bt, yh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(bhi, bt);                                      \
+      blo = yh - bhi;                                              \
+      ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \
+   }
+
+#define stbsp__ddtoS64(ob, xh, xl)          \
+   {                                        \
+      double ahi = 0, alo, vh, t;           \
+      ob = (stbsp__int64)xh;                \
+      vh = (double)ob;                      \
+      ahi = (xh - vh);                      \
+      t = (ahi - xh);                       \
+      alo = (xh - (ahi - t)) - (vh + t);    \
+      ob += (stbsp__int64)(ahi + alo + xl); \
+   }
+
+#define stbsp__ddrenorm(oh, ol) \
+   {                            \
+      double s;                 \
+      s = oh + ol;              \
+      ol = ol - (s - oh);       \
+      oh = s;                   \
+   }
+
+#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh);
+
+#define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl);
+
+static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350
+{
+   double ph, pl;
+   if ((power >= 0) && (power <= 22)) {
+      stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]);
+   } else {
+      stbsp__int32 e, et, eb;
+      double p2h, p2l;
+
+      e = power;
+      if (power < 0)
+         e = -e;
+      et = (e * 0x2c9) >> 14; /* %23 */
+      if (et > 13)
+         et = 13;
+      eb = e - (et * 23);
+
+      ph = d;
+      pl = 0.0;
+      if (power < 0) {
+         if (eb) {
+            --eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]);
+            stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]);
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      } else {
+         if (eb) {
+            e = eb;
+            if (eb > 22)
+               eb = 22;
+            e -= eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]);
+            if (e) {
+               stbsp__ddrenorm(ph, pl);
+               stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]);
+               stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl);
+               ph = p2h;
+               pl = p2l;
+            }
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      }
+   }
+   stbsp__ddrenorm(ph, pl);
+   *ohi = ph;
+   *olo = pl;
+}
+
+// given a float value, returns the significant bits in bits, and the position of the
+//   decimal point in decimal_pos.  +/-INF and NAN are specified by special values
+//   returned in the decimal_pos parameter.
+// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits)
+{
+   double d;
+   stbsp__int64 bits = 0;
+   stbsp__int32 expo, e, ng, tens;
+
+   d = value;
+   STBSP__COPYFP(bits, d);
+   expo = (stbsp__int32)((bits >> 52) & 2047);
+   ng = (stbsp__int32)((stbsp__uint64) bits >> 63);
+   if (ng)
+      d = -d;
+
+   if (expo == 2047) // is nan or inf?
+   {
+      *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf";
+      *decimal_pos = STBSP__SPECIAL;
+      *len = 3;
+      return ng;
+   }
+
+   if (expo == 0) // is zero or denormal
+   {
+      if (((stbsp__uint64) bits << 1) == 0) // do zero
+      {
+         *decimal_pos = 1;
+         *start = out;
+         out[0] = '0';
+         *len = 1;
+         return ng;
+      }
+      // find the right expo for denormals
+      {
+         stbsp__int64 v = ((stbsp__uint64)1) << 51;
+         while ((bits & v) == 0) {
+            --expo;
+            v >>= 1;
+         }
+      }
+   }
+
+   // find the decimal exponent as well as the decimal bits of the value
+   {
+      double ph, pl;
+
+      // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046
+      tens = expo - 1023;
+      tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1);
+
+      // move the significant bits into position and stick them into an int
+      stbsp__raise_to_power10(&ph, &pl, d, 18 - tens);
+
+      // get full as much precision from double-double as possible
+      stbsp__ddtoS64(bits, ph, pl);
+
+      // check if we undershot
+      if (((stbsp__uint64)bits) >= stbsp__tento19th)
+         ++tens;
+   }
+
+   // now do the rounding in integer land
+   frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits);
+   if ((frac_digits < 24)) {
+      stbsp__uint32 dg = 1;
+      if ((stbsp__uint64)bits >= stbsp__powten[9])
+         dg = 10;
+      while ((stbsp__uint64)bits >= stbsp__powten[dg]) {
+         ++dg;
+         if (dg == 20)
+            goto noround;
+      }
+      if (frac_digits < dg) {
+         stbsp__uint64 r;
+         // add 0.5 at the right position and round
+         e = dg - frac_digits;
+         if ((stbsp__uint32)e >= 24)
+            goto noround;
+         r = stbsp__powten[e];
+         bits = bits + (r / 2);
+         if ((stbsp__uint64)bits >= stbsp__powten[dg])
+            ++tens;
+         bits /= r;
+      }
+   noround:;
+   }
+
+   // kill long trailing runs of zeros
+   if (bits) {
+      stbsp__uint32 n;
+      for (;;) {
+         if (bits <= 0xffffffff)
+            break;
+         if (bits % 1000)
+            goto donez;
+         bits /= 1000;
+      }
+      n = (stbsp__uint32)bits;
+      while ((n % 1000) == 0)
+         n /= 1000;
+      bits = n;
+   donez:;
+   }
+
+   // convert to string
+   out += 64;
+   e = 0;
+   for (;;) {
+      stbsp__uint32 n;
+      char *o = out - 8;
+      // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned)
+      if (bits >= 100000000) {
+         n = (stbsp__uint32)(bits % 100000000);
+         bits /= 100000000;
+      } else {
+         n = (stbsp__uint32)bits;
+         bits = 0;
+      }
+      while (n) {
+         out -= 2;
+         *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+         n /= 100;
+         e += 2;
+      }
+      if (bits == 0) {
+         if ((e) && (out[0] == '0')) {
+            ++out;
+            --e;
+         }
+         break;
+      }
+      while (out != o) {
+         *--out = '0';
+         ++e;
+      }
+   }
+
+   *decimal_pos = tens;
+   *start = out;
+   *len = e;
+   return ng;
+}
+
+#undef stbsp__ddmulthi
+#undef stbsp__ddrenorm
+#undef stbsp__ddmultlo
+#undef stbsp__ddmultlos
+#undef STBSP__SPECIAL
+#undef STBSP__COPYFP
+
+#endif // STB_SPRINTF_NOFLOAT
+
+// clean up
+#undef stbsp__uint16
+#undef stbsp__uint32
+#undef stbsp__int32
+#undef stbsp__uint64
+#undef stbsp__int64
+#undef STBSP__UNALIGNED
+
+#endif // STB_SPRINTF_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/

+ 0 - 46
vendor/stb/src/stb_truetype_wasm.c

@@ -1,46 +0,0 @@
-#include <stddef.h>
-
-void *stbtt_malloc(size_t size);
-void stbtt_free(void *ptr);
-
-void stbtt_qsort(void* base, size_t num, size_t size, int (*compare)(const void*, const void*));
-
-double stbtt_floor(double x);
-double stbtt_ceil(double x);
-double stbtt_sqrt(double x);
-double stbtt_pow(double x, double y);
-double stbtt_fmod(double x, double y);
-double stbtt_cos(double x);
-double stbtt_acos(double x);
-double stbtt_fabs(double x);
-
-unsigned long stbtt_strlen(const char *str);
-
-void *memcpy(void *dst, const void *src, size_t count);
-void *memset(void *dst, int x, size_t count);
-
-#define STBRP_SORT stbtt_qsort
-#define STBRP_ASSERT(condition) ((void)0)
-
-#define STBTT_malloc(x,u)  ((void)(u),stbtt_malloc(x))
-#define STBTT_free(x,u)    ((void)(u),stbtt_free(x))
-
-#define STBTT_assert(condition) ((void)0)
-
-#define STBTT_ifloor(x)   ((int) stbtt_floor(x))
-#define STBTT_iceil(x)    ((int) stbtt_ceil(x))
-#define STBTT_sqrt(x)      stbtt_sqrt(x)
-#define STBTT_pow(x,y)     stbtt_pow(x,y)
-#define STBTT_fmod(x,y)    stbtt_fmod(x,y)
-#define STBTT_cos(x)       stbtt_cos(x)
-#define STBTT_acos(x)      stbtt_acos(x)
-#define STBTT_fabs(x)      stbtt_fabs(x)
-#define STBTT_strlen(x)    stbtt_strlen(x)
-#define STBTT_memcpy       memcpy
-#define STBTT_memset       memset
-
-#define STB_RECT_PACK_IMPLEMENTATION
-#include "stb_rect_pack.h"
-
-#define STB_TRUETYPE_IMPLEMENTATION
-#include "stb_truetype.h"

+ 5 - 2
vendor/stb/truetype/stb_truetype.odin

@@ -8,6 +8,7 @@ LIB :: (
 	     "../lib/stb_truetype.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_truetype.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_truetype.a" when ODIN_OS == .Darwin
+	else "../lib/stb_truetype_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -15,10 +16,12 @@ when LIB != "" {
 	when !#exists(LIB) {
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
+}
 
-	foreign import stbtt { LIB }
-} else when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
 	foreign import stbtt "../lib/stb_truetype_wasm.o"
+} else when LIB != "" {
+	foreign import stbtt { LIB }
 } else {
 	foreign import stbtt "system:stb_truetype"
 }

+ 1 - 79
vendor/stb/truetype/stb_truetype_wasm.odin

@@ -1,82 +1,4 @@
 #+build wasm32, wasm64p32
 package stb_truetype
 
-import "base:builtin"
-import "base:intrinsics"
-import "base:runtime"
-
-import "core:c"
-import "core:math"
-import "core:slice"
-import "core:sort"
-
-@(require, linkage="strong", link_name="stbtt_malloc")
-malloc :: proc "c" (size: uint) -> rawptr {
-	context = runtime.default_context()
-	ptr, _ := runtime.mem_alloc_non_zeroed(int(size))
-	return raw_data(ptr)
-}
-
-@(require, linkage="strong", link_name="stbtt_free")
-free :: proc "c" (ptr: rawptr) {
-	context = runtime.default_context()
-	builtin.free(ptr)
-}
-
-@(require, linkage="strong", link_name="stbtt_qsort")
-qsort :: proc "c" (base: rawptr, num: uint, size: uint, cmp: proc "c" (a, b: rawptr) -> i32) {
-	context = runtime.default_context()
-
-	Inputs :: struct {
-		base: rawptr,
-		num:  uint,
-		size: uint,
-		cmp:  proc "c" (a, b: rawptr) -> i32,
-	}
-
-	sort.sort({
-		collection = &Inputs{base, num, size, cmp},
-		len = proc(it: sort.Interface) -> int {
-			inputs := (^Inputs)(it.collection)
-			return int(inputs.num)
-		},
-		less = proc(it: sort.Interface, i, j: int) -> bool {
-			inputs := (^Inputs)(it.collection)
-			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
-			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
-			return inputs.cmp(a, b) < 0
-		},
-		swap = proc(it: sort.Interface, i, j: int) {
-			inputs := (^Inputs)(it.collection)
-
-			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
-			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
-
-			slice.ptr_swap_non_overlapping(a, b, int(inputs.size))
-		},
-	})
-}
-
-@(require, linkage="strong", link_name="stbtt_floor")
-floor :: proc "c" (x: f64) -> f64 { return math.floor(x) }
-@(require, linkage="strong", link_name="stbtt_ceil")
-ceil :: proc "c" (x: f64) -> f64 { return math.ceil(x) }
-@(require, linkage="strong", link_name="stbtt_sqrt")
-sqrt :: proc "c" (x: f64) -> f64 { return math.sqrt(x) }
-@(require, linkage="strong", link_name="stbtt_pow")
-pow :: proc "c" (x, y: f64) -> f64 { return math.pow(x, y) }
-@(require, linkage="strong", link_name="stbtt_fmod")
-fmod :: proc "c" (x, y: f64) -> f64 { return math.mod(x, y) }
-@(require, linkage="strong", link_name="stbtt_cos")
-cos :: proc "c" (x: f64) -> f64 { return math.cos(x) }
-@(require, linkage="strong", link_name="stbtt_acos")
-acos :: proc "c" (x: f64) -> f64 { return math.acos(x) }
-@(require, linkage="strong", link_name="stbtt_fabs")
-fabs :: proc "c" (x: f64) -> f64 { return math.abs(x) }
-
-@(require, linkage="strong", link_name="stbtt_strlen")
-strlen :: proc "c" (str: cstring) -> c.ulong { return c.ulong(len(str)) }
-
-// NOTE: defined in runtime.
-// void *memcpy(void *dst, const void *src, size_t count);
-// void *memset(void *dst, int x, size_t count);
+@(require) import _ "vendor:libc"

Some files were not shown because too many files changed in this diff