Browse Source

Merge branch 'master' into file-tags-without-comments

Karl Zylinski 11 months ago
parent
commit
093ade0504
97 changed files with 7030 additions and 1020 deletions
  1. 2 2
      base/runtime/core_builtin.odin
  2. 7 4
      base/runtime/internal.odin
  3. 2 2
      core/bytes/bytes.odin
  4. 837 63
      core/mem/alloc.odin
  5. 912 245
      core/mem/allocators.odin
  6. 103 23
      core/mem/doc.odin
  7. 436 49
      core/mem/mem.odin
  8. 23 4
      core/mem/mutex_allocator.odin
  9. 85 11
      core/mem/raw.odin
  10. 243 104
      core/mem/rollback_stack_allocator.odin
  11. 98 21
      core/mem/tracking_allocator.odin
  12. 10 0
      core/odin/parser/parser.odin
  13. 1 1
      core/os/os_freebsd.odin
  14. 19 70
      core/os/os_js.odin
  15. 1 1
      core/os/os_netbsd.odin
  16. 7 7
      core/strings/strings.odin
  17. 42 41
      core/sync/chan/chan.odin
  18. 38 37
      core/sync/extended.odin
  19. 17 8
      core/sync/futex_darwin.odin
  20. 4 4
      core/sync/futex_freebsd.odin
  21. 4 4
      core/sync/futex_linux.odin
  22. 4 4
      core/sync/futex_netbsd.odin
  23. 4 4
      core/sync/futex_openbsd.odin
  24. 4 4
      core/sync/futex_wasm.odin
  25. 2 18
      core/sync/primitives.odin
  26. 2 2
      core/sync/primitives_atomic.odin
  27. 11 0
      core/sys/darwin/sync.odin
  28. 4 0
      core/sys/info/platform_darwin.odin
  29. 4 0
      core/testing/runner.odin
  30. 22 0
      core/testing/runner_windows.odin
  31. 11 0
      core/testing/signal_handler_libc.odin
  32. 8 4
      core/testing/testing.odin
  33. 6 6
      core/thread/thread.odin
  34. 1 0
      core/thread/thread_pool.odin
  35. 8 24
      core/thread/thread_unix.odin
  36. 3 3
      core/thread/thread_windows.odin
  37. 2 0
      src/bug_report.cpp
  38. 4 2
      src/build_settings.cpp
  39. 16 0
      src/check_builtin.cpp
  40. 6 1
      src/check_expr.cpp
  41. 14 4
      src/check_stmt.cpp
  42. 32 0
      src/check_type.cpp
  43. 52 19
      src/checker.cpp
  44. 1 0
      src/entity.cpp
  45. 10 2
      src/gb/gb.h
  46. 1 1
      src/llvm_backend.hpp
  47. 44 36
      src/llvm_backend_debug.cpp
  48. 57 5
      src/llvm_backend_proc.cpp
  49. 49 2
      src/main.cpp
  50. 20 11
      src/parser.cpp
  51. 20 13
      tests/core/flags/test_core_flags.odin
  52. 2 2
      tests/core/mem/test_mem_dynamic_pool.odin
  53. 2 0
      tests/core/normal.odin
  54. 274 0
      tests/core/sync/chan/test_core_sync_chan.odin
  55. 714 0
      tests/core/sync/test_core_sync.odin
  56. 3 0
      tests/core/sys/posix/structs.odin
  57. 18 4
      vendor/box2d/box2d.odin
  58. 4 0
      vendor/box2d/box2d_wasm.odin
  59. 2 0
      vendor/box2d/build_box2d.sh
  60. BIN
      vendor/box2d/lib/box2d_wasm.o
  61. BIN
      vendor/box2d/lib/box2d_wasm_simd.o
  62. 32 0
      vendor/box2d/wasm.Makefile
  63. 5 0
      vendor/cgltf/cgltf.odin
  64. 4 0
      vendor/cgltf/cgltf_wasm.odin
  65. BIN
      vendor/cgltf/lib/cgltf_wasm.o
  66. 4 0
      vendor/cgltf/src/Makefile
  67. 12 0
      vendor/libc/README.md
  68. 15 0
      vendor/libc/assert.odin
  69. 16 0
      vendor/libc/include/assert.h
  70. 21 0
      vendor/libc/include/math.h
  71. 47 0
      vendor/libc/include/stdio.h
  72. 19 0
      vendor/libc/include/stdlib.h
  73. 21 0
      vendor/libc/include/string.h
  74. 25 0
      vendor/libc/libc.odin
  75. 100 0
      vendor/libc/math.odin
  76. 106 0
      vendor/libc/stdio.odin
  77. 119 0
      vendor/libc/stdlib.odin
  78. 111 0
      vendor/libc/string.odin
  79. 44 13
      vendor/stb/image/stb_image.odin
  80. 5 0
      vendor/stb/image/stb_image_resize.odin
  81. 4 0
      vendor/stb/image/stb_image_wasm.odin
  82. 16 6
      vendor/stb/image/stb_image_write.odin
  83. BIN
      vendor/stb/lib/stb_image_resize_wasm.o
  84. BIN
      vendor/stb/lib/stb_image_wasm.o
  85. BIN
      vendor/stb/lib/stb_image_write_wasm.o
  86. BIN
      vendor/stb/lib/stb_rect_pack_wasm.o
  87. BIN
      vendor/stb/lib/stb_sprintf_wasm.o
  88. BIN
      vendor/stb/lib/stb_truetype_wasm.o
  89. 5 0
      vendor/stb/rect_pack/stb_rect_pack.odin
  90. 4 0
      vendor/stb/rect_pack/stb_rect_pack_wasm.odin
  91. 37 0
      vendor/stb/sprintf/stb_sprintf.odin
  92. 12 2
      vendor/stb/src/Makefile
  93. 2 0
      vendor/stb/src/stb_sprintf.c
  94. 1906 0
      vendor/stb/src/stb_sprintf.h
  95. 0 46
      vendor/stb/src/stb_truetype_wasm.c
  96. 5 2
      vendor/stb/truetype/stb_truetype.odin
  97. 1 79
      vendor/stb/truetype/stb_truetype_wasm.odin

+ 2 - 2
base/runtime/core_builtin.odin

@@ -913,7 +913,7 @@ card :: proc "contextless" (s: $S/bit_set[$E; $U]) -> int {
 
 
 @builtin
 @builtin
 @(disabled=ODIN_DISABLE_ASSERT)
 @(disabled=ODIN_DISABLE_ASSERT)
-assert :: proc(condition: bool, message := "", loc := #caller_location) {
+assert :: proc(condition: bool, message := #caller_expression(condition), loc := #caller_location) {
 	if !condition {
 	if !condition {
 		// NOTE(bill): This is wrapped in a procedure call
 		// NOTE(bill): This is wrapped in a procedure call
 		// to improve performance to make the CPU not
 		// to improve performance to make the CPU not
@@ -952,7 +952,7 @@ unimplemented :: proc(message := "", loc := #caller_location) -> ! {
 
 
 @builtin
 @builtin
 @(disabled=ODIN_DISABLE_ASSERT)
 @(disabled=ODIN_DISABLE_ASSERT)
-assert_contextless :: proc "contextless" (condition: bool, message := "", loc := #caller_location) {
+assert_contextless :: proc "contextless" (condition: bool, message := #caller_expression(condition), loc := #caller_location) {
 	if !condition {
 	if !condition {
 		// NOTE(bill): This is wrapped in a procedure call
 		// NOTE(bill): This is wrapped in a procedure call
 		// to improve performance to make the CPU not
 		// to improve performance to make the CPU not

+ 7 - 4
base/runtime/internal.odin

@@ -118,16 +118,15 @@ mem_copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> r
 DEFAULT_ALIGNMENT :: 2*align_of(rawptr)
 DEFAULT_ALIGNMENT :: 2*align_of(rawptr)
 
 
 mem_alloc_bytes :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
 mem_alloc_bytes :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
-	if size == 0 {
-		return nil, nil
-	}
-	if allocator.procedure == nil {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
+	if size == 0 || allocator.procedure == nil{
 		return nil, nil
 		return nil, nil
 	}
 	}
 	return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
 	return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
 }
 }
 
 
 mem_alloc :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
 mem_alloc :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if size == 0 || allocator.procedure == nil {
 	if size == 0 || allocator.procedure == nil {
 		return nil, nil
 		return nil, nil
 	}
 	}
@@ -135,6 +134,7 @@ mem_alloc :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, a
 }
 }
 
 
 mem_alloc_non_zeroed :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
 mem_alloc_non_zeroed :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if size == 0 || allocator.procedure == nil {
 	if size == 0 || allocator.procedure == nil {
 		return nil, nil
 		return nil, nil
 	}
 	}
@@ -174,6 +174,7 @@ mem_free_all :: #force_inline proc(allocator := context.allocator, loc := #calle
 }
 }
 
 
 _mem_resize :: #force_inline proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, should_zero: bool, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
 _mem_resize :: #force_inline proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, should_zero: bool, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if allocator.procedure == nil {
 	if allocator.procedure == nil {
 		return nil, nil
 		return nil, nil
 	}
 	}
@@ -215,9 +216,11 @@ _mem_resize :: #force_inline proc(ptr: rawptr, old_size, new_size: int, alignmen
 }
 }
 
 
 mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
 mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	return _mem_resize(ptr, old_size, new_size, alignment, allocator, true, loc)
 	return _mem_resize(ptr, old_size, new_size, alignment, allocator, true, loc)
 }
 }
 non_zero_mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
 non_zero_mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	return _mem_resize(ptr, old_size, new_size, alignment, allocator, false, loc)
 	return _mem_resize(ptr, old_size, new_size, alignment, allocator, false, loc)
 }
 }
 
 

+ 2 - 2
core/bytes/bytes.odin

@@ -334,7 +334,7 @@ Inputs:
 Returns:
 Returns:
 - index: The index of the byte `c`, or -1 if it was not found.
 - index: The index of the byte `c`, or -1 if it was not found.
 */
 */
-index_byte :: proc(s: []byte, c: byte) -> (index: int) #no_bounds_check {
+index_byte :: proc "contextless" (s: []byte, c: byte) -> (index: int) #no_bounds_check {
 	i, l := 0, len(s)
 	i, l := 0, len(s)
 
 
 	// Guard against small strings.  On modern systems, it is ALWAYS
 	// Guard against small strings.  On modern systems, it is ALWAYS
@@ -469,7 +469,7 @@ Inputs:
 Returns:
 Returns:
 - index: The index of the byte `c`, or -1 if it was not found.
 - index: The index of the byte `c`, or -1 if it was not found.
 */
 */
-last_index_byte :: proc(s: []byte, c: byte) -> int #no_bounds_check {
+last_index_byte :: proc "contextless" (s: []byte, c: byte) -> int #no_bounds_check {
 	i := len(s)
 	i := len(s)
 
 
 	// Guard against small strings.  On modern systems, it is ALWAYS
 	// Guard against small strings.  On modern systems, it is ALWAYS

File diff suppressed because it is too large
+ 837 - 63
core/mem/alloc.odin


File diff suppressed because it is too large
+ 912 - 245
core/mem/allocators.odin


+ 103 - 23
core/mem/doc.odin

@@ -1,34 +1,114 @@
 /*
 /*
-package mem implements various types of allocators.
+The `mem` package implements various allocators and provides utility procedures
+for dealing with memory, pointers and slices.
 
 
+The documentation below describes basic concepts, applicable to the `mem`
+package.
 
 
-An example of how to use the `Tracking_Allocator` to track subsequent allocations
-in your program and report leaks and bad frees:
+## Pointers, multipointers, and slices
 
 
-Example:
-	package foo
+A *pointer* is an abstraction of an *address*, a numberic value representing the
+location of an object in memory. That object is said to be *pointed to* by the
+pointer. To obtain the address of a pointer, cast it to `uintptr`.
 
 
-	import "core:mem"
-	import "core:fmt"
+A multipointer is a pointer that points to multiple objects. Unlike a pointer,
+a multipointer can be indexed, but does not have a definite length. A slice is
+a pointer that points to multiple objects equipped with the length, specifying
+the amount of objects a slice points to.
 
 
-	_main :: proc() {
-		// do stuff
-	}
+When object's values are read through a pointer, that operation is called a
+*load* operation. When memory is read through a pointer, that operation is
+called a *store* operation. Both of these operations can be called a *memory
+access operation*.
 
 
-	main :: proc() {
-		track: mem.Tracking_Allocator
-		mem.tracking_allocator_init(&track, context.allocator)
-		defer mem.tracking_allocator_destroy(&track)
-		context.allocator = mem.tracking_allocator(&track)
+## Allocators
 
 
-		_main()
+In C and C++ memory models, allocations of objects in memory are typically
+treated individually with a generic allocator (The `malloc` procedure). Which in
+some scenarios can lead to poor cache utilization, slowdowns on individual
+objects' memory management and growing complexity of the code needing to keep
+track of the pointers and their lifetimes.
 
 
-		for _, leak in track.allocation_map {
-			fmt.printf("%v leaked %m\n", leak.location, leak.size)
-		}
-		for bad_free in track.bad_free_array {
-			fmt.printf("%v allocation %p was freed badly\n", bad_free.location, bad_free.memory)
-		}
-	}
+Using different kinds of *allocators* for different purposes can solve these
+problems. The allocators are typically optimized for specific use-cases and
+can potentially simplify the memory management code.
+
+For example, in the context of making a game, having an Arena allocator could
+simplify allocations of any temporary memory, because the programmer doesn't
+have to keep track of which objects need to be freed every time they are
+allocated, because at the end of every frame the whole allocator is reset to
+its initial state and all objects are freed at once.
+
+The allocators have different kinds of restrictions on object lifetimes, sizes,
+alignment and can be a significant gain, if used properly. Odin supports
+allocators on a language level.
+
+Operations such as `new`, `free` and `delete` by default will use
+`context.allocator`, which can be overridden by the user. When an override
+happens all called procedures will inherit the new context and use the same
+allocator.
+
+We will define one concept to simplify the description of some allocator-related
+procedures, which is ownership. If the memory was allocated via a specific
+allocator, that allocator is said to be the *owner* of that memory region. To
+note, unlike Rust, in Odin the memory ownership model is not strict.
+
+## Alignment
+
+An address is said to be *aligned to `N` bytes*, if the addresses's numeric
+value is divisible by `N`. The number `N` in this case can be referred to as
+the *alignment boundary*. Typically an alignment is a power of two integer
+value.
+
+A *natural alignment* of an object is typically equal to its size. For example
+a 16 bit integer has a natural alignment of 2 bytes. When an object is not
+located on its natural alignment boundary, accesses to that object are
+considered *unaligned*.
+
+Some machines issue a hardware **exception**, or experience **slowdowns** when a
+memory access operation occurs from an unaligned address. Examples of such
+operations are:
+
+- SIMD instructions on x86. These instructions require all memory accesses to be
+  on an address that is aligned to 16 bytes.
+- On ARM unaligned loads have an extra cycle penalty.
+
+As such, many operations that allocate memory in this package allow to
+explicitly specify the alignment of allocated pointers/slices. The default
+alignment for all operations is specified in a constant `mem.DEFAULT_ALIGNMENT`.
+
+## Zero by default
+
+Whenever new memory is allocated, via an allocator, or on the stack, by default
+Odin will zero-initialize that memory, even if it wasn't explicitly
+initialized. This allows for some convenience in certain scenarios and ease of
+debugging, which will not be described in detail here.
+
+However zero-initialization can be a cause of slowdowns, when allocating large
+buffers. For this reason, allocators have `*_non_zeroed` modes of allocation
+that allow the user to request for uninitialized memory and will avoid a
+relatively expensive zero-filling of the buffer.
+
+## Naming conventions
+
+The word `size` is used to denote the **size in bytes**. The word `length` is
+used to denote the count of objects.
+
+The allocation procedures use the following conventions:
+
+- If the name contains `alloc_bytes` or `resize_bytes`, then the procedure takes
+  in slice parameters and returns slices.
+- If the procedure name contains `alloc` or `resize`, then the procedure takes
+  in a raw pointer and returns raw pointers.
+- If the procedure name contains `free_bytes`, then the procedure takes in a
+  slice.
+- If the procedure name contains `free`, then the procedure takes in a pointer.
+
+Higher-level allocation procedures follow the following naming scheme:
+
+- `new`: Allocates a single object
+- `free`: Free a single object (opposite of `new`)
+- `make`: Allocate a group of objects
+- `delete`: Free a group of objects (opposite of `make`)
 */
 */
 package mem
 package mem

+ 436 - 49
core/mem/mem.odin

@@ -3,49 +3,185 @@ package mem
 import "base:runtime"
 import "base:runtime"
 import "base:intrinsics"
 import "base:intrinsics"
 
 
-Byte     :: runtime.Byte
+/*
+The size, in bytes, of a single byte.
+
+This constant is equal to the value of `1`.
+*/
+Byte :: runtime.Byte
+
+/*
+The size, in bytes, of one kilobyte.
+
+This constant is equal to the amount of bytes in one kilobyte (also known as
+kibibyte), which is equal to 1024 bytes.
+*/
 Kilobyte :: runtime.Kilobyte
 Kilobyte :: runtime.Kilobyte
+
+/*
+The size, in bytes, of one megabyte.
+
+This constant is equal to the amount of bytes in one megabyte (also known as
+mebibyte), which is equal to 1024 kilobyte.
+*/
 Megabyte :: runtime.Megabyte
 Megabyte :: runtime.Megabyte
+
+/*
+The size, in bytes, of one gigabyte.
+
+This constant is equal to the amount of bytes in one gigabyte (also known as
+gibiibyte), which is equal to 1024 megabytes.
+*/
 Gigabyte :: runtime.Gigabyte
 Gigabyte :: runtime.Gigabyte
+
+/*
+The size, in bytes, of one terabyte.
+
+This constant is equal to the amount of bytes in one terabyte (also known as
+tebiibyte), which is equal to 1024 gigabytes.
+*/
 Terabyte :: runtime.Terabyte
 Terabyte :: runtime.Terabyte
+
+/*
+The size, in bytes, of one petabyte.
+
+This constant is equal to the amount of bytes in one petabyte (also known as
+pebiibyte), which is equal to 1024 terabytes.
+*/
 Petabyte :: runtime.Petabyte
 Petabyte :: runtime.Petabyte
-Exabyte  :: runtime.Exabyte
 
 
+/*
+The size, in bytes, of one exabyte.
+
+This constant is equal to the amount of bytes in one exabyte (also known as
+exbibyte), which is equal to 1024 petabytes.
+*/
+Exabyte :: runtime.Exabyte
+
+/*
+Set each byte of a memory range to a specific value.
+
+This procedure copies value specified by the `value` parameter into each of the
+`len` bytes of a memory range, located at address `data`.
+
+This procedure returns the pointer to `data`.
+*/
 set :: proc "contextless" (data: rawptr, value: byte, len: int) -> rawptr {
 set :: proc "contextless" (data: rawptr, value: byte, len: int) -> rawptr {
 	return runtime.memset(data, i32(value), len)
 	return runtime.memset(data, i32(value), len)
 }
 }
+
+/*
+Set each byte of a memory range to zero.
+
+This procedure copies the value `0` into the `len` bytes of a memory range,
+starting at address `data`.
+
+This procedure returns the pointer to `data`.
+*/
 zero :: proc "contextless" (data: rawptr, len: int) -> rawptr {
 zero :: proc "contextless" (data: rawptr, len: int) -> rawptr {
 	intrinsics.mem_zero(data, len)
 	intrinsics.mem_zero(data, len)
 	return data
 	return data
 }
 }
+
+/*
+Set each byte of a memory range to zero.
+
+This procedure copies the value `0` into the `len` bytes of a memory range,
+starting at address `data`.
+
+This procedure returns the pointer to `data`.
+
+Unlike the `zero()` procedure, which can be optimized away or reordered by the
+compiler under certain circumstances, `zero_explicit()` procedure can not be
+optimized away or reordered with other memory access operations, and the
+compiler assumes volatile semantics of the memory.
+*/
 zero_explicit :: proc "contextless" (data: rawptr, len: int) -> rawptr {
 zero_explicit :: proc "contextless" (data: rawptr, len: int) -> rawptr {
 	// This routine tries to avoid the compiler optimizing away the call,
 	// This routine tries to avoid the compiler optimizing away the call,
-	// so that it is always executed.  It is intended to provided
+	// so that it is always executed.  It is intended to provide
 	// equivalent semantics to those provided by the C11 Annex K 3.7.4.1
 	// equivalent semantics to those provided by the C11 Annex K 3.7.4.1
 	// memset_s call.
 	// memset_s call.
 	intrinsics.mem_zero_volatile(data, len) // Use the volatile mem_zero
 	intrinsics.mem_zero_volatile(data, len) // Use the volatile mem_zero
 	intrinsics.atomic_thread_fence(.Seq_Cst) // Prevent reordering
 	intrinsics.atomic_thread_fence(.Seq_Cst) // Prevent reordering
 	return data
 	return data
 }
 }
+
+/*
+Zero-fill the memory of an object.
+
+This procedure sets each byte of the object pointed to by the pointer `item`
+to zero, and returns the pointer to `item`.
+*/
 zero_item :: proc "contextless" (item: $P/^$T) -> P {
 zero_item :: proc "contextless" (item: $P/^$T) -> P {
 	intrinsics.mem_zero(item, size_of(T))
 	intrinsics.mem_zero(item, size_of(T))
 	return item
 	return item
 }
 }
+
+/*
+Zero-fill the memory of the slice.
+
+This procedure sets each byte of the slice pointed to by the slice `data`
+to zero, and returns the slice `data`.
+*/
 zero_slice :: proc "contextless" (data: $T/[]$E) -> T {
 zero_slice :: proc "contextless" (data: $T/[]$E) -> T {
 	zero(raw_data(data), size_of(E)*len(data))
 	zero(raw_data(data), size_of(E)*len(data))
 	return data
 	return data
 }
 }
 
 
+/*
+Copy bytes from one memory range to another.
 
 
+This procedure copies `len` bytes of data, from the memory range pointed to by
+the `src` pointer into the memory range pointed to by the `dst` pointer, and
+returns the `dst` pointer.
+*/
 copy :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
 copy :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
 	intrinsics.mem_copy(dst, src, len)
 	intrinsics.mem_copy(dst, src, len)
 	return dst
 	return dst
 }
 }
+
+/*
+Copy bytes between two non-overlapping memory ranges.
+
+This procedure copies `len` bytes of data, from the memory range pointed to by
+the `src` pointer into the memory range pointed to by the `dst` pointer, and
+returns the `dst` pointer.
+
+This is a slightly more optimized version of the `copy` procedure that requires
+that memory ranges specified by the parameters to this procedure are not
+overlapping. If the memory ranges specified by `dst` and `src` pointers overlap,
+the behavior of this function may be unpredictable.
+*/
 copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
 copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
 	intrinsics.mem_copy_non_overlapping(dst, src, len)
 	intrinsics.mem_copy_non_overlapping(dst, src, len)
 	return dst
 	return dst
 }
 }
 
 
+/*
+Compare two memory ranges defined by slices.
+
+This procedure performs a byte-by-byte comparison between memory ranges
+specified by slices `a` and `b`, and returns a value, specifying their relative
+ordering.
+
+If the return value is:
+- Equal to `-1`, then `a` is "smaller" than `b`.
+- Equal to `+1`, then `a` is "bigger"  than `b`.
+- Equal to `0`, then `a` and `b` are equal.
+
+The comparison is performed as follows:
+1. Each byte, upto `min(len(a), len(b))` bytes is compared between `a` and `b`.
+  - If the byte in slice `a` is smaller than a byte in slice `b`, then comparison
+  stops and this procedure returns `-1`.
+  - If the byte in slice `a` is bigger than a byte in slice `b`, then comparison
+  stops and this procedure returns `+1`.
+  - Otherwise the comparison continues until `min(len(a), len(b))` are compared.
+2. If all the bytes in the range are equal, then the lengths of the slices are
+  compared.
+  - If the length of slice `a` is smaller than the length of slice `b`, then `-1` is returned.
+  - If the length of slice `b` is smaller than the length of slice `b`, then `+1` is returned.
+  - Otherwise `0` is returned.
+*/
 @(require_results)
 @(require_results)
 compare :: proc "contextless" (a, b: []byte) -> int {
 compare :: proc "contextless" (a, b: []byte) -> int {
 	res := compare_byte_ptrs(raw_data(a), raw_data(b), min(len(a), len(b)))
 	res := compare_byte_ptrs(raw_data(a), raw_data(b), min(len(a), len(b)))
@@ -57,16 +193,89 @@ compare :: proc "contextless" (a, b: []byte) -> int {
 	return res
 	return res
 }
 }
 
 
+/*
+Compare two memory ranges defined by byte pointers.
+
+This procedure performs a byte-by-byte comparison between memory ranges of size
+`n` located at addresses `a` and `b`, and returns a value, specifying their relative
+ordering.
+
+If the return value is:
+- Equal to `-1`, then `a` is "smaller" than `b`.
+- Equal to `+1`, then `a` is "bigger"  than `b`.
+- Equal to `0`, then `a` and `b` are equal.
+
+The comparison is performed as follows:
+1. Each byte, upto `n` bytes is compared between `a` and `b`.
+  - If the byte in `a` is smaller than a byte in `b`, then comparison stops
+  and this procedure returns `-1`.
+  - If the byte in `a` is bigger than a byte in `b`, then comparison stops
+  and this procedure returns `+1`.
+  - Otherwise the comparison continues until `n` bytes are compared.
+2. If all the bytes in the range are equal, this procedure returns `0`.
+*/
 @(require_results)
 @(require_results)
 compare_byte_ptrs :: proc "contextless" (a, b: ^byte, n: int) -> int #no_bounds_check {
 compare_byte_ptrs :: proc "contextless" (a, b: ^byte, n: int) -> int #no_bounds_check {
 	return runtime.memory_compare(a, b, n)
 	return runtime.memory_compare(a, b, n)
 }
 }
 
 
+/*
+Compare two memory ranges defined by pointers.
+
+This procedure performs a byte-by-byte comparison between memory ranges of size
+`n` located at addresses `a` and `b`, and returns a value, specifying their relative
+ordering.
+
+If the return value is:
+- Equal to `-1`, then `a` is "smaller" than `b`.
+- Equal to `+1`, then `a` is "bigger"  than `b`.
+- Equal to `0`, then `a` and `b` are equal.
+
+The comparison is performed as follows:
+1. Each byte, upto `n` bytes is compared between `a` and `b`.
+  - If the byte in `a` is smaller than a byte in `b`, then comparison stops
+  and this procedure returns `-1`.
+  - If the byte in `a` is bigger than a byte in `b`, then comparison stops
+  and this procedure returns `+1`.
+  - Otherwise the comparison continues until `n` bytes are compared.
+2. If all the bytes in the range are equal, this procedure returns `0`.
+*/
+@(require_results)
+compare_ptrs :: proc "contextless" (a, b: rawptr, n: int) -> int {
+	return compare_byte_ptrs((^byte)(a), (^byte)(b), n)
+}
+
+/*
+Check whether two objects are equal on binary level.
+
+This procedure checks whether the memory ranges occupied by objects `a` and
+`b` are equal. See `compare_byte_ptrs()` for how this comparison is done.
+*/
+@(require_results)
+simple_equal :: proc "contextless" (a, b: $T) -> bool where intrinsics.type_is_simple_compare(T) {
+	a, b := a, b
+	return compare_byte_ptrs((^byte)(&a), (^byte)(&b), size_of(T)) == 0
+}
+
+/*
+Check if the memory range defined by a slice is zero-filled.
+
+This procedure checks whether every byte, pointed to by the slice, specified
+by the parameter `data`, is zero. If all bytes of the slice are zero, this
+procedure returns `true`. Otherwise this procedure returns `false`.
+*/
 @(require_results)
 @(require_results)
 check_zero :: proc(data: []byte) -> bool {
 check_zero :: proc(data: []byte) -> bool {
 	return check_zero_ptr(raw_data(data), len(data))
 	return check_zero_ptr(raw_data(data), len(data))
 }
 }
 
 
+/*
+Check if the memory range defined defined by a pointer is zero-filled.
+
+This procedure checks whether each of the `len` bytes, starting at address
+`ptr` is zero. If all bytes of this range are zero, this procedure returns
+`true`. Otherwise this procedure returns `false`.
+*/
 @(require_results)
 @(require_results)
 check_zero_ptr :: proc(ptr: rawptr, len: int) -> bool {
 check_zero_ptr :: proc(ptr: rawptr, len: int) -> bool {
 	switch {
 	switch {
@@ -81,57 +290,99 @@ check_zero_ptr :: proc(ptr: rawptr, len: int) -> bool {
 	case 4: return intrinsics.unaligned_load((^u32)(ptr)) == 0
 	case 4: return intrinsics.unaligned_load((^u32)(ptr)) == 0
 	case 8: return intrinsics.unaligned_load((^u64)(ptr)) == 0
 	case 8: return intrinsics.unaligned_load((^u64)(ptr)) == 0
 	}
 	}
-
 	start := uintptr(ptr)
 	start := uintptr(ptr)
 	start_aligned := align_forward_uintptr(start, align_of(uintptr))
 	start_aligned := align_forward_uintptr(start, align_of(uintptr))
 	end := start + uintptr(len)
 	end := start + uintptr(len)
 	end_aligned := align_backward_uintptr(end, align_of(uintptr))
 	end_aligned := align_backward_uintptr(end, align_of(uintptr))
-
 	for b in start..<start_aligned {
 	for b in start..<start_aligned {
 		if (^byte)(b)^ != 0 {
 		if (^byte)(b)^ != 0 {
 			return false
 			return false
 		}
 		}
 	}
 	}
-
 	for b := start_aligned; b < end_aligned; b += size_of(uintptr) {
 	for b := start_aligned; b < end_aligned; b += size_of(uintptr) {
 		if (^uintptr)(b)^ != 0 {
 		if (^uintptr)(b)^ != 0 {
 			return false
 			return false
 		}
 		}
 	}
 	}
-
 	for b in end_aligned..<end {
 	for b in end_aligned..<end {
 		if (^byte)(b)^ != 0 {
 		if (^byte)(b)^ != 0 {
 			return false
 			return false
 		}
 		}
 	}
 	}
-
 	return true
 	return true
 }
 }
 
 
-@(require_results)
-simple_equal :: proc "contextless" (a, b: $T) -> bool where intrinsics.type_is_simple_compare(T) {
-	a, b := a, b
-	return compare_byte_ptrs((^byte)(&a), (^byte)(&b), size_of(T)) == 0
-}
+/*
+Offset a given pointer by a given amount.
 
 
-@(require_results)
-compare_ptrs :: proc "contextless" (a, b: rawptr, n: int) -> int {
-	return compare_byte_ptrs((^byte)(a), (^byte)(b), n)
-}
+This procedure offsets the pointer `ptr` to an object of type `T`, by the amount
+of bytes specified by `offset*size_of(T)`, and returns the pointer `ptr`.
 
 
+**Note**: Prefer to use multipointer types, if possible.
+*/
 ptr_offset :: intrinsics.ptr_offset
 ptr_offset :: intrinsics.ptr_offset
+
+/*
+Offset a given pointer by a given amount backwards.
+
+This procedure offsets the pointer `ptr` to an object of type `T`, by the amount
+of bytes specified by `offset*size_of(T)` in the negative direction, and
+returns the pointer `ptr`.
+*/
 ptr_sub :: intrinsics.ptr_sub
 ptr_sub :: intrinsics.ptr_sub
 
 
+/*
+Construct a slice from pointer and length.
+
+This procedure creates a slice, that points to `len` amount of objects located
+at an address, specified by `ptr`.
+*/
 @(require_results)
 @(require_results)
 slice_ptr :: proc "contextless" (ptr: ^$T, len: int) -> []T {
 slice_ptr :: proc "contextless" (ptr: ^$T, len: int) -> []T {
 	return ([^]T)(ptr)[:len]
 	return ([^]T)(ptr)[:len]
 }
 }
 
 
+/*
+Construct a byte slice from raw pointer and length.
+
+This procedure creates a byte slice, that points to `len` amount of bytes
+located at an address specified by `data`.
+*/
 @(require_results)
 @(require_results)
 byte_slice :: #force_inline proc "contextless" (data: rawptr, #any_int len: int) -> []byte {
 byte_slice :: #force_inline proc "contextless" (data: rawptr, #any_int len: int) -> []byte {
 	return ([^]u8)(data)[:max(len, 0)]
 	return ([^]u8)(data)[:max(len, 0)]
 }
 }
 
 
+/*
+Create a byte slice from pointer and length.
+
+This procedure creates a byte slice, pointing to `len` objects, starting from
+the address specified by `ptr`.
+*/
+@(require_results)
+ptr_to_bytes :: proc "contextless" (ptr: ^$T, len := 1) -> []byte {
+	return transmute([]byte)Raw_Slice{ptr, len*size_of(T)}
+}
+
+/*
+Obtain the slice, pointing to the contents of `any`.
+
+This procedure returns the slice, pointing to the contents of the specified
+value of the `any` type.
+*/
+@(require_results)
+any_to_bytes :: proc "contextless" (val: any) -> []byte {
+	ti := type_info_of(val.id)
+	size := ti != nil ? ti.size : 0
+	return transmute([]byte)Raw_Slice{val.data, size}
+}
+
+/*
+Obtain a byte slice from any slice.
+
+This procedure returns a slice, that points to the same bytes as the slice,
+specified by `slice` and returns the resulting byte slice.
+*/
 @(require_results)
 @(require_results)
 slice_to_bytes :: proc "contextless" (slice: $E/[]$T) -> []byte {
 slice_to_bytes :: proc "contextless" (slice: $E/[]$T) -> []byte {
 	s := transmute(Raw_Slice)slice
 	s := transmute(Raw_Slice)slice
@@ -139,6 +390,15 @@ slice_to_bytes :: proc "contextless" (slice: $E/[]$T) -> []byte {
 	return transmute([]byte)s
 	return transmute([]byte)s
 }
 }
 
 
+/*
+Transmute slice to a different type.
+
+This procedure performs an operation similar to transmute, returning a slice of
+type `T` that points to the same bytes as the slice specified by `slice`
+parameter. Unlike plain transmute operation, this procedure adjusts the length
+of the resulting slice, such that the resulting slice points to the correct
+amount of objects to cover the memory region pointed to by `slice`.
+*/
 @(require_results)
 @(require_results)
 slice_data_cast :: proc "contextless" ($T: typeid/[]$A, slice: $S/[]$B) -> T {
 slice_data_cast :: proc "contextless" ($T: typeid/[]$A, slice: $S/[]$B) -> T {
 	when size_of(A) == 0 || size_of(B) == 0 {
 	when size_of(A) == 0 || size_of(B) == 0 {
@@ -150,12 +410,25 @@ slice_data_cast :: proc "contextless" ($T: typeid/[]$A, slice: $S/[]$B) -> T {
 	}
 	}
 }
 }
 
 
+/*
+Obtain data and length of a slice.
+
+This procedure returns the pointer to the start of the memory region pointed to
+by slice `slice` and the length of the slice.
+*/
 @(require_results)
 @(require_results)
 slice_to_components :: proc "contextless" (slice: $E/[]$T) -> (data: ^T, len: int) {
 slice_to_components :: proc "contextless" (slice: $E/[]$T) -> (data: ^T, len: int) {
 	s := transmute(Raw_Slice)slice
 	s := transmute(Raw_Slice)slice
 	return (^T)(s.data), s.len
 	return (^T)(s.data), s.len
 }
 }
 
 
+/*
+Create a dynamic array from slice.
+
+This procedure creates a dynamic array, using slice `backing` as the backing
+buffer for the dynamic array. The resulting dynamic array can not grow beyond
+the size of the specified slice.
+*/
 @(require_results)
 @(require_results)
 buffer_from_slice :: proc "contextless" (backing: $T/[]$E) -> [dynamic]E {
 buffer_from_slice :: proc "contextless" (backing: $T/[]$E) -> [dynamic]E {
 	return transmute([dynamic]E)Raw_Dynamic_Array{
 	return transmute([dynamic]E)Raw_Dynamic_Array{
@@ -169,19 +442,12 @@ buffer_from_slice :: proc "contextless" (backing: $T/[]$E) -> [dynamic]E {
 	}
 	}
 }
 }
 
 
-@(require_results)
-ptr_to_bytes :: proc "contextless" (ptr: ^$T, len := 1) -> []byte {
-	return transmute([]byte)Raw_Slice{ptr, len*size_of(T)}
-}
-
-@(require_results)
-any_to_bytes :: proc "contextless" (val: any) -> []byte {
-	ti := type_info_of(val.id)
-	size := ti != nil ? ti.size : 0
-	return transmute([]byte)Raw_Slice{val.data, size}
-}
-
+/*
+Check whether a number is a power of two.
 
 
+This procedure checks whether a given pointer-sized unsigned integer contains
+a power-of-two value.
+*/
 @(require_results)
 @(require_results)
 is_power_of_two :: proc "contextless" (x: uintptr) -> bool {
 is_power_of_two :: proc "contextless" (x: uintptr) -> bool {
 	if x <= 0 {
 	if x <= 0 {
@@ -190,66 +456,167 @@ is_power_of_two :: proc "contextless" (x: uintptr) -> bool {
 	return (x & (x-1)) == 0
 	return (x & (x-1)) == 0
 }
 }
 
 
-@(require_results)
-align_forward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
-	return rawptr(align_forward_uintptr(uintptr(ptr), align))
+/*
+Check if a pointer is aligned.
+
+This procedure checks whether a pointer `x` is aligned to a boundary specified
+by `align`, and returns `true` if the pointer is aligned, and false otherwise.
+*/
+is_aligned :: proc "contextless" (x: rawptr, align: int) -> bool {
+	p := uintptr(x)
+	return (p & (1<<uintptr(align) - 1)) == 0
 }
 }
 
 
+/*
+Align uintptr forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 @(require_results)
 align_forward_uintptr :: proc(ptr, align: uintptr) -> uintptr {
 align_forward_uintptr :: proc(ptr, align: uintptr) -> uintptr {
 	assert(is_power_of_two(align))
 	assert(is_power_of_two(align))
+	return (ptr + align-1) & ~(align-1)
+}
 
 
-	p := ptr
-	modulo := p & (align-1)
-	if modulo != 0 {
-		p += align - modulo
-	}
-	return p
+/*
+Align pointer forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
+@(require_results)
+align_forward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
+	return rawptr(align_forward_uintptr(uintptr(ptr), align))
 }
 }
 
 
+/*
+Align int forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 @(require_results)
 align_forward_int :: proc(ptr, align: int) -> int {
 align_forward_int :: proc(ptr, align: int) -> int {
 	return int(align_forward_uintptr(uintptr(ptr), uintptr(align)))
 	return int(align_forward_uintptr(uintptr(ptr), uintptr(align)))
 }
 }
+
+/*
+Align uint forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 @(require_results)
 align_forward_uint :: proc(ptr, align: uint) -> uint {
 align_forward_uint :: proc(ptr, align: uint) -> uint {
 	return uint(align_forward_uintptr(uintptr(ptr), uintptr(align)))
 	return uint(align_forward_uintptr(uintptr(ptr), uintptr(align)))
 }
 }
 
 
+/*
+Align uintptr backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 @(require_results)
-align_backward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
-	return rawptr(align_backward_uintptr(uintptr(ptr), align))
+align_backward_uintptr :: proc(ptr, align: uintptr) -> uintptr {
+	assert(is_power_of_two(align))
+	return ptr & ~(align-1)
 }
 }
 
 
+/*
+Align rawptr backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 @(require_results)
-align_backward_uintptr :: proc(ptr, align: uintptr) -> uintptr {
-	return align_forward_uintptr(ptr - align + 1, align)
+align_backward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
+	return rawptr(align_backward_uintptr(uintptr(ptr), align))
 }
 }
 
 
+/*
+Align int backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 @(require_results)
 align_backward_int :: proc(ptr, align: int) -> int {
 align_backward_int :: proc(ptr, align: int) -> int {
 	return int(align_backward_uintptr(uintptr(ptr), uintptr(align)))
 	return int(align_backward_uintptr(uintptr(ptr), uintptr(align)))
 }
 }
+
+/*
+Align uint backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 @(require_results)
 align_backward_uint :: proc(ptr, align: uint) -> uint {
 align_backward_uint :: proc(ptr, align: uint) -> uint {
 	return uint(align_backward_uintptr(uintptr(ptr), uintptr(align)))
 	return uint(align_backward_uintptr(uintptr(ptr), uintptr(align)))
 }
 }
 
 
+/*
+Create a context with a given allocator.
+
+This procedure returns a copy of the current context with the allocator replaced
+by the allocator `a`.
+*/
 @(require_results)
 @(require_results)
 context_from_allocator :: proc(a: Allocator) -> type_of(context) {
 context_from_allocator :: proc(a: Allocator) -> type_of(context) {
 	context.allocator = a
 	context.allocator = a
 	return context
 	return context
 }
 }
 
 
+/*
+Copy the value from a pointer into a value.
+
+This procedure copies the object of type `T` pointed to by the pointer `ptr`
+into a new stack-allocated value and returns that value.
+*/
 @(require_results)
 @(require_results)
 reinterpret_copy :: proc "contextless" ($T: typeid, ptr: rawptr) -> (value: T) {
 reinterpret_copy :: proc "contextless" ($T: typeid, ptr: rawptr) -> (value: T) {
 	copy(&value, ptr, size_of(T))
 	copy(&value, ptr, size_of(T))
 	return
 	return
 }
 }
 
 
+/*
+Dynamic array with a fixed capacity buffer.
 
 
+This type represents dynamic arrays with a fixed-size backing buffer. Upon
+allocating memory beyond reaching the maximum capacity, allocations from fixed
+byte buffers return `nil` and no error.
+*/
 Fixed_Byte_Buffer :: distinct [dynamic]byte
 Fixed_Byte_Buffer :: distinct [dynamic]byte
 
 
+/*
+Create a fixed byte buffer from a slice.
+*/
 @(require_results)
 @(require_results)
 make_fixed_byte_buffer :: proc "contextless" (backing: []byte) -> Fixed_Byte_Buffer {
 make_fixed_byte_buffer :: proc "contextless" (backing: []byte) -> Fixed_Byte_Buffer {
 	s := transmute(Raw_Slice)backing
 	s := transmute(Raw_Slice)backing
@@ -264,40 +631,60 @@ make_fixed_byte_buffer :: proc "contextless" (backing: []byte) -> Fixed_Byte_Buf
 	return transmute(Fixed_Byte_Buffer)d
 	return transmute(Fixed_Byte_Buffer)d
 }
 }
 
 
+/*
+General-purpose align formula.
 
 
-
+This procedure is equivalent to `align_forward`, but it does not require the
+alignment to be a power of two.
+*/
 @(require_results)
 @(require_results)
 align_formula :: proc "contextless" (size, align: int) -> int {
 align_formula :: proc "contextless" (size, align: int) -> int {
 	result := size + align-1
 	result := size + align-1
 	return result - result%align
 	return result - result%align
 }
 }
 
 
+/*
+Calculate the padding for header preceding aligned data.
+
+This procedure returns the padding, following the specified pointer `ptr` that
+will be able to fit in a header of the size `header_size`, immediately
+preceding the memory region, aligned on a boundary specified by `align`. See
+the following diagram for a visual representation.
+
+        header size
+	    |<------>|
+	+---+--------+------------- - - -
+	    | HEADER |  DATA...
+	+---+--------+------------- - - -
+	^            ^
+	|<---------->|
+	|  padding   |
+	ptr          aligned ptr
+
+The function takes in `ptr` and `header_size`, as well as the required
+alignment for `DATA`. The return value of the function is the padding between
+`ptr` and `aligned_ptr` that will be able to fit the header.
+*/
 @(require_results)
 @(require_results)
 calc_padding_with_header :: proc "contextless" (ptr: uintptr, align: uintptr, header_size: int) -> int {
 calc_padding_with_header :: proc "contextless" (ptr: uintptr, align: uintptr, header_size: int) -> int {
 	p, a := ptr, align
 	p, a := ptr, align
 	modulo := p & (a-1)
 	modulo := p & (a-1)
-
 	padding := uintptr(0)
 	padding := uintptr(0)
 	if modulo != 0 {
 	if modulo != 0 {
 		padding = a - modulo
 		padding = a - modulo
 	}
 	}
-
 	needed_space := uintptr(header_size)
 	needed_space := uintptr(header_size)
 	if padding < needed_space {
 	if padding < needed_space {
 		needed_space -= padding
 		needed_space -= padding
-
 		if needed_space & (a-1) > 0 {
 		if needed_space & (a-1) > 0 {
 			padding += align * (1+(needed_space/align))
 			padding += align * (1+(needed_space/align))
 		} else {
 		} else {
 			padding += align * (needed_space/align)
 			padding += align * (needed_space/align)
 		}
 		}
 	}
 	}
-
 	return int(padding)
 	return int(padding)
 }
 }
 
 
-
-
 @(require_results, deprecated="prefer 'slice.clone'")
 @(require_results, deprecated="prefer 'slice.clone'")
 clone_slice :: proc(slice: $T/[]$E, allocator := context.allocator, loc := #caller_location) -> (new_slice: T) {
 clone_slice :: proc(slice: $T/[]$E, allocator := context.allocator, loc := #caller_location) -> (new_slice: T) {
 	new_slice, _ = make(T, len(slice), allocator, loc)
 	new_slice, _ = make(T, len(slice), allocator, loc)

+ 23 - 4
core/mem/mutex_allocator.odin

@@ -3,17 +3,31 @@ package mem
 
 
 import "core:sync"
 import "core:sync"
 
 
+/*
+The data for mutex allocator.
+*/
 Mutex_Allocator :: struct {
 Mutex_Allocator :: struct {
 	backing: Allocator,
 	backing: Allocator,
 	mutex:   sync.Mutex,
 	mutex:   sync.Mutex,
 }
 }
 
 
+/*
+Initialize the mutex allocator.
+
+This procedure initializes the mutex allocator using `backin_allocator` as the
+allocator that will be used to pass all allocation requests through.
+*/
 mutex_allocator_init :: proc(m: ^Mutex_Allocator, backing_allocator: Allocator) {
 mutex_allocator_init :: proc(m: ^Mutex_Allocator, backing_allocator: Allocator) {
 	m.backing = backing_allocator
 	m.backing = backing_allocator
 	m.mutex = {}
 	m.mutex = {}
 }
 }
 
 
+/*
+Mutex allocator.
 
 
+The mutex allocator is a wrapper for allocators that is used to serialize all
+allocator requests across multiple threads.
+*/
 @(require_results)
 @(require_results)
 mutex_allocator :: proc(m: ^Mutex_Allocator) -> Allocator {
 mutex_allocator :: proc(m: ^Mutex_Allocator) -> Allocator {
 	return Allocator{
 	return Allocator{
@@ -22,11 +36,16 @@ mutex_allocator :: proc(m: ^Mutex_Allocator) -> Allocator {
 	}
 	}
 }
 }
 
 
-mutex_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                             size, alignment: int,
-                             old_memory: rawptr, old_size: int, loc := #caller_location) -> (result: []byte, err: Allocator_Error) {
+mutex_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size: int,
+	alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
 	m := (^Mutex_Allocator)(allocator_data)
 	m := (^Mutex_Allocator)(allocator_data)
-
 	sync.mutex_guard(&m.mutex)
 	sync.mutex_guard(&m.mutex)
 	return m.backing.procedure(m.backing.data, mode, size, alignment, old_memory, old_size, loc)
 	return m.backing.procedure(m.backing.data, mode, size, alignment, old_memory, old_size, loc)
 }
 }

+ 85 - 11
core/mem/raw.odin

@@ -3,26 +3,100 @@ package mem
 import "base:builtin"
 import "base:builtin"
 import "base:runtime"
 import "base:runtime"
 
 
-Raw_Any           :: runtime.Raw_Any
-Raw_String        :: runtime.Raw_String
-Raw_Cstring       :: runtime.Raw_Cstring
-Raw_Slice         :: runtime.Raw_Slice
+/*
+Memory layout of the `any` type.
+*/
+Raw_Any :: runtime.Raw_Any
+
+/*
+Memory layout of the `string` type.
+*/
+Raw_String :: runtime.Raw_String
+
+/*
+Memory layout of the `cstring` type.
+*/
+Raw_Cstring :: runtime.Raw_Cstring
+
+/*
+Memory layout of `[]T` types.
+*/
+Raw_Slice :: runtime.Raw_Slice
+
+/*
+Memory layout of `[dynamic]T` types.
+*/
 Raw_Dynamic_Array :: runtime.Raw_Dynamic_Array
 Raw_Dynamic_Array :: runtime.Raw_Dynamic_Array
-Raw_Map           :: runtime.Raw_Map
-Raw_Soa_Pointer   :: runtime.Raw_Soa_Pointer
 
 
-Raw_Complex32     :: runtime.Raw_Complex32
-Raw_Complex64     :: runtime.Raw_Complex64
-Raw_Complex128    :: runtime.Raw_Complex128
-Raw_Quaternion64  :: runtime.Raw_Quaternion64
+/*
+Memory layout of `map[K]V` types.
+*/
+Raw_Map :: runtime.Raw_Map
+
+/*
+Memory layout of `#soa []T` types.
+*/
+Raw_Soa_Pointer :: runtime.Raw_Soa_Pointer
+
+/*
+Memory layout of the `complex32` type.
+*/
+Raw_Complex32 :: runtime.Raw_Complex32
+
+/*
+Memory layout of the `complex64` type.
+*/
+Raw_Complex64 :: runtime.Raw_Complex64
+
+/*
+Memory layout of the `complex128` type.
+*/
+Raw_Complex128 :: runtime.Raw_Complex128
+
+/*
+Memory layout of the `quaternion64` type.
+*/
+Raw_Quaternion64 :: runtime.Raw_Quaternion64
+
+/*
+Memory layout of the `quaternion128` type.
+*/
 Raw_Quaternion128 :: runtime.Raw_Quaternion128
 Raw_Quaternion128 :: runtime.Raw_Quaternion128
+
+/*
+Memory layout of the `quaternion256` type.
+*/
 Raw_Quaternion256 :: runtime.Raw_Quaternion256
 Raw_Quaternion256 :: runtime.Raw_Quaternion256
-Raw_Quaternion64_Vector_Scalar  :: runtime.Raw_Quaternion64_Vector_Scalar
+
+/*
+Memory layout of the `quaternion64` type.
+*/
+Raw_Quaternion64_Vector_Scalar :: runtime.Raw_Quaternion64_Vector_Scalar
+
+/*
+Memory layout of the `quaternion128` type.
+*/
 Raw_Quaternion128_Vector_Scalar :: runtime.Raw_Quaternion128_Vector_Scalar
 Raw_Quaternion128_Vector_Scalar :: runtime.Raw_Quaternion128_Vector_Scalar
+
+/*
+Memory layout of the `quaternion256` type.
+*/
 Raw_Quaternion256_Vector_Scalar :: runtime.Raw_Quaternion256_Vector_Scalar
 Raw_Quaternion256_Vector_Scalar :: runtime.Raw_Quaternion256_Vector_Scalar
 
 
+/*
+Create a value of the any type.
+
+This procedure creates a value with type `any` that points to an object with
+typeid `id` located at an address specified by `data`.
+*/
 make_any :: proc "contextless" (data: rawptr, id: typeid) -> any {
 make_any :: proc "contextless" (data: rawptr, id: typeid) -> any {
 	return transmute(any)Raw_Any{data, id}
 	return transmute(any)Raw_Any{data, id}
 }
 }
 
 
+/*
+Obtain pointer to the data.
+
+This procedure returns the pointer to the data of a slice, string, or a dynamic
+array.
+*/
 raw_data :: builtin.raw_data
 raw_data :: builtin.raw_data

+ 243 - 104
core/mem/rollback_stack_allocator.odin

@@ -1,52 +1,36 @@
 package mem
 package mem
 
 
-// The Rollback Stack Allocator was designed for the test runner to be fast,
-// able to grow, and respect the Tracking Allocator's requirement for
-// individual frees. It is not overly concerned with fragmentation, however.
-//
-// It has support for expansion when configured with a block allocator and
-// limited support for out-of-order frees.
-//
-// Allocation has constant-time best and usual case performance.
-// At worst, it is linear according to the number of memory blocks.
-//
-// Allocation follows a first-fit strategy when there are multiple memory
-// blocks.
-//
-// Freeing has constant-time best and usual case performance.
-// At worst, it is linear according to the number of memory blocks and number
-// of freed items preceding the last item in a block.
-//
-// Resizing has constant-time performance, if it's the last item in a block, or
-// the new size is smaller. Naturally, this becomes linear-time if there are
-// multiple blocks to search for the pointer's owning block. Otherwise, the
-// allocator defaults to a combined alloc & free operation internally.
-//
-// Out-of-order freeing is accomplished by collapsing a run of freed items
-// from the last allocation backwards.
-//
-// Each allocation has an overhead of 8 bytes and any extra bytes to satisfy
-// the requested alignment.
-
 import "base:runtime"
 import "base:runtime"
 
 
+/*
+Rollback stack default block size.
+*/
 ROLLBACK_STACK_DEFAULT_BLOCK_SIZE :: 4 * Megabyte
 ROLLBACK_STACK_DEFAULT_BLOCK_SIZE :: 4 * Megabyte
 
 
-// This limitation is due to the size of `prev_ptr`, but it is only for the
-// head block; any allocation in excess of the allocator's `block_size` is
-// valid, so long as the block allocator can handle it.
-//
-// This is because allocations over the block size are not split up if the item
-// within is freed; they are immediately returned to the block allocator.
-ROLLBACK_STACK_MAX_HEAD_BLOCK_SIZE :: 2 * Gigabyte
+/*
+Rollback stack max head block size.
 
 
+This limitation is due to the size of `prev_ptr`, but it is only for the
+head block; any allocation in excess of the allocator's `block_size` is
+valid, so long as the block allocator can handle it.
+
+This is because allocations over the block size are not split up if the item
+within is freed; they are immediately returned to the block allocator.
+*/
+ROLLBACK_STACK_MAX_HEAD_BLOCK_SIZE :: 2 * Gigabyte
 
 
+/*
+Allocation header of the rollback stack allocator.
+*/
 Rollback_Stack_Header :: bit_field u64 {
 Rollback_Stack_Header :: bit_field u64 {
 	prev_offset:  uintptr | 32,
 	prev_offset:  uintptr | 32,
 	is_free:         bool |  1,
 	is_free:         bool |  1,
 	prev_ptr:     uintptr | 31,
 	prev_ptr:     uintptr | 31,
 }
 }
 
 
+/*
+Block header of the rollback stack allocator.
+*/
 Rollback_Stack_Block :: struct {
 Rollback_Stack_Block :: struct {
 	next_block: ^Rollback_Stack_Block,
 	next_block: ^Rollback_Stack_Block,
 	last_alloc: rawptr,
 	last_alloc: rawptr,
@@ -54,13 +38,15 @@ Rollback_Stack_Block :: struct {
 	buffer: []byte,
 	buffer: []byte,
 }
 }
 
 
+/*
+Rollback stack allocator data.
+*/
 Rollback_Stack :: struct {
 Rollback_Stack :: struct {
 	head: ^Rollback_Stack_Block,
 	head: ^Rollback_Stack_Block,
 	block_size: int,
 	block_size: int,
 	block_allocator: Allocator,
 	block_allocator: Allocator,
 }
 }
 
 
-
 @(private="file", require_results)
 @(private="file", require_results)
 rb_ptr_in_bounds :: proc(block: ^Rollback_Stack_Block, ptr: rawptr) -> bool {
 rb_ptr_in_bounds :: proc(block: ^Rollback_Stack_Block, ptr: rawptr) -> bool {
 	start := raw_data(block.buffer)
 	start := raw_data(block.buffer)
@@ -110,6 +96,9 @@ rb_rollback_block :: proc(block: ^Rollback_Stack_Block, header: ^Rollback_Stack_
 	}
 	}
 }
 }
 
 
+/*
+Free memory to a rollback stack allocator.
+*/
 @(private="file", require_results)
 @(private="file", require_results)
 rb_free :: proc(stack: ^Rollback_Stack, ptr: rawptr) -> Allocator_Error {
 rb_free :: proc(stack: ^Rollback_Stack, ptr: rawptr) -> Allocator_Error {
 	parent, block, header := rb_find_ptr(stack, ptr) or_return
 	parent, block, header := rb_find_ptr(stack, ptr) or_return
@@ -128,6 +117,9 @@ rb_free :: proc(stack: ^Rollback_Stack, ptr: rawptr) -> Allocator_Error {
 	return nil
 	return nil
 }
 }
 
 
+/*
+Free all memory owned by the rollback stack allocator.
+*/
 @(private="file")
 @(private="file")
 rb_free_all :: proc(stack: ^Rollback_Stack) {
 rb_free_all :: proc(stack: ^Rollback_Stack) {
 	for block := stack.head.next_block; block != nil; /**/ {
 	for block := stack.head.next_block; block != nil; /**/ {
@@ -141,45 +133,75 @@ rb_free_all :: proc(stack: ^Rollback_Stack) {
 	stack.head.offset = 0
 	stack.head.offset = 0
 }
 }
 
 
-@(private="file", require_results)
-rb_resize :: proc(stack: ^Rollback_Stack, ptr: rawptr, old_size, size, alignment: int) -> (result: []byte, err: Allocator_Error) {
-	if ptr != nil {
-		if block, _, ok := rb_find_last_alloc(stack, ptr); ok {
-			// `block.offset` should never underflow because it is contingent
-			// on `old_size` in the first place, assuming sane arguments.
-			assert(block.offset >= cast(uintptr)old_size, "Rollback Stack Allocator received invalid `old_size`.")
-
-			if block.offset + cast(uintptr)size - cast(uintptr)old_size < cast(uintptr)len(block.buffer) {
-				// Prevent singleton allocations from fragmenting by forbidding
-				// them to shrink, removing the possibility of overflow bugs.
-				if len(block.buffer) <= stack.block_size {
-					block.offset += cast(uintptr)size - cast(uintptr)old_size
-				}
-				#no_bounds_check return (cast([^]byte)ptr)[:size], nil
-			}
-		}
+/*
+Allocate memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
+	if bytes != nil {
+		zero_slice(bytes)
 	}
 	}
+	return raw_data(bytes), err
+}
 
 
-	result = rb_alloc(stack, size, alignment) or_return
-	runtime.mem_copy_non_overlapping(raw_data(result), ptr, old_size)
-	err = rb_free(stack, ptr)
+/*
+Allocate memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc_bytes :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	bytes, err := rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
+	if bytes != nil {
+		zero_slice(bytes)
+	}
+	return bytes, err
+}
 
 
-	return
+/*
+Allocate non-initialized memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
+	return raw_data(bytes), err
 }
 }
 
 
-@(private="file", require_results)
-rb_alloc :: proc(stack: ^Rollback_Stack, size, alignment: int) -> (result: []byte, err: Allocator_Error) {
+/*
+Allocate non-initialized memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc_bytes_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
+	assert(size >= 0, "Size must be positive or zero.", loc)
+	assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", loc)
 	parent: ^Rollback_Stack_Block
 	parent: ^Rollback_Stack_Block
 	for block := stack.head; /**/; block = block.next_block {
 	for block := stack.head; /**/; block = block.next_block {
 		when !ODIN_DISABLE_ASSERT {
 		when !ODIN_DISABLE_ASSERT {
 			allocated_new_block: bool
 			allocated_new_block: bool
 		}
 		}
-
 		if block == nil {
 		if block == nil {
 			if stack.block_allocator.procedure == nil {
 			if stack.block_allocator.procedure == nil {
 				return nil, .Out_Of_Memory
 				return nil, .Out_Of_Memory
 			}
 			}
-
 			minimum_size_required := size_of(Rollback_Stack_Header) + size + alignment - 1
 			minimum_size_required := size_of(Rollback_Stack_Header) + size + alignment - 1
 			new_block_size := max(minimum_size_required, stack.block_size)
 			new_block_size := max(minimum_size_required, stack.block_size)
 			block = rb_make_block(new_block_size, stack.block_allocator) or_return
 			block = rb_make_block(new_block_size, stack.block_allocator) or_return
@@ -188,10 +210,8 @@ rb_alloc :: proc(stack: ^Rollback_Stack, size, alignment: int) -> (result: []byt
 				allocated_new_block = true
 				allocated_new_block = true
 			}
 			}
 		}
 		}
-
 		start := raw_data(block.buffer)[block.offset:]
 		start := raw_data(block.buffer)[block.offset:]
 		padding := cast(uintptr)calc_padding_with_header(cast(uintptr)start, cast(uintptr)alignment, size_of(Rollback_Stack_Header))
 		padding := cast(uintptr)calc_padding_with_header(cast(uintptr)start, cast(uintptr)alignment, size_of(Rollback_Stack_Header))
-
 		if block.offset + padding + cast(uintptr)size > cast(uintptr)len(block.buffer) {
 		if block.offset + padding + cast(uintptr)size > cast(uintptr)len(block.buffer) {
 			when !ODIN_DISABLE_ASSERT {
 			when !ODIN_DISABLE_ASSERT {
 				if allocated_new_block {
 				if allocated_new_block {
@@ -201,54 +221,150 @@ rb_alloc :: proc(stack: ^Rollback_Stack, size, alignment: int) -> (result: []byt
 			parent = block
 			parent = block
 			continue
 			continue
 		}
 		}
-
 		header := cast(^Rollback_Stack_Header)(start[padding - size_of(Rollback_Stack_Header):])
 		header := cast(^Rollback_Stack_Header)(start[padding - size_of(Rollback_Stack_Header):])
 		ptr := start[padding:]
 		ptr := start[padding:]
-
 		header^ = {
 		header^ = {
 			prev_offset = block.offset,
 			prev_offset = block.offset,
 			prev_ptr = uintptr(0) if block.last_alloc == nil else cast(uintptr)block.last_alloc - cast(uintptr)raw_data(block.buffer),
 			prev_ptr = uintptr(0) if block.last_alloc == nil else cast(uintptr)block.last_alloc - cast(uintptr)raw_data(block.buffer),
 			is_free = false,
 			is_free = false,
 		}
 		}
-
 		block.last_alloc = ptr
 		block.last_alloc = ptr
 		block.offset += padding + cast(uintptr)size
 		block.offset += padding + cast(uintptr)size
-
 		if len(block.buffer) > stack.block_size {
 		if len(block.buffer) > stack.block_size {
 			// This block exceeds the allocator's standard block size and is considered a singleton.
 			// This block exceeds the allocator's standard block size and is considered a singleton.
 			// Prevent any further allocations on it.
 			// Prevent any further allocations on it.
 			block.offset = cast(uintptr)len(block.buffer)
 			block.offset = cast(uintptr)len(block.buffer)
 		}
 		}
-		
 		#no_bounds_check return ptr[:size], nil
 		#no_bounds_check return ptr[:size], nil
 	}
 	}
-
 	return nil, .Out_Of_Memory
 	return nil, .Out_Of_Memory
 }
 }
 
 
+/*
+Resize an allocation owned by rollback stack allocator.
+*/
+@(require_results)
+rb_resize :: proc(
+	stack: ^Rollback_Stack,
+	old_ptr: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_resize_bytes_non_zeroed(stack, byte_slice(old_ptr, old_size), size, alignment, loc)
+	if bytes != nil {
+		if old_ptr == nil {
+			zero_slice(bytes)
+		} else if size > old_size {
+			zero_slice(bytes[old_size:])
+		}
+	}
+	return raw_data(bytes), err
+}
+
+/*
+Resize an allocation owned by rollback stack allocator.
+*/
+@(require_results)
+rb_resize_bytes :: proc(
+	stack: ^Rollback_Stack,
+	old_memory: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]u8, Allocator_Error) {
+	bytes, err := rb_resize_bytes_non_zeroed(stack, old_memory, size, alignment, loc)
+	if bytes != nil {
+		if old_memory == nil {
+			zero_slice(bytes)
+		} else if size > len(old_memory) {
+			zero_slice(bytes[len(old_memory):])
+		}
+	}
+	return bytes, err
+}
+
+/*
+Resize an allocation owned by rollback stack allocator without explicit
+zero-initialization.
+*/
+@(require_results)
+rb_resize_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	old_ptr: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_resize_bytes_non_zeroed(stack, byte_slice(old_ptr, old_size), size, alignment, loc)
+	return raw_data(bytes), err
+}
+
+/*
+Resize an allocation owned by rollback stack allocator without explicit
+zero-initialization.
+*/
+@(require_results)
+rb_resize_bytes_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	old_memory: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
+	old_size := len(old_memory)
+	ptr := raw_data(old_memory)
+	assert(size >= 0, "Size must be positive or zero.", loc)
+	assert(old_size >= 0, "Old size must be positive or zero.", loc)
+	assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", loc)
+	if ptr != nil {
+		if block, _, ok := rb_find_last_alloc(stack, ptr); ok {
+			// `block.offset` should never underflow because it is contingent
+			// on `old_size` in the first place, assuming sane arguments.
+			assert(block.offset >= cast(uintptr)old_size, "Rollback Stack Allocator received invalid `old_size`.")
+			if block.offset + cast(uintptr)size - cast(uintptr)old_size < cast(uintptr)len(block.buffer) {
+				// Prevent singleton allocations from fragmenting by forbidding
+				// them to shrink, removing the possibility of overflow bugs.
+				if len(block.buffer) <= stack.block_size {
+					block.offset += cast(uintptr)size - cast(uintptr)old_size
+				}
+				#no_bounds_check return (ptr)[:size], nil
+			}
+		}
+	}
+	result = rb_alloc_bytes_non_zeroed(stack, size, alignment) or_return
+	runtime.mem_copy_non_overlapping(raw_data(result), ptr, old_size)
+	err = rb_free(stack, ptr)
+	return
+}
+
 @(private="file", require_results)
 @(private="file", require_results)
 rb_make_block :: proc(size: int, allocator: Allocator) -> (block: ^Rollback_Stack_Block, err: Allocator_Error) {
 rb_make_block :: proc(size: int, allocator: Allocator) -> (block: ^Rollback_Stack_Block, err: Allocator_Error) {
 	buffer := runtime.mem_alloc(size_of(Rollback_Stack_Block) + size, align_of(Rollback_Stack_Block), allocator) or_return
 	buffer := runtime.mem_alloc(size_of(Rollback_Stack_Block) + size, align_of(Rollback_Stack_Block), allocator) or_return
-
 	block = cast(^Rollback_Stack_Block)raw_data(buffer)
 	block = cast(^Rollback_Stack_Block)raw_data(buffer)
 	#no_bounds_check block.buffer = buffer[size_of(Rollback_Stack_Block):]
 	#no_bounds_check block.buffer = buffer[size_of(Rollback_Stack_Block):]
 	return
 	return
 }
 }
 
 
-
+/*
+Initialize the rollback stack allocator using a fixed backing buffer.
+*/
 rollback_stack_init_buffered :: proc(stack: ^Rollback_Stack, buffer: []byte, location := #caller_location) {
 rollback_stack_init_buffered :: proc(stack: ^Rollback_Stack, buffer: []byte, location := #caller_location) {
 	MIN_SIZE :: size_of(Rollback_Stack_Block) + size_of(Rollback_Stack_Header) + size_of(rawptr)
 	MIN_SIZE :: size_of(Rollback_Stack_Block) + size_of(Rollback_Stack_Header) + size_of(rawptr)
 	assert(len(buffer) >= MIN_SIZE, "User-provided buffer to Rollback Stack Allocator is too small.", location)
 	assert(len(buffer) >= MIN_SIZE, "User-provided buffer to Rollback Stack Allocator is too small.", location)
-
 	block := cast(^Rollback_Stack_Block)raw_data(buffer)
 	block := cast(^Rollback_Stack_Block)raw_data(buffer)
 	block^ = {}
 	block^ = {}
 	#no_bounds_check block.buffer = buffer[size_of(Rollback_Stack_Block):]
 	#no_bounds_check block.buffer = buffer[size_of(Rollback_Stack_Block):]
-
 	stack^ = {}
 	stack^ = {}
 	stack.head = block
 	stack.head = block
 	stack.block_size = len(block.buffer)
 	stack.block_size = len(block.buffer)
 }
 }
 
 
+/*
+Initialize the rollback stack alocator using a backing block allocator.
+*/
 rollback_stack_init_dynamic :: proc(
 rollback_stack_init_dynamic :: proc(
 	stack: ^Rollback_Stack,
 	stack: ^Rollback_Stack,
 	block_size : int = ROLLBACK_STACK_DEFAULT_BLOCK_SIZE,
 	block_size : int = ROLLBACK_STACK_DEFAULT_BLOCK_SIZE,
@@ -261,22 +377,25 @@ rollback_stack_init_dynamic :: proc(
 		// size is insufficient; check only on platforms with big enough ints.
 		// size is insufficient; check only on platforms with big enough ints.
 		assert(block_size <= ROLLBACK_STACK_MAX_HEAD_BLOCK_SIZE, "Rollback Stack Allocators cannot support head blocks larger than 2 gigabytes.", location)
 		assert(block_size <= ROLLBACK_STACK_MAX_HEAD_BLOCK_SIZE, "Rollback Stack Allocators cannot support head blocks larger than 2 gigabytes.", location)
 	}
 	}
-
 	block := rb_make_block(block_size, block_allocator) or_return
 	block := rb_make_block(block_size, block_allocator) or_return
-
 	stack^ = {}
 	stack^ = {}
 	stack.head = block
 	stack.head = block
 	stack.block_size = block_size
 	stack.block_size = block_size
 	stack.block_allocator = block_allocator
 	stack.block_allocator = block_allocator
-
 	return nil
 	return nil
 }
 }
 
 
+/*
+Initialize the rollback stack.
+*/
 rollback_stack_init :: proc {
 rollback_stack_init :: proc {
 	rollback_stack_init_buffered,
 	rollback_stack_init_buffered,
 	rollback_stack_init_dynamic,
 	rollback_stack_init_dynamic,
 }
 }
 
 
+/*
+Destroy a rollback stack.
+*/
 rollback_stack_destroy :: proc(stack: ^Rollback_Stack) {
 rollback_stack_destroy :: proc(stack: ^Rollback_Stack) {
 	if stack.block_allocator.procedure != nil {
 	if stack.block_allocator.procedure != nil {
 		rb_free_all(stack)
 		rb_free_all(stack)
@@ -285,6 +404,37 @@ rollback_stack_destroy :: proc(stack: ^Rollback_Stack) {
 	stack^ = {}
 	stack^ = {}
 }
 }
 
 
+/*
+Rollback stack allocator.
+
+The Rollback Stack Allocator was designed for the test runner to be fast,
+able to grow, and respect the Tracking Allocator's requirement for
+individual frees. It is not overly concerned with fragmentation, however.
+
+It has support for expansion when configured with a block allocator and
+limited support for out-of-order frees.
+
+Allocation has constant-time best and usual case performance.
+At worst, it is linear according to the number of memory blocks.
+
+Allocation follows a first-fit strategy when there are multiple memory
+blocks.
+
+Freeing has constant-time best and usual case performance.
+At worst, it is linear according to the number of memory blocks and number
+of freed items preceding the last item in a block.
+
+Resizing has constant-time performance, if it's the last item in a block, or
+the new size is smaller. Naturally, this becomes linear-time if there are
+multiple blocks to search for the pointer's owning block. Otherwise, the
+allocator defaults to a combined alloc & free operation internally.
+
+Out-of-order freeing is accomplished by collapsing a run of freed items
+from the last allocation backwards.
+
+Each allocation has an overhead of 8 bytes and any extra bytes to satisfy
+the requested alignment.
+*/
 @(require_results)
 @(require_results)
 rollback_stack_allocator :: proc(stack: ^Rollback_Stack) -> Allocator {
 rollback_stack_allocator :: proc(stack: ^Rollback_Stack) -> Allocator {
 	return Allocator {
 	return Allocator {
@@ -294,48 +444,37 @@ rollback_stack_allocator :: proc(stack: ^Rollback_Stack) -> Allocator {
 }
 }
 
 
 @(require_results)
 @(require_results)
-rollback_stack_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                                      size, alignment: int,
-                                      old_memory: rawptr, old_size: int, location := #caller_location,
+rollback_stack_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size, alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
 ) -> (result: []byte, err: Allocator_Error) {
 ) -> (result: []byte, err: Allocator_Error) {
 	stack := cast(^Rollback_Stack)allocator_data
 	stack := cast(^Rollback_Stack)allocator_data
-
 	switch mode {
 	switch mode {
-	case .Alloc, .Alloc_Non_Zeroed:
-		assert(size >= 0, "Size must be positive or zero.", location)
-		assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", location)
-		result = rb_alloc(stack, size, alignment) or_return
-
-		if mode == .Alloc {
-			zero_slice(result)
-		}
-
+	case .Alloc:
+		return rb_alloc_bytes(stack, size, alignment, loc)
+	case .Alloc_Non_Zeroed:
+		return rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
 	case .Free:
 	case .Free:
-		err = rb_free(stack, old_memory)
-
+		return nil, rb_free(stack, old_memory)
 	case .Free_All:
 	case .Free_All:
 		rb_free_all(stack)
 		rb_free_all(stack)
-
-	case .Resize, .Resize_Non_Zeroed:
-		assert(size >= 0, "Size must be positive or zero.", location)
-		assert(old_size >= 0, "Old size must be positive or zero.", location)
-		assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", location)
-		result = rb_resize(stack, old_memory, old_size, size, alignment) or_return
-
-		#no_bounds_check if mode == .Resize && size > old_size {
-			zero_slice(result[old_size:])
-		}
-
+		return nil, nil
+	case .Resize:
+		return rb_resize_bytes(stack, byte_slice(old_memory, old_size), size, alignment, loc)
+	case .Resize_Non_Zeroed:
+		return rb_resize_bytes_non_zeroed(stack, byte_slice(old_memory, old_size), size, alignment, loc)
 	case .Query_Features:
 	case .Query_Features:
 		set := (^Allocator_Mode_Set)(old_memory)
 		set := (^Allocator_Mode_Set)(old_memory)
 		if set != nil {
 		if set != nil {
 			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Free_All, .Resize, .Resize_Non_Zeroed}
 			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Free_All, .Resize, .Resize_Non_Zeroed}
 		}
 		}
 		return nil, nil
 		return nil, nil
-
 	case .Query_Info:
 	case .Query_Info:
 		return nil, .Mode_Not_Implemented
 		return nil, .Mode_Not_Implemented
 	}
 	}
-
 	return
 	return
 }
 }

+ 98 - 21
core/mem/tracking_allocator.odin

@@ -4,50 +4,85 @@ package mem
 import "base:runtime"
 import "base:runtime"
 import "core:sync"
 import "core:sync"
 
 
+/*
+Allocation entry for the tracking allocator.
+
+This structure stores the data related to an allocation.
+*/
 Tracking_Allocator_Entry :: struct {
 Tracking_Allocator_Entry :: struct {
-	memory:    rawptr,
-	size:      int,
+	// Pointer to an allocated region.
+	memory: rawptr,
+	// Size of the allocated memory region.
+	size: int,
+	// Requested alignment.
 	alignment: int,
 	alignment: int,
-	mode:      Allocator_Mode,
-	err:       Allocator_Error,
+	// Mode of the operation.
+	mode: Allocator_Mode,
+	// Error.
+	err: Allocator_Error,
+	// Location of the allocation.
 	location:  runtime.Source_Code_Location,
 	location:  runtime.Source_Code_Location,
 }
 }
+
+/*
+Bad free entry for a tracking allocator.
+*/
 Tracking_Allocator_Bad_Free_Entry :: struct {
 Tracking_Allocator_Bad_Free_Entry :: struct {
-	memory:   rawptr,
+	// Pointer, on which free operation was called.
+	memory: rawptr,
+	// The source location of where the operation was called.
 	location: runtime.Source_Code_Location,
 	location: runtime.Source_Code_Location,
 }
 }
+
+/*
+Tracking allocator data.
+*/
 Tracking_Allocator :: struct {
 Tracking_Allocator :: struct {
-	backing:           Allocator,
-	allocation_map:    map[rawptr]Tracking_Allocator_Entry,
-	bad_free_array:    [dynamic]Tracking_Allocator_Bad_Free_Entry,
-	mutex:             sync.Mutex,
+	backing: Allocator,
+	allocation_map: map[rawptr]Tracking_Allocator_Entry,
+	bad_free_array: [dynamic]Tracking_Allocator_Bad_Free_Entry,
+	mutex: sync.Mutex,
 	clear_on_free_all: bool,
 	clear_on_free_all: bool,
-
-	total_memory_allocated:   i64,
-	total_allocation_count:   i64,
-	total_memory_freed:       i64,
-	total_free_count:         i64,
-	peak_memory_allocated:    i64,
+	total_memory_allocated: i64,
+	total_allocation_count: i64,
+	total_memory_freed: i64,
+	total_free_count: i64,
+	peak_memory_allocated: i64,
 	current_memory_allocated: i64,
 	current_memory_allocated: i64,
 }
 }
 
 
+/*
+Initialize the tracking allocator.
+
+This procedure initializes the tracking allocator `t` with a backing allocator
+specified with `backing_allocator`. The `internals_allocator` will used to
+allocate the tracked data.
+*/
 tracking_allocator_init :: proc(t: ^Tracking_Allocator, backing_allocator: Allocator, internals_allocator := context.allocator) {
 tracking_allocator_init :: proc(t: ^Tracking_Allocator, backing_allocator: Allocator, internals_allocator := context.allocator) {
 	t.backing = backing_allocator
 	t.backing = backing_allocator
 	t.allocation_map.allocator = internals_allocator
 	t.allocation_map.allocator = internals_allocator
 	t.bad_free_array.allocator = internals_allocator
 	t.bad_free_array.allocator = internals_allocator
-
 	if .Free_All in query_features(t.backing) {
 	if .Free_All in query_features(t.backing) {
 		t.clear_on_free_all = true
 		t.clear_on_free_all = true
 	}
 	}
 }
 }
 
 
+/*
+Destroy the tracking allocator.
+*/
 tracking_allocator_destroy :: proc(t: ^Tracking_Allocator) {
 tracking_allocator_destroy :: proc(t: ^Tracking_Allocator) {
 	delete(t.allocation_map)
 	delete(t.allocation_map)
 	delete(t.bad_free_array)
 	delete(t.bad_free_array)
 }
 }
 
 
+/*
+Clear the tracking allocator.
+
+This procedure clears the tracked data from a tracking allocator.
 
 
-// Clear only the current allocation data while keeping the totals intact.
+**Note**: This procedure clears only the current allocation data while keeping
+the totals intact.
+*/
 tracking_allocator_clear :: proc(t: ^Tracking_Allocator) {
 tracking_allocator_clear :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_lock(&t.mutex)
 	sync.mutex_lock(&t.mutex)
 	clear(&t.allocation_map)
 	clear(&t.allocation_map)
@@ -56,7 +91,11 @@ tracking_allocator_clear :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_unlock(&t.mutex)
 	sync.mutex_unlock(&t.mutex)
 }
 }
 
 
-// Reset all of a Tracking Allocator's allocation data back to zero.
+/*
+Reset the tracking allocator.
+
+Reset all of a Tracking Allocator's allocation data back to zero.
+*/
 tracking_allocator_reset :: proc(t: ^Tracking_Allocator) {
 tracking_allocator_reset :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_lock(&t.mutex)
 	sync.mutex_lock(&t.mutex)
 	clear(&t.allocation_map)
 	clear(&t.allocation_map)
@@ -70,6 +109,39 @@ tracking_allocator_reset :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_unlock(&t.mutex)
 	sync.mutex_unlock(&t.mutex)
 }
 }
 
 
+/*
+Tracking allocator.
+
+The tracking allocator is an allocator wrapper that tracks memory allocations.
+This allocator stores all the allocations in a map. Whenever a pointer that's
+not inside of the map is freed, the `bad_free_array` entry is added.
+
+An example of how to use the `Tracking_Allocator` to track subsequent allocations
+in your program and report leaks and bad frees:
+
+Example:
+
+	package foo
+
+	import "core:mem"
+	import "core:fmt"
+
+	main :: proc() {
+		track: mem.Tracking_Allocator
+		mem.tracking_allocator_init(&track, context.allocator)
+		defer mem.tracking_allocator_destroy(&track)
+		context.allocator = mem.tracking_allocator(&track)
+
+		do_stuff()
+
+		for _, leak in track.allocation_map {
+			fmt.printf("%v leaked %m\n", leak.location, leak.size)
+		}
+		for bad_free in track.bad_free_array {
+			fmt.printf("%v allocation %p was freed badly\n", bad_free.location, bad_free.memory)
+		}
+	}
+*/
 @(require_results)
 @(require_results)
 tracking_allocator :: proc(data: ^Tracking_Allocator) -> Allocator {
 tracking_allocator :: proc(data: ^Tracking_Allocator) -> Allocator {
 	return Allocator{
 	return Allocator{
@@ -78,9 +150,14 @@ tracking_allocator :: proc(data: ^Tracking_Allocator) -> Allocator {
 	}
 	}
 }
 }
 
 
-tracking_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                                size, alignment: int,
-                                old_memory: rawptr, old_size: int, loc := #caller_location) -> (result: []byte, err: Allocator_Error) {
+tracking_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size, alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
 	track_alloc :: proc(data: ^Tracking_Allocator, entry: ^Tracking_Allocator_Entry) {
 	track_alloc :: proc(data: ^Tracking_Allocator, entry: ^Tracking_Allocator_Entry) {
 		data.total_memory_allocated += i64(entry.size)
 		data.total_memory_allocated += i64(entry.size)
 		data.total_allocation_count += 1
 		data.total_allocation_count += 1

+ 10 - 0
core/odin/parser/parser.odin

@@ -2302,6 +2302,16 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr {
 			bd.name = name.text
 			bd.name = name.text
 			return bd
 			return bd
 
 
+		case "caller_expression":
+			bd := ast.new(ast.Basic_Directive, tok.pos, end_pos(name))
+			bd.tok  = tok
+			bd.name = name.text
+
+			if peek_token_kind(p, .Open_Paren) {
+				return parse_call_expr(p, bd)
+			}
+			return bd
+
 		case "location", "exists", "load", "load_directory", "load_hash", "hash", "assert", "panic", "defined", "config":
 		case "location", "exists", "load", "load_directory", "load_hash", "hash", "assert", "panic", "defined", "config":
 			bd := ast.new(ast.Basic_Directive, tok.pos, end_pos(name))
 			bd := ast.new(ast.Basic_Directive, tok.pos, end_pos(name))
 			bd.tok  = tok
 			bd.tok  = tok

+ 1 - 1
core/os/os_freebsd.odin

@@ -920,7 +920,7 @@ get_page_size :: proc() -> int {
 _processor_core_count :: proc() -> int {
 _processor_core_count :: proc() -> int {
 	count : int = 0
 	count : int = 0
 	count_size := size_of(count)
 	count_size := size_of(count)
-	if _sysctlbyname("hw.logicalcpu", &count, &count_size, nil, 0) == 0 {
+	if _sysctlbyname("hw.ncpu", &count, &count_size, nil, 0) == 0 {
 		if count > 0 {
 		if count > 0 {
 			return count
 			return count
 		}
 		}

+ 19 - 70
core/os/os_js.odin

@@ -3,33 +3,38 @@ package os
 
 
 import "base:runtime"
 import "base:runtime"
 
 
+foreign import "odin_env"
+
 @(require_results)
 @(require_results)
 is_path_separator :: proc(c: byte) -> bool {
 is_path_separator :: proc(c: byte) -> bool {
 	return c == '/' || c == '\\'
 	return c == '/' || c == '\\'
 }
 }
 
 
+Handle :: distinct u32
+
+stdout: Handle = 1
+stderr: Handle = 2
+
 @(require_results)
 @(require_results)
 open :: proc(path: string, mode: int = O_RDONLY, perm: int = 0) -> (Handle, Error) {
 open :: proc(path: string, mode: int = O_RDONLY, perm: int = 0) -> (Handle, Error) {
 	unimplemented("core:os procedure not supported on JS target")
 	unimplemented("core:os procedure not supported on JS target")
 }
 }
 
 
 close :: proc(fd: Handle) -> Error {
 close :: proc(fd: Handle) -> Error {
-	unimplemented("core:os procedure not supported on JS target")
+	return nil
 }
 }
 
 
 flush :: proc(fd: Handle) -> (err: Error) {
 flush :: proc(fd: Handle) -> (err: Error) {
-	unimplemented("core:os procedure not supported on JS target")
+	return nil
 }
 }
 
 
-
-
 write :: proc(fd: Handle, data: []byte) -> (int, Error) {
 write :: proc(fd: Handle, data: []byte) -> (int, Error) {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-@(private="file")
-read_console :: proc(handle: Handle, b: []byte) -> (n: int, err: Error) {
-	unimplemented("core:os procedure not supported on JS target")
+	foreign odin_env {
+		@(link_name="write")
+		_write :: proc "contextless" (fd: Handle, p: []byte) ---
+	}
+	_write(fd, data)
+	return len(data), nil
 }
 }
 
 
 read :: proc(fd: Handle, data: []byte) -> (int, Error) {
 read :: proc(fd: Handle, data: []byte) -> (int, Error) {
@@ -45,19 +50,6 @@ file_size :: proc(fd: Handle) -> (i64, Error) {
 	unimplemented("core:os procedure not supported on JS target")
 	unimplemented("core:os procedure not supported on JS target")
 }
 }
 
 
-
-@(private)
-MAX_RW :: 1<<30
-
-@(private)
-pread :: proc(fd: Handle, data: []byte, offset: i64) -> (int, Error) {
-	unimplemented("core:os procedure not supported on JS target")
-}
-@(private)
-pwrite :: proc(fd: Handle, data: []byte, offset: i64) -> (int, Error) {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
 read_at :: proc(fd: Handle, data: []byte, offset: i64) -> (n: int, err: Error) {
 read_at :: proc(fd: Handle, data: []byte, offset: i64) -> (n: int, err: Error) {
 	unimplemented("core:os procedure not supported on JS target")
 	unimplemented("core:os procedure not supported on JS target")
 }
 }
@@ -65,16 +57,6 @@ write_at :: proc(fd: Handle, data: []byte, offset: i64) -> (n: int, err: Error)
 	unimplemented("core:os procedure not supported on JS target")
 	unimplemented("core:os procedure not supported on JS target")
 }
 }
 
 
-stdout: Handle = 1
-stderr: Handle = 2
-
-@(require_results)
-get_std_handle :: proc "contextless" (h: uint) -> Handle {
-	context = runtime.default_context()
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-
 @(require_results)
 @(require_results)
 exists :: proc(path: string) -> bool {
 exists :: proc(path: string) -> bool {
 	unimplemented("core:os procedure not supported on JS target")
 	unimplemented("core:os procedure not supported on JS target")
@@ -90,9 +72,6 @@ is_dir :: proc(path: string) -> bool {
 	unimplemented("core:os procedure not supported on JS target")
 	unimplemented("core:os procedure not supported on JS target")
 }
 }
 
 
-// NOTE(tetra): GetCurrentDirectory is not thread safe with SetCurrentDirectory and GetFullPathName
-//@private cwd_lock := win32.SRWLOCK{} // zero is initialized
-
 @(require_results)
 @(require_results)
 get_current_directory :: proc(allocator := context.allocator) -> string {
 get_current_directory :: proc(allocator := context.allocator) -> string {
 	unimplemented("core:os procedure not supported on JS target")
 	unimplemented("core:os procedure not supported on JS target")
@@ -118,18 +97,6 @@ remove_directory :: proc(path: string) -> (err: Error) {
 }
 }
 
 
 
 
-
-@(private, require_results)
-is_abs :: proc(path: string) -> bool {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-@(private, require_results)
-fix_long_path :: proc(path: string) -> string {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-
 link :: proc(old_name, new_name: string) -> (err: Error) {
 link :: proc(old_name, new_name: string) -> (err: Error) {
 	unimplemented("core:os procedure not supported on JS target")
 	unimplemented("core:os procedure not supported on JS target")
 }
 }
@@ -169,7 +136,6 @@ read_dir :: proc(fd: Handle, n: int, allocator := context.allocator) -> (fi: []F
 	unimplemented("core:os procedure not supported on JS target")
 	unimplemented("core:os procedure not supported on JS target")
 }
 }
 
 
-Handle    :: distinct uintptr
 File_Time :: distinct u64
 File_Time :: distinct u64
 
 
 _Platform_Error :: enum i32 {
 _Platform_Error :: enum i32 {
@@ -254,12 +220,7 @@ WSAECONNRESET             :: Platform_Error.WSAECONNRESET
 ERROR_FILE_IS_PIPE        :: General_Error.File_Is_Pipe
 ERROR_FILE_IS_PIPE        :: General_Error.File_Is_Pipe
 ERROR_FILE_IS_NOT_DIR     :: General_Error.Not_Dir
 ERROR_FILE_IS_NOT_DIR     :: General_Error.Not_Dir
 
 
-// "Argv" arguments converted to Odin strings
-args := _alloc_command_line_arguments()
-
-
-
-
+args: []string
 
 
 @(require_results)
 @(require_results)
 last_write_time :: proc(fd: Handle) -> (File_Time, Error) {
 last_write_time :: proc(fd: Handle) -> (File_Time, Error) {
@@ -279,26 +240,14 @@ get_page_size :: proc() -> int {
 
 
 @(private, require_results)
 @(private, require_results)
 _processor_core_count :: proc() -> int {
 _processor_core_count :: proc() -> int {
-	unimplemented("core:os procedure not supported on JS target")
+	return 1
 }
 }
 
 
 exit :: proc "contextless" (code: int) -> ! {
 exit :: proc "contextless" (code: int) -> ! {
-	context = runtime.default_context()
-	unimplemented("core:os procedure not supported on JS target")
+	unimplemented_contextless("core:os procedure not supported on JS target")
 }
 }
 
 
-
-
 @(require_results)
 @(require_results)
 current_thread_id :: proc "contextless" () -> int {
 current_thread_id :: proc "contextless" () -> int {
-	context = runtime.default_context()
-	unimplemented("core:os procedure not supported on JS target")
+	return 0
 }
 }
-
-
-
-@(require_results)
-_alloc_command_line_arguments :: proc() -> []string {
-	return nil
-}
-

+ 1 - 1
core/os/os_netbsd.odin

@@ -978,7 +978,7 @@ get_page_size :: proc() -> int {
 _processor_core_count :: proc() -> int {
 _processor_core_count :: proc() -> int {
 	count : int = 0
 	count : int = 0
 	count_size := size_of(count)
 	count_size := size_of(count)
-	if _sysctlbyname("hw.logicalcpu", &count, &count_size, nil, 0) == 0 {
+	if _sysctlbyname("hw.ncpu", &count, &count_size, nil, 0) == 0 {
 		if count > 0 {
 		if count > 0 {
 			return count
 			return count
 		}
 		}

+ 7 - 7
core/strings/strings.odin

@@ -93,7 +93,7 @@ Inputs:
 Returns:
 Returns:
 - res: A string created from the null-terminated byte pointer and length
 - res: A string created from the null-terminated byte pointer and length
 */
 */
-string_from_null_terminated_ptr :: proc(ptr: [^]byte, len: int) -> (res: string) {
+string_from_null_terminated_ptr :: proc "contextless" (ptr: [^]byte, len: int) -> (res: string) {
 	s := string(ptr[:len])
 	s := string(ptr[:len])
 	s = truncate_to_byte(s, 0)
 	s = truncate_to_byte(s, 0)
 	return s
 	return s
@@ -139,7 +139,7 @@ NOTE: Failure to find the byte results in returning the entire string.
 Returns:
 Returns:
 - res: The truncated string
 - res: The truncated string
 */
 */
-truncate_to_byte :: proc(str: string, b: byte) -> (res: string) {
+truncate_to_byte :: proc "contextless" (str: string, b: byte) -> (res: string) {
 	n := index_byte(str, b)
 	n := index_byte(str, b)
 	if n < 0 {
 	if n < 0 {
 		n = len(str)
 		n = len(str)
@@ -261,7 +261,7 @@ Inputs:
 Returns:
 Returns:
 - result: `-1` if `lhs` comes first, `1` if `rhs` comes first, or `0` if they are equal
 - result: `-1` if `lhs` comes first, `1` if `rhs` comes first, or `0` if they are equal
 */
 */
-compare :: proc(lhs, rhs: string) -> (result: int) {
+compare :: proc "contextless" (lhs, rhs: string) -> (result: int) {
 	return mem.compare(transmute([]byte)lhs, transmute([]byte)rhs)
 	return mem.compare(transmute([]byte)lhs, transmute([]byte)rhs)
 }
 }
 /*
 /*
@@ -1447,7 +1447,7 @@ Output:
 	-1
 	-1
 
 
 */
 */
-index_byte :: proc(s: string, c: byte) -> (res: int) {
+index_byte :: proc "contextless" (s: string, c: byte) -> (res: int) {
 	return #force_inline bytes.index_byte(transmute([]u8)s, c)
 	return #force_inline bytes.index_byte(transmute([]u8)s, c)
 }
 }
 /*
 /*
@@ -1482,7 +1482,7 @@ Output:
 	-1
 	-1
 
 
 */
 */
-last_index_byte :: proc(s: string, c: byte) -> (res: int) {
+last_index_byte :: proc "contextless" (s: string, c: byte) -> (res: int) {
 	return #force_inline bytes.last_index_byte(transmute([]u8)s, c)
 	return #force_inline bytes.last_index_byte(transmute([]u8)s, c)
 }
 }
 /*
 /*
@@ -1576,8 +1576,8 @@ Output:
 	-1
 	-1
 
 
 */
 */
-index :: proc(s, substr: string) -> (res: int) {
-	hash_str_rabin_karp :: proc(s: string) -> (hash: u32 = 0, pow: u32 = 1) {
+index :: proc "contextless" (s, substr: string) -> (res: int) {
+	hash_str_rabin_karp :: proc "contextless" (s: string) -> (hash: u32 = 0, pow: u32 = 1) {
 		for i := 0; i < len(s); i += 1 {
 		for i := 0; i < len(s); i += 1 {
 			hash = hash*PRIME_RABIN_KARP + u32(s[i])
 			hash = hash*PRIME_RABIN_KARP + u32(s[i])
 		}
 		}

+ 42 - 41
core/sync/chan/chan.odin

@@ -22,19 +22,17 @@ Raw_Chan :: struct {
 	allocator:       runtime.Allocator,
 	allocator:       runtime.Allocator,
 	allocation_size: int,
 	allocation_size: int,
 	msg_size:        u16,
 	msg_size:        u16,
-	closed:          b16, // atomic
+	closed:          b16, // guarded by `mutex`
 	mutex:           sync.Mutex,
 	mutex:           sync.Mutex,
 	r_cond:          sync.Cond,
 	r_cond:          sync.Cond,
 	w_cond:          sync.Cond,
 	w_cond:          sync.Cond,
-	r_waiting:       int,  // atomic
-	w_waiting:       int,  // atomic
+	r_waiting:       int,  // guarded by `mutex`
+	w_waiting:       int,  // guarded by `mutex`
 
 
 	// Buffered
 	// Buffered
 	queue: ^Raw_Queue,
 	queue: ^Raw_Queue,
 
 
 	// Unbuffered
 	// Unbuffered
-	r_mutex:         sync.Mutex,
-	w_mutex:         sync.Mutex,
 	unbuffered_data: rawptr,
 	unbuffered_data: rawptr,
 }
 }
 
 
@@ -164,27 +162,30 @@ send_raw :: proc "contextless" (c: ^Raw_Chan, msg_in: rawptr) -> (ok: bool) {
 	}
 	}
 	if c.queue != nil { // buffered
 	if c.queue != nil { // buffered
 		sync.guard(&c.mutex)
 		sync.guard(&c.mutex)
-		for c.queue.len == c.queue.cap {
-			sync.atomic_add(&c.w_waiting, 1)
+		for !c.closed && c.queue.len == c.queue.cap {
+			c.w_waiting += 1
 			sync.wait(&c.w_cond, &c.mutex)
 			sync.wait(&c.w_cond, &c.mutex)
-			sync.atomic_sub(&c.w_waiting, 1)
+			c.w_waiting -= 1
+		}
+
+		if c.closed {
+			return false
 		}
 		}
 
 
 		ok = raw_queue_push(c.queue, msg_in)
 		ok = raw_queue_push(c.queue, msg_in)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 			sync.signal(&c.r_cond)
 		}
 		}
 	} else if c.unbuffered_data != nil { // unbuffered
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.w_mutex)
 		sync.guard(&c.mutex)
 		sync.guard(&c.mutex)
 
 
-		if sync.atomic_load(&c.closed) {
+		if c.closed {
 			return false
 			return false
 		}
 		}
 
 
 		mem.copy(c.unbuffered_data, msg_in, int(c.msg_size))
 		mem.copy(c.unbuffered_data, msg_in, int(c.msg_size))
-		sync.atomic_add(&c.w_waiting, 1)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		c.w_waiting += 1
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 			sync.signal(&c.r_cond)
 		}
 		}
 		sync.wait(&c.w_cond, &c.mutex)
 		sync.wait(&c.w_cond, &c.mutex)
@@ -201,13 +202,13 @@ recv_raw :: proc "contextless" (c: ^Raw_Chan, msg_out: rawptr) -> (ok: bool) {
 	if c.queue != nil { // buffered
 	if c.queue != nil { // buffered
 		sync.guard(&c.mutex)
 		sync.guard(&c.mutex)
 		for c.queue.len == 0 {
 		for c.queue.len == 0 {
-			if sync.atomic_load(&c.closed) {
+			if c.closed {
 				return
 				return
 			}
 			}
 
 
-			sync.atomic_add(&c.r_waiting, 1)
+			c.r_waiting += 1
 			sync.wait(&c.r_cond, &c.mutex)
 			sync.wait(&c.r_cond, &c.mutex)
-			sync.atomic_sub(&c.r_waiting, 1)
+			c.r_waiting -= 1
 		}
 		}
 
 
 		msg := raw_queue_pop(c.queue)
 		msg := raw_queue_pop(c.queue)
@@ -215,27 +216,26 @@ recv_raw :: proc "contextless" (c: ^Raw_Chan, msg_out: rawptr) -> (ok: bool) {
 			mem.copy(msg_out, msg, int(c.msg_size))
 			mem.copy(msg_out, msg, int(c.msg_size))
 		}
 		}
 
 
-		if sync.atomic_load(&c.w_waiting) > 0 {
+		if c.w_waiting > 0 {
 			sync.signal(&c.w_cond)
 			sync.signal(&c.w_cond)
 		}
 		}
 		ok = true
 		ok = true
 	} else if c.unbuffered_data != nil { // unbuffered
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.r_mutex)
 		sync.guard(&c.mutex)
 		sync.guard(&c.mutex)
 
 
-		for !sync.atomic_load(&c.closed) &&
-		    sync.atomic_load(&c.w_waiting) == 0 {
-			sync.atomic_add(&c.r_waiting, 1)
+		for !c.closed &&
+			c.w_waiting == 0 {
+			c.r_waiting += 1
 			sync.wait(&c.r_cond, &c.mutex)
 			sync.wait(&c.r_cond, &c.mutex)
-			sync.atomic_sub(&c.r_waiting, 1)
+			c.r_waiting -= 1
 		}
 		}
 
 
-		if sync.atomic_load(&c.closed) {
+		if c.closed {
 			return
 			return
 		}
 		}
 
 
 		mem.copy(msg_out, c.unbuffered_data, int(c.msg_size))
 		mem.copy(msg_out, c.unbuffered_data, int(c.msg_size))
-		sync.atomic_sub(&c.w_waiting, 1)
+		c.w_waiting -= 1
 
 
 		sync.signal(&c.w_cond)
 		sync.signal(&c.w_cond)
 		ok = true
 		ok = true
@@ -255,21 +255,24 @@ try_send_raw :: proc "contextless" (c: ^Raw_Chan, msg_in: rawptr) -> (ok: bool)
 			return false
 			return false
 		}
 		}
 
 
+		if c.closed {
+			return false
+		}
+
 		ok = raw_queue_push(c.queue, msg_in)
 		ok = raw_queue_push(c.queue, msg_in)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 			sync.signal(&c.r_cond)
 		}
 		}
 	} else if c.unbuffered_data != nil { // unbuffered
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.w_mutex)
 		sync.guard(&c.mutex)
 		sync.guard(&c.mutex)
 
 
-		if sync.atomic_load(&c.closed) {
+		if c.closed {
 			return false
 			return false
 		}
 		}
 
 
 		mem.copy(c.unbuffered_data, msg_in, int(c.msg_size))
 		mem.copy(c.unbuffered_data, msg_in, int(c.msg_size))
-		sync.atomic_add(&c.w_waiting, 1)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		c.w_waiting += 1
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 			sync.signal(&c.r_cond)
 		}
 		}
 		sync.wait(&c.w_cond, &c.mutex)
 		sync.wait(&c.w_cond, &c.mutex)
@@ -294,21 +297,19 @@ try_recv_raw :: proc "contextless" (c: ^Raw_Chan, msg_out: rawptr) -> bool {
 			mem.copy(msg_out, msg, int(c.msg_size))
 			mem.copy(msg_out, msg, int(c.msg_size))
 		}
 		}
 
 
-		if sync.atomic_load(&c.w_waiting) > 0 {
+		if c.w_waiting > 0 {
 			sync.signal(&c.w_cond)
 			sync.signal(&c.w_cond)
 		}
 		}
 		return true
 		return true
 	} else if c.unbuffered_data != nil { // unbuffered
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.r_mutex)
 		sync.guard(&c.mutex)
 		sync.guard(&c.mutex)
 
 
-		if sync.atomic_load(&c.closed) ||
-		   sync.atomic_load(&c.w_waiting) == 0 {
+		if c.closed || c.w_waiting == 0 {
 			return false
 			return false
 		}
 		}
 
 
 		mem.copy(msg_out, c.unbuffered_data, int(c.msg_size))
 		mem.copy(msg_out, c.unbuffered_data, int(c.msg_size))
-		sync.atomic_sub(&c.w_waiting, 1)
+		c.w_waiting -= 1
 
 
 		sync.signal(&c.w_cond)
 		sync.signal(&c.w_cond)
 		return true
 		return true
@@ -351,10 +352,10 @@ close :: proc "contextless" (c: ^Raw_Chan) -> bool {
 		return false
 		return false
 	}
 	}
 	sync.guard(&c.mutex)
 	sync.guard(&c.mutex)
-	if sync.atomic_load(&c.closed) {
+	if c.closed {
 		return false
 		return false
 	}
 	}
-	sync.atomic_store(&c.closed, true)
+	c.closed = true
 	sync.broadcast(&c.r_cond)
 	sync.broadcast(&c.r_cond)
 	sync.broadcast(&c.w_cond)
 	sync.broadcast(&c.w_cond)
 	return true
 	return true
@@ -366,7 +367,7 @@ is_closed :: proc "contextless" (c: ^Raw_Chan) -> bool {
 		return true
 		return true
 	}
 	}
 	sync.guard(&c.mutex)
 	sync.guard(&c.mutex)
-	return bool(sync.atomic_load(&c.closed))
+	return bool(c.closed)
 }
 }
 
 
 
 
@@ -423,9 +424,9 @@ raw_queue_pop :: proc "contextless" (q: ^Raw_Queue) -> (data: rawptr) {
 can_recv :: proc "contextless" (c: ^Raw_Chan) -> bool {
 can_recv :: proc "contextless" (c: ^Raw_Chan) -> bool {
 	sync.guard(&c.mutex)
 	sync.guard(&c.mutex)
 	if is_buffered(c) {
 	if is_buffered(c) {
-		return len(c) > 0
+		return c.queue.len > 0
 	}
 	}
-	return sync.atomic_load(&c.w_waiting) > 0
+	return c.w_waiting > 0
 }
 }
 
 
 
 
@@ -435,7 +436,7 @@ can_send :: proc "contextless" (c: ^Raw_Chan) -> bool {
 	if is_buffered(c) {
 	if is_buffered(c) {
 		return c.queue.len < c.queue.cap
 		return c.queue.len < c.queue.cap
 	}
 	}
-	return sync.atomic_load(&c.r_waiting) > 0
+	return c.w_waiting == 0
 }
 }
 
 
 
 
@@ -484,4 +485,4 @@ select_raw :: proc "odin" (recvs: []^Raw_Chan, sends: []^Raw_Chan, send_msgs: []
 		ok = send_raw(sends[sel.idx], send_msgs[sel.idx])
 		ok = send_raw(sends[sel.idx], send_msgs[sel.idx])
 	}
 	}
 	return
 	return
-}
+}

+ 38 - 37
core/sync/extended.odin

@@ -8,7 +8,7 @@ _ :: vg
 Wait group.
 Wait group.
 
 
 Wait group is a synchronization primitive used by the waiting thread to wait,
 Wait group is a synchronization primitive used by the waiting thread to wait,
-until a all working threads finish work.
+until all working threads finish work.
 
 
 The waiting thread first sets the number of working threads it will expect to
 The waiting thread first sets the number of working threads it will expect to
 wait for using `wait_group_add` call, and start waiting using `wait_group_wait`
 wait for using `wait_group_add` call, and start waiting using `wait_group_wait`
@@ -35,7 +35,7 @@ Wait_Group :: struct #no_copy {
 /*
 /*
 Increment an internal counter of a wait group.
 Increment an internal counter of a wait group.
 
 
-This procedure atomicaly increments a number to the specified wait group's
+This procedure atomically increments a number to the specified wait group's
 internal counter by a specified amount. This operation can be done on any
 internal counter by a specified amount. This operation can be done on any
 thread.
 thread.
 */
 */
@@ -48,12 +48,12 @@ wait_group_add :: proc "contextless" (wg: ^Wait_Group, delta: int) {
 
 
 	atomic_add(&wg.counter, delta)
 	atomic_add(&wg.counter, delta)
 	if wg.counter < 0 {
 	if wg.counter < 0 {
-		_panic("sync.Wait_Group negative counter")
+		panic_contextless("sync.Wait_Group negative counter")
 	}
 	}
 	if wg.counter == 0 {
 	if wg.counter == 0 {
 		cond_broadcast(&wg.cond)
 		cond_broadcast(&wg.cond)
 		if wg.counter != 0 {
 		if wg.counter != 0 {
-			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			panic_contextless("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 		}
 	}
 	}
 }
 }
@@ -81,7 +81,7 @@ wait_group_wait :: proc "contextless" (wg: ^Wait_Group) {
 	if wg.counter != 0 {
 	if wg.counter != 0 {
 		cond_wait(&wg.cond, &wg.mutex)
 		cond_wait(&wg.cond, &wg.mutex)
 		if wg.counter != 0 {
 		if wg.counter != 0 {
-			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			panic_contextless("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 		}
 	}
 	}
 }
 }
@@ -105,7 +105,7 @@ wait_group_wait_with_timeout :: proc "contextless" (wg: ^Wait_Group, duration: t
 			return false
 			return false
 		}
 		}
 		if wg.counter != 0 {
 		if wg.counter != 0 {
-			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			panic_contextless("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 		}
 	}
 	}
 	return true
 	return true
@@ -121,7 +121,7 @@ When `barrier_wait` procedure is called by any thread, that thread will block
 the execution, until all threads associated with the barrier reach the same
 the execution, until all threads associated with the barrier reach the same
 point of execution and also call `barrier_wait`.
 point of execution and also call `barrier_wait`.
 
 
-when barrier is initialized, a `thread_count` parameter is passed, signifying
+When a barrier is initialized, a `thread_count` parameter is passed, signifying
 the amount of participant threads of the barrier. The barrier also keeps track
 the amount of participant threads of the barrier. The barrier also keeps track
 of an internal atomic counter. When a thread calls `barrier_wait`, the internal
 of an internal atomic counter. When a thread calls `barrier_wait`, the internal
 counter is incremented. When the internal counter reaches `thread_count`, it is
 counter is incremented. When the internal counter reaches `thread_count`, it is
@@ -208,7 +208,7 @@ Represents a thread synchronization primitive that, when signalled, releases one
 single waiting thread and then resets automatically to a state where it can be
 single waiting thread and then resets automatically to a state where it can be
 signalled again.
 signalled again.
 
 
-When a thread calls `auto_reset_event_wait`, it's execution will be blocked,
+When a thread calls `auto_reset_event_wait`, its execution will be blocked,
 until the event is signalled by another thread. The call to
 until the event is signalled by another thread. The call to
 `auto_reset_event_signal` wakes up exactly one thread waiting for the event.
 `auto_reset_event_signal` wakes up exactly one thread waiting for the event.
 */
 */
@@ -228,15 +228,15 @@ thread.
 */
 */
 auto_reset_event_signal :: proc "contextless" (e: ^Auto_Reset_Event) {
 auto_reset_event_signal :: proc "contextless" (e: ^Auto_Reset_Event) {
 	old_status := atomic_load_explicit(&e.status, .Relaxed)
 	old_status := atomic_load_explicit(&e.status, .Relaxed)
+	new_status := old_status + 1 if old_status < 1 else 1
 	for {
 	for {
-		new_status := old_status + 1 if old_status < 1 else 1
 		if _, ok := atomic_compare_exchange_weak_explicit(&e.status, old_status, new_status, .Release, .Relaxed); ok {
 		if _, ok := atomic_compare_exchange_weak_explicit(&e.status, old_status, new_status, .Release, .Relaxed); ok {
 			break
 			break
 		}
 		}
-
-		if old_status < 0 {
-			sema_post(&e.sema)
-		}
+		cpu_relax()
+	}
+	if old_status < 0 {
+		sema_post(&e.sema)
 	}
 	}
 }
 }
 
 
@@ -297,7 +297,7 @@ waiting to acquire the lock, exactly one of those threads is unblocked and
 allowed into the critical section.
 allowed into the critical section.
 */
 */
 ticket_mutex_unlock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) {
 ticket_mutex_unlock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) {
-	atomic_add_explicit(&m.serving, 1, .Relaxed)
+	atomic_add_explicit(&m.serving, 1, .Release)
 }
 }
 
 
 /*
 /*
@@ -331,8 +331,8 @@ Benaphore.
 
 
 A benaphore is a combination of an atomic variable and a semaphore that can
 A benaphore is a combination of an atomic variable and a semaphore that can
 improve locking efficiency in a no-contention system. Acquiring a benaphore
 improve locking efficiency in a no-contention system. Acquiring a benaphore
-lock doesn't call into an internal semaphore, if no other thread in a middle of
-a critical section.
+lock doesn't call into an internal semaphore, if no other thread is in the
+middle of a critical section.
 
 
 Once a lock on a benaphore is acquired by a thread, no other thread is allowed
 Once a lock on a benaphore is acquired by a thread, no other thread is allowed
 into any critical sections, associted with the same benaphore, until the lock
 into any critical sections, associted with the same benaphore, until the lock
@@ -355,7 +355,7 @@ from entering any critical sections associated with the same benaphore, until
 until the lock is released.
 until the lock is released.
 */
 */
 benaphore_lock :: proc "contextless" (b: ^Benaphore) {
 benaphore_lock :: proc "contextless" (b: ^Benaphore) {
-	if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 {
+	if atomic_add_explicit(&b.counter, 1, .Acquire) > 0 {
 		sema_wait(&b.sema)
 		sema_wait(&b.sema)
 	}
 	}
 }
 }
@@ -381,10 +381,10 @@ Release a lock on a benaphore.
 
 
 This procedure releases a lock on the specified benaphore. If any of the threads
 This procedure releases a lock on the specified benaphore. If any of the threads
 are waiting on the lock, exactly one thread is allowed into a critical section
 are waiting on the lock, exactly one thread is allowed into a critical section
-associated with the same banaphore.
+associated with the same benaphore.
 */
 */
 benaphore_unlock :: proc "contextless" (b: ^Benaphore) {
 benaphore_unlock :: proc "contextless" (b: ^Benaphore) {
-	if atomic_sub_explicit(&b.counter, 1, .Release) > 0 {
+	if atomic_sub_explicit(&b.counter, 1, .Release) > 1 {
 		sema_post(&b.sema)
 		sema_post(&b.sema)
 	}
 	}
 }
 }
@@ -418,8 +418,8 @@ benaphore_guard :: proc "contextless" (m: ^Benaphore) -> bool {
 /*
 /*
 Recursive benaphore.
 Recursive benaphore.
 
 
-Recurisve benaphore is just like a plain benaphore, except it allows reentrancy
-into the critical section.
+A recursive benaphore is just like a plain benaphore, except it allows
+reentrancy into the critical section.
 
 
 When a lock is acquired on a benaphore, all other threads attempting to
 When a lock is acquired on a benaphore, all other threads attempting to
 acquire a lock on the same benaphore will be blocked from any critical sections,
 acquire a lock on the same benaphore will be blocked from any critical sections,
@@ -449,13 +449,15 @@ recursive benaphore, until the lock is released.
 */
 */
 recursive_benaphore_lock :: proc "contextless" (b: ^Recursive_Benaphore) {
 recursive_benaphore_lock :: proc "contextless" (b: ^Recursive_Benaphore) {
 	tid := current_thread_id()
 	tid := current_thread_id()
-	if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 {
-		if tid != b.owner {
-			sema_wait(&b.sema)
+	check_owner: if tid != atomic_load_explicit(&b.owner, .Acquire) {
+		atomic_add_explicit(&b.counter, 1, .Relaxed)
+		if _, ok := atomic_compare_exchange_strong_explicit(&b.owner, 0, tid, .Release, .Relaxed); ok {
+			break check_owner
 		}
 		}
+		sema_wait(&b.sema)
+		atomic_store_explicit(&b.owner, tid, .Release)
 	}
 	}
 	// inside the lock
 	// inside the lock
-	b.owner = tid
 	b.recursion += 1
 	b.recursion += 1
 }
 }
 
 
@@ -472,15 +474,14 @@ benaphore, until the lock is released.
 */
 */
 recursive_benaphore_try_lock :: proc "contextless" (b: ^Recursive_Benaphore) -> bool {
 recursive_benaphore_try_lock :: proc "contextless" (b: ^Recursive_Benaphore) -> bool {
 	tid := current_thread_id()
 	tid := current_thread_id()
-	if b.owner == tid {
-		atomic_add_explicit(&b.counter, 1, .Acquire)
-	}
-
-	if v, _ := atomic_compare_exchange_strong_explicit(&b.counter, 0, 1, .Acquire, .Acquire); v != 0 {
+	check_owner: if tid != atomic_load_explicit(&b.owner, .Acquire) {
+		if _, ok := atomic_compare_exchange_strong_explicit(&b.owner, 0, tid, .Release, .Relaxed); ok {
+			atomic_add_explicit(&b.counter, 1, .Relaxed)
+			break check_owner
+		}
 		return false
 		return false
 	}
 	}
 	// inside the lock
 	// inside the lock
-	b.owner = tid
 	b.recursion += 1
 	b.recursion += 1
 	return true
 	return true
 }
 }
@@ -494,14 +495,14 @@ for other threads for entering.
 */
 */
 recursive_benaphore_unlock :: proc "contextless" (b: ^Recursive_Benaphore) {
 recursive_benaphore_unlock :: proc "contextless" (b: ^Recursive_Benaphore) {
 	tid := current_thread_id()
 	tid := current_thread_id()
-	_assert(tid == b.owner, "tid != b.owner")
+	assert_contextless(tid == atomic_load_explicit(&b.owner, .Relaxed), "tid != b.owner")
 	b.recursion -= 1
 	b.recursion -= 1
 	recursion := b.recursion
 	recursion := b.recursion
+
 	if recursion == 0 {
 	if recursion == 0 {
-		b.owner = 0
-	}
-	if atomic_sub_explicit(&b.counter, 1, .Release) > 0 {
-		if recursion == 0 {
+		if atomic_sub_explicit(&b.counter, 1, .Relaxed) == 1 {
+			atomic_store_explicit(&b.owner, 0, .Release)
+		} else {
 			sema_post(&b.sema)
 			sema_post(&b.sema)
 		}
 		}
 	}
 	}
@@ -740,4 +741,4 @@ Make event available.
 one_shot_event_signal :: proc "contextless" (e: ^One_Shot_Event) {
 one_shot_event_signal :: proc "contextless" (e: ^One_Shot_Event) {
 	atomic_store_explicit(&e.state, 1, .Release)
 	atomic_store_explicit(&e.state, 1, .Release)
 	futex_broadcast(&e.state)
 	futex_broadcast(&e.state)
-}
+}

+ 17 - 8
core/sync/futex_darwin.odin

@@ -12,6 +12,8 @@ foreign System {
 	// __ulock_wait is not available on 10.15
 	// __ulock_wait is not available on 10.15
 	// See https://github.com/odin-lang/Odin/issues/1959
 	// See https://github.com/odin-lang/Odin/issues/1959
 	__ulock_wait  :: proc "c" (operation: u32, addr: rawptr, value: u64, timeout_us: u32) -> c.int ---
 	__ulock_wait  :: proc "c" (operation: u32, addr: rawptr, value: u64, timeout_us: u32) -> c.int ---
+	// >= MacOS 11.
+	__ulock_wait2 :: proc "c" (operation: u32, addr: rawptr, value: u64, timeout_ns: u64, value2: u64) -> c.int ---
 	__ulock_wake  :: proc "c" (operation: u32, addr: rawptr, wake_value: u64) -> c.int ---
 	__ulock_wake  :: proc "c" (operation: u32, addr: rawptr, wake_value: u64) -> c.int ---
 }
 }
 
 
@@ -48,22 +50,29 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 		case -ETIMEDOUT:
 		case -ETIMEDOUT:
 			return false
 			return false
 		case:
 		case:
-			_panic("darwin.os_sync_wait_on_address_with_timeout failure")
+			panic_contextless("darwin.os_sync_wait_on_address_with_timeout failure")
 		}
 		}
 	} else {
 	} else {
 
 
-	timeout_ns := u32(duration)
-	s := __ulock_wait(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, u64(expected), timeout_ns)
+	when darwin.ULOCK_WAIT_2_AVAILABLE {
+		timeout_ns := u64(duration)
+		s := __ulock_wait2(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, u64(expected), timeout_ns, 0)
+	} else {
+		timeout_us := u32(duration / time.Microsecond)
+		s := __ulock_wait(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, u64(expected), timeout_us)
+	}
+
 	if s >= 0 {
 	if s >= 0 {
 		return true
 		return true
 	}
 	}
+
 	switch s {
 	switch s {
 	case EINTR, EFAULT:
 	case EINTR, EFAULT:
 		return true
 		return true
 	case ETIMEDOUT:
 	case ETIMEDOUT:
 		return false
 		return false
 	case:
 	case:
-		_panic("futex_wait failure")
+		panic_contextless("futex_wait failure")
 	}
 	}
 	return true
 	return true
 
 
@@ -83,7 +92,7 @@ _futex_signal :: proc "contextless" (f: ^Futex) {
 			case -ENOENT:
 			case -ENOENT:
 				return
 				return
 			case:
 			case:
-				_panic("darwin.os_sync_wake_by_address_any failure")
+				panic_contextless("darwin.os_sync_wake_by_address_any failure")
 			}
 			}
 		}
 		}
 	} else {
 	} else {
@@ -99,7 +108,7 @@ _futex_signal :: proc "contextless" (f: ^Futex) {
 		case ENOENT:
 		case ENOENT:
 			return
 			return
 		case:
 		case:
-			_panic("futex_wake_single failure")
+			panic_contextless("futex_wake_single failure")
 		}
 		}
 	}
 	}
 
 
@@ -119,7 +128,7 @@ _futex_broadcast :: proc "contextless" (f: ^Futex) {
 			case -ENOENT:
 			case -ENOENT:
 				return
 				return
 			case:
 			case:
-				_panic("darwin.os_sync_wake_by_address_all failure")
+				panic_contextless("darwin.os_sync_wake_by_address_all failure")
 			}
 			}
 		}
 		}
 	} else {
 	} else {
@@ -135,7 +144,7 @@ _futex_broadcast :: proc "contextless" (f: ^Futex) {
 		case ENOENT:
 		case ENOENT:
 			return
 			return
 		case:
 		case:
-			_panic("futex_wake_all failure")
+			panic_contextless("futex_wake_all failure")
 		}
 		}
 	}
 	}
 
 

+ 4 - 4
core/sync/futex_freebsd.odin

@@ -21,7 +21,7 @@ _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 			continue
 			continue
 		}
 		}
 
 
-		_panic("_futex_wait failure")
+		panic_contextless("_futex_wait failure")
 	}
 	}
 
 
 	unreachable()
 	unreachable()
@@ -44,14 +44,14 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 		return false
 		return false
 	}
 	}
 
 
-	_panic("_futex_wait_with_timeout failure")
+	panic_contextless("_futex_wait_with_timeout failure")
 }
 }
 
 
 _futex_signal :: proc "contextless" (f: ^Futex) {
 _futex_signal :: proc "contextless" (f: ^Futex) {
 	errno := freebsd._umtx_op(f, .WAKE, 1, nil, nil)
 	errno := freebsd._umtx_op(f, .WAKE, 1, nil, nil)
 
 
 	if errno != nil {
 	if errno != nil {
-		_panic("_futex_signal failure")
+		panic_contextless("_futex_signal failure")
 	}
 	}
 }
 }
 
 
@@ -59,6 +59,6 @@ _futex_broadcast :: proc "contextless" (f: ^Futex)  {
 	errno := freebsd._umtx_op(f, .WAKE, cast(c.ulong)max(i32), nil, nil)
 	errno := freebsd._umtx_op(f, .WAKE, cast(c.ulong)max(i32), nil, nil)
 
 
 	if errno != nil {
 	if errno != nil {
-		_panic("_futex_broadcast failure")
+		panic_contextless("_futex_broadcast failure")
 	}
 	}
 }
 }

+ 4 - 4
core/sync/futex_linux.odin

@@ -15,7 +15,7 @@ _futex_wait :: proc "contextless" (futex: ^Futex, expected: u32) -> bool {
 		return true
 		return true
 	case:
 	case:
 		// TODO(flysand): More descriptive panic messages based on the vlaue of `errno`
 		// TODO(flysand): More descriptive panic messages based on the vlaue of `errno`
-		_panic("futex_wait failure")
+		panic_contextless("futex_wait failure")
 	}
 	}
 }
 }
 
 
@@ -34,7 +34,7 @@ _futex_wait_with_timeout :: proc "contextless" (futex: ^Futex, expected: u32, du
 	case .NONE, .EINTR, .EAGAIN:
 	case .NONE, .EINTR, .EAGAIN:
 		return true
 		return true
 	case:
 	case:
-		_panic("futex_wait_with_timeout failure")
+		panic_contextless("futex_wait_with_timeout failure")
 	}
 	}
 }
 }
 
 
@@ -44,7 +44,7 @@ _futex_signal :: proc "contextless" (futex: ^Futex) {
 	case .NONE:
 	case .NONE:
 		return
 		return
 	case:
 	case:
-		_panic("futex_wake_single failure")
+		panic_contextless("futex_wake_single failure")
 	}
 	}
 }
 }
 
 
@@ -57,6 +57,6 @@ _futex_broadcast :: proc "contextless" (futex: ^Futex)  {
 	case .NONE:
 	case .NONE:
 		return
 		return
 	case:
 	case:
-		_panic("_futex_wake_all failure")
+		panic_contextless("_futex_wake_all failure")
 	}
 	}
 }
 }

+ 4 - 4
core/sync/futex_netbsd.odin

@@ -35,7 +35,7 @@ _futex_wait :: proc "contextless" (futex: ^Futex, expected: u32) -> bool {
 		case EINTR, EAGAIN:
 		case EINTR, EAGAIN:
 			return true
 			return true
 		case:
 		case:
-			_panic("futex_wait failure")
+			panic_contextless("futex_wait failure")
 		}	
 		}	
 	}
 	}
 	return true
 	return true
@@ -55,7 +55,7 @@ _futex_wait_with_timeout :: proc "contextless" (futex: ^Futex, expected: u32, du
 		case ETIMEDOUT:
 		case ETIMEDOUT:
 			return false
 			return false
 		case:
 		case:
-			_panic("futex_wait_with_timeout failure")
+			panic_contextless("futex_wait_with_timeout failure")
 		}
 		}
 	}
 	}
 	return true
 	return true
@@ -63,12 +63,12 @@ _futex_wait_with_timeout :: proc "contextless" (futex: ^Futex, expected: u32, du
 
 
 _futex_signal :: proc "contextless" (futex: ^Futex) {
 _futex_signal :: proc "contextless" (futex: ^Futex) {
 	if _, ok := intrinsics.syscall_bsd(unix.SYS___futex, uintptr(futex), FUTEX_WAKE_PRIVATE, 1, 0, 0, 0); !ok {
 	if _, ok := intrinsics.syscall_bsd(unix.SYS___futex, uintptr(futex), FUTEX_WAKE_PRIVATE, 1, 0, 0, 0); !ok {
-		_panic("futex_wake_single failure")
+		panic_contextless("futex_wake_single failure")
 	}
 	}
 }
 }
 
 
 _futex_broadcast :: proc "contextless" (futex: ^Futex)  {
 _futex_broadcast :: proc "contextless" (futex: ^Futex)  {
 	if _, ok := intrinsics.syscall_bsd(unix.SYS___futex, uintptr(futex), FUTEX_WAKE_PRIVATE, uintptr(max(i32)), 0, 0, 0); !ok {
 	if _, ok := intrinsics.syscall_bsd(unix.SYS___futex, uintptr(futex), FUTEX_WAKE_PRIVATE, uintptr(max(i32)), 0, 0, 0); !ok {
-		_panic("_futex_wake_all failure")
+		panic_contextless("_futex_wake_all failure")
 	}
 	}
 }
 }

+ 4 - 4
core/sync/futex_openbsd.odin

@@ -36,7 +36,7 @@ _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 		return false
 		return false
 	}
 	}
 
 
-	_panic("futex_wait failure")
+	panic_contextless("futex_wait failure")
 }
 }
 
 
 _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
 _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
@@ -62,14 +62,14 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 		return false
 		return false
 	}
 	}
 
 
-	_panic("futex_wait_with_timeout failure")
+	panic_contextless("futex_wait_with_timeout failure")
 }
 }
 
 
 _futex_signal :: proc "contextless" (f: ^Futex) {
 _futex_signal :: proc "contextless" (f: ^Futex) {
 	res := _unix_futex(f, FUTEX_WAKE_PRIVATE, 1, nil)
 	res := _unix_futex(f, FUTEX_WAKE_PRIVATE, 1, nil)
 
 
 	if res == -1 {
 	if res == -1 {
-		_panic("futex_wake_single failure")
+		panic_contextless("futex_wake_single failure")
 	}
 	}
 }
 }
 
 
@@ -77,6 +77,6 @@ _futex_broadcast :: proc "contextless" (f: ^Futex)  {
 	res := _unix_futex(f, FUTEX_WAKE_PRIVATE, u32(max(i32)), nil)
 	res := _unix_futex(f, FUTEX_WAKE_PRIVATE, u32(max(i32)), nil)
 
 
 	if res == -1 {
 	if res == -1 {
-		_panic("_futex_wake_all failure")
+		panic_contextless("_futex_wake_all failure")
 	}
 	}
 }
 }

+ 4 - 4
core/sync/futex_wasm.odin

@@ -10,7 +10,7 @@ import "core:time"
 
 
 _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 	when !intrinsics.has_target_feature("atomics") {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 	} else {
 		s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, -1)
 		s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, -1)
 		return s != 0
 		return s != 0
@@ -19,7 +19,7 @@ _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 
 
 _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
 _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
 	when !intrinsics.has_target_feature("atomics") {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 	} else {
 		s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, i64(duration))
 		s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, i64(duration))
 		return s != 0
 		return s != 0
@@ -28,7 +28,7 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 
 
 _futex_signal :: proc "contextless" (f: ^Futex) {
 _futex_signal :: proc "contextless" (f: ^Futex) {
 	when !intrinsics.has_target_feature("atomics") {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 	} else {
 		loop: for {
 		loop: for {
 			s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), 1)
 			s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), 1)
@@ -41,7 +41,7 @@ _futex_signal :: proc "contextless" (f: ^Futex) {
 
 
 _futex_broadcast :: proc "contextless" (f: ^Futex) {
 _futex_broadcast :: proc "contextless" (f: ^Futex) {
 	when !intrinsics.has_target_feature("atomics") {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 	} else {
 		loop: for {
 		loop: for {
 			s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), ~u32(0))
 			s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), ~u32(0))

+ 2 - 18
core/sync/primitives.odin

@@ -1,6 +1,5 @@
 package sync
 package sync
 
 
-import "base:runtime"
 import "core:time"
 import "core:time"
 
 
 /*
 /*
@@ -390,7 +389,7 @@ recursive_mutex_guard :: proc "contextless" (m: ^Recursive_Mutex) -> bool {
 A condition variable.
 A condition variable.
 
 
 `Cond` implements a condition variable, a rendezvous point for threads waiting
 `Cond` implements a condition variable, a rendezvous point for threads waiting
-for signalling the occurence of an event. Condition variables are used on
+for signalling the occurence of an event. Condition variables are used in
 conjuction with mutexes to provide a shared access to one or more shared
 conjuction with mutexes to provide a shared access to one or more shared
 variable.
 variable.
 
 
@@ -560,7 +559,7 @@ futex_wait :: proc "contextless" (f: ^Futex, expected: u32) {
 		return
 		return
 	}
 	}
 	ok := _futex_wait(f, expected)
 	ok := _futex_wait(f, expected)
-	_assert(ok, "futex_wait failure")
+	assert_contextless(ok, "futex_wait failure")
 }
 }
 
 
 /*
 /*
@@ -597,18 +596,3 @@ Wake up multiple threads waiting on a futex.
 futex_broadcast :: proc "contextless" (f: ^Futex) {
 futex_broadcast :: proc "contextless" (f: ^Futex) {
 	_futex_broadcast(f)
 	_futex_broadcast(f)
 }
 }
-
-
-@(private)
-_assert :: proc "contextless" (cond: bool, msg: string) {
-	if !cond {
-		_panic(msg)
-	}
-}
-
-@(private)
-_panic :: proc "contextless" (msg: string) -> ! {
-	runtime.print_string(msg)
-	runtime.print_byte('\n')
-	runtime.trap()
-}

+ 2 - 2
core/sync/primitives_atomic.odin

@@ -240,7 +240,7 @@ atomic_recursive_mutex_lock :: proc "contextless" (m: ^Atomic_Recursive_Mutex) {
 
 
 atomic_recursive_mutex_unlock :: proc "contextless" (m: ^Atomic_Recursive_Mutex) {
 atomic_recursive_mutex_unlock :: proc "contextless" (m: ^Atomic_Recursive_Mutex) {
 	tid := current_thread_id()
 	tid := current_thread_id()
-	_assert(tid == m.owner, "tid != m.owner")
+	assert_contextless(tid == m.owner, "tid != m.owner")
 	m.recursion -= 1
 	m.recursion -= 1
 	recursion := m.recursion
 	recursion := m.recursion
 	if recursion == 0 {
 	if recursion == 0 {
@@ -361,7 +361,7 @@ atomic_sema_wait_with_timeout :: proc "contextless" (s: ^Atomic_Sema, duration:
 			if !futex_wait_with_timeout(&s.count, u32(original_count), remaining) {
 			if !futex_wait_with_timeout(&s.count, u32(original_count), remaining) {
 				return false
 				return false
 			}
 			}
-			original_count = s.count
+			original_count = atomic_load_explicit(&s.count, .Relaxed)
 		}
 		}
 		if original_count == atomic_compare_exchange_strong_explicit(&s.count, original_count, original_count-1, .Acquire, .Acquire) {
 		if original_count == atomic_compare_exchange_strong_explicit(&s.count, original_count, original_count-1, .Acquire, .Acquire) {
 			return true
 			return true

+ 11 - 0
core/sys/darwin/sync.odin

@@ -5,6 +5,7 @@ foreign import system "system:System.framework"
 // #define OS_WAIT_ON_ADDR_AVAILABILITY \
 // #define OS_WAIT_ON_ADDR_AVAILABILITY \
 // 	__API_AVAILABLE(macos(14.4), ios(17.4), tvos(17.4), watchos(10.4))
 // 	__API_AVAILABLE(macos(14.4), ios(17.4), tvos(17.4), watchos(10.4))
 when ODIN_OS == .Darwin {
 when ODIN_OS == .Darwin {
+
 	when ODIN_PLATFORM_SUBTARGET == .iOS && ODIN_MINIMUM_OS_VERSION >= 17_04_00 {
 	when ODIN_PLATFORM_SUBTARGET == .iOS && ODIN_MINIMUM_OS_VERSION >= 17_04_00 {
 		WAIT_ON_ADDRESS_AVAILABLE :: true
 		WAIT_ON_ADDRESS_AVAILABLE :: true
 	} else when ODIN_MINIMUM_OS_VERSION >= 14_04_00 {
 	} else when ODIN_MINIMUM_OS_VERSION >= 14_04_00 {
@@ -12,8 +13,18 @@ when ODIN_OS == .Darwin {
 	} else {
 	} else {
 		WAIT_ON_ADDRESS_AVAILABLE :: false
 		WAIT_ON_ADDRESS_AVAILABLE :: false
 	}
 	}
+
+	when ODIN_PLATFORM_SUBTARGET == .iOS && ODIN_MINIMUM_OS_VERSION >= 14_00_00 {
+		ULOCK_WAIT_2_AVAILABLE :: true
+	} else when ODIN_MINIMUM_OS_VERSION >= 11_00_00 {
+		ULOCK_WAIT_2_AVAILABLE :: true
+	} else {
+		ULOCK_WAIT_2_AVAILABLE :: false
+	}
+
 } else {
 } else {
 	WAIT_ON_ADDRESS_AVAILABLE :: false
 	WAIT_ON_ADDRESS_AVAILABLE :: false
+	ULOCK_WAIT_2_AVAILABLE    :: false
 }
 }
 
 
 os_sync_wait_on_address_flag :: enum u32 {
 os_sync_wait_on_address_flag :: enum u32 {

+ 4 - 0
core/sys/info/platform_darwin.odin

@@ -530,6 +530,10 @@ macos_release_map: map[string]Darwin_To_Release = {
 	"23F79"      = {{23, 5, 0}, "macOS", {"Sonoma",         {14, 5, 0}}},
 	"23F79"      = {{23, 5, 0}, "macOS", {"Sonoma",         {14, 5, 0}}},
 	"23G80"      = {{23, 6, 0}, "macOS", {"Sonoma",         {14, 6, 0}}},
 	"23G80"      = {{23, 6, 0}, "macOS", {"Sonoma",         {14, 6, 0}}},
 	"23G93"      = {{23, 6, 0}, "macOS", {"Sonoma",         {14, 6, 1}}},
 	"23G93"      = {{23, 6, 0}, "macOS", {"Sonoma",         {14, 6, 1}}},
+	"23H124"     = {{23, 6, 0}, "macOS", {"Sonoma",         {14, 7, 0}}},
+
+	// MacOS Sequoia
+	"24A335"     = {{24, 0, 0}, "macOS", {"Sequoia",        {15, 0, 0}}},
 }
 }
 
 
 @(private)
 @(private)

+ 4 - 0
core/testing/runner.odin

@@ -204,6 +204,10 @@ runner :: proc(internal_tests: []Internal_Test) -> bool {
 		}
 		}
 	}
 	}
 
 
+	when ODIN_OS == .Windows {
+		console_ansi_init()
+	}
+
 	stdout := io.to_writer(os.stream_from_handle(os.stdout))
 	stdout := io.to_writer(os.stream_from_handle(os.stdout))
 	stderr := io.to_writer(os.stream_from_handle(os.stderr))
 	stderr := io.to_writer(os.stream_from_handle(os.stderr))
 
 

+ 22 - 0
core/testing/runner_windows.odin

@@ -0,0 +1,22 @@
+//+private
+package testing
+
+import win32 "core:sys/windows"
+
+console_ansi_init :: proc() {
+	stdout := win32.GetStdHandle(win32.STD_OUTPUT_HANDLE)
+	if stdout != win32.INVALID_HANDLE && stdout != nil {
+		old_console_mode: u32
+		if win32.GetConsoleMode(stdout, &old_console_mode) {
+			win32.SetConsoleMode(stdout, old_console_mode | win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING)
+		}
+	}
+
+	stderr := win32.GetStdHandle(win32.STD_ERROR_HANDLE)
+	if stderr != win32.INVALID_HANDLE && stderr != nil {
+		old_console_mode: u32
+		if win32.GetConsoleMode(stderr, &old_console_mode) {
+			win32.SetConsoleMode(stderr, old_console_mode | win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING)
+		}
+	}
+}

+ 11 - 0
core/testing/signal_handler_libc.odin

@@ -26,6 +26,8 @@ import "core:os"
 
 
 @(private="file", thread_local)
 @(private="file", thread_local)
 local_test_index: libc.sig_atomic_t
 local_test_index: libc.sig_atomic_t
+@(private="file", thread_local)
+local_test_index_set: bool
 
 
 // Windows does not appear to have a SIGTRAP, so this is defined here, instead
 // Windows does not appear to have a SIGTRAP, so this is defined here, instead
 // of in the libc package, just so there's no confusion about it being
 // of in the libc package, just so there's no confusion about it being
@@ -45,6 +47,13 @@ stop_runner_callback :: proc "c" (sig: libc.int) {
 
 
 @(private="file")
 @(private="file")
 stop_test_callback :: proc "c" (sig: libc.int) {
 stop_test_callback :: proc "c" (sig: libc.int) {
+	if !local_test_index_set {
+		// We're a thread created by a test thread.
+		//
+		// There's nothing we can do to inform the test runner about who
+		// signalled, so hopefully the test will handle their own sub-threads.
+		return
+	}
 	if local_test_index == -1 {
 	if local_test_index == -1 {
 		// We're the test runner, and we ourselves have caught a signal from
 		// We're the test runner, and we ourselves have caught a signal from
 		// which there is no recovery.
 		// which there is no recovery.
@@ -114,6 +123,7 @@ This is a dire bug and should be reported to the Odin developers.
 
 
 _setup_signal_handler :: proc() {
 _setup_signal_handler :: proc() {
 	local_test_index = -1
 	local_test_index = -1
+	local_test_index_set = true
 
 
 	// Catch user interrupt / CTRL-C.
 	// Catch user interrupt / CTRL-C.
 	libc.signal(libc.SIGINT, stop_runner_callback)
 	libc.signal(libc.SIGINT, stop_runner_callback)
@@ -135,6 +145,7 @@ _setup_signal_handler :: proc() {
 
 
 _setup_task_signal_handler :: proc(test_index: int) {
 _setup_task_signal_handler :: proc(test_index: int) {
 	local_test_index = cast(libc.sig_atomic_t)test_index
 	local_test_index = cast(libc.sig_atomic_t)test_index
+	local_test_index_set = true
 }
 }
 
 
 _should_stop_runner :: proc() -> bool {
 _should_stop_runner :: proc() -> bool {

+ 8 - 4
core/testing/testing.odin

@@ -105,9 +105,13 @@ cleanup :: proc(t: ^T, procedure: proc(rawptr), user_data: rawptr) {
 	append(&t.cleanups, Internal_Cleanup{procedure, user_data, context})
 	append(&t.cleanups, Internal_Cleanup{procedure, user_data, context})
 }
 }
 
 
-expect :: proc(t: ^T, ok: bool, msg: string = "", loc := #caller_location) -> bool {
+expect :: proc(t: ^T, ok: bool, msg := "", expr := #caller_expression(ok), loc := #caller_location) -> bool {
 	if !ok {
 	if !ok {
-		log.error(msg, location=loc)
+		if msg == "" {
+			log.errorf("expected %v to be true", expr, location=loc)
+		} else {
+			log.error(msg, location=loc)
+		}
 	}
 	}
 	return ok
 	return ok
 }
 }
@@ -119,10 +123,10 @@ expectf :: proc(t: ^T, ok: bool, format: string, args: ..any, loc := #caller_loc
 	return ok
 	return ok
 }
 }
 
 
-expect_value :: proc(t: ^T, value, expected: $T, loc := #caller_location) -> bool where intrinsics.type_is_comparable(T) {
+expect_value :: proc(t: ^T, value, expected: $T, loc := #caller_location, value_expr := #caller_expression(value)) -> bool where intrinsics.type_is_comparable(T) {
 	ok := value == expected || reflect.is_nil(value) && reflect.is_nil(expected)
 	ok := value == expected || reflect.is_nil(value) && reflect.is_nil(expected)
 	if !ok {
 	if !ok {
-		log.errorf("expected %v, got %v", expected, value, location=loc)
+		log.errorf("expected %v to be %v, got %v", value_expr, expected, value, location=loc)
 	}
 	}
 	return ok
 	return ok
 }
 }

+ 6 - 6
core/thread/thread.odin

@@ -272,7 +272,7 @@ create_and_start :: proc(fn: proc(), init_context: Maybe(runtime.Context) = nil,
 	t := create(thread_proc, priority)
 	t := create(thread_proc, priority)
 	t.data = rawptr(fn)
 	t.data = rawptr(fn)
 	if self_cleanup {
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 	}
 	t.init_context = init_context
 	t.init_context = init_context
 	start(t)
 	start(t)
@@ -307,7 +307,7 @@ create_and_start_with_data :: proc(data: rawptr, fn: proc(data: rawptr), init_co
 	t.user_index = 1
 	t.user_index = 1
 	t.user_args[0] = data
 	t.user_args[0] = data
 	if self_cleanup {
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 	}
 	t.init_context = init_context
 	t.init_context = init_context
 	start(t)
 	start(t)
@@ -347,7 +347,7 @@ create_and_start_with_poly_data :: proc(data: $T, fn: proc(data: T), init_contex
 	mem.copy(&t.user_args[0], &data, size_of(T))
 	mem.copy(&t.user_args[0], &data, size_of(T))
 
 
 	if self_cleanup {
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 	}
 
 
 	t.init_context = init_context
 	t.init_context = init_context
@@ -394,7 +394,7 @@ create_and_start_with_poly_data2 :: proc(arg1: $T1, arg2: $T2, fn: proc(T1, T2),
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg2))
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg2))
 
 
 	if self_cleanup {
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 	}
 
 
 	t.init_context = init_context
 	t.init_context = init_context
@@ -443,7 +443,7 @@ create_and_start_with_poly_data3 :: proc(arg1: $T1, arg2: $T2, arg3: $T3, fn: pr
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg3))
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg3))
 
 
 	if self_cleanup {
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 	}
 
 
 	t.init_context = init_context
 	t.init_context = init_context
@@ -494,7 +494,7 @@ create_and_start_with_poly_data4 :: proc(arg1: $T1, arg2: $T2, arg3: $T3, arg4:
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg4))
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg4))
 
 
 	if self_cleanup {
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 	}
 
 
 	t.init_context = init_context
 	t.init_context = init_context

+ 1 - 0
core/thread/thread_pool.odin

@@ -60,6 +60,7 @@ pool_thread_runner :: proc(t: ^Thread) {
 		if task, ok := pool_pop_waiting(pool); ok {
 		if task, ok := pool_pop_waiting(pool); ok {
 			data.task = task
 			data.task = task
 			pool_do_work(pool, task)
 			pool_do_work(pool, task)
+			sync.guard(&pool.mutex)
 			data.task = {}
 			data.task = {}
 		}
 		}
 	}
 	}

+ 8 - 24
core/thread/thread_unix.odin

@@ -5,18 +5,14 @@ package thread
 import "base:runtime"
 import "base:runtime"
 import "core:sync"
 import "core:sync"
 import "core:sys/unix"
 import "core:sys/unix"
-import "core:time"
 
 
 _IS_SUPPORTED :: true
 _IS_SUPPORTED :: true
 
 
-CAS :: sync.atomic_compare_exchange_strong
-
 // NOTE(tetra): Aligned here because of core/unix/pthread_linux.odin/pthread_t.
 // NOTE(tetra): Aligned here because of core/unix/pthread_linux.odin/pthread_t.
 // Also see core/sys/darwin/mach_darwin.odin/semaphore_t.
 // Also see core/sys/darwin/mach_darwin.odin/semaphore_t.
 Thread_Os_Specific :: struct #align(16) {
 Thread_Os_Specific :: struct #align(16) {
 	unix_thread: unix.pthread_t, // NOTE: very large on Darwin, small on Linux.
 	unix_thread: unix.pthread_t, // NOTE: very large on Darwin, small on Linux.
-	cond:        sync.Cond,
-	mutex:       sync.Mutex,
+	start_ok:    sync.Sema,
 }
 }
 //
 //
 // Creates a thread which will run the given procedure.
 // Creates a thread which will run the given procedure.
@@ -29,14 +25,10 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 		// We need to give the thread a moment to start up before we enable cancellation.
 		// We need to give the thread a moment to start up before we enable cancellation.
 		can_set_thread_cancel_state := unix.pthread_setcancelstate(unix.PTHREAD_CANCEL_ENABLE, nil) == 0
 		can_set_thread_cancel_state := unix.pthread_setcancelstate(unix.PTHREAD_CANCEL_ENABLE, nil) == 0
 
 
-		sync.lock(&t.mutex)
-
 		t.id = sync.current_thread_id()
 		t.id = sync.current_thread_id()
 
 
-		for (.Started not_in sync.atomic_load(&t.flags)) {
-			// HACK: use a timeout so in the event that the condition is signalled at THIS comment's exact point
-			// (after checking flags, before starting the wait) it gets itself out of that deadlock after a ms.
-			sync.wait_with_timeout(&t.cond, &t.mutex, time.Millisecond)
+		if .Started not_in sync.atomic_load(&t.flags) {
+			sync.wait(&t.start_ok)
 		}
 		}
 
 
 		if .Joined in sync.atomic_load(&t.flags) {
 		if .Joined in sync.atomic_load(&t.flags) {
@@ -66,8 +58,6 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 
 
 		sync.atomic_or(&t.flags, { .Done })
 		sync.atomic_or(&t.flags, { .Done })
 
 
-		sync.unlock(&t.mutex)
-
 		if .Self_Cleanup in sync.atomic_load(&t.flags) {
 		if .Self_Cleanup in sync.atomic_load(&t.flags) {
 			res := unix.pthread_detach(t.unix_thread)
 			res := unix.pthread_detach(t.unix_thread)
 			assert_contextless(res == 0)
 			assert_contextless(res == 0)
@@ -132,7 +122,7 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 
 
 _start :: proc(t: ^Thread) {
 _start :: proc(t: ^Thread) {
 	sync.atomic_or(&t.flags, { .Started })
 	sync.atomic_or(&t.flags, { .Started })
-	sync.signal(&t.cond)
+	sync.post(&t.start_ok)
 }
 }
 
 
 _is_done :: proc(t: ^Thread) -> bool {
 _is_done :: proc(t: ^Thread) -> bool {
@@ -140,24 +130,18 @@ _is_done :: proc(t: ^Thread) -> bool {
 }
 }
 
 
 _join :: proc(t: ^Thread) {
 _join :: proc(t: ^Thread) {
-	// sync.guard(&t.mutex)
-
 	if unix.pthread_equal(unix.pthread_self(), t.unix_thread) {
 	if unix.pthread_equal(unix.pthread_self(), t.unix_thread) {
 		return
 		return
 	}
 	}
 
 
-	// Preserve other flags besides `.Joined`, like `.Started`.
-	unjoined := sync.atomic_load(&t.flags) - {.Joined}
-	joined   := unjoined + {.Joined}
-
-	// Try to set `t.flags` from unjoined to joined. If it returns joined,
-	// it means the previous value had that flag set and we can return.
-	if res, ok := CAS(&t.flags, unjoined, joined); res == joined && !ok {
+	// If the previous value was already `Joined`, then we can return.
+	if .Joined in sync.atomic_or(&t.flags, {.Joined}) {
 		return
 		return
 	}
 	}
+
 	// Prevent non-started threads from blocking main thread with initial wait
 	// Prevent non-started threads from blocking main thread with initial wait
 	// condition.
 	// condition.
-	if .Started not_in unjoined {
+	if .Started not_in sync.atomic_load(&t.flags) {
 		_start(t)
 		_start(t)
 	}
 	}
 	unix.pthread_join(t.unix_thread, nil)
 	unix.pthread_join(t.unix_thread, nil)

+ 3 - 3
core/thread/thread_windows.odin

@@ -27,7 +27,7 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 	__windows_thread_entry_proc :: proc "system" (t_: rawptr) -> win32.DWORD {
 	__windows_thread_entry_proc :: proc "system" (t_: rawptr) -> win32.DWORD {
 		t := (^Thread)(t_)
 		t := (^Thread)(t_)
 
 
-		if .Joined in t.flags {
+		if .Joined in sync.atomic_load(&t.flags) {
 			return 0
 			return 0
 		}
 		}
 
 
@@ -48,9 +48,9 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 			t.procedure(t)
 			t.procedure(t)
 		}
 		}
 
 
-		intrinsics.atomic_store(&t.flags, t.flags + {.Done})
+		intrinsics.atomic_or(&t.flags, {.Done})
 
 
-		if .Self_Cleanup in t.flags {
+		if .Self_Cleanup in sync.atomic_load(&t.flags) {
 			win32.CloseHandle(t.win32_thread)
 			win32.CloseHandle(t.win32_thread)
 			t.win32_thread = win32.INVALID_HANDLE
 			t.win32_thread = win32.INVALID_HANDLE
 			// NOTE(ftphikari): It doesn't matter which context 'free' received, right?
 			// NOTE(ftphikari): It doesn't matter which context 'free' received, right?

+ 2 - 0
src/bug_report.cpp

@@ -919,6 +919,8 @@ gb_internal void report_os_info() {
 			{"23F79",    {23,  5,  0}, "macOS", {"Sonoma",        {14,  5,  0}}},
 			{"23F79",    {23,  5,  0}, "macOS", {"Sonoma",        {14,  5,  0}}},
 			{"23G80",    {23,  6,  0}, "macOS", {"Sonoma",        {14,  6,  0}}},
 			{"23G80",    {23,  6,  0}, "macOS", {"Sonoma",        {14,  6,  0}}},
 			{"23G93",    {23,  6,  0}, "macOS", {"Sonoma",        {14,  6,  1}}},
 			{"23G93",    {23,  6,  0}, "macOS", {"Sonoma",        {14,  6,  1}}},
+			{"23H124",   {23,  6,  0}, "macOS", {"Sonoma",        {14,  7,  0}}},
+			{"24A335",   {24,  0,  0}, "macOS", {"Sequoia",       {15,  0,  0}}},
 		};
 		};
 
 
 
 

+ 4 - 2
src/build_settings.cpp

@@ -285,6 +285,7 @@ enum VetFlags : u64 {
 	VetFlag_Deprecated      = 1u<<7,
 	VetFlag_Deprecated      = 1u<<7,
 	VetFlag_Cast            = 1u<<8,
 	VetFlag_Cast            = 1u<<8,
 	VetFlag_Tabs            = 1u<<9,
 	VetFlag_Tabs            = 1u<<9,
+	VetFlag_UnusedProcedures = 1u<<10,
 
 
 	VetFlag_Unused = VetFlag_UnusedVariables|VetFlag_UnusedImports,
 	VetFlag_Unused = VetFlag_UnusedVariables|VetFlag_UnusedImports,
 
 
@@ -316,6 +317,8 @@ u64 get_vet_flag_from_name(String const &name) {
 		return VetFlag_Cast;
 		return VetFlag_Cast;
 	} else if (name == "tabs") {
 	} else if (name == "tabs") {
 		return VetFlag_Tabs;
 		return VetFlag_Tabs;
+	} else if (name == "unused-procedures") {
+		return VetFlag_UnusedProcedures;
 	}
 	}
 	return VetFlag_NONE;
 	return VetFlag_NONE;
 }
 }
@@ -383,6 +386,7 @@ struct BuildContext {
 
 
 	u64 vet_flags;
 	u64 vet_flags;
 	u32 sanitizer_flags;
 	u32 sanitizer_flags;
+	StringSet vet_packages;
 
 
 	bool   has_resource;
 	bool   has_resource;
 	String link_flags;
 	String link_flags;
@@ -1462,8 +1466,6 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta
 		bc->thread_count = gb_max(bc->affinity.thread_count, 1);
 		bc->thread_count = gb_max(bc->affinity.thread_count, 1);
 	}
 	}
 
 
-	string_set_init(&bc->custom_attributes);
-
 	bc->ODIN_VENDOR  = str_lit("odin");
 	bc->ODIN_VENDOR  = str_lit("odin");
 	bc->ODIN_VERSION = ODIN_VERSION;
 	bc->ODIN_VERSION = ODIN_VERSION;
 	bc->ODIN_ROOT    = odin_root_dir();
 	bc->ODIN_ROOT    = odin_root_dir();

+ 16 - 0
src/check_builtin.cpp

@@ -1632,6 +1632,22 @@ gb_internal bool check_builtin_procedure_directive(CheckerContext *c, Operand *o
 
 
 		operand->type = t_source_code_location;
 		operand->type = t_source_code_location;
 		operand->mode = Addressing_Value;
 		operand->mode = Addressing_Value;
+	} else if (name == "caller_expression") {
+		if (ce->args.count > 1) {
+			error(ce->args[0], "'#caller_expression' expects either 0 or 1 arguments, got %td", ce->args.count);
+		}
+		if (ce->args.count > 0) {
+			Ast *arg = ce->args[0];
+			Operand o = {};
+			Entity *e = check_ident(c, &o, arg, nullptr, nullptr, true);
+			if (e == nullptr || (e->flags & EntityFlag_Param) == 0) {
+				error(ce->args[0], "'#caller_expression' expected a valid earlier parameter name");
+			}
+			arg->Ident.entity = e;
+		}
+
+		operand->type = t_string;
+		operand->mode = Addressing_Value;
 	} else if (name == "exists") {
 	} else if (name == "exists") {
 		if (ce->args.count != 1) {
 		if (ce->args.count != 1) {
 			error(ce->close, "'#exists' expects 1 argument, got %td", ce->args.count);
 			error(ce->close, "'#exists' expects 1 argument, got %td", ce->args.count);

+ 6 - 1
src/check_expr.cpp

@@ -7807,7 +7807,8 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c
 		    name == "load" ||
 		    name == "load" ||
 		    name == "load_directory" ||
 		    name == "load_directory" ||
 		    name == "load_hash" ||
 		    name == "load_hash" ||
-		    name == "hash"
+		    name == "hash" ||
+		    name == "caller_expression"
 		) {
 		) {
 			operand->mode = Addressing_Builtin;
 			operand->mode = Addressing_Builtin;
 			operand->builtin_id = BuiltinProc_DIRECTIVE;
 			operand->builtin_id = BuiltinProc_DIRECTIVE;
@@ -8725,6 +8726,10 @@ gb_internal ExprKind check_basic_directive_expr(CheckerContext *c, Operand *o, A
 		error(node, "#caller_location may only be used as a default argument parameter");
 		error(node, "#caller_location may only be used as a default argument parameter");
 		o->type = t_source_code_location;
 		o->type = t_source_code_location;
 		o->mode = Addressing_Value;
 		o->mode = Addressing_Value;
+	} else if (name == "caller_expression") {
+		error(node, "#caller_expression may only be used as a default argument parameter");
+		o->type = t_string;
+		o->mode = Addressing_Value;
 	} else {
 	} else {
 		if (name == "location") {
 		if (name == "location") {
 			init_core_source_code_location(c->checker);
 			init_core_source_code_location(c->checker);

+ 14 - 4
src/check_stmt.cpp

@@ -1641,6 +1641,8 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 
 
 	Ast *expr = unparen_expr(rs->expr);
 	Ast *expr = unparen_expr(rs->expr);
 
 
+	Operand rhs_operand = {};
+
 	bool is_range = false;
 	bool is_range = false;
 	bool is_possibly_addressable = true;
 	bool is_possibly_addressable = true;
 	isize max_val_count = 2;
 	isize max_val_count = 2;
@@ -1698,7 +1700,7 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 					}
 					}
 				}
 				}
 			}
 			}
-			bool is_ptr = is_type_pointer(type_deref(operand.type));
+			bool is_ptr = is_type_pointer(operand.type);
 			Type *t = base_type(type_deref(operand.type));
 			Type *t = base_type(type_deref(operand.type));
 
 
 			switch (t->kind) {
 			switch (t->kind) {
@@ -1750,16 +1752,19 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 				break;
 				break;
 
 
 			case Type_DynamicArray:
 			case Type_DynamicArray:
+				is_possibly_addressable = true;
 				array_add(&vals, t->DynamicArray.elem);
 				array_add(&vals, t->DynamicArray.elem);
 				array_add(&vals, t_int);
 				array_add(&vals, t_int);
 				break;
 				break;
 
 
 			case Type_Slice:
 			case Type_Slice:
+				is_possibly_addressable = true;
 				array_add(&vals, t->Slice.elem);
 				array_add(&vals, t->Slice.elem);
 				array_add(&vals, t_int);
 				array_add(&vals, t_int);
 				break;
 				break;
 
 
 			case Type_Map:
 			case Type_Map:
+				is_possibly_addressable = true;
 				is_map = true;
 				is_map = true;
 				array_add(&vals, t->Map.key);
 				array_add(&vals, t->Map.key);
 				array_add(&vals, t->Map.value);
 				array_add(&vals, t->Map.value);
@@ -1781,6 +1786,8 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 
 
 			case Type_Tuple:
 			case Type_Tuple:
 				{
 				{
+					is_possibly_addressable = false;
+
 					isize count = t->Tuple.variables.count;
 					isize count = t->Tuple.variables.count;
 					if (count < 1) {
 					if (count < 1) {
 						ERROR_BLOCK();
 						ERROR_BLOCK();
@@ -1810,8 +1817,6 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 						array_add(&vals, e->type);
 						array_add(&vals, e->type);
 					}
 					}
 
 
-					is_possibly_addressable = false;
-
 					bool do_break = false;
 					bool do_break = false;
 					for (isize i = rs->vals.count-1; i >= 0; i--) {
 					for (isize i = rs->vals.count-1; i >= 0; i--) {
 						if (rs->vals[i] != nullptr && count < i+2) {
 						if (rs->vals[i] != nullptr && count < i+2) {
@@ -1831,6 +1836,11 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 
 
 			case Type_Struct:
 			case Type_Struct:
 				if (t->Struct.soa_kind != StructSoa_None) {
 				if (t->Struct.soa_kind != StructSoa_None) {
+					if (t->Struct.soa_kind == StructSoa_Fixed) {
+						is_possibly_addressable = operand.mode == Addressing_Variable || is_ptr;
+					} else {
+						is_possibly_addressable = true;
+					}
 					is_soa = true;
 					is_soa = true;
 					array_add(&vals, t->Struct.soa_elem);
 					array_add(&vals, t->Struct.soa_elem);
 					array_add(&vals, t_int);
 					array_add(&vals, t_int);
@@ -1907,7 +1917,7 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 					if (is_possibly_addressable && i == addressable_index) {
 					if (is_possibly_addressable && i == addressable_index) {
 						entity->flags &= ~EntityFlag_Value;
 						entity->flags &= ~EntityFlag_Value;
 					} else {
 					} else {
-						char const *idx_name = is_map ? "key" : is_bit_set ? "element" : "index";
+						char const *idx_name = is_map ? "key" : (is_bit_set || i == 0) ? "element" : "index";
 						error(token, "The %s variable '%.*s' cannot be made addressable", idx_name, LIT(str));
 						error(token, "The %s variable '%.*s' cannot be made addressable", idx_name, LIT(str));
 					}
 					}
 				}
 				}

+ 32 - 0
src/check_type.cpp

@@ -1605,6 +1605,25 @@ gb_internal bool is_expr_from_a_parameter(CheckerContext *ctx, Ast *expr) {
 	return false;
 	return false;
 }
 }
 
 
+gb_internal bool is_caller_expression(Ast *expr) {
+	if (expr->kind == Ast_BasicDirective && expr->BasicDirective.name.string == "caller_expression") {
+		return true;
+	}
+
+	Ast *call = unparen_expr(expr);
+	if (call->kind != Ast_CallExpr) {
+		return false;
+	}
+
+	ast_node(ce, CallExpr, call);
+	if (ce->proc->kind != Ast_BasicDirective) {
+		return false;
+	}
+
+	ast_node(bd, BasicDirective, ce->proc);
+	String name = bd->name.string;
+	return name == "caller_expression";
+}
 
 
 gb_internal ParameterValue handle_parameter_value(CheckerContext *ctx, Type *in_type, Type **out_type_, Ast *expr, bool allow_caller_location) {
 gb_internal ParameterValue handle_parameter_value(CheckerContext *ctx, Type *in_type, Type **out_type_, Ast *expr, bool allow_caller_location) {
 	ParameterValue param_value = {};
 	ParameterValue param_value = {};
@@ -1626,7 +1645,19 @@ gb_internal ParameterValue handle_parameter_value(CheckerContext *ctx, Type *in_
 		if (in_type) {
 		if (in_type) {
 			check_assignment(ctx, &o, in_type, str_lit("parameter value"));
 			check_assignment(ctx, &o, in_type, str_lit("parameter value"));
 		}
 		}
+	} else if (is_caller_expression(expr)) {
+		if (expr->kind != Ast_BasicDirective) {
+			check_builtin_procedure_directive(ctx, &o, expr, t_string);
+		}
+
+		param_value.kind = ParameterValue_Expression;
+		o.type = t_string;
+		o.mode = Addressing_Value;
+		o.expr = expr;
 
 
+		if (in_type) {
+			check_assignment(ctx, &o, in_type, str_lit("parameter value"));
+		}
 	} else {
 	} else {
 		if (in_type) {
 		if (in_type) {
 			check_expr_with_type_hint(ctx, &o, expr, in_type);
 			check_expr_with_type_hint(ctx, &o, expr, in_type);
@@ -1858,6 +1889,7 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para
 			case ParameterValue_Nil:
 			case ParameterValue_Nil:
 				break;
 				break;
 			case ParameterValue_Location:
 			case ParameterValue_Location:
+			case ParameterValue_Expression:
 			case ParameterValue_Value:
 			case ParameterValue_Value:
 				gbString str = type_to_string(type);
 				gbString str = type_to_string(type);
 				error(params[i], "A default value for a parameter must not be a polymorphic constant type, got %s", str);
 				error(params[i], "A default value for a parameter must not be a polymorphic constant type, got %s", str);

+ 52 - 19
src/checker.cpp

@@ -533,18 +533,13 @@ gb_internal u64 check_vet_flags(CheckerContext *c) {
 	    c->curr_proc_decl->proc_lit) {
 	    c->curr_proc_decl->proc_lit) {
 		file = c->curr_proc_decl->proc_lit->file();
 		file = c->curr_proc_decl->proc_lit->file();
 	}
 	}
-	if (file && file->vet_flags_set) {
-		return file->vet_flags;
-	}
-	return build_context.vet_flags;
+
+	return ast_file_vet_flags(file);
 }
 }
 
 
 gb_internal u64 check_vet_flags(Ast *node) {
 gb_internal u64 check_vet_flags(Ast *node) {
 	AstFile *file = node->file();
 	AstFile *file = node->file();
-	if (file && file->vet_flags_set) {
-		return file->vet_flags;
-	}
-	return build_context.vet_flags;
+	return ast_file_vet_flags(file);
 }
 }
 
 
 enum VettedEntityKind {
 enum VettedEntityKind {
@@ -681,20 +676,45 @@ gb_internal bool check_vet_unused(Checker *c, Entity *e, VettedEntity *ve) {
 	return false;
 	return false;
 }
 }
 
 
-gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
-	bool vet_unused = (vet_flags & VetFlag_Unused) != 0;
-	bool vet_shadowing = (vet_flags & (VetFlag_Shadowing|VetFlag_Using)) != 0;
-
+gb_internal void check_scope_usage_internal(Checker *c, Scope *scope, u64 vet_flags, bool per_entity) {
+	u64 original_vet_flags = vet_flags;
 	Array<VettedEntity> vetted_entities = {};
 	Array<VettedEntity> vetted_entities = {};
 	array_init(&vetted_entities, heap_allocator());
 	array_init(&vetted_entities, heap_allocator());
+	defer (array_free(&vetted_entities));
 
 
 	rw_mutex_shared_lock(&scope->mutex);
 	rw_mutex_shared_lock(&scope->mutex);
 	for (auto const &entry : scope->elements) {
 	for (auto const &entry : scope->elements) {
 		Entity *e = entry.value;
 		Entity *e = entry.value;
 		if (e == nullptr) continue;
 		if (e == nullptr) continue;
+
+		vet_flags = original_vet_flags;
+		if (per_entity) {
+			vet_flags = ast_file_vet_flags(e->file);
+		}
+
+		bool vet_unused = (vet_flags & VetFlag_Unused) != 0;
+		bool vet_shadowing = (vet_flags & (VetFlag_Shadowing|VetFlag_Using)) != 0;
+		bool vet_unused_procedures = (vet_flags & VetFlag_UnusedProcedures) != 0;
+
 		VettedEntity ve_unused = {};
 		VettedEntity ve_unused = {};
 		VettedEntity ve_shadowed = {};
 		VettedEntity ve_shadowed = {};
-		bool is_unused = vet_unused && check_vet_unused(c, e, &ve_unused);
+		bool is_unused = false;
+		if (vet_unused && check_vet_unused(c, e, &ve_unused)) {
+			is_unused = true;
+		} else if (vet_unused_procedures &&
+		           e->kind == Entity_Procedure) {
+			if (e->flags&EntityFlag_Used) {
+				is_unused = false;
+			} else if (e->flags & EntityFlag_Require) {
+				is_unused = false;
+			} else if (e->pkg && e->pkg->kind == Package_Init && e->token.string == "main") {
+				is_unused = false;
+			} else {
+				is_unused = true;
+				ve_unused.kind = VettedEntity_Unused;
+				ve_unused.entity = e;
+			}
+		}
 		bool is_shadowed = vet_shadowing && check_vet_shadowing(c, e, &ve_shadowed);
 		bool is_shadowed = vet_shadowing && check_vet_shadowing(c, e, &ve_shadowed);
 		if (is_unused && is_shadowed) {
 		if (is_unused && is_shadowed) {
 			VettedEntity ve_both = ve_shadowed;
 			VettedEntity ve_both = ve_shadowed;
@@ -717,13 +737,18 @@ gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
 	}
 	}
 	rw_mutex_shared_unlock(&scope->mutex);
 	rw_mutex_shared_unlock(&scope->mutex);
 
 
-	gb_sort(vetted_entities.data, vetted_entities.count, gb_size_of(VettedEntity), vetted_entity_variable_pos_cmp);
+	array_sort(vetted_entities, vetted_entity_variable_pos_cmp);
 
 
 	for (auto const &ve : vetted_entities) {
 	for (auto const &ve : vetted_entities) {
 		Entity *e = ve.entity;
 		Entity *e = ve.entity;
 		Entity *other = ve.other;
 		Entity *other = ve.other;
 		String name = e->token.string;
 		String name = e->token.string;
 
 
+		vet_flags = original_vet_flags;
+		if (per_entity) {
+			vet_flags = ast_file_vet_flags(e->file);
+		}
+
 		if (ve.kind == VettedEntity_Shadowed_And_Unused) {
 		if (ve.kind == VettedEntity_Shadowed_And_Unused) {
 			error(e->token, "'%.*s' declared but not used, possibly shadows declaration at line %d", LIT(name), other->token.pos.line);
 			error(e->token, "'%.*s' declared but not used, possibly shadows declaration at line %d", LIT(name), other->token.pos.line);
 		} else if (vet_flags) {
 		} else if (vet_flags) {
@@ -732,6 +757,9 @@ gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
 				if (e->kind == Entity_Variable && (vet_flags & VetFlag_UnusedVariables) != 0) {
 				if (e->kind == Entity_Variable && (vet_flags & VetFlag_UnusedVariables) != 0) {
 					error(e->token, "'%.*s' declared but not used", LIT(name));
 					error(e->token, "'%.*s' declared but not used", LIT(name));
 				}
 				}
+				if (e->kind == Entity_Procedure && (vet_flags & VetFlag_UnusedProcedures) != 0) {
+					error(e->token, "'%.*s' declared but not used", LIT(name));
+				}
 				if ((e->kind == Entity_ImportName || e->kind == Entity_LibraryName) && (vet_flags & VetFlag_UnusedImports) != 0) {
 				if ((e->kind == Entity_ImportName || e->kind == Entity_LibraryName) && (vet_flags & VetFlag_UnusedImports) != 0) {
 					error(e->token, "'%.*s' declared but not used", LIT(name));
 					error(e->token, "'%.*s' declared but not used", LIT(name));
 				}
 				}
@@ -749,7 +777,11 @@ gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
 		}
 		}
 	}
 	}
 
 
-	array_free(&vetted_entities);
+}
+
+
+gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
+	check_scope_usage_internal(c, scope, vet_flags, false);
 
 
 	for (Scope *child = scope->head_child; child != nullptr; child = child->next) {
 	for (Scope *child = scope->head_child; child != nullptr; child = child->next) {
 		if (child->flags & (ScopeFlag_Proc|ScopeFlag_Type|ScopeFlag_File)) {
 		if (child->flags & (ScopeFlag_Proc|ScopeFlag_Type|ScopeFlag_File)) {
@@ -6497,12 +6529,13 @@ gb_internal void check_parsed_files(Checker *c) {
 	TIME_SECTION("check scope usage");
 	TIME_SECTION("check scope usage");
 	for (auto const &entry : c->info.files) {
 	for (auto const &entry : c->info.files) {
 		AstFile *f = entry.value;
 		AstFile *f = entry.value;
-		u64 vet_flags = build_context.vet_flags;
-		if (f->vet_flags_set) {
-			vet_flags = f->vet_flags;
-		}
+		u64 vet_flags = ast_file_vet_flags(f);
 		check_scope_usage(c, f->scope, vet_flags);
 		check_scope_usage(c, f->scope, vet_flags);
 	}
 	}
+	for (auto const &entry : c->info.packages) {
+		AstPackage *pkg = entry.value;
+		check_scope_usage_internal(c, pkg->scope, 0, true);
+	}
 
 
 	TIME_SECTION("add basic type information");
 	TIME_SECTION("add basic type information");
 	// Add "Basic" type information
 	// Add "Basic" type information

+ 1 - 0
src/entity.cpp

@@ -104,6 +104,7 @@ enum ParameterValueKind {
 	ParameterValue_Constant,
 	ParameterValue_Constant,
 	ParameterValue_Nil,
 	ParameterValue_Nil,
 	ParameterValue_Location,
 	ParameterValue_Location,
+	ParameterValue_Expression,
 	ParameterValue_Value,
 	ParameterValue_Value,
 };
 };
 
 

+ 10 - 2
src/gb/gb.h

@@ -3195,11 +3195,11 @@ void gb_affinity_init(gbAffinity *a) {
 	a->core_count       = 1;
 	a->core_count       = 1;
 	a->threads_per_core = 1;
 	a->threads_per_core = 1;
 
 
-	if (sysctlbyname("hw.logicalcpu", &count, &count_size, NULL, 0) == 0) {
+	if (sysctlbyname("kern.smp.cpus", &count, &count_size, NULL, 0) == 0) {
 		if (count > 0) {
 		if (count > 0) {
 			a->thread_count = count;
 			a->thread_count = count;
 			// Get # of physical cores
 			// Get # of physical cores
-			if (sysctlbyname("hw.physicalcpu", &count, &count_size, NULL, 0) == 0) {
+			if (sysctlbyname("kern.smp.cores", &count, &count_size, NULL, 0) == 0) {
 				if (count > 0) {
 				if (count > 0) {
 					a->core_count = count;
 					a->core_count = count;
 					a->threads_per_core = a->thread_count / count;
 					a->threads_per_core = a->thread_count / count;
@@ -3210,6 +3210,14 @@ void gb_affinity_init(gbAffinity *a) {
 				}
 				}
 			}
 			}
 		}
 		}
+	} else if (sysctlbyname("hw.ncpu", &count, &count_size, NULL, 0) == 0) {
+		// SMP disabled or unavailable.
+		if (count > 0) {
+			a->is_accurate      = true;
+			a->thread_count     = count;
+			a->core_count       = count;
+			a->threads_per_core = 1;
+		}
 	}
 	}
 
 
 }
 }

+ 1 - 1
src/llvm_backend.hpp

@@ -528,7 +528,7 @@ gb_internal lbAddr lb_store_range_stmt_val(lbProcedure *p, Ast *stmt_val, lbValu
 gb_internal lbValue lb_emit_source_code_location_const(lbProcedure *p, String const &procedure, TokenPos const &pos);
 gb_internal lbValue lb_emit_source_code_location_const(lbProcedure *p, String const &procedure, TokenPos const &pos);
 gb_internal lbValue lb_const_source_code_location_const(lbModule *m, String const &procedure, TokenPos const &pos);
 gb_internal lbValue lb_const_source_code_location_const(lbModule *m, String const &procedure, TokenPos const &pos);
 
 
-gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TokenPos const &pos);
+gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TypeProc *procedure_type, Ast *call_expression);
 
 
 gb_internal lbValue lb_equal_proc_for_type(lbModule *m, Type *type);
 gb_internal lbValue lb_equal_proc_for_type(lbModule *m, Type *type);
 gb_internal lbValue lb_hasher_proc_for_type(lbModule *m, Type *type);
 gb_internal lbValue lb_hasher_proc_for_type(lbModule *m, Type *type);

+ 44 - 36
src/llvm_backend_debug.cpp

@@ -552,6 +552,48 @@ gb_internal LLVMMetadataRef lb_debug_bitset(lbModule *m, Type *type, String name
 	return final_decl;
 	return final_decl;
 }
 }
 
 
+gb_internal LLVMMetadataRef lb_debug_bitfield(lbModule *m, Type *type, String name, LLVMMetadataRef scope, LLVMMetadataRef file, unsigned line) {
+	Type *bt = base_type(type);
+	GB_ASSERT(bt->kind == Type_BitField);
+
+	lb_debug_file_line(m, bt->BitField.node, &file, &line);
+
+	u64 size_in_bits = 8*type_size_of(bt);
+	u32 align_in_bits = 8*cast(u32)type_align_of(bt);
+
+    unsigned element_count = cast(unsigned)bt->BitField.fields.count;
+    LLVMMetadataRef *elements = gb_alloc_array(permanent_allocator(), LLVMMetadataRef, element_count);
+
+    u64 offset_in_bits = 0;
+    for (unsigned i = 0; i < element_count; i++) {
+        Entity *f = bt->BitField.fields[i];
+        u8 bit_size = bt->BitField.bit_sizes[i];
+        GB_ASSERT(f->kind == Entity_Variable);
+        String name = f->token.string;
+        elements[i] = LLVMDIBuilderCreateBitFieldMemberType(m->debug_builder, scope, cast(char const *)name.text, name.len, file, line,
+            bit_size, offset_in_bits, 0,
+            LLVMDIFlagZero, lb_debug_type(m, f->type)
+        );
+
+        offset_in_bits += bit_size;
+    }
+
+	LLVMMetadataRef final_decl = LLVMDIBuilderCreateStructType(
+		m->debug_builder, scope,
+		cast(char const *)name.text, cast(size_t)name.len,
+		file, line,
+		size_in_bits, align_in_bits,
+		LLVMDIFlagZero,
+		nullptr,
+		elements, element_count,
+		0,
+		nullptr,
+		"", 0
+	);
+	lb_set_llvm_metadata(m, type, final_decl);
+	return final_decl;
+}
+
 gb_internal LLVMMetadataRef lb_debug_enum(lbModule *m, Type *type, String name, LLVMMetadataRef scope, LLVMMetadataRef file, unsigned line) {
 gb_internal LLVMMetadataRef lb_debug_enum(lbModule *m, Type *type, String name, LLVMMetadataRef scope, LLVMMetadataRef file, unsigned line) {
 	Type *bt = base_type(type);
 	Type *bt = base_type(type);
 	GB_ASSERT(bt->kind == Type_Enum);
 	GB_ASSERT(bt->kind == Type_Enum);
@@ -816,6 +858,7 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) {
 	case Type_Union:        return lb_debug_union(        m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 	case Type_Union:        return lb_debug_union(        m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 	case Type_BitSet:       return lb_debug_bitset(       m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 	case Type_BitSet:       return lb_debug_bitset(       m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 	case Type_Enum:         return lb_debug_enum(         m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 	case Type_Enum:         return lb_debug_enum(         m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
+	case Type_BitField:     return lb_debug_bitfield(     m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 
 
 	case Type_Tuple:
 	case Type_Tuple:
 		if (type->Tuple.variables.count == 1) {
 		if (type->Tuple.variables.count == 1) {
@@ -901,42 +944,6 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) {
 			lb_debug_type(m, type->Matrix.elem),
 			lb_debug_type(m, type->Matrix.elem),
 			subscripts, gb_count_of(subscripts));
 			subscripts, gb_count_of(subscripts));
 	}
 	}
-
-	case Type_BitField: {
-		LLVMMetadataRef parent_scope = nullptr;
-		LLVMMetadataRef scope = nullptr;
-		LLVMMetadataRef file = nullptr;
-		unsigned line = 0;
-		u64 size_in_bits = 8*cast(u64)type_size_of(type);
-		u32 align_in_bits = 8*cast(u32)type_align_of(type);
-		LLVMDIFlags flags = LLVMDIFlagZero;
-
-		unsigned element_count = cast(unsigned)type->BitField.fields.count;
-		LLVMMetadataRef *elements = gb_alloc_array(permanent_allocator(), LLVMMetadataRef, element_count);
-
-		u64 offset_in_bits = 0;
-		for (unsigned i = 0; i < element_count; i++) {
-			Entity *f = type->BitField.fields[i];
-			u8 bit_size = type->BitField.bit_sizes[i];
-			GB_ASSERT(f->kind == Entity_Variable);
-			String name = f->token.string;
-			unsigned field_line = 0;
-			LLVMDIFlags field_flags = LLVMDIFlagZero;
-			elements[i] = LLVMDIBuilderCreateBitFieldMemberType(m->debug_builder, scope, cast(char const *)name.text, name.len, file, field_line,
-				bit_size, offset_in_bits, offset_in_bits,
-				field_flags, lb_debug_type(m, f->type)
-			);
-
-			offset_in_bits += bit_size;
-		}
-
-
-		return LLVMDIBuilderCreateStructType(m->debug_builder, parent_scope, "", 0, file, line,
-			size_in_bits, align_in_bits, flags,
-			nullptr, elements, element_count, 0, nullptr,
-			"", 0
-		);
-	}
 	}
 	}
 
 
 	GB_PANIC("Invalid type %s", type_to_string(type));
 	GB_PANIC("Invalid type %s", type_to_string(type));
@@ -1022,6 +1029,7 @@ gb_internal LLVMMetadataRef lb_debug_type(lbModule *m, Type *type) {
 		case Type_Union:        return lb_debug_union(m, type, name, scope, file, line);
 		case Type_Union:        return lb_debug_union(m, type, name, scope, file, line);
 		case Type_BitSet:       return lb_debug_bitset(m, type, name, scope, file, line);
 		case Type_BitSet:       return lb_debug_bitset(m, type, name, scope, file, line);
 		case Type_Enum:         return lb_debug_enum(m, type, name, scope, file, line);
 		case Type_Enum:         return lb_debug_enum(m, type, name, scope, file, line);
+		case Type_BitField:     return lb_debug_bitfield(m, type, name, scope, file, line);
 		}
 		}
 	}
 	}
 
 

+ 57 - 5
src/llvm_backend_proc.cpp

@@ -699,7 +699,9 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) {
 					}
 					}
 
 
 					if (e->Variable.param_value.kind != ParameterValue_Invalid) {
 					if (e->Variable.param_value.kind != ParameterValue_Invalid) {
-						lbValue c = lb_handle_param_value(p, e->type, e->Variable.param_value, e->token.pos);
+						GB_ASSERT(e->Variable.param_value.kind != ParameterValue_Location);
+						GB_ASSERT(e->Variable.param_value.kind != ParameterValue_Expression);
+						lbValue c = lb_handle_param_value(p, e->type, e->Variable.param_value, nullptr, nullptr);
 						lb_addr_store(p, res, c);
 						lb_addr_store(p, res, c);
 					}
 					}
 
 
@@ -3420,7 +3422,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
 }
 }
 
 
 
 
-gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TokenPos const &pos) {
+gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TypeProc *procedure_type, Ast* call_expression) {
 	switch (param_value.kind) {
 	switch (param_value.kind) {
 	case ParameterValue_Constant:
 	case ParameterValue_Constant:
 		if (is_type_constant_type(parameter_type)) {
 		if (is_type_constant_type(parameter_type)) {
@@ -3446,8 +3448,60 @@ gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type,
 			if (p->entity != nullptr) {
 			if (p->entity != nullptr) {
 				proc_name = p->entity->token.string;
 				proc_name = p->entity->token.string;
 			}
 			}
+
+			ast_node(ce, CallExpr, call_expression);
+			TokenPos pos = ast_token(ce->proc).pos;
+
 			return lb_emit_source_code_location_as_global(p, proc_name, pos);
 			return lb_emit_source_code_location_as_global(p, proc_name, pos);
 		}
 		}
+	case ParameterValue_Expression:
+		{
+			Ast *orig = param_value.original_ast_expr;
+			if (orig->kind == Ast_BasicDirective) {
+				gbString expr = expr_to_string(call_expression, temporary_allocator());
+				return lb_const_string(p->module, make_string_c(expr));
+			}
+
+			isize param_idx = -1;
+			String param_str = {0};
+			{
+				Ast *call = unparen_expr(orig);
+				GB_ASSERT(call->kind == Ast_CallExpr);
+				ast_node(ce, CallExpr, call);
+				GB_ASSERT(ce->proc->kind == Ast_BasicDirective);
+				GB_ASSERT(ce->args.count == 1);
+				Ast *target = ce->args[0];
+				GB_ASSERT(target->kind == Ast_Ident);
+				String target_str = target->Ident.token.string;
+
+				param_idx = lookup_procedure_parameter(procedure_type, target_str);
+				param_str = target_str;
+			}
+			GB_ASSERT(param_idx >= 0);
+
+
+			Ast *target_expr = nullptr;
+			ast_node(ce, CallExpr, call_expression);
+
+			if (ce->split_args->positional.count > param_idx) {
+				target_expr = ce->split_args->positional[param_idx];
+			}
+
+			for_array(i, ce->split_args->named) {
+				Ast *arg = ce->split_args->named[i];
+				ast_node(fv, FieldValue, arg);
+				GB_ASSERT(fv->field->kind == Ast_Ident);
+				String name = fv->field->Ident.token.string;
+				if (name == param_str) {
+					target_expr = fv->value;
+					break;
+				}
+			}
+
+			gbString expr = expr_to_string(target_expr, temporary_allocator());
+			return lb_const_string(p->module, make_string_c(expr));
+		}
+
 	case ParameterValue_Value:
 	case ParameterValue_Value:
 		return lb_build_expr(p, param_value.ast_value);
 		return lb_build_expr(p, param_value.ast_value);
 	}
 	}
@@ -3739,8 +3793,6 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
 		}
 		}
 	}
 	}
 
 
-	TokenPos pos = ast_token(ce->proc).pos;
-
 
 
 	if (pt->params != nullptr)  {
 	if (pt->params != nullptr)  {
 		isize min_count = pt->params->Tuple.variables.count;
 		isize min_count = pt->params->Tuple.variables.count;
@@ -3764,7 +3816,7 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
 					args[arg_index] = lb_const_nil(p->module, e->type);
 					args[arg_index] = lb_const_nil(p->module, e->type);
 					break;
 					break;
 				case Entity_Variable:
 				case Entity_Variable:
-					args[arg_index] = lb_handle_param_value(p, e->type, e->Variable.param_value, pos);
+					args[arg_index] = lb_handle_param_value(p, e->type, e->Variable.param_value, pt, expr);
 					break;
 					break;
 
 
 				case Entity_Constant:
 				case Entity_Constant:

+ 49 - 2
src/main.cpp

@@ -340,12 +340,14 @@ enum BuildFlagKind {
 	BuildFlag_VetUnused,
 	BuildFlag_VetUnused,
 	BuildFlag_VetUnusedImports,
 	BuildFlag_VetUnusedImports,
 	BuildFlag_VetUnusedVariables,
 	BuildFlag_VetUnusedVariables,
+	BuildFlag_VetUnusedProcedures,
 	BuildFlag_VetUsingStmt,
 	BuildFlag_VetUsingStmt,
 	BuildFlag_VetUsingParam,
 	BuildFlag_VetUsingParam,
 	BuildFlag_VetStyle,
 	BuildFlag_VetStyle,
 	BuildFlag_VetSemicolon,
 	BuildFlag_VetSemicolon,
 	BuildFlag_VetCast,
 	BuildFlag_VetCast,
 	BuildFlag_VetTabs,
 	BuildFlag_VetTabs,
+	BuildFlag_VetPackages,
 
 
 	BuildFlag_CustomAttribute,
 	BuildFlag_CustomAttribute,
 	BuildFlag_IgnoreUnknownAttributes,
 	BuildFlag_IgnoreUnknownAttributes,
@@ -547,6 +549,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
 	add_flag(&build_flags, BuildFlag_Vet,                     str_lit("vet"),                       BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_Vet,                     str_lit("vet"),                       BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnused,               str_lit("vet-unused"),                BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnused,               str_lit("vet-unused"),                BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnusedVariables,      str_lit("vet-unused-variables"),      BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnusedVariables,      str_lit("vet-unused-variables"),      BuildFlagParam_None,    Command__does_check);
+	add_flag(&build_flags, BuildFlag_VetUnusedProcedures,     str_lit("vet-unused-procedures"),     BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnusedImports,        str_lit("vet-unused-imports"),        BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnusedImports,        str_lit("vet-unused-imports"),        BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetShadowing,            str_lit("vet-shadowing"),             BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetShadowing,            str_lit("vet-shadowing"),             BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUsingStmt,            str_lit("vet-using-stmt"),            BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUsingStmt,            str_lit("vet-using-stmt"),            BuildFlagParam_None,    Command__does_check);
@@ -555,6 +558,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
 	add_flag(&build_flags, BuildFlag_VetSemicolon,            str_lit("vet-semicolon"),             BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetSemicolon,            str_lit("vet-semicolon"),             BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetCast,                 str_lit("vet-cast"),                  BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetCast,                 str_lit("vet-cast"),                  BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetTabs,                 str_lit("vet-tabs"),                  BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetTabs,                 str_lit("vet-tabs"),                  BuildFlagParam_None,    Command__does_check);
+	add_flag(&build_flags, BuildFlag_VetPackages,             str_lit("vet-packages"),              BuildFlagParam_String,  Command__does_check);
 
 
 	add_flag(&build_flags, BuildFlag_CustomAttribute,         str_lit("custom-attribute"),          BuildFlagParam_String,  Command__does_check, true);
 	add_flag(&build_flags, BuildFlag_CustomAttribute,         str_lit("custom-attribute"),          BuildFlagParam_String,  Command__does_check, true);
 	add_flag(&build_flags, BuildFlag_IgnoreUnknownAttributes, str_lit("ignore-unknown-attributes"), BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_IgnoreUnknownAttributes, str_lit("ignore-unknown-attributes"), BuildFlagParam_None,    Command__does_check);
@@ -1220,6 +1224,36 @@ gb_internal bool parse_build_flags(Array<String> args) {
 						case BuildFlag_VetSemicolon:       build_context.vet_flags |= VetFlag_Semicolon;       break;
 						case BuildFlag_VetSemicolon:       build_context.vet_flags |= VetFlag_Semicolon;       break;
 						case BuildFlag_VetCast:            build_context.vet_flags |= VetFlag_Cast;            break;
 						case BuildFlag_VetCast:            build_context.vet_flags |= VetFlag_Cast;            break;
 						case BuildFlag_VetTabs:            build_context.vet_flags |= VetFlag_Tabs;            break;
 						case BuildFlag_VetTabs:            build_context.vet_flags |= VetFlag_Tabs;            break;
+						case BuildFlag_VetUnusedProcedures:
+							build_context.vet_flags |= VetFlag_UnusedProcedures;
+							if (!set_flags[BuildFlag_VetPackages]) {
+								gb_printf_err("-%.*s must be used with -vet-packages\n", LIT(name));
+								bad_flags = true;
+							}
+							break;
+
+						case BuildFlag_VetPackages:
+							{
+								GB_ASSERT(value.kind == ExactValue_String);
+								String val = value.value_string;
+								String_Iterator it = {val, 0};
+								for (;;) {
+									String pkg = string_split_iterator(&it, ',');
+									if (pkg.len == 0) {
+										break;
+									}
+
+									pkg = string_trim_whitespace(pkg);
+									if (!string_is_valid_identifier(pkg)) {
+										gb_printf_err("-%.*s '%.*s' must be a valid identifier\n", LIT(name), LIT(pkg));
+										bad_flags = true;
+										continue;
+									}
+
+									string_set_add(&build_context.vet_packages, pkg);
+								}
+							}
+							break;
 
 
 						case BuildFlag_CustomAttribute:
 						case BuildFlag_CustomAttribute:
 							{
 							{
@@ -1234,7 +1268,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
 
 
 									attr = string_trim_whitespace(attr);
 									attr = string_trim_whitespace(attr);
 									if (!string_is_valid_identifier(attr)) {
 									if (!string_is_valid_identifier(attr)) {
-										gb_printf_err("-custom-attribute '%.*s' must be a valid identifier\n", LIT(attr));
+										gb_printf_err("-%.*s '%.*s' must be a valid identifier\n", LIT(name), LIT(attr));
 										bad_flags = true;
 										bad_flags = true;
 										continue;
 										continue;
 									}
 									}
@@ -2364,7 +2398,7 @@ gb_internal void print_show_help(String const arg0, String const &command) {
 		print_usage_line(0, "");
 		print_usage_line(0, "");
 
 
 		print_usage_line(1, "-vet-unused");
 		print_usage_line(1, "-vet-unused");
-		print_usage_line(2, "Checks for unused declarations.");
+		print_usage_line(2, "Checks for unused declarations (variables and imports).");
 		print_usage_line(0, "");
 		print_usage_line(0, "");
 
 
 		print_usage_line(1, "-vet-unused-variables");
 		print_usage_line(1, "-vet-unused-variables");
@@ -2406,6 +2440,16 @@ gb_internal void print_show_help(String const arg0, String const &command) {
 		print_usage_line(1, "-vet-tabs");
 		print_usage_line(1, "-vet-tabs");
 		print_usage_line(2, "Errs when the use of tabs has not been used for indentation.");
 		print_usage_line(2, "Errs when the use of tabs has not been used for indentation.");
 		print_usage_line(0, "");
 		print_usage_line(0, "");
+
+		print_usage_line(1, "-vet-packages:<comma-separated-strings>");
+		print_usage_line(2, "Sets which packages by name will be vetted.");
+		print_usage_line(2, "Files with specific +vet tags will not be ignored if they are not in the packages set.");
+		print_usage_line(0, "");
+
+		print_usage_line(1, "-vet-unused-procedures");
+		print_usage_line(2, "Checks for unused procedures.");
+		print_usage_line(2, "Must be used with -vet-packages or specified on a per file with +vet tags.");
+		print_usage_line(0, "");
 	}
 	}
 
 
 	if (check) {
 	if (check) {
@@ -3150,6 +3194,9 @@ int main(int arg_count, char const **arg_ptr) {
 
 
 	build_context.command = command;
 	build_context.command = command;
 
 
+	string_set_init(&build_context.custom_attributes);
+	string_set_init(&build_context.vet_packages);
+
 	if (!parse_build_flags(args)) {
 	if (!parse_build_flags(args)) {
 		return 1;
 		return 1;
 	}
 	}

+ 20 - 11
src/parser.cpp

@@ -1,10 +1,28 @@
 #include "parser_pos.cpp"
 #include "parser_pos.cpp"
 
 
+gb_internal bool in_vet_packages(AstFile *file) {
+	if (file == nullptr) {
+		return true;
+	}
+	if (file->pkg == nullptr) {
+		return true;
+	}
+	if (build_context.vet_packages.entries.count == 0) {
+		return true;
+	}
+	return string_set_exists(&build_context.vet_packages, file->pkg->name);
+}
+
 gb_internal u64 ast_file_vet_flags(AstFile *f) {
 gb_internal u64 ast_file_vet_flags(AstFile *f) {
 	if (f != nullptr && f->vet_flags_set) {
 	if (f != nullptr && f->vet_flags_set) {
 		return f->vet_flags;
 		return f->vet_flags;
 	}
 	}
-	return build_context.vet_flags;
+
+	bool found = in_vet_packages(f);
+	if (found) {
+		return build_context.vet_flags;
+	}
+	return 0;
 }
 }
 
 
 gb_internal bool ast_file_vet_style(AstFile *f) {
 gb_internal bool ast_file_vet_style(AstFile *f) {
@@ -5378,18 +5396,9 @@ gb_internal Ast *parse_stmt(AstFile *f) {
 }
 }
 
 
 
 
-
-gb_internal u64 check_vet_flags(AstFile *file) {
-	if (file && file->vet_flags_set) {
-		return file->vet_flags;
-	}
-	return build_context.vet_flags;
-}
-
-
 gb_internal void parse_enforce_tabs(AstFile *f) {
 gb_internal void parse_enforce_tabs(AstFile *f) {
 	// Checks to see if tabs have been used for indentation
 	// Checks to see if tabs have been used for indentation
-	if ((check_vet_flags(f) & VetFlag_Tabs) == 0) {
+	if ((ast_file_vet_flags(f) & VetFlag_Tabs) == 0) {
 		return;
 		return;
 	}
 	}
 
 

+ 20 - 13
tests/core/flags/test_core_flags.odin

@@ -12,6 +12,26 @@ import "core:strings"
 import "core:testing"
 import "core:testing"
 import "core:time/datetime"
 import "core:time/datetime"
 
 
+Custom_Data :: struct {
+	a: int,
+}
+
+@(init)
+init_custom_type_setter :: proc() {
+	// NOTE: This is done here so it can be out of the flow of the
+	// multi-threaded test runner, to prevent any data races that could be
+	// reported by using `-sanitize:thread`.
+	//
+	// Do mind that this means every test here acknowledges the `Custom_Data` type.
+	flags.register_type_setter(proc (data: rawptr, data_type: typeid, _, _: string) -> (string, bool, runtime.Allocator_Error) {
+		if data_type == Custom_Data {
+			(cast(^Custom_Data)data).a = 32
+			return "", true, nil
+		}
+		return "", false, nil
+	})
+}
+
 @(test)
 @(test)
 test_no_args :: proc(t: ^testing.T) {
 test_no_args :: proc(t: ^testing.T) {
 	S :: struct {
 	S :: struct {
@@ -1230,9 +1250,6 @@ test_net :: proc(t: ^testing.T) {
 @(test)
 @(test)
 test_custom_type_setter :: proc(t: ^testing.T) {
 test_custom_type_setter :: proc(t: ^testing.T) {
 	Custom_Bool :: distinct bool
 	Custom_Bool :: distinct bool
-	Custom_Data :: struct {
-		a: int,
-	}
 
 
 	S :: struct {
 	S :: struct {
 		a: Custom_Data,
 		a: Custom_Data,
@@ -1240,16 +1257,6 @@ test_custom_type_setter :: proc(t: ^testing.T) {
 	}
 	}
 	s: S
 	s: S
 
 
-	// NOTE: Mind that this setter is global state, and the test runner is multi-threaded.
-	// It should be fine so long as all type setter tests are in this one test proc.
-	flags.register_type_setter(proc (data: rawptr, data_type: typeid, _, _: string) -> (string, bool, runtime.Allocator_Error) {
-		if data_type == Custom_Data {
-			(cast(^Custom_Data)data).a = 32
-			return "", true, nil
-		}
-		return "", false, nil
-	})
-	defer flags.register_type_setter(nil)
 	args := [?]string { "-a:hellope", "-b:true" }
 	args := [?]string { "-a:hellope", "-b:true" }
 	result := flags.parse(&s, args[:])
 	result := flags.parse(&s, args[:])
 	testing.expect_value(t, result, nil)
 	testing.expect_value(t, result, nil)

+ 2 - 2
tests/core/mem/test_mem_dynamic_pool.odin

@@ -6,7 +6,7 @@ import "core:mem"
 
 
 expect_pool_allocation :: proc(t: ^testing.T, expected_used_bytes, num_bytes, alignment: int) {
 expect_pool_allocation :: proc(t: ^testing.T, expected_used_bytes, num_bytes, alignment: int) {
 	pool: mem.Dynamic_Pool
 	pool: mem.Dynamic_Pool
-	mem.dynamic_pool_init(pool = &pool, alignment = alignment)
+	mem.dynamic_pool_init(&pool, alignment = alignment)
 	pool_allocator := mem.dynamic_pool_allocator(&pool)
 	pool_allocator := mem.dynamic_pool_allocator(&pool)
 
 
 	element, err := mem.alloc(num_bytes, alignment, pool_allocator)
 	element, err := mem.alloc(num_bytes, alignment, pool_allocator)
@@ -48,7 +48,7 @@ expect_pool_allocation_out_of_band :: proc(t: ^testing.T, num_bytes, out_band_si
 	testing.expect(t, num_bytes >= out_band_size, "Sanity check failed, your test call is flawed! Make sure that num_bytes >= out_band_size!")
 	testing.expect(t, num_bytes >= out_band_size, "Sanity check failed, your test call is flawed! Make sure that num_bytes >= out_band_size!")
 
 
 	pool: mem.Dynamic_Pool
 	pool: mem.Dynamic_Pool
-	mem.dynamic_pool_init(pool = &pool, out_band_size = out_band_size)
+	mem.dynamic_pool_init(&pool, out_band_size = out_band_size)
 	pool_allocator := mem.dynamic_pool_allocator(&pool)
 	pool_allocator := mem.dynamic_pool_allocator(&pool)
 
 
 	element, err := mem.alloc(num_bytes, allocator = pool_allocator)
 	element, err := mem.alloc(num_bytes, allocator = pool_allocator)

+ 2 - 0
tests/core/normal.odin

@@ -39,6 +39,8 @@ download_assets :: proc() {
 @(require) import "slice"
 @(require) import "slice"
 @(require) import "strconv"
 @(require) import "strconv"
 @(require) import "strings"
 @(require) import "strings"
+@(require) import "sync"
+@(require) import "sync/chan"
 @(require) import "sys/posix"
 @(require) import "sys/posix"
 @(require) import "sys/windows"
 @(require) import "sys/windows"
 @(require) import "text/i18n"
 @(require) import "text/i18n"

+ 274 - 0
tests/core/sync/chan/test_core_sync_chan.odin

@@ -0,0 +1,274 @@
+package test_core_sync_chan
+
+import "base:runtime"
+import "base:intrinsics"
+import "core:log"
+import "core:math/rand"
+import "core:sync/chan"
+import "core:testing"
+import "core:thread"
+import "core:time"
+
+
+Message_Type :: enum i32 {
+	Result,
+	Add,
+	Multiply,
+	Subtract,
+	Divide,
+	End,
+}
+
+Message :: struct {
+	type: Message_Type,
+	i: i64,
+}
+
+Comm :: struct {
+	host: chan.Chan(Message),
+	client: chan.Chan(Message),
+	manual_buffering: bool,
+}
+
+BUFFER_SIZE :: 8
+MAX_RAND    :: 32
+FAIL_TIME   :: 1 * time.Second
+SLEEP_TIME  :: 1 * time.Millisecond
+
+comm_client :: proc(th: ^thread.Thread) {
+	data := cast(^Comm)th.data
+	manual_buffering := data.manual_buffering
+
+	n: i64
+
+	for manual_buffering && !chan.can_recv(data.host) {
+		thread.yield()
+	}
+
+	recv_loop: for msg in chan.recv(data.host) {
+		#partial switch msg.type {
+		case .Add:      n += msg.i
+		case .Multiply: n *= msg.i
+		case .Subtract: n -= msg.i
+		case .Divide:   n /= msg.i
+		case .End:
+			break recv_loop
+		case:
+			panic("Unknown message type for client.")
+		}
+
+		for manual_buffering && !chan.can_recv(data.host) {
+			thread.yield()
+		}
+	}
+
+	for manual_buffering && !chan.can_send(data.host) {
+		thread.yield()
+	}
+
+	chan.send(data.client, Message{.Result, n})
+	chan.close(data.client)
+}
+
+send_messages :: proc(t: ^testing.T, host: chan.Chan(Message), manual_buffering: bool = false) -> (expected: i64) {
+	expected = 1
+	for manual_buffering && !chan.can_send(host) {
+		thread.yield()
+	}
+	chan.send(host, Message{.Add, 1})
+	log.debug(Message{.Add, 1})
+
+	for _ in 0..<1+2*BUFFER_SIZE {
+		msg: Message
+		msg.i = 1 + rand.int63_max(MAX_RAND)
+		switch rand.int_max(4) {
+		case 0:
+			msg.type = .Add
+			expected += msg.i
+		case 1:
+			msg.type = .Multiply
+			expected *= msg.i
+		case 2:
+			msg.type = .Subtract
+			expected -= msg.i
+		case 3:
+			msg.type = .Divide
+			expected /= msg.i
+		}
+
+		for manual_buffering && !chan.can_send(host) {
+			thread.yield()
+		}
+		if manual_buffering {
+			testing.expect(t, chan.len(host) == 0)
+		}
+
+		chan.send(host, msg)
+		log.debug(msg)
+	}
+
+	for manual_buffering && !chan.can_send(host) {
+		thread.yield()
+	}
+	chan.send(host, Message{.End, 0})
+	log.debug(Message{.End, 0})
+	chan.close(host)
+
+	return
+}
+
+@test
+test_chan_buffered :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	comm: Comm
+	alloc_err: runtime.Allocator_Error
+	comm.host,   alloc_err = chan.create_buffered(chan.Chan(Message), BUFFER_SIZE, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	comm.client, alloc_err = chan.create_buffered(chan.Chan(Message), BUFFER_SIZE, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer {
+		chan.destroy(comm.host)
+		chan.destroy(comm.client)
+	}
+
+	testing.expect(t, chan.is_buffered(comm.host))
+	testing.expect(t, chan.is_buffered(comm.client))
+	testing.expect(t, !chan.is_unbuffered(comm.host))
+	testing.expect(t, !chan.is_unbuffered(comm.client))
+	testing.expect_value(t, chan.len(comm.host), 0)
+	testing.expect_value(t, chan.len(comm.client), 0)
+	testing.expect_value(t, chan.cap(comm.host), BUFFER_SIZE)
+	testing.expect_value(t, chan.cap(comm.client), BUFFER_SIZE)
+
+	reckoner := thread.create(comm_client)
+	defer thread.destroy(reckoner)
+	reckoner.data = &comm
+	thread.start(reckoner)
+
+	expected := send_messages(t, comm.host, manual_buffering = false)
+
+	// Sleep so we can give the other thread enough time to buffer its message.
+	time.sleep(SLEEP_TIME)
+
+	testing.expect_value(t, chan.len(comm.client), 1)
+	result, ok := chan.try_recv(comm.client)
+
+	// One more sleep to ensure it has enough time to close.
+	time.sleep(SLEEP_TIME)
+
+	testing.expect_value(t, chan.is_closed(comm.client), true)
+	testing.expect_value(t, ok, true)
+	testing.expect_value(t, result.i, expected)
+	log.debug(result, expected)
+
+	// Make sure sending to closed channels fails.
+	testing.expect_value(t, chan.send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.send(comm.client, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.client, Message{.End, 0}), false)
+	_, ok = chan.recv(comm.host);       testing.expect_value(t, ok, false)
+	_, ok = chan.recv(comm.client);     testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.host);   testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.client); testing.expect_value(t, ok, false)
+}
+
+@test
+test_chan_unbuffered :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	comm: Comm
+	comm.manual_buffering = true
+	alloc_err: runtime.Allocator_Error
+	comm.host,   alloc_err = chan.create_unbuffered(chan.Chan(Message), context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	comm.client, alloc_err = chan.create_unbuffered(chan.Chan(Message), context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer {
+		chan.destroy(comm.host)
+		chan.destroy(comm.client)
+	}
+
+	testing.expect(t, !chan.is_buffered(comm.host))
+	testing.expect(t, !chan.is_buffered(comm.client))
+	testing.expect(t, chan.is_unbuffered(comm.host))
+	testing.expect(t, chan.is_unbuffered(comm.client))
+	testing.expect_value(t, chan.len(comm.host), 0)
+	testing.expect_value(t, chan.len(comm.client), 0)
+	testing.expect_value(t, chan.cap(comm.host), 0)
+	testing.expect_value(t, chan.cap(comm.client), 0)
+
+	reckoner := thread.create(comm_client)
+	defer thread.destroy(reckoner)
+	reckoner.data = &comm
+	thread.start(reckoner)
+
+	for !chan.can_send(comm.client) {
+		thread.yield()
+	}
+
+	expected := send_messages(t, comm.host)
+	testing.expect_value(t, chan.is_closed(comm.host), true)
+
+	for !chan.can_recv(comm.client) {
+		thread.yield()
+	}
+
+	result, ok := chan.try_recv(comm.client)
+	testing.expect_value(t, ok, true)
+	testing.expect_value(t, result.i, expected)
+	log.debug(result, expected)
+
+	// Sleep so we can give the other thread enough time to close its side
+	// after we've received its message.
+	time.sleep(SLEEP_TIME)
+
+	testing.expect_value(t, chan.is_closed(comm.client), true)
+
+	// Make sure sending and receiving on closed channels fails.
+	testing.expect_value(t, chan.send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.send(comm.client, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.client, Message{.End, 0}), false)
+	_, ok = chan.recv(comm.host);       testing.expect_value(t, ok, false)
+	_, ok = chan.recv(comm.client);     testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.host);   testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.client); testing.expect_value(t, ok, false)
+}
+
+@test
+test_full_buffered_closed_chan_deadlock :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	ch, alloc_err := chan.create_buffered(chan.Chan(int), 1, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer chan.destroy(ch)
+
+	testing.expect(t, chan.can_send(ch))
+	testing.expect(t, chan.send(ch, 32))
+	testing.expect(t, chan.close(ch))
+	testing.expect(t, !chan.send(ch, 32))
+}
+
+// This test guarantees a buffered channel's messages can still be received
+// even after closing. This is currently how the API works. If that changes,
+// this test will need to change.
+@test
+test_accept_message_from_closed_buffered_chan :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	ch, alloc_err := chan.create_buffered(chan.Chan(int), 2, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer chan.destroy(ch)
+
+	testing.expect(t, chan.can_send(ch))
+	testing.expect(t, chan.send(ch, 32))
+	testing.expect(t, chan.send(ch, 64))
+	testing.expect(t, chan.close(ch))
+	result, ok := chan.recv(ch)
+	testing.expect_value(t, result, 32)
+	testing.expect(t, ok)
+	result, ok = chan.try_recv(ch)
+	testing.expect_value(t, result, 64)
+	testing.expect(t, ok)
+}

+ 714 - 0
tests/core/sync/test_core_sync.odin

@@ -0,0 +1,714 @@
+// NOTE(Feoramund): These tests should be run a few hundred times, with and
+// without `-sanitize:thread` enabled, to ensure maximum safety.
+//
+// Keep in mind that running with the debug logs uncommented can result in
+// failures disappearing due to the delay of sending the log message causing
+// different synchronization patterns.
+
+package test_core_sync
+
+import "base:intrinsics"
+// import "core:log"
+import "core:sync"
+import "core:testing"
+import "core:thread"
+import "core:time"
+
+FAIL_TIME        :: 1 * time.Second
+SLEEP_TIME       :: 1 * time.Millisecond
+SMALL_SLEEP_TIME :: 10 * time.Microsecond
+
+// This needs to be high enough to cause a data race if any of the
+// synchronization primitives fail.
+THREADS :: 8
+
+// Manually wait on all threads to finish.
+//
+// This reduces a dependency on a `Wait_Group` or similar primitives.
+//
+// It's also important that we wait for every thread to finish, as it's
+// possible for a thread to finish after the test if we don't check, despite
+// joining it to the test thread.
+wait_for :: proc(threads: []^thread.Thread) {
+	wait_loop: for {
+		count := len(threads)
+		for v in threads {
+			if thread.is_done(v) {
+				count -= 1
+			}
+		}
+		if count == 0 {
+			break wait_loop
+		}
+		thread.yield()
+	}
+	for t in threads {
+		thread.join(t)
+		thread.destroy(t)
+	}
+}
+
+//
+// core:sync/primitives.odin
+//
+
+@test
+test_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m: sync.Mutex,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("MUTEX-%v> locking", th.id)
+		sync.mutex_lock(&data.m)
+		data.number += 1
+		// log.debugf("MUTEX-%v> unlocking", th.id)
+		sync.mutex_unlock(&data.m)
+		// log.debugf("MUTEX-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_rw_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m1: sync.RW_Mutex,
+		m2: sync.RW_Mutex,
+		number1: int,
+		number2: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.rw_mutex_shared_lock(&data.m1)
+		n := data.number1
+		sync.rw_mutex_shared_unlock(&data.m1)
+
+		sync.rw_mutex_lock(&data.m2)
+		data.number2 += n
+		sync.rw_mutex_unlock(&data.m2)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	sync.rw_mutex_lock(&data.m1)
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	data.number1 = 1
+	sync.rw_mutex_unlock(&data.m1)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number2, THREADS)
+}
+
+@test
+test_recursive_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m: sync.Recursive_Mutex,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("REC_MUTEX-%v> locking", th.id)
+		tried1 := sync.recursive_mutex_try_lock(&data.m)
+		for _ in 0..<3 {
+			sync.recursive_mutex_lock(&data.m)
+		}
+		tried2 := sync.recursive_mutex_try_lock(&data.m)
+		// log.debugf("REC_MUTEX-%v> locked", th.id)
+		data.number += 1
+		// log.debugf("REC_MUTEX-%v> unlocking", th.id)
+		for _ in 0..<3 {
+			sync.recursive_mutex_unlock(&data.m)
+		}
+		if tried1 { sync.recursive_mutex_unlock(&data.m) }
+		if tried2 { sync.recursive_mutex_unlock(&data.m) }
+		// log.debugf("REC_MUTEX-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_cond :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		c: sync.Cond,
+		m: sync.Mutex,
+		i: int,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.mutex_lock(&data.m)
+
+		for intrinsics.atomic_load(&data.i) != 1 {
+			sync.cond_wait(&data.c, &data.m)
+		}
+
+		data.number += intrinsics.atomic_load(&data.i)
+
+		sync.mutex_unlock(&data.m)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+	data.i = -1
+
+	sync.mutex_lock(&data.m)
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	time.sleep(SLEEP_TIME)
+	data.i = 1
+	sync.mutex_unlock(&data.m)
+	sync.cond_broadcast(&data.c)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_cond_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	c: sync.Cond
+	m: sync.Mutex
+	sync.mutex_lock(&m)
+	sync.cond_wait_with_timeout(&c, &m, SLEEP_TIME)
+}
+
+@test
+test_semaphore :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		s: sync.Sema,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("SEM-%v> waiting", th.id)
+		sync.sema_wait(&data.s)
+		data.number += 1
+		// log.debugf("SEM-%v> posting", th.id)
+		sync.sema_post(&data.s)
+		// log.debugf("SEM-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+	sync.sema_post(&data.s)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_semaphore_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	s: sync.Sema
+	sync.sema_wait_with_timeout(&s, SLEEP_TIME)
+}
+
+@test
+test_futex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		f: sync.Futex,
+		i: int,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("FUTEX-%v> waiting", th.id)
+		sync.futex_wait(&data.f, 3)
+		// log.debugf("FUTEX-%v> done", th.id)
+
+		n := data.i
+		intrinsics.atomic_add(&data.number, n)
+	}
+
+	data: Data
+	data.i = -1
+	data.f = 3
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	data.i = 1
+	// Change the futex variable to keep late-starters from stalling.
+	data.f = 0
+	sync.futex_broadcast(&data.f)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_futex_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	f: sync.Futex = 1
+	sync.futex_wait_with_timeout(&f, 1, SLEEP_TIME)
+}
+
+//
+// core:sync/extended.odin
+//
+
+@test
+test_wait_group :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		step1: sync.Wait_Group,
+		step2: sync.Wait_Group,
+		i: int,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.wait_group_wait(&data.step1)
+
+		n := data.i
+		intrinsics.atomic_add(&data.number, n)
+
+		sync.wait_group_done(&data.step2)
+	}
+
+	data: Data
+	data.i = -1
+	threads: [THREADS]^thread.Thread
+
+	sync.wait_group_add(&data.step1, 1)
+	sync.wait_group_add(&data.step2, THREADS)
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	time.sleep(SMALL_SLEEP_TIME)
+	data.i = 1
+	sync.wait_group_done(&data.step1)
+
+	sync.wait_group_wait(&data.step2)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.step1.counter, 0)
+	testing.expect_value(t, data.step2.counter, 0)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_wait_group_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	wg: sync.Wait_Group
+	sync.wait_group_wait_with_timeout(&wg, SLEEP_TIME)
+}
+
+@test
+test_barrier :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		b: sync.Barrier,
+		i: int,
+		number: int,
+
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.barrier_wait(&data.b)
+
+		intrinsics.atomic_add(&data.number, data.i)
+	}
+
+	data: Data
+	data.i = -1
+	threads: [THREADS]^thread.Thread
+
+	sync.barrier_init(&data.b, THREADS + 1) // +1 for this thread, of course.
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+	time.sleep(SMALL_SLEEP_TIME)
+	data.i = 1
+	sync.barrier_wait(&data.b)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.b.index, 0)
+	testing.expect_value(t, data.b.generation_id, 1)
+	testing.expect_value(t, data.b.thread_count, THREADS + 1)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_auto_reset :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		a: sync.Auto_Reset_Event,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("AUR-%v> entering", th.id)
+		sync.auto_reset_event_wait(&data.a)
+		// log.debugf("AUR-%v> adding", th.id)
+		data.number += 1
+		// log.debugf("AUR-%v> signalling", th.id)
+		sync.auto_reset_event_signal(&data.a)
+		// log.debugf("AUR-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	// There is a chance that this test can stall if a signal is sent before
+	// all threads are queued, because it's possible for some number of threads
+	// to get to the waiting state, the signal to fire, all of the waited
+	// threads to pass successfully, then the other threads come in with no-one
+	// to run a signal.
+	//
+	// So we'll just test a fully-waited queue of cascading threads.
+	for {
+		status := intrinsics.atomic_load(&data.a.status)
+		if status == -THREADS {
+			// log.debug("All Auto_Reset_Event threads have queued.")
+			break
+		}
+		intrinsics.cpu_relax()
+	}
+
+	sync.auto_reset_event_signal(&data.a)
+
+	wait_for(threads[:])
+
+	// The last thread should leave this primitive in a signalled state.
+	testing.expect_value(t, data.a.status, 1)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_auto_reset_already_signalled :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	a: sync.Auto_Reset_Event
+	sync.auto_reset_event_signal(&a)
+	sync.auto_reset_event_wait(&a)
+	testing.expect_value(t, a.status, 0)
+}
+
+@test
+test_ticket_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m: sync.Ticket_Mutex,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("TIC-%i> entering", th.id)
+		// intrinsics.debug_trap()
+		sync.ticket_mutex_lock(&data.m)
+		// log.debugf("TIC-%i> locked", th.id)
+		data.number += 1
+		// log.debugf("TIC-%i> unlocking", th.id)
+		sync.ticket_mutex_unlock(&data.m)
+		// log.debugf("TIC-%i> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.m.ticket, THREADS)
+	testing.expect_value(t, data.m.serving, THREADS)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_benaphore :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		b: sync.Benaphore,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+		sync.benaphore_lock(&data.b)
+		data.number += 1
+		sync.benaphore_unlock(&data.b)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.b.counter, 0)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_recursive_benaphore :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		b: sync.Recursive_Benaphore,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("REC_BEP-%i> entering", th.id)
+		tried1 := sync.recursive_benaphore_try_lock(&data.b)
+		for _ in 0..<3 {
+			sync.recursive_benaphore_lock(&data.b)
+		}
+		tried2 := sync.recursive_benaphore_try_lock(&data.b)
+		// log.debugf("REC_BEP-%i> locked", th.id)
+		data.number += 1
+		for _ in 0..<3 {
+			sync.recursive_benaphore_unlock(&data.b)
+		}
+		if tried1 { sync.recursive_benaphore_unlock(&data.b) }
+		if tried2 { sync.recursive_benaphore_unlock(&data.b) }
+		// log.debugf("REC_BEP-%i> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	// The benaphore should be unowned at the end.
+	testing.expect_value(t, data.b.counter, 0)
+	testing.expect_value(t, data.b.owner, 0)
+	testing.expect_value(t, data.b.recursion, 0)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_once :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		once: sync.Once,
+		number: int,
+	}
+
+	write :: proc "contextless" (data: rawptr) {
+		data := cast(^Data)data
+		data.number += 1
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+		// log.debugf("ONCE-%v> entering", th.id)
+		sync.once_do_with_data_contextless(&data.once, write, data)
+		// log.debugf("ONCE-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.once.done, true)
+	testing.expect_value(t, data.number, 1)
+}
+
+@test
+test_park :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		car: sync.Parker,
+		number: int,
+	}
+
+	data: Data
+
+	th := thread.create_and_start_with_data(&data, proc(data: rawptr) {
+		data := cast(^Data)data
+		time.sleep(SLEEP_TIME)
+		sync.unpark(&data.car)
+		data.number += 1
+	})
+
+	sync.park(&data.car)
+
+	wait_for([]^thread.Thread{ th })
+
+	PARKER_EMPTY :: 0
+	testing.expect_value(t, data.car.state, PARKER_EMPTY)
+	testing.expect_value(t, data.number, 1)
+}
+
+@test
+test_park_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	car: sync.Parker
+	sync.park_with_timeout(&car, SLEEP_TIME)
+}
+
+@test
+test_one_shot_event :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		event: sync.One_Shot_Event,
+		number: int,
+	}
+
+	data: Data
+
+	th := thread.create_and_start_with_data(&data, proc(data: rawptr) {
+		data := cast(^Data)data
+		time.sleep(SLEEP_TIME)
+		sync.one_shot_event_signal(&data.event)
+		data.number += 1
+	})
+
+	sync.one_shot_event_wait(&data.event)
+
+	wait_for([]^thread.Thread{ th })
+
+	testing.expect_value(t, data.event.state, 1)
+	testing.expect_value(t, data.number, 1)
+}

+ 3 - 0
tests/core/sys/posix/structs.odin

@@ -63,6 +63,9 @@ execute_struct_checks :: proc(t: ^testing.T) {
 		waiting: for {
 		waiting: for {
 			status: i32
 			status: i32
 			wpid := posix.waitpid(pid, &status, {})
 			wpid := posix.waitpid(pid, &status, {})
+			if status == posix.EINTR {
+				continue
+			}
 			if !testing.expectf(t, wpid != -1, "waitpid() failure: %v", posix.strerror()) {
 			if !testing.expectf(t, wpid != -1, "waitpid() failure: %v", posix.strerror()) {
 				return false
 				return false
 			}
 			}

+ 18 - 4
vendor/box2d/box2d.odin

@@ -3,7 +3,11 @@ package vendor_box2d
 import "base:intrinsics"
 import "base:intrinsics"
 import "core:c"
 import "core:c"
 
 
-@(private) VECTOR_EXT :: "avx2" when #config(VENDOR_BOX2D_ENABLE_AVX2, intrinsics.has_target_feature("avx2")) else "sse2"
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	@(private) VECTOR_EXT :: "_simd" when #config(VENDOR_BOX2D_ENABLE_SIMD128, intrinsics.has_target_feature("simd128")) else ""
+} else {
+	@(private) VECTOR_EXT :: "avx2" when #config(VENDOR_BOX2D_ENABLE_AVX2, intrinsics.has_target_feature("avx2")) else "sse2"
+}
 
 
 when ODIN_OS == .Windows {
 when ODIN_OS == .Windows {
 	@(private) LIB_PATH :: "lib/box2d_windows_amd64_" + VECTOR_EXT + ".lib"
 	@(private) LIB_PATH :: "lib/box2d_windows_amd64_" + VECTOR_EXT + ".lib"
@@ -13,6 +17,8 @@ when ODIN_OS == .Windows {
 	@(private) LIB_PATH :: "lib/box2d_darwin_amd64_" + VECTOR_EXT + ".a"
 	@(private) LIB_PATH :: "lib/box2d_darwin_amd64_" + VECTOR_EXT + ".a"
 } else when ODIN_ARCH == .amd64 {
 } else when ODIN_ARCH == .amd64 {
 	@(private) LIB_PATH :: "lib/box2d_other_amd64_" + VECTOR_EXT + ".a"
 	@(private) LIB_PATH :: "lib/box2d_other_amd64_" + VECTOR_EXT + ".a"
+} else when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	@(private) LIB_PATH :: "lib/box2d_wasm" + VECTOR_EXT + ".o"
 } else {
 } else {
 	@(private) LIB_PATH :: "lib/box2d_other.a"
 	@(private) LIB_PATH :: "lib/box2d_other.a"
 }
 }
@@ -21,8 +27,16 @@ when !#exists(LIB_PATH) {
 	#panic("Could not find the compiled box2d libraries at \"" + LIB_PATH + "\", they can be compiled by running the `build.sh` script at `" + ODIN_ROOT + "vendor/box2d/build_box2d.sh\"`")
 	#panic("Could not find the compiled box2d libraries at \"" + LIB_PATH + "\", they can be compiled by running the `build.sh` script at `" + ODIN_ROOT + "vendor/box2d/build_box2d.sh\"`")
 }
 }
 
 
-foreign import lib {
-	LIB_PATH,
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	when VECTOR_EXT == "_simd" {
+		foreign import lib "lib/box2d_wasm_simd.o"
+	} else {
+		foreign import lib "lib/box2d_wasm.o"
+	}
+} else {
+	foreign import lib {
+		LIB_PATH,
+	}
 }
 }
 
 
 
 
@@ -1520,4 +1534,4 @@ IsValid :: proc{
 	Joint_IsValid,
 	Joint_IsValid,
 
 
 	IsValidRay,
 	IsValidRay,
-}
+}

+ 4 - 0
vendor/box2d/box2d_wasm.odin

@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package vendor_box2d
+
+@(require) import _ "vendor:libc"

+ 2 - 0
vendor/box2d/build_box2d.sh

@@ -68,5 +68,7 @@ esac
 
 
 cd ..
 cd ..
 
 
+make -f wasm.Makefile
+
 rm -rf v3.0.0.tar.gz
 rm -rf v3.0.0.tar.gz
 rm -rf box2d-3.0.0
 rm -rf box2d-3.0.0

BIN
vendor/box2d/lib/box2d_wasm.o


BIN
vendor/box2d/lib/box2d_wasm_simd.o


+ 32 - 0
vendor/box2d/wasm.Makefile

@@ -0,0 +1,32 @@
+# Custom Makefile to build box2d for Odin's WASM targets.
+# I tried to make a cmake toolchain file for this / use cmake but this is far easier.
+# NOTE: We are pretending to be emscripten to box2d so it takes WASM code paths, but we don't actually use emscripten.
+
+# CC = $(shell brew --prefix llvm)/bin/clang
+# LD = $(shell brew --prefix llvm)/bin/wasm-ld
+
+VERSION   = 3.0.0
+SRCS      = $(wildcard box2d-$(VERSION)/src/*.c)
+OBJS_SIMD = $(SRCS:.c=_simd.o)
+OBJS      = $(SRCS:.c=.o)
+SYSROOT   = $(shell odin root)/vendor/libc
+CFLAGS    = -Ibox2d-$(VERSION)/include -Ibox2d-$(VERSION)/Extern/simde --target=wasm32 -D__EMSCRIPTEN__ -DNDEBUG -O3 --sysroot=$(SYSROOT)
+
+all: lib/box2d_wasm.o lib/box2d_wasm_simd.o clean
+
+%.o: %.c
+	$(CC) -c $(CFLAGS) $< -o $@
+
+%_simd.o: %.c
+	$(CC) -c $(CFLAGS) -msimd128 $< -o $@
+
+lib/box2d_wasm.o: $(OBJS)
+	$(LD) -r -o lib/box2d_wasm.o $(OBJS)
+
+lib/box2d_wasm_simd.o: $(OBJS_SIMD)
+	$(LD) -r -o lib/box2d_wasm_simd.o $(OBJS_SIMD)
+
+clean:
+	rm -rf $(OBJS) $(OBJS_SIMD)
+
+.PHONY: clean

+ 5 - 0
vendor/cgltf/cgltf.odin

@@ -5,6 +5,7 @@ LIB :: (
 	     "lib/cgltf.lib"      when ODIN_OS == .Windows
 	     "lib/cgltf.lib"      when ODIN_OS == .Windows
 	else "lib/cgltf.a"        when ODIN_OS == .Linux
 	else "lib/cgltf.a"        when ODIN_OS == .Linux
 	else "lib/darwin/cgltf.a" when ODIN_OS == .Darwin
 	else "lib/darwin/cgltf.a" when ODIN_OS == .Darwin
+	else "lib/cgltf_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 	else ""
 )
 )
 
 
@@ -13,7 +14,11 @@ when LIB != "" {
 		// Windows library is shipped with the compiler, so a Windows specific message should not be needed.
 		// Windows library is shipped with the compiler, so a Windows specific message should not be needed.
 		#panic("Could not find the compiled cgltf library, it can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/cgltf/src\"`")
 		#panic("Could not find the compiled cgltf library, it can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/cgltf/src\"`")
 	}
 	}
+}
 
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import lib "lib/cgltf_wasm.o"
+} else when LIB != "" {
 	foreign import lib { LIB }
 	foreign import lib { LIB }
 } else {
 } else {
 	foreign import lib "system:cgltf"
 	foreign import lib "system:cgltf"

+ 4 - 0
vendor/cgltf/cgltf_wasm.odin

@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package cgltf
+
+@(require) import _ "vendor:libc"

BIN
vendor/cgltf/lib/cgltf_wasm.o


+ 4 - 0
vendor/cgltf/src/Makefile

@@ -6,6 +6,10 @@ else
 all: unix
 all: unix
 endif
 endif
 
 
+wasm:
+	mkdir -p ../lib
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc cgltf.c -o ../lib/cgltf_wasm.o
+
 unix:
 unix:
 	mkdir -p ../lib
 	mkdir -p ../lib
 	$(CC) -c -O2 -Os -fPIC cgltf.c 	
 	$(CC) -c -O2 -Os -fPIC cgltf.c 	

+ 12 - 0
vendor/libc/README.md

@@ -0,0 +1,12 @@
+# vendor:libc
+
+A (very small) subset of a libc implementation over Odin libraries.
+This is mainly intended for use in Odin WASM builds to allow using libraries like box2d, cgltf etc. without emscripten hacks.
+
+You can use this with clang by doing `clang -c --target=wasm32 --sysroot=$(odin root)/vendor/libc` (+ all other flags and inputs).
+This will (if all the libc usage of the library is implemented) spit out a `.o` file you can use with the foreign import system.
+If you then also make sure this package is included in the Odin side of the project (`@(require) import "vendor:libc"`) you will be able
+compile to WASM like Odin expects.
+
+This is currently used by `vendor:box2d`, `vendor:stb/image`, `vendor:stb/truetype`, `vendor:stb/rect_pack`, and `vendor:cgltf`.
+You can see how building works by looking at those.

+ 15 - 0
vendor/libc/assert.odin

@@ -0,0 +1,15 @@
+package odin_libc
+
+import "base:runtime"
+
+@(require, linkage="strong", link_name="__odin_libc_assert_fail")
+__odin_libc_assert_fail :: proc "c" (func: cstring, file: cstring, line: i32, expr: cstring) -> ! {
+	context = g_ctx
+	loc := runtime.Source_Code_Location{
+		file_path = string(file),
+		line      = line,
+		column    = 0,
+		procedure = string(func),
+	}
+	context.assertion_failure_proc("runtime assertion", string(expr), loc)
+}

+ 16 - 0
vendor/libc/include/assert.h

@@ -0,0 +1,16 @@
+#ifdef NDEBUG
+#define	assert(e)	((void)0)
+#else
+
+#ifdef __FILE_NAME__
+#define __ASSERT_FILE_NAME __FILE_NAME__
+#else /* __FILE_NAME__ */
+#define __ASSERT_FILE_NAME __FILE__
+#endif /* __FILE_NAME__ */
+
+void __odin_libc_assert_fail(const char *, const char *, int, const char *);
+
+#define	assert(e) \
+    (__builtin_expect(!(e), 0) ? __odin_libc_assert_fail(__func__, __ASSERT_FILE_NAME, __LINE__, #e) : (void)0)
+
+#endif /* NDEBUG */

+ 21 - 0
vendor/libc/include/math.h

@@ -0,0 +1,21 @@
+#include <stdbool.h>
+
+float sqrtf(float);
+float cosf(float);
+float sinf(float);
+float atan2f(float, float);
+bool isnan(float);
+bool isinf(float);
+double floor(double x);
+double ceil(double x);
+double sqrt(double x);
+double pow(double x, double y);
+double fmod(double x, double y);
+double cos(double x);
+double acos(double x);
+double fabs(double x);
+int abs(int);
+double ldexp(double, int);
+double exp(double);
+float log(float);
+float sin(float);

+ 47 - 0
vendor/libc/include/stdio.h

@@ -0,0 +1,47 @@
+#include <stddef.h>
+#include <stdarg.h>
+
+#pragma once
+
+typedef struct {} FILE;
+
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+
+#define stdout ((FILE *)2)
+#define stderr ((FILE *)3)
+
+FILE *fopen(const char *, char *);
+int fclose(FILE *);
+int fseek(FILE *, long, int);
+long ftell(FILE *);
+size_t fread(void *, size_t, size_t, FILE *);
+size_t fwrite(const void *, size_t, size_t, FILE *);
+
+int vfprintf(FILE *, const char *, va_list);
+int vsnprintf(char *, size_t, const char *, va_list);
+
+static inline int snprintf(char *buf, size_t size, const char *fmt, ...) {
+	va_list args;
+	va_start(args, fmt);
+	int result = vsnprintf(buf, size, fmt, args);
+	va_end(args);
+	return result;
+}
+
+static inline int fprintf(FILE *f, const char *fmt, ...) {
+	va_list args;
+	va_start(args, fmt);
+	int result = vfprintf(f, fmt, args);
+	va_end(args);
+	return result;
+}
+
+static inline int printf(const char *fmt, ...) {
+	va_list args;
+	va_start(args, fmt);
+	int result = vfprintf(stdout, fmt, args);
+	va_end(args);
+	return result;
+}

+ 19 - 0
vendor/libc/include/stdlib.h

@@ -0,0 +1,19 @@
+#include <stddef.h>
+
+void *malloc(size_t size);
+
+void *aligned_alloc(size_t alignment, size_t size);
+
+void free(void *);
+
+void *realloc(void *, size_t);
+
+void qsort(void* base, size_t num, size_t size, int (*compare)(const void*, const void*));
+
+int atoi(const char *);
+long atol(const char *);
+long long atoll(const char *);
+
+double atof(const char *);
+
+long strtol(const char *, char **, int);

+ 21 - 0
vendor/libc/include/string.h

@@ -0,0 +1,21 @@
+#include <stddef.h>
+
+void *memcpy(void *, const void *, size_t);
+void *memset(void *, int, size_t);
+void *memmove(void *, void *, size_t);
+int memcmp(const void *, const void *, size_t);
+
+unsigned long strlen(const char *str);
+
+char *strchr(const char *, int);
+char *strrchr(const char *, int);
+
+char *strncpy(char *, const char *, size_t);
+char *strcpy(char *, const char *);
+
+size_t strcspn(const char *, const char *);
+
+int strcmp(const char *, const char *);
+int strncmp(const char *, const char *, size_t);
+
+char *strstr(const char *, const char *);

+ 25 - 0
vendor/libc/libc.odin

@@ -0,0 +1,25 @@
+package odin_libc
+
+import "base:runtime"
+
+import "core:mem"
+
+@(private)
+g_ctx: runtime.Context
+@(private)
+g_allocator: mem.Compat_Allocator
+
+@(init)
+init_context :: proc() {
+	g_ctx = context
+
+	// Wrapping the allocator with the mem.Compat_Allocator so we can
+	// mimic the realloc semantics.
+	mem.compat_allocator_init(&g_allocator, g_ctx.allocator)
+	g_ctx.allocator = mem.compat_allocator(&g_allocator)
+}
+
+// NOTE: the allocator must respect an `old_size` of `-1` on resizes!
+set_context :: proc(ctx := context) {
+	g_ctx = ctx
+}

+ 100 - 0
vendor/libc/math.odin

@@ -0,0 +1,100 @@
+package odin_libc
+
+import "base:builtin"
+
+import "core:math"
+
+@(require, linkage="strong", link_name="sqrtf")
+sqrtf :: proc "c" (v: f32) -> f32 {
+	return math.sqrt(v)
+}
+
+@(require, linkage="strong", link_name="cosf")
+cosf :: proc "c" (v: f32) -> f32 {
+	return math.cos(v)
+}
+
+@(require, linkage="strong", link_name="sinf")
+sinf :: proc "c" (v: f32) -> f32 {
+	return math.sin(v)
+}
+
+@(require, linkage="strong", link_name="atan2f")
+atan2f :: proc "c" (v: f32, v2: f32) -> f32 {
+	return math.atan2(v, v2)
+}
+
+@(require, linkage="strong", link_name="isnan")
+isnan :: proc "c" (v: f32) -> bool {
+	return math.is_nan(v)
+}
+
+@(require, linkage="strong", link_name="isinf")
+isinf :: proc "c" (v: f32) -> bool {
+	return math.is_inf(v)
+}
+
+@(require, linkage="strong", link_name="sqrt")
+sqrt :: proc "c" (x: f64) -> f64 {
+	return math.sqrt(x)
+}
+
+@(require, linkage="strong", link_name="floor")
+floor :: proc "c" (x: f64) -> f64 {
+	return math.floor(x)
+}
+
+@(require, linkage="strong", link_name="ceil")
+ceil :: proc "c" (x: f64) -> f64 {
+	return math.ceil(x)
+}
+
+@(require, linkage="strong", link_name="pow")
+pow :: proc "c" (x, y: f64) -> f64 {
+	return math.pow(x, y)
+}
+
+@(require, linkage="strong", link_name="fmod")
+fmod :: proc "c" (x, y: f64) -> f64 {
+	return math.mod(x, y)
+}
+
+@(require, linkage="strong", link_name="cos")
+cos :: proc "c" (x: f64) -> f64 {
+	return math.cos(x)
+}
+
+@(require, linkage="strong", link_name="acos")
+acos :: proc "c" (x: f64) -> f64 {
+	return math.acos(x)
+}
+
+@(require, linkage="strong", link_name="fabs")
+fabs :: proc "c" (x: f64) -> f64 {
+	return math.abs(x)
+}
+
+@(require, linkage="strong", link_name="abs")
+abs :: proc "c" (x: i32) -> i32 {
+	return builtin.abs(x)
+}
+
+@(require, linkage="strong", link_name="ldexp")
+ldexp :: proc "c" (x: f64, y: i32) -> f64{
+	return math.ldexp(x, int(y))
+}
+
+@(require, linkage="strong", link_name="exp")
+exp :: proc "c" (x: f64) -> f64 {
+	return math.exp(x)
+}
+
+@(require, linkage="strong", link_name="log")
+log :: proc "c" (x: f32) -> f32 {
+	return math.ln(x)
+}
+
+@(require, linkage="strong", link_name="sin")
+sin :: proc "c" (x: f32) -> f32 {
+	return math.sin(x)
+}

+ 106 - 0
vendor/libc/stdio.odin

@@ -0,0 +1,106 @@
+package odin_libc
+
+import "core:c"
+import "core:io"
+import "core:os"
+
+import stb "vendor:stb/sprintf"
+
+FILE :: uintptr
+
+@(require, linkage="strong", link_name="fopen")
+fopen :: proc "c" (path: cstring, mode: cstring) -> FILE {
+	context = g_ctx
+	unimplemented("odin_libc.fopen")
+}
+
+@(require, linkage="strong", link_name="fseek")
+fseek :: proc "c" (file: FILE, offset: c.long, whence: i32) -> i32 {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	_, err := os.seek(handle, i64(offset), int(whence))
+	if err != nil {
+		return -1
+	}
+	return 0
+}
+
+@(require, linkage="strong", link_name="ftell")
+ftell :: proc "c" (file: FILE) -> c.long {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	off, err := os.seek(handle, 0, os.SEEK_CUR)
+	if err != nil {
+		return -1
+	}
+	return c.long(off)
+}
+
+@(require, linkage="strong", link_name="fclose")
+fclose :: proc "c" (file: FILE) -> i32 {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	if os.close(handle) != nil {
+		return -1
+	}
+	return 0
+}
+
+@(require, linkage="strong", link_name="fread")
+fread :: proc "c" (buffer: [^]byte, size: uint, count: uint, file: FILE) -> uint {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	n, _   := os.read(handle, buffer[:min(size, count)])
+	return uint(max(0, n))
+}
+
+@(require, linkage="strong", link_name="fwrite")
+fwrite :: proc "c" (buffer: [^]byte, size: uint, count: uint, file: FILE) -> uint {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	n, _   := os.write(handle, buffer[:min(size, count)])
+	return uint(max(0, n))
+}
+
+@(require, linkage="strong", link_name="vsnprintf")
+vsnprintf :: proc "c" (buf: [^]byte, count: uint, fmt: cstring, args: ^c.va_list) -> i32 {
+	i32_count := i32(count)
+	assert_contextless(i32_count >= 0)
+	return stb.vsnprintf(buf, i32_count, fmt, args)
+}
+
+@(require, linkage="strong", link_name="vfprintf")
+vfprintf :: proc "c" (file: FILE, fmt: cstring, args: ^c.va_list) -> i32 {
+	context = g_ctx
+
+	handle := os.Handle(file-1)
+
+	MAX_STACK :: 4096
+
+	buf: []byte
+	stack_buf: [MAX_STACK]byte = ---
+	{
+		n := stb.vsnprintf(&stack_buf[0], MAX_STACK, fmt, args)
+		if n <= 0 {
+			return n
+		}
+
+		if n >= MAX_STACK {
+			buf = make([]byte, n)
+			n2 := stb.vsnprintf(raw_data(buf), i32(len(buf)), fmt, args)
+			assert(n == n2)
+		} else {
+			buf = stack_buf[:n]
+		}
+	}
+	defer if len(buf) > MAX_STACK {
+		delete(buf)
+	}
+
+	_, err := io.write_full(os.stream_from_handle(handle), buf)
+	if err != nil {
+		return -1
+	}
+
+	return i32(len(buf))
+}

+ 119 - 0
vendor/libc/stdlib.odin

@@ -0,0 +1,119 @@
+package odin_libc
+
+import "base:runtime"
+
+import "core:c"
+import "core:slice"
+import "core:sort"
+import "core:strconv"
+import "core:strings"
+
+@(require, linkage="strong", link_name="malloc")
+malloc :: proc "c" (size: uint) -> rawptr {
+	context = g_ctx
+	ptr, err := runtime.mem_alloc_non_zeroed(int(size))
+	assert(err == nil, "allocation failure")
+	return raw_data(ptr)
+}
+
+@(require, linkage="strong", link_name="aligned_alloc")
+aligned_alloc :: proc "c" (alignment: uint, size: uint) -> rawptr {
+	context = g_ctx
+	ptr, err := runtime.mem_alloc_non_zeroed(int(size), int(alignment))
+	assert(err == nil, "allocation failure")
+	return raw_data(ptr)
+}
+
+@(require, linkage="strong", link_name="free")
+free :: proc "c" (ptr: rawptr) {
+	context = g_ctx
+	runtime.mem_free(ptr)
+}
+
+@(require, linkage="strong", link_name="realloc")
+realloc :: proc "c" (ptr: rawptr, new_size: uint) -> rawptr {
+	context = g_ctx
+	// -1 for the old_size, assumed to be wrapped with the mem.Compat_Allocator to get the right size.
+	// Note that realloc does not actually care about alignment and is allowed to just align it to something
+	// else than the original allocation.
+	ptr, err := runtime.non_zero_mem_resize(ptr, -1, int(new_size))
+	assert(err != nil, "realloc failure")
+	return raw_data(ptr)
+}
+
+@(require, linkage="strong", link_name="qsort")
+qsort :: proc "c" (base: rawptr, num: uint, size: uint, cmp: proc "c" (a, b: rawptr) -> i32) {
+	context = g_ctx
+
+	Inputs :: struct {
+		base: rawptr,
+		num:  uint,
+		size: uint,
+		cmp:  proc "c" (a, b: rawptr) -> i32,
+	}
+
+	sort.sort({
+		collection = &Inputs{base, num, size, cmp},
+		len = proc(it: sort.Interface) -> int {
+			inputs := (^Inputs)(it.collection)
+			return int(inputs.num)
+		},
+		less = proc(it: sort.Interface, i, j: int) -> bool {
+			inputs := (^Inputs)(it.collection)
+			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
+			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
+			return inputs.cmp(a, b) < 0
+		},
+		swap = proc(it: sort.Interface, i, j: int) {
+			inputs := (^Inputs)(it.collection)
+
+			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
+			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
+
+			slice.ptr_swap_non_overlapping(a, b, int(inputs.size))
+		},
+	})
+}
+
+@(require, linkage="strong", link_name="atoi")
+atoi :: proc "c" (str: cstring) -> i32 {
+	return i32(atoll(str))
+}
+
+@(require, linkage="strong", link_name="atol")
+atol :: proc "c" (str: cstring) -> c.long {
+	return c.long(atoll(str))
+}
+
+@(require, linkage="strong", link_name="atoll")
+atoll :: proc "c" (str: cstring) -> c.longlong {
+	context = g_ctx
+
+	sstr := string(str)
+	sstr  = strings.trim_left_space(sstr)
+	i, _ := strconv.parse_i64_of_base(sstr, 10)
+	return c.longlong(i)
+}
+
+@(require, linkage="strong", link_name="atof")
+atof :: proc "c" (str: cstring) -> f64 {
+	context = g_ctx
+
+	sstr := string(str)
+	sstr  = strings.trim_left_space(sstr)
+	f, _ := strconv.parse_f64(sstr)
+	return f
+}
+
+@(require, linkage="strong", link_name="strtol")
+strtol :: proc "c" (str: cstring, str_end: ^cstring, base: i32) -> c.long {
+	context = g_ctx
+
+	sstr := string(str)
+	sstr  = strings.trim_left_space(sstr)
+
+	n: int
+	i, _ := strconv.parse_i64_of_base(sstr, int(base), &n)
+	str_end ^= cstring(raw_data(sstr)[n:])
+	return c.long(clamp(i, i64(min(c.long)), i64(max(c.long))))
+}

+ 111 - 0
vendor/libc/string.odin

@@ -0,0 +1,111 @@
+package odin_libc
+
+import "base:intrinsics"
+
+import "core:c"
+import "core:strings"
+import "core:mem"
+
+// NOTE: already defined by Odin.
+// void *memcpy(void *, const void *, size_t);
+// void *memset(void *, int, size_t);
+
+@(require, linkage="strong", link_name="memcmp")
+memcmp :: proc "c" (lhs: [^]byte, rhs: [^]byte, count: uint) -> i32 {
+	icount := int(count)
+	assert_contextless(icount >= 0)
+	return i32(mem.compare(lhs[:icount], rhs[:icount]))
+}
+
+@(require, linkage="strong", link_name="strlen")
+strlen :: proc "c" (str: cstring) -> c.ulong {
+	return c.ulong(len(str))
+}
+
+@(require, linkage="strong", link_name="strchr")
+strchr :: proc "c" (str: cstring, ch: i32) -> cstring {
+	bch  := u8(ch)
+	sstr := string(str)
+	if bch == 0 {
+		return cstring(raw_data(sstr)[len(sstr):])
+	}
+
+	idx := strings.index_byte(sstr, bch)
+	if idx < 0 {
+		return nil
+	}
+
+	return cstring(raw_data(sstr)[idx:])
+}
+
+@(require, linkage="strong", link_name="strrchr")
+strrchr :: proc "c" (str: cstring, ch: i32) -> cstring {
+	bch  := u8(ch)
+	sstr := string(str)
+	if bch == 0 {
+		return cstring(raw_data(sstr)[len(sstr):])
+	}
+
+	idx := strings.last_index_byte(sstr, bch)
+	if idx < 0 {
+		return nil
+	}
+
+	return cstring(raw_data(sstr)[idx:])
+}
+
+@(require, linkage="strong", link_name="strncpy")
+strncpy :: proc "c" (dst: [^]byte, src: cstring, count: uint) -> cstring {
+	icount := int(count)
+	assert_contextless(icount >= 0)
+	cnt := min(len(src), icount)
+	intrinsics.mem_copy_non_overlapping(dst, rawptr(src), cnt)
+	intrinsics.mem_zero(dst, icount-cnt)
+	return cstring(dst)
+}
+
+@(require, linkage="strong", link_name="strcpy")
+strcpy :: proc "c" (dst: [^]byte, src: cstring) -> cstring {
+	intrinsics.mem_copy_non_overlapping(dst, rawptr(src), len(src)+1)
+	return cstring(dst)
+}
+
+@(require, linkage="strong", link_name="strcspn")
+strcspn :: proc "c" (dst: cstring, src: cstring) -> uint {
+	context = g_ctx
+	sdst := string(dst)
+	idx := strings.index_any(sdst, string(src))
+	if idx == -1 {
+		return len(sdst)
+	}
+	return uint(idx)
+}
+
+@(require, linkage="strong", link_name="strncmp")
+strncmp :: proc "c" (lhs: cstring, rhs: cstring, count: uint) -> i32 {
+	icount := int(count)
+	assert_contextless(icount >= 0)
+	lhss := strings.string_from_null_terminated_ptr(([^]byte)(lhs), icount)
+	rhss := strings.string_from_null_terminated_ptr(([^]byte)(rhs), icount)
+	return i32(strings.compare(lhss, rhss))
+}
+
+@(require, linkage="strong", link_name="strcmp")
+strcmp :: proc "c" (lhs: cstring, rhs: cstring) -> i32 {
+	return i32(strings.compare(string(lhs), string(rhs)))
+}
+
+@(require, linkage="strong", link_name="strstr")
+strstr :: proc "c" (str: cstring, substr: cstring) -> cstring {
+	if substr == "" {
+		return str
+	}
+
+	idx := strings.index(string(str), string(substr))
+	if idx < 0 {
+		return nil
+	}
+
+	return cstring(([^]byte)(str)[idx:])
+}
+

+ 44 - 13
vendor/stb/image/stb_image.odin

@@ -7,6 +7,7 @@ LIB :: (
 	     "../lib/stb_image.lib"      when ODIN_OS == .Windows
 	     "../lib/stb_image.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_image.a"        when ODIN_OS == .Linux
 	else "../lib/stb_image.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_image.a" when ODIN_OS == .Darwin
 	else "../lib/darwin/stb_image.a" when ODIN_OS == .Darwin
+	else "../lib/stb_image_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 	else ""
 )
 )
 
 
@@ -15,12 +16,19 @@ when LIB != "" {
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
 	}
+}
 
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import stbi "../lib/stb_image_wasm.o"
+	foreign import stbi { LIB }
+} else when LIB != "" {
 	foreign import stbi { LIB }
 	foreign import stbi { LIB }
 } else {
 } else {
 	foreign import stbi "system:stb_image"
 	foreign import stbi "system:stb_image"
 }
 }
 
 
+NO_STDIO :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
+
 #assert(size_of(c.int) == size_of(b32))
 #assert(size_of(c.int) == size_of(b32))
 #assert(size_of(b32) == size_of(c.int))
 #assert(size_of(b32) == size_of(c.int))
 
 
@@ -33,14 +41,48 @@ Io_Callbacks :: struct {
 	eof:  proc "c" (user: rawptr) -> c.int,                             // returns nonzero if we are at end of file/data
 	eof:  proc "c" (user: rawptr) -> c.int,                             // returns nonzero if we are at end of file/data
 }
 }
 
 
+when !NO_STDIO {
+	@(default_calling_convention="c", link_prefix="stbi_")
+	foreign stbi {
+		////////////////////////////////////
+		//
+		// 8-bits-per-channel interface
+		//
+		load           :: proc(filename: cstring, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
+		load_from_file :: proc(f: ^c.FILE,        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
+
+		////////////////////////////////////
+		//
+		// 16-bits-per-channel interface
+		//
+		load_16           :: proc(filename: cstring, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
+		load_16_from_file :: proc(f: ^c.FILE,        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
+
+		////////////////////////////////////
+		//
+		// float-per-channel interface
+		//
+		loadf           :: proc(filename: cstring, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
+		loadf_from_file :: proc(f: ^c.FILE,        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
+
+		is_hdr           :: proc(filename: cstring) -> c.int ---
+		is_hdr_from_file :: proc(f: ^c.FILE)        -> c.int ---
+
+		// get image dimensions & components without fully decoding
+		info           :: proc(filename: cstring, x, y, comp: ^c.int) -> c.int ---
+		info_from_file :: proc(f: ^c.FILE,        x, y, comp: ^c.int) -> c.int ---
+
+		is_16_bit           :: proc(filename: cstring) -> b32 ---
+		is_16_bit_from_file :: proc(f: ^c.FILE)        -> b32 ---
+	}
+}
+
 @(default_calling_convention="c", link_prefix="stbi_")
 @(default_calling_convention="c", link_prefix="stbi_")
 foreign stbi {
 foreign stbi {
 	////////////////////////////////////
 	////////////////////////////////////
 	//
 	//
 	// 8-bits-per-channel interface
 	// 8-bits-per-channel interface
 	//
 	//
-	load                :: proc(filename: cstring,                 x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
-	load_from_file      :: proc(f: ^c.FILE,                        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
 	load_from_memory    :: proc(buffer: [^]byte, len: c.int,       x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
 	load_from_memory    :: proc(buffer: [^]byte, len: c.int,       x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
 	load_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
 	load_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
 
 
@@ -50,8 +92,6 @@ foreign stbi {
 	//
 	//
 	// 16-bits-per-channel interface
 	// 16-bits-per-channel interface
 	//
 	//
-	load_16                :: proc(filename: cstring,           x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
-	load_16_from_file      :: proc(f: ^c.FILE,                  x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
 	load_16_from_memory    :: proc(buffer: [^]byte, len: c.int, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
 	load_16_from_memory    :: proc(buffer: [^]byte, len: c.int, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
 	load_16_from_callbacks :: proc(clbk: ^Io_Callbacks,         x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
 	load_16_from_callbacks :: proc(clbk: ^Io_Callbacks,         x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
 
 
@@ -59,8 +99,6 @@ foreign stbi {
 	//
 	//
 	// float-per-channel interface
 	// float-per-channel interface
 	//
 	//
-	loadf                 :: proc(filename: cstring,                 x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
-	loadf_from_file       :: proc(f: ^c.FILE,                        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
 	loadf_from_memory     :: proc(buffer: [^]byte, len: c.int,       x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
 	loadf_from_memory     :: proc(buffer: [^]byte, len: c.int,       x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
 	loadf_from_callbacks  :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
 	loadf_from_callbacks  :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
 
 
@@ -73,9 +111,6 @@ foreign stbi {
 	is_hdr_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr) -> c.int ---
 	is_hdr_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr) -> c.int ---
 	is_hdr_from_memory    :: proc(buffer: [^]byte, len: c.int)       -> c.int ---
 	is_hdr_from_memory    :: proc(buffer: [^]byte, len: c.int)       -> c.int ---
 
 
-	is_hdr           :: proc(filename: cstring) -> c.int ---
-	is_hdr_from_file :: proc(f: ^c.FILE)        -> c.int ---
-
 	// get a VERY brief reason for failure
 	// get a VERY brief reason for failure
 	// NOT THREADSAFE
 	// NOT THREADSAFE
 	failure_reason :: proc() -> cstring ---
 	failure_reason :: proc() -> cstring ---
@@ -84,13 +119,9 @@ foreign stbi {
 	image_free :: proc(retval_from_load: rawptr) ---
 	image_free :: proc(retval_from_load: rawptr) ---
 
 
 	// get image dimensions & components without fully decoding
 	// get image dimensions & components without fully decoding
-	info                :: proc(filename: cstring,                 x, y, comp: ^c.int) -> c.int ---
-	info_from_file      :: proc(f: ^c.FILE,                        x, y, comp: ^c.int) -> c.int ---
 	info_from_memory    :: proc(buffer: [^]byte, len: c.int,       x, y, comp: ^c.int) -> c.int ---
 	info_from_memory    :: proc(buffer: [^]byte, len: c.int,       x, y, comp: ^c.int) -> c.int ---
 	info_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, comp: ^c.int) -> c.int ---
 	info_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, comp: ^c.int) -> c.int ---
 	
 	
-	is_16_bit             :: proc(filename: cstring) -> b32 ---
-	is_16_bit_from_file   :: proc(f: ^c.FILE) -> b32 ---
 	is_16_bit_from_memory :: proc(buffer: [^]byte, len: c.int) -> c.int ---
 	is_16_bit_from_memory :: proc(buffer: [^]byte, len: c.int) -> c.int ---
 
 
 	// for image formats that explicitly notate that they have premultiplied alpha,
 	// for image formats that explicitly notate that they have premultiplied alpha,

+ 5 - 0
vendor/stb/image/stb_image_resize.odin

@@ -7,6 +7,7 @@ RESIZE_LIB :: (
 	     "../lib/stb_image_resize.lib"      when ODIN_OS == .Windows
 	     "../lib/stb_image_resize.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_image_resize.a"        when ODIN_OS == .Linux
 	else "../lib/stb_image_resize.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_image_resize.a" when ODIN_OS == .Darwin
 	else "../lib/darwin/stb_image_resize.a" when ODIN_OS == .Darwin
+	else "../lib/stb_image_resize_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 	else ""
 )
 )
 
 
@@ -15,7 +16,11 @@ when RESIZE_LIB != "" {
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
 	}
+}
 
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import lib "../lib/stb_image_resize_wasm.o"
+} else when RESIZE_LIB != "" {
 	foreign import lib { RESIZE_LIB }
 	foreign import lib { RESIZE_LIB }
 } else {
 } else {
 	foreign import lib "system:stb_image_resize"
 	foreign import lib "system:stb_image_resize"

+ 4 - 0
vendor/stb/image/stb_image_wasm.odin

@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package stb_image
+
+@(require) import _ "vendor:libc"

+ 16 - 6
vendor/stb/image/stb_image_write.odin

@@ -7,6 +7,7 @@ WRITE_LIB :: (
 	     "../lib/stb_image_write.lib"      when ODIN_OS == .Windows
 	     "../lib/stb_image_write.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_image_write.a"        when ODIN_OS == .Linux
 	else "../lib/stb_image_write.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_image_write.a" when ODIN_OS == .Darwin
 	else "../lib/darwin/stb_image_write.a" when ODIN_OS == .Darwin
+	else "../lib/stb_image_write_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 	else ""
 )
 )
 
 
@@ -15,7 +16,11 @@ when WRITE_LIB != "" {
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
 	}
+}
 
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import stbiw "../lib/stb_image_write_wasm.o"
+} else when WRITE_LIB != "" {
 	foreign import stbiw { WRITE_LIB }
 	foreign import stbiw { WRITE_LIB }
 } else {
 } else {
 	foreign import stbiw "system:stb_image_write"
 	foreign import stbiw "system:stb_image_write"
@@ -25,12 +30,6 @@ write_func :: proc "c" (ctx: rawptr, data: rawptr, size: c.int)
 
 
 @(default_calling_convention="c", link_prefix="stbi_")
 @(default_calling_convention="c", link_prefix="stbi_")
 foreign stbiw {
 foreign stbiw {
-	write_png :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, stride_in_bytes: c.int)     -> c.int ---
-	write_bmp :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
-	write_tga :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
-	write_hdr :: proc(filename: cstring, w, h, comp: c.int, data: [^]f32)                             -> c.int ---
-	write_jpg :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, quality: c.int /*0..=100*/) -> c.int ---
-	
 	write_png_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr, stride_in_bytes: c.int)     -> c.int ---
 	write_png_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr, stride_in_bytes: c.int)     -> c.int ---
 	write_bmp_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr)                             -> c.int ---
 	write_bmp_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr)                             -> c.int ---
 	write_tga_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr)                             -> c.int ---
 	write_tga_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr)                             -> c.int ---
@@ -39,3 +38,14 @@ foreign stbiw {
 	
 	
 	flip_vertically_on_write :: proc(flip_boolean: b32) ---
 	flip_vertically_on_write :: proc(flip_boolean: b32) ---
 }
 }
+
+when !NO_STDIO {
+	@(default_calling_convention="c", link_prefix="stbi_")
+	foreign stbiw {
+		write_png :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, stride_in_bytes: c.int)     -> c.int ---
+		write_bmp :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
+		write_tga :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
+		write_hdr :: proc(filename: cstring, w, h, comp: c.int, data: [^]f32)                             -> c.int ---
+		write_jpg :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, quality: c.int /*0..=100*/) -> c.int ---
+	}
+}

BIN
vendor/stb/lib/stb_image_resize_wasm.o


BIN
vendor/stb/lib/stb_image_wasm.o


BIN
vendor/stb/lib/stb_image_write_wasm.o


BIN
vendor/stb/lib/stb_rect_pack_wasm.o


BIN
vendor/stb/lib/stb_sprintf_wasm.o


BIN
vendor/stb/lib/stb_truetype_wasm.o


+ 5 - 0
vendor/stb/rect_pack/stb_rect_pack.odin

@@ -9,6 +9,7 @@ LIB :: (
 	     "../lib/stb_rect_pack.lib"      when ODIN_OS == .Windows
 	     "../lib/stb_rect_pack.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_rect_pack.a"        when ODIN_OS == .Linux
 	else "../lib/stb_rect_pack.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_rect_pack.a" when ODIN_OS == .Darwin
 	else "../lib/darwin/stb_rect_pack.a" when ODIN_OS == .Darwin
+	else "../lib/stb_rect_pack_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 	else ""
 )
 )
 
 
@@ -16,7 +17,11 @@ when LIB != "" {
 	when !#exists(LIB) {
 	when !#exists(LIB) {
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
 	}
+}
 
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import lib "../lib/stb_rect_pack_wasm.o"
+} else when LIB != "" {
 	foreign import lib { LIB }
 	foreign import lib { LIB }
 } else {
 } else {
 	foreign import lib "system:stb_rect_pack"
 	foreign import lib "system:stb_rect_pack"

+ 4 - 0
vendor/stb/rect_pack/stb_rect_pack_wasm.odin

@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package stb_rect_pack
+
+@(require) import _ "vendor:libc"

+ 37 - 0
vendor/stb/sprintf/stb_sprintf.odin

@@ -0,0 +1,37 @@
+package stb_sprintf
+
+import "core:c"
+
+@(private)
+LIB :: (
+	     "../lib/stb_sprintf.lib"      when ODIN_OS == .Windows
+	else "../lib/stb_sprintf.a"        when ODIN_OS == .Linux
+	else "../lib/darwin/stb_sprintf.a" when ODIN_OS == .Darwin
+	else "../lib/stb_sprintf_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
+	else ""
+)
+
+when LIB != "" {
+	when !#exists(LIB) {
+		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
+	}
+}
+
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import stbpf "../lib/stb_sprintf_wasm.o"
+} else when LIB != "" {
+	foreign import stbpf { LIB }
+} else {
+	foreign import stbpf "system:stb_sprintf"
+}
+
+@(link_prefix="stbsp_", default_calling_convention="c")
+foreign stbpf {
+	sprintf    :: proc(buf: [^]byte, fmt: cstring, #c_vararg args: ..any) -> i32 ---
+	snprintf   :: proc(buf: [^]byte, count: i32, fmt: cstring, #c_vararg args: ..any) -> i32 ---
+	vsprintf   :: proc(buf: [^]byte, fmt: cstring, va: c.va_list) -> i32 ---
+	vsnprintf  :: proc(buf: [^]byte, count: i32, fmt: cstring, va: ^c.va_list) -> i32 ---
+	vsprintfcb :: proc(callback: SPRINTFCB, user: rawptr, buf: [^]byte, fmt: cstring, va: ^c.va_list) -> i32 ---
+}
+
+SPRINTFCB :: #type proc "c" (buf: [^]byte, user: rawptr, len: i32) -> cstring

+ 12 - 2
vendor/stb/src/Makefile

@@ -8,17 +8,24 @@ endif
 
 
 wasm:
 wasm:
 	mkdir -p ../lib
 	mkdir -p ../lib
-	$(CC) -c -Os --target=wasm32 -nostdlib stb_truetype_wasm.c -o ../lib/stb_truetype_wasm.o
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_image.c        -o ../lib/stb_image_wasm.o        -DSTBI_NO_STDIO
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_image_write.c  -o ../lib/stb_image_write_wasm.o  -DSTBI_WRITE_NO_STDIO 
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_image_resize.c -o ../lib/stb_image_resize_wasm.o
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_truetype.c     -o ../lib/stb_truetype_wasm.o
+	# $(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_vorbis.c       -o ../lib/stb_vorbis_wasm.o       -DSTB_VORBIS_NO_STDIO
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_rect_pack.c    -o ../lib/stb_rect_pack_wasm.o
+	$(CC) -c -Os --target=wasm32                                          stb_sprintf.c      -o ../lib/stb_sprintf_wasm.o
 
 
 unix:
 unix:
 	mkdir -p ../lib
 	mkdir -p ../lib
-	$(CC) -c -O2 -Os -fPIC stb_image.c stb_image_write.c stb_image_resize.c stb_truetype.c stb_rect_pack.c stb_vorbis.c
+	$(CC) -c -O2 -Os -fPIC stb_image.c stb_image_write.c stb_image_resize.c stb_truetype.c stb_rect_pack.c stb_vorbis.c stb_sprintf.c
 	$(AR) rcs ../lib/stb_image.a        stb_image.o
 	$(AR) rcs ../lib/stb_image.a        stb_image.o
 	$(AR) rcs ../lib/stb_image_write.a  stb_image_write.o
 	$(AR) rcs ../lib/stb_image_write.a  stb_image_write.o
 	$(AR) rcs ../lib/stb_image_resize.a stb_image_resize.o
 	$(AR) rcs ../lib/stb_image_resize.a stb_image_resize.o
 	$(AR) rcs ../lib/stb_truetype.a     stb_truetype.o
 	$(AR) rcs ../lib/stb_truetype.a     stb_truetype.o
 	$(AR) rcs ../lib/stb_rect_pack.a    stb_rect_pack.o
 	$(AR) rcs ../lib/stb_rect_pack.a    stb_rect_pack.o
 	$(AR) rcs ../lib/stb_vorbis.a       stb_vorbis.o
 	$(AR) rcs ../lib/stb_vorbis.a       stb_vorbis.o
+	$(AR) rcs ../lib/stb_sprintf.a      stb_sprintf.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image.so         -o ../lib/stb_image.so        stb_image.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image.so         -o ../lib/stb_image.so        stb_image.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image_write.so   -o ../lib/stb_image_write.so  stb_image_write.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image_write.so   -o ../lib/stb_image_write.so  stb_image_write.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image_resize.so  -o ../lib/stb_image_resize.so stb_image_resize.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image_resize.so  -o ../lib/stb_image_resize.so stb_image_resize.o
@@ -47,4 +54,7 @@ darwin:
 	$(CC) -arch x86_64 -c -O2 -Os -fPIC stb_vorbis.c -o stb_vorbis-x86_64.o -mmacosx-version-min=10.12
 	$(CC) -arch x86_64 -c -O2 -Os -fPIC stb_vorbis.c -o stb_vorbis-x86_64.o -mmacosx-version-min=10.12
 	$(CC) -arch arm64  -c -O2 -Os -fPIC stb_vorbis.c -o stb_vorbis-arm64.o -mmacosx-version-min=10.12
 	$(CC) -arch arm64  -c -O2 -Os -fPIC stb_vorbis.c -o stb_vorbis-arm64.o -mmacosx-version-min=10.12
 	lipo -create stb_vorbis-x86_64.o stb_vorbis-arm64.o -output ../lib/darwin/stb_vorbis.a
 	lipo -create stb_vorbis-x86_64.o stb_vorbis-arm64.o -output ../lib/darwin/stb_vorbis.a
+	$(CC) -arch x86_64 -c -O2 -Os -fPIC stb_sprintf.c -o stb_sprintf-x86_64.o -mmacosx-version-min=10.12
+	$(CC) -arch arm64  -c -O2 -Os -fPIC stb_sprintf.c -o stb_sprintf-arm64.o -mmacosx-version-min=10.12
+	lipo -create stb_sprintf-x86_64.o stb_sprintf-arm64.o -output ../lib/darwin/stb_sprintf.a
 	rm *.o
 	rm *.o

+ 2 - 0
vendor/stb/src/stb_sprintf.c

@@ -0,0 +1,2 @@
+#define STB_SPRINTF_IMPLEMENTATION
+#include "stb_sprintf.h"

+ 1906 - 0
vendor/stb/src/stb_sprintf.h

@@ -0,0 +1,1906 @@
+// stb_sprintf - v1.10 - public domain snprintf() implementation
+// originally by Jeff Roberts / RAD Game Tools, 2015/10/20
+// http://github.com/nothings/stb
+//
+// allowed types:  sc uidBboXx p AaGgEef n
+// lengths      :  hh h ll j z t I64 I32 I
+//
+// Contributors:
+//    Fabian "ryg" Giesen (reformatting)
+//    github:aganm (attribute format)
+//
+// Contributors (bugfixes):
+//    github:d26435
+//    github:trex78
+//    github:account-login
+//    Jari Komppa (SI suffixes)
+//    Rohit Nirmal
+//    Marcin Wojdyr
+//    Leonard Ritter
+//    Stefano Zanotti
+//    Adam Allison
+//    Arvid Gerstmann
+//    Markus Kolb
+//
+// LICENSE:
+//
+//   See end of file for license information.
+
+#ifndef STB_SPRINTF_H_INCLUDE
+#define STB_SPRINTF_H_INCLUDE
+
+/*
+Single file sprintf replacement.
+
+Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20.
+Hereby placed in public domain.
+
+This is a full sprintf replacement that supports everything that
+the C runtime sprintfs support, including float/double, 64-bit integers,
+hex floats, field parameters (%*.*d stuff), length reads backs, etc.
+
+Why would you need this if sprintf already exists?  Well, first off,
+it's *much* faster (see below). It's also much smaller than the CRT
+versions code-space-wise. We've also added some simple improvements
+that are super handy (commas in thousands, callbacks at buffer full,
+for example). Finally, the format strings for MSVC and GCC differ
+for 64-bit integers (among other small things), so this lets you use
+the same format strings in cross platform code.
+
+It uses the standard single file trick of being both the header file
+and the source itself. If you just include it normally, you just get
+the header file function definitions. To get the code, you include
+it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first.
+
+It only uses va_args macros from the C runtime to do it's work. It
+does cast doubles to S64s and shifts and divides U64s, which does
+drag in CRT code on most platforms.
+
+It compiles to roughly 8K with float support, and 4K without.
+As a comparison, when using MSVC static libs, calling sprintf drags
+in 16K.
+
+API:
+====
+int stbsp_sprintf( char * buf, char const * fmt, ... )
+int stbsp_snprintf( char * buf, int count, char const * fmt, ... )
+  Convert an arg list into a buffer.  stbsp_snprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintf( char * buf, char const * fmt, va_list va )
+int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va )
+  Convert a va_list arg list into a buffer.  stbsp_vsnprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+    typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len );
+  Convert into a buffer, calling back every STB_SPRINTF_MIN chars.
+  Your callback can then copy the chars out, print them or whatever.
+  This function is actually the workhorse for everything else.
+  The buffer you pass in must hold at least STB_SPRINTF_MIN characters.
+    // you return the next buffer to use or 0 to stop converting
+
+void stbsp_set_separators( char comma, char period )
+  Set the comma and period characters to use.
+
+FLOATS/DOUBLES:
+===============
+This code uses a internal float->ascii conversion method that uses
+doubles with error correction (double-doubles, for ~105 bits of
+precision).  This conversion is round-trip perfect - that is, an atof
+of the values output here will give you the bit-exact double back.
+
+One difference is that our insignificant digits will be different than
+with MSVC or GCC (but they don't match each other either).  We also
+don't attempt to find the minimum length matching float (pre-MSVC15
+doesn't either).
+
+If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT
+and you'll save 4K of code space.
+
+64-BIT INTS:
+============
+This library also supports 64-bit integers and you can use MSVC style or
+GCC style indicators (%I64d or %lld).  It supports the C99 specifiers
+for size_t and ptr_diff_t (%jd %zd) as well.
+
+EXTRAS:
+=======
+Like some GCCs, for integers and floats, you can use a ' (single quote)
+specifier and commas will be inserted on the thousands: "%'d" on 12345
+would print 12,345.
+
+For integers and floats, you can use a "$" specifier and the number
+will be converted to float and then divided to get kilo, mega, giga or
+tera and then printed, so "%$d" 1000 is "1.0 k", "%$.2d" 2536000 is
+"2.53 M", etc. For byte values, use two $:s, like "%$$d" to turn
+2536000 to "2.42 Mi". If you prefer JEDEC suffixes to SI ones, use three
+$:s: "%$$$d" -> "2.42 M". To remove the space between the number and the
+suffix, add "_" specifier: "%_$d" -> "2.53M".
+
+In addition to octal and hexadecimal conversions, you can print
+integers in binary: "%b" for 256 would print 100.
+
+PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC):
+===================================================================
+"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC)
+"%24d" across all 32-bit ints (4.5x/4.2x faster)
+"%x" across all 32-bit ints (4.5x/3.8x faster)
+"%08x" across all 32-bit ints (4.3x/3.8x faster)
+"%f" across e-10 to e+10 floats (7.3x/6.0x faster)
+"%e" across e-10 to e+10 floats (8.1x/6.0x faster)
+"%g" across e-10 to e+10 floats (10.0x/7.1x faster)
+"%f" for values near e-300 (7.9x/6.5x faster)
+"%f" for values near e+300 (10.0x/9.1x faster)
+"%e" for values near e-300 (10.1x/7.0x faster)
+"%e" for values near e+300 (9.2x/6.0x faster)
+"%.320f" for values near e-300 (12.6x/11.2x faster)
+"%a" for random values (8.6x/4.3x faster)
+"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster)
+"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster)
+"%s%s%s" for 64 char strings (7.1x/7.3x faster)
+"...512 char string..." ( 35.0x/32.5x faster!)
+*/
+
+#if defined(__clang__)
+ #if defined(__has_feature) && defined(__has_attribute)
+  #if __has_feature(address_sanitizer)
+   #if __has_attribute(__no_sanitize__)
+    #define STBSP__ASAN __attribute__((__no_sanitize__("address")))
+   #elif __has_attribute(__no_sanitize_address__)
+    #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+   #elif __has_attribute(__no_address_safety_analysis__)
+    #define STBSP__ASAN __attribute__((__no_address_safety_analysis__))
+   #endif
+  #endif
+ #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+ #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__
+  #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+ #endif
+#endif
+
+#ifndef STBSP__ASAN
+#define STBSP__ASAN
+#endif
+
+#ifdef STB_SPRINTF_STATIC
+#define STBSP__PUBLICDEC static
+#define STBSP__PUBLICDEF static STBSP__ASAN
+#else
+#ifdef __cplusplus
+#define STBSP__PUBLICDEC extern "C"
+#define STBSP__PUBLICDEF extern "C" STBSP__ASAN
+#else
+#define STBSP__PUBLICDEC extern
+#define STBSP__PUBLICDEF STBSP__ASAN
+#endif
+#endif
+
+#if defined(__has_attribute)
+ #if __has_attribute(format)
+   #define STBSP__ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va)))
+ #endif
+#endif
+
+#ifndef STBSP__ATTRIBUTE_FORMAT
+#define STBSP__ATTRIBUTE_FORMAT(fmt,va)
+#endif
+
+#ifdef _MSC_VER
+#define STBSP__NOTUSED(v)  (void)(v)
+#else
+#define STBSP__NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#include <stdarg.h> // for va_arg(), va_list()
+#include <stddef.h> // size_t, ptrdiff_t
+
+#ifndef STB_SPRINTF_MIN
+#define STB_SPRINTF_MIN 512 // how many characters per callback
+#endif
+typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len);
+
+#ifndef STB_SPRINTF_DECORATE
+#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names
+#endif
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsnprintf)(char *buf, int count, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(2,3);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(3,4);
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC void STB_SPRINTF_DECORATE(set_separators)(char comma, char period);
+
+#endif // STB_SPRINTF_H_INCLUDE
+
+#ifdef STB_SPRINTF_IMPLEMENTATION
+
+#define stbsp__uint32 unsigned int
+#define stbsp__int32 signed int
+
+#ifdef _MSC_VER
+#define stbsp__uint64 unsigned __int64
+#define stbsp__int64 signed __int64
+#else
+#define stbsp__uint64 unsigned long long
+#define stbsp__int64 signed long long
+#endif
+#define stbsp__uint16 unsigned short
+
+#ifndef stbsp__uintptr
+#if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__)
+#define stbsp__uintptr stbsp__uint64
+#else
+#define stbsp__uintptr stbsp__uint32
+#endif
+#endif
+
+#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC)
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define STB_SPRINTF_MSVC_MODE
+#endif
+#endif
+
+#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses
+#define STBSP__UNALIGNED(code)
+#else
+#define STBSP__UNALIGNED(code) code
+#endif
+
+#ifndef STB_SPRINTF_NOFLOAT
+// internal float utility functions
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits);
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value);
+#define STBSP__SPECIAL 0x7000
+#endif
+
+static char stbsp__period = '.';
+static char stbsp__comma = ',';
+static struct
+{
+   short temp; // force next field to be 2-byte aligned
+   char pair[201];
+} stbsp__digitpair =
+{
+  0,
+   "00010203040506070809101112131415161718192021222324"
+   "25262728293031323334353637383940414243444546474849"
+   "50515253545556575859606162636465666768697071727374"
+   "75767778798081828384858687888990919293949596979899"
+};
+
+STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char pcomma, char pperiod)
+{
+   stbsp__period = pperiod;
+   stbsp__comma = pcomma;
+}
+
+#define STBSP__LEFTJUST 1
+#define STBSP__LEADINGPLUS 2
+#define STBSP__LEADINGSPACE 4
+#define STBSP__LEADING_0X 8
+#define STBSP__LEADINGZERO 16
+#define STBSP__INTMAX 32
+#define STBSP__TRIPLET_COMMA 64
+#define STBSP__NEGATIVE 128
+#define STBSP__METRIC_SUFFIX 256
+#define STBSP__HALFWIDTH 512
+#define STBSP__METRIC_NOSPACE 1024
+#define STBSP__METRIC_1024 2048
+#define STBSP__METRIC_JEDEC 4096
+
+static void stbsp__lead_sign(stbsp__uint32 fl, char *sign)
+{
+   sign[0] = 0;
+   if (fl & STBSP__NEGATIVE) {
+      sign[0] = 1;
+      sign[1] = '-';
+   } else if (fl & STBSP__LEADINGSPACE) {
+      sign[0] = 1;
+      sign[1] = ' ';
+   } else if (fl & STBSP__LEADINGPLUS) {
+      sign[0] = 1;
+      sign[1] = '+';
+   }
+}
+
+static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit)
+{
+   char const * sn = s;
+
+   // get up to 4-byte alignment
+   for (;;) {
+      if (((stbsp__uintptr)sn & 3) == 0)
+         break;
+
+      if (!limit || *sn == 0)
+         return (stbsp__uint32)(sn - s);
+
+      ++sn;
+      --limit;
+   }
+
+   // scan over 4 bytes at a time to find terminating 0
+   // this will intentionally scan up to 3 bytes past the end of buffers,
+   // but becase it works 4B aligned, it will never cross page boundaries
+   // (hence the STBSP__ASAN markup; the over-read here is intentional
+   // and harmless)
+   while (limit >= 4) {
+      stbsp__uint32 v = *(stbsp__uint32 *)sn;
+      // bit hack to find if there's a 0 byte in there
+      if ((v - 0x01010101) & (~v) & 0x80808080UL)
+         break;
+
+      sn += 4;
+      limit -= 4;
+   }
+
+   // handle the last few characters to find actual size
+   while (limit && *sn) {
+      ++sn;
+      --limit;
+   }
+
+   return (stbsp__uint32)(sn - s);
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va)
+{
+   static char hex[] = "0123456789abcdefxp";
+   static char hexu[] = "0123456789ABCDEFXP";
+   char *bf;
+   char const *f;
+   int tlen = 0;
+
+   bf = buf;
+   f = fmt;
+   for (;;) {
+      stbsp__int32 fw, pr, tz;
+      stbsp__uint32 fl;
+
+      // macros for the callback buffer stuff
+      #define stbsp__chk_cb_bufL(bytes)                        \
+         {                                                     \
+            int len = (int)(bf - buf);                         \
+            if ((len + (bytes)) >= STB_SPRINTF_MIN) {          \
+               tlen += len;                                    \
+               if (0 == (bf = buf = callback(buf, user, len))) \
+                  goto done;                                   \
+            }                                                  \
+         }
+      #define stbsp__chk_cb_buf(bytes)    \
+         {                                \
+            if (callback) {               \
+               stbsp__chk_cb_bufL(bytes); \
+            }                             \
+         }
+      #define stbsp__flush_cb()                      \
+         {                                           \
+            stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \
+         } // flush if there is even one byte in the buffer
+      #define stbsp__cb_buf_clamp(cl, v)                \
+         cl = v;                                        \
+         if (callback) {                                \
+            int lg = STB_SPRINTF_MIN - (int)(bf - buf); \
+            if (cl > lg)                                \
+               cl = lg;                                 \
+         }
+
+      // fast copy everything up to the next % (or end of string)
+      for (;;) {
+         while (((stbsp__uintptr)f) & 3) {
+         schk1:
+            if (f[0] == '%')
+               goto scandd;
+         schk2:
+            if (f[0] == 0)
+               goto endfmt;
+            stbsp__chk_cb_buf(1);
+            *bf++ = f[0];
+            ++f;
+         }
+         for (;;) {
+            // Check if the next 4 bytes contain %(0x25) or end of string.
+            // Using the 'hasless' trick:
+            // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
+            stbsp__uint32 v, c;
+            v = *(stbsp__uint32 *)f;
+            c = (~v) & 0x80808080;
+            if (((v ^ 0x25252525) - 0x01010101) & c)
+               goto schk1;
+            if ((v - 0x01010101) & c)
+               goto schk2;
+            if (callback)
+               if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
+                  goto schk1;
+            #ifdef STB_SPRINTF_NOUNALIGNED
+                if(((stbsp__uintptr)bf) & 3) {
+                    bf[0] = f[0];
+                    bf[1] = f[1];
+                    bf[2] = f[2];
+                    bf[3] = f[3];
+                } else
+            #endif
+            {
+                *(stbsp__uint32 *)bf = v;
+            }
+            bf += 4;
+            f += 4;
+         }
+      }
+   scandd:
+
+      ++f;
+
+      // ok, we have a percent, read the modifiers first
+      fw = 0;
+      pr = -1;
+      fl = 0;
+      tz = 0;
+
+      // flags
+      for (;;) {
+         switch (f[0]) {
+         // if we have left justify
+         case '-':
+            fl |= STBSP__LEFTJUST;
+            ++f;
+            continue;
+         // if we have leading plus
+         case '+':
+            fl |= STBSP__LEADINGPLUS;
+            ++f;
+            continue;
+         // if we have leading space
+         case ' ':
+            fl |= STBSP__LEADINGSPACE;
+            ++f;
+            continue;
+         // if we have leading 0x
+         case '#':
+            fl |= STBSP__LEADING_0X;
+            ++f;
+            continue;
+         // if we have thousand commas
+         case '\'':
+            fl |= STBSP__TRIPLET_COMMA;
+            ++f;
+            continue;
+         // if we have kilo marker (none->kilo->kibi->jedec)
+         case '$':
+            if (fl & STBSP__METRIC_SUFFIX) {
+               if (fl & STBSP__METRIC_1024) {
+                  fl |= STBSP__METRIC_JEDEC;
+               } else {
+                  fl |= STBSP__METRIC_1024;
+               }
+            } else {
+               fl |= STBSP__METRIC_SUFFIX;
+            }
+            ++f;
+            continue;
+         // if we don't want space between metric suffix and number
+         case '_':
+            fl |= STBSP__METRIC_NOSPACE;
+            ++f;
+            continue;
+         // if we have leading zero
+         case '0':
+            fl |= STBSP__LEADINGZERO;
+            ++f;
+            goto flags_done;
+         default: goto flags_done;
+         }
+      }
+   flags_done:
+
+      // get the field width
+      if (f[0] == '*') {
+         fw = va_arg(va, stbsp__uint32);
+         ++f;
+      } else {
+         while ((f[0] >= '0') && (f[0] <= '9')) {
+            fw = fw * 10 + f[0] - '0';
+            f++;
+         }
+      }
+      // get the precision
+      if (f[0] == '.') {
+         ++f;
+         if (f[0] == '*') {
+            pr = va_arg(va, stbsp__uint32);
+            ++f;
+         } else {
+            pr = 0;
+            while ((f[0] >= '0') && (f[0] <= '9')) {
+               pr = pr * 10 + f[0] - '0';
+               f++;
+            }
+         }
+      }
+
+      // handle integer size overrides
+      switch (f[0]) {
+      // are we halfwidth?
+      case 'h':
+         fl |= STBSP__HALFWIDTH;
+         ++f;
+         if (f[0] == 'h')
+            ++f;  // QUARTERWIDTH
+         break;
+      // are we 64-bit (unix style)
+      case 'l':
+         fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0);
+         ++f;
+         if (f[0] == 'l') {
+            fl |= STBSP__INTMAX;
+            ++f;
+         }
+         break;
+      // are we 64-bit on intmax? (c99)
+      case 'j':
+         fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit on size_t or ptrdiff_t? (c99)
+      case 'z':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      case 't':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit (msft style)
+      case 'I':
+         if ((f[1] == '6') && (f[2] == '4')) {
+            fl |= STBSP__INTMAX;
+            f += 3;
+         } else if ((f[1] == '3') && (f[2] == '2')) {
+            f += 3;
+         } else {
+            fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0);
+            ++f;
+         }
+         break;
+      default: break;
+      }
+
+      // handle each replacement
+      switch (f[0]) {
+         #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307
+         char num[STBSP__NUMSZ];
+         char lead[8];
+         char tail[8];
+         char *s;
+         char const *h;
+         stbsp__uint32 l, n, cs;
+         stbsp__uint64 n64;
+#ifndef STB_SPRINTF_NOFLOAT
+         double fv;
+#endif
+         stbsp__int32 dp;
+         char const *sn;
+
+      case 's':
+         // get the string
+         s = va_arg(va, char *);
+         if (s == 0)
+            s = (char *)"null";
+         // get the length, limited to desired precision
+         // always limit to ~0u chars since our counts are 32b
+         l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u);
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         // copy the string in
+         goto scopy;
+
+      case 'c': // char
+         // get the character
+         s = num + STBSP__NUMSZ - 1;
+         *s = (char)va_arg(va, int);
+         l = 1;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+
+      case 'n': // weird write-bytes specifier
+      {
+         int *d = va_arg(va, int *);
+         *d = tlen + (int)(bf - buf);
+      } break;
+
+#ifdef STB_SPRINTF_NOFLOAT
+      case 'A':              // float
+      case 'a':              // hex float
+      case 'G':              // float
+      case 'g':              // float
+      case 'E':              // float
+      case 'e':              // float
+      case 'f':              // float
+         va_arg(va, double); // eat it
+         s = (char *)"No float";
+         l = 8;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         cs = 0;
+         STBSP__NOTUSED(dp);
+         goto scopy;
+#else
+      case 'A': // hex float
+      case 'a': // hex float
+         h = (f[0] == 'A') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv))
+            fl |= STBSP__NEGATIVE;
+
+         s = num + 64;
+
+         stbsp__lead_sign(fl, lead);
+
+         if (dp == -1023)
+            dp = (n64) ? -1022 : 0;
+         else
+            n64 |= (((stbsp__uint64)1) << 52);
+         n64 <<= (64 - 56);
+         if (pr < 15)
+            n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4));
+// add leading chars
+
+#ifdef STB_SPRINTF_MSVC_MODE
+         *s++ = '0';
+         *s++ = 'x';
+#else
+         lead[1 + lead[0]] = '0';
+         lead[2 + lead[0]] = 'x';
+         lead[0] += 2;
+#endif
+         *s++ = h[(n64 >> 60) & 15];
+         n64 <<= 4;
+         if (pr)
+            *s++ = stbsp__period;
+         sn = s;
+
+         // print the bits
+         n = pr;
+         if (n > 13)
+            n = 13;
+         if (pr > (stbsp__int32)n)
+            tz = pr - n;
+         pr = 0;
+         while (n--) {
+            *s++ = h[(n64 >> 60) & 15];
+            n64 <<= 4;
+         }
+
+         // print the expo
+         tail[1] = h[17];
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+         n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3));
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+
+         dp = (int)(s - sn);
+         l = (int)(s - (num + 64));
+         s = num + 64;
+         cs = 1 + (3 << 24);
+         goto scopy;
+
+      case 'G': // float
+      case 'g': // float
+         h = (f[0] == 'G') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6;
+         else if (pr == 0)
+            pr = 1; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+
+         // clamp the precision and delete extra zeros after clamp
+         n = pr;
+         if (l > (stbsp__uint32)pr)
+            l = pr;
+         while ((l > 1) && (pr) && (sn[l - 1] == '0')) {
+            --pr;
+            --l;
+         }
+
+         // should we use %e
+         if ((dp <= -4) || (dp > (stbsp__int32)n)) {
+            if (pr > (stbsp__int32)l)
+               pr = l - 1;
+            else if (pr)
+               --pr; // when using %e, there is one digit before the decimal
+            goto doexpfromg;
+         }
+         // this is the insane action to get the pr to match %g semantics for %f
+         if (dp > 0) {
+            pr = (dp < (stbsp__int32)l) ? l - dp : 0;
+         } else {
+            pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr);
+         }
+         goto dofloatfromg;
+
+      case 'E': // float
+      case 'e': // float
+         h = (f[0] == 'E') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+      doexpfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+         // handle leading chars
+         *s++ = sn[0];
+
+         if (pr)
+            *s++ = stbsp__period;
+
+         // handle after decimal
+         if ((l - 1) > (stbsp__uint32)pr)
+            l = pr + 1;
+         for (n = 1; n < l; n++)
+            *s++ = sn[n];
+         // trailing zeros
+         tz = pr - (l - 1);
+         pr = 0;
+         // dump expo
+         tail[1] = h[0xe];
+         dp -= 1;
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+#ifdef STB_SPRINTF_MSVC_MODE
+         n = 5;
+#else
+         n = (dp >= 100) ? 5 : 4;
+#endif
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+         cs = 1 + (3 << 24); // how many tens
+         goto flt_lead;
+
+      case 'f': // float
+         fv = va_arg(va, double);
+      doafloat:
+         // do kilos
+         if (fl & STBSP__METRIC_SUFFIX) {
+            double divisor;
+            divisor = 1000.0f;
+            if (fl & STBSP__METRIC_1024)
+               divisor = 1024.0;
+            while (fl < 0x4000000) {
+               if ((fv < divisor) && (fv > -divisor))
+                  break;
+               fv /= divisor;
+               fl += 0x1000000;
+            }
+         }
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr))
+            fl |= STBSP__NEGATIVE;
+      dofloatfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+
+         // handle the three decimal varieties
+         if (dp <= 0) {
+            stbsp__int32 i;
+            // handle 0.000*000xxxx
+            *s++ = '0';
+            if (pr)
+               *s++ = stbsp__period;
+            n = -dp;
+            if ((stbsp__int32)n > pr)
+               n = pr;
+            i = n;
+            while (i) {
+               if ((((stbsp__uintptr)s) & 3) == 0)
+                  break;
+               *s++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)s = 0x30303030;
+               s += 4;
+               i -= 4;
+            }
+            while (i) {
+               *s++ = '0';
+               --i;
+            }
+            if ((stbsp__int32)(l + n) > pr)
+               l = pr - n;
+            i = l;
+            while (i) {
+               *s++ = *sn++;
+               --i;
+            }
+            tz = pr - (n + l);
+            cs = 1 + (3 << 24); // how many tens did we write (for commas below)
+         } else {
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0;
+            if ((stbsp__uint32)dp >= l) {
+               // handle xxxx000*000.0
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= l)
+                        break;
+                  }
+               }
+               if (n < (stbsp__uint32)dp) {
+                  n = dp - n;
+                  if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                     while (n) {
+                        if ((((stbsp__uintptr)s) & 3) == 0)
+                           break;
+                        *s++ = '0';
+                        --n;
+                     }
+                     while (n >= 4) {
+                        *(stbsp__uint32 *)s = 0x30303030;
+                        s += 4;
+                        n -= 4;
+                     }
+                  }
+                  while (n) {
+                     if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                        cs = 0;
+                        *s++ = stbsp__comma;
+                     } else {
+                        *s++ = '0';
+                        --n;
+                     }
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr) {
+                  *s++ = stbsp__period;
+                  tz = pr;
+               }
+            } else {
+               // handle xxxxx.xxxx000*000
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= (stbsp__uint32)dp)
+                        break;
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr)
+                  *s++ = stbsp__period;
+               if ((l - dp) > (stbsp__uint32)pr)
+                  l = pr + dp;
+               while (n < l) {
+                  *s++ = sn[n];
+                  ++n;
+               }
+               tz = pr - (l - dp);
+            }
+         }
+         pr = 0;
+
+         // handle k,m,g,t
+         if (fl & STBSP__METRIC_SUFFIX) {
+            char idx;
+            idx = 1;
+            if (fl & STBSP__METRIC_NOSPACE)
+               idx = 0;
+            tail[0] = idx;
+            tail[1] = ' ';
+            {
+               if (fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'.
+                  if (fl & STBSP__METRIC_1024)
+                     tail[idx + 1] = "_KMGT"[fl >> 24];
+                  else
+                     tail[idx + 1] = "_kMGT"[fl >> 24];
+                  idx++;
+                  // If printing kibits and not in jedec, add the 'i'.
+                  if (fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) {
+                     tail[idx + 1] = 'i';
+                     idx++;
+                  }
+                  tail[0] = idx;
+               }
+            }
+         };
+
+      flt_lead:
+         // get the length that we copied
+         l = (stbsp__uint32)(s - (num + 64));
+         s = num + 64;
+         goto scopy;
+#endif
+
+      case 'B': // upper binary
+      case 'b': // lower binary
+         h = (f[0] == 'B') ? hexu : hex;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[0xb];
+         }
+         l = (8 << 4) | (1 << 8);
+         goto radixnum;
+
+      case 'o': // octal
+         h = hexu;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 1;
+            lead[1] = '0';
+         }
+         l = (3 << 4) | (3 << 8);
+         goto radixnum;
+
+      case 'p': // pointer
+         fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0;
+         pr = sizeof(void *) * 2;
+         fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros
+                                    // fall through - to X
+
+      case 'X': // upper hex
+      case 'x': // lower hex
+         h = (f[0] == 'X') ? hexu : hex;
+         l = (4 << 4) | (4 << 8);
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[16];
+         }
+      radixnum:
+         // get the number
+         if (fl & STBSP__INTMAX)
+            n64 = va_arg(va, stbsp__uint64);
+         else
+            n64 = va_arg(va, stbsp__uint32);
+
+         s = num + STBSP__NUMSZ;
+         dp = 0;
+         // clear tail, and clear leading if value is zero
+         tail[0] = 0;
+         if (n64 == 0) {
+            lead[0] = 0;
+            if (pr == 0) {
+               l = 0;
+               cs = 0;
+               goto scopy;
+            }
+         }
+         // convert to string
+         for (;;) {
+            *--s = h[n64 & ((1 << (l >> 8)) - 1)];
+            n64 >>= (l >> 8);
+            if (!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr)))
+               break;
+            if (fl & STBSP__TRIPLET_COMMA) {
+               ++l;
+               if ((l & 15) == ((l >> 4) & 15)) {
+                  l &= ~15;
+                  *--s = stbsp__comma;
+               }
+            }
+         };
+         // get the tens and the comma pos
+         cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24);
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         // copy it
+         goto scopy;
+
+      case 'u': // unsigned
+      case 'i':
+      case 'd': // integer
+         // get the integer and abs it
+         if (fl & STBSP__INTMAX) {
+            stbsp__int64 i64 = va_arg(va, stbsp__int64);
+            n64 = (stbsp__uint64)i64;
+            if ((f[0] != 'u') && (i64 < 0)) {
+               n64 = (stbsp__uint64)-i64;
+               fl |= STBSP__NEGATIVE;
+            }
+         } else {
+            stbsp__int32 i = va_arg(va, stbsp__int32);
+            n64 = (stbsp__uint32)i;
+            if ((f[0] != 'u') && (i < 0)) {
+               n64 = (stbsp__uint32)-i;
+               fl |= STBSP__NEGATIVE;
+            }
+         }
+
+#ifndef STB_SPRINTF_NOFLOAT
+         if (fl & STBSP__METRIC_SUFFIX) {
+            if (n64 < 1024)
+               pr = 0;
+            else if (pr == -1)
+               pr = 1;
+            fv = (double)(stbsp__int64)n64;
+            goto doafloat;
+         }
+#endif
+
+         // convert to string
+         s = num + STBSP__NUMSZ;
+         l = 0;
+
+         for (;;) {
+            // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators)
+            char *o = s - 8;
+            if (n64 >= 100000000) {
+               n = (stbsp__uint32)(n64 % 100000000);
+               n64 /= 100000000;
+            } else {
+               n = (stbsp__uint32)n64;
+               n64 = 0;
+            }
+            if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+               do {
+                  s -= 2;
+                  *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+                  n /= 100;
+               } while (n);
+            }
+            while (n) {
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = (char)(n % 10) + '0';
+                  n /= 10;
+               }
+            }
+            if (n64 == 0) {
+               if ((s[0] == '0') && (s != (num + STBSP__NUMSZ)))
+                  ++s;
+               break;
+            }
+            while (s != o)
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = '0';
+               }
+         }
+
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         if (l == 0) {
+            *--s = '0';
+            l = 1;
+         }
+         cs = l + (3 << 24);
+         if (pr < 0)
+            pr = 0;
+
+      scopy:
+         // get fw=leading/trailing space, pr=leading zeros
+         if (pr < (stbsp__int32)l)
+            pr = l;
+         n = pr + lead[0] + tail[0] + tz;
+         if (fw < (stbsp__int32)n)
+            fw = n;
+         fw -= n;
+         pr -= l;
+
+         // handle right justify and leading zeros
+         if ((fl & STBSP__LEFTJUST) == 0) {
+            if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr
+            {
+               pr = (fw > pr) ? fw : pr;
+               fw = 0;
+            } else {
+               fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas
+            }
+         }
+
+         // copy the spaces and/or zeros
+         if (fw + pr) {
+            stbsp__int32 i;
+            stbsp__uint32 c;
+
+            // copy leading spaces (or when doing %8.4d stuff)
+            if ((fl & STBSP__LEFTJUST) == 0)
+               while (fw > 0) {
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i) {
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  stbsp__chk_cb_buf(1);
+               }
+
+            // copy leader
+            sn = lead + 1;
+            while (lead[0]) {
+               stbsp__cb_buf_clamp(i, lead[0]);
+               lead[0] -= (char)i;
+               while (i) {
+                  *bf++ = *sn++;
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+
+            // copy leading zeros
+            c = cs >> 24;
+            cs &= 0xffffff;
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0;
+            while (pr > 0) {
+               stbsp__cb_buf_clamp(i, pr);
+               pr -= i;
+               if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = '0';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x30303030;
+                     bf += 4;
+                     i -= 4;
+                  }
+               }
+               while (i) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) {
+                     cs = 0;
+                     *bf++ = stbsp__comma;
+                  } else
+                     *bf++ = '0';
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+         }
+
+         // copy leader if there is still one
+         sn = lead + 1;
+         while (lead[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, lead[0]);
+            lead[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy the string
+         n = l;
+         while (n) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, n);
+            n -= i;
+            STBSP__UNALIGNED(while (i >= 4) {
+               *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s;
+               bf += 4;
+               s += 4;
+               i -= 4;
+            })
+            while (i) {
+               *bf++ = *s++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy trailing zeros
+         while (tz) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tz);
+            tz -= i;
+            while (i) {
+               if ((((stbsp__uintptr)bf) & 3) == 0)
+                  break;
+               *bf++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)bf = 0x30303030;
+               bf += 4;
+               i -= 4;
+            }
+            while (i) {
+               *bf++ = '0';
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy tail if there is one
+         sn = tail + 1;
+         while (tail[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tail[0]);
+            tail[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // handle the left justify
+         if (fl & STBSP__LEFTJUST)
+            if (fw > 0) {
+               while (fw) {
+                  stbsp__int32 i;
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i--)
+                     *bf++ = ' ';
+                  stbsp__chk_cb_buf(1);
+               }
+            }
+         break;
+
+      default: // unknown, just copy code
+         s = num + STBSP__NUMSZ - 1;
+         *s = f[0];
+         l = 1;
+         fw = fl = 0;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+      }
+      ++f;
+   }
+endfmt:
+
+   if (!callback)
+      *bf = 0;
+   else
+      stbsp__flush_cb();
+
+done:
+   return tlen + (int)(bf - buf);
+}
+
+// cleanup
+#undef STBSP__LEFTJUST
+#undef STBSP__LEADINGPLUS
+#undef STBSP__LEADINGSPACE
+#undef STBSP__LEADING_0X
+#undef STBSP__LEADINGZERO
+#undef STBSP__INTMAX
+#undef STBSP__TRIPLET_COMMA
+#undef STBSP__NEGATIVE
+#undef STBSP__METRIC_SUFFIX
+#undef STBSP__NUMSZ
+#undef stbsp__chk_cb_bufL
+#undef stbsp__chk_cb_buf
+#undef stbsp__flush_cb
+#undef stbsp__cb_buf_clamp
+
+// ============================================================================
+//   wrapper functions
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+   result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+   va_end(va);
+   return result;
+}
+
+typedef struct stbsp__context {
+   char *buf;
+   int count;
+   int length;
+   char tmp[STB_SPRINTF_MIN];
+} stbsp__context;
+
+static char *stbsp__clamp_callback(const char *buf, void *user, int len)
+{
+   stbsp__context *c = (stbsp__context *)user;
+   c->length += len;
+
+   if (len > c->count)
+      len = c->count;
+
+   if (len) {
+      if (buf != c->buf) {
+         const char *s, *se;
+         char *d;
+         d = c->buf;
+         s = buf;
+         se = buf + len;
+         do {
+            *d++ = *s++;
+         } while (s < se);
+      }
+      c->buf += len;
+      c->count -= len;
+   }
+
+   if (c->count <= 0)
+      return c->tmp;
+   return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can
+}
+
+static char * stbsp__count_clamp_callback( const char * buf, void * user, int len )
+{
+   stbsp__context * c = (stbsp__context*)user;
+   (void) sizeof(buf);
+
+   c->length += len;
+   return c->tmp; // go direct into buffer if you can
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va )
+{
+   stbsp__context c;
+
+   if ( (count == 0) && !buf )
+   {
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__count_clamp_callback, &c, c.tmp, fmt, va );
+   }
+   else
+   {
+      int l;
+
+      c.buf = buf;
+      c.count = count;
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va );
+
+      // zero-terminate
+      l = (int)( c.buf - buf );
+      if ( l >= count ) // should never be greater, only equal (or less) than count
+         l = count - 1;
+      buf[l] = 0;
+   }
+
+   return c.length;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+
+   result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va);
+   va_end(va);
+
+   return result;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va)
+{
+   return STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+}
+
+// =======================================================================
+//   low level float utility functions
+
+#ifndef STB_SPRINTF_NOFLOAT
+
+// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
+#define STBSP__COPYFP(dest, src)                   \
+   {                                               \
+      int cn;                                      \
+      for (cn = 0; cn < 8; cn++)                   \
+         ((char *)&dest)[cn] = ((char *)&src)[cn]; \
+   }
+
+// get float info
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value)
+{
+   double d;
+   stbsp__int64 b = 0;
+
+   // load value and round at the frac_digits
+   d = value;
+
+   STBSP__COPYFP(b, d);
+
+   *bits = b & ((((stbsp__uint64)1) << 52) - 1);
+   *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023);
+
+   return (stbsp__int32)((stbsp__uint64) b >> 63);
+}
+
+static double const stbsp__bot[23] = {
+   1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011,
+   1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022
+};
+static double const stbsp__negbot[22] = {
+   1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011,
+   1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022
+};
+static double const stbsp__negboterr[22] = {
+   -5.551115123125783e-018,  -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023,
+   4.5251888174113739e-024,  -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028,  2.0113352370744385e-029,
+   -3.0373745563400371e-030, 1.1806906454401013e-032,  -7.7705399876661076e-032, 2.0902213275965398e-033,  -7.1542424054621921e-034, -7.1542424054621926e-035,
+   2.4754073164739869e-036,  5.4846728545790429e-037,  9.2462547772103625e-038,  -4.8596774326570872e-039
+};
+static double const stbsp__top[13] = {
+   1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299
+};
+static double const stbsp__negtop[13] = {
+   1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299
+};
+static double const stbsp__toperr[13] = {
+   8388608,
+   6.8601809640529717e+028,
+   -7.253143638152921e+052,
+   -4.3377296974619174e+075,
+   -1.5559416129466825e+098,
+   -3.2841562489204913e+121,
+   -3.7745893248228135e+144,
+   -1.7356668416969134e+167,
+   -3.8893577551088374e+190,
+   -9.9566444326005119e+213,
+   6.3641293062232429e+236,
+   -5.2069140800249813e+259,
+   -5.2504760255204387e+282
+};
+static double const stbsp__negtoperr[13] = {
+   3.9565301985100693e-040,  -2.299904345391321e-063,  3.6506201437945798e-086,  1.1875228833981544e-109,
+   -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178,  -5.7778912386589953e-201,
+   7.4997100559334532e-224,  -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293,
+   8.0970921678014997e-317
+};
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1200)
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000,
+   100000000000,
+   1000000000000,
+   10000000000000,
+   100000000000000,
+   1000000000000000,
+   10000000000000000,
+   100000000000000000,
+   1000000000000000000,
+   10000000000000000000U
+};
+#define stbsp__tento19th ((stbsp__uint64)1000000000000000000)
+#else
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000ULL,
+   100000000000ULL,
+   1000000000000ULL,
+   10000000000000ULL,
+   100000000000000ULL,
+   1000000000000000ULL,
+   10000000000000000ULL,
+   100000000000000000ULL,
+   1000000000000000000ULL,
+   10000000000000000000ULL
+};
+#define stbsp__tento19th (1000000000000000000ULL)
+#endif
+
+#define stbsp__ddmulthi(oh, ol, xh, yh)                            \
+   {                                                               \
+      double ahi = 0, alo, bhi = 0, blo;                           \
+      stbsp__int64 bt;                                             \
+      oh = xh * yh;                                                \
+      STBSP__COPYFP(bt, xh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(ahi, bt);                                      \
+      alo = xh - ahi;                                              \
+      STBSP__COPYFP(bt, yh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(bhi, bt);                                      \
+      blo = yh - bhi;                                              \
+      ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \
+   }
+
+#define stbsp__ddtoS64(ob, xh, xl)          \
+   {                                        \
+      double ahi = 0, alo, vh, t;           \
+      ob = (stbsp__int64)xh;                \
+      vh = (double)ob;                      \
+      ahi = (xh - vh);                      \
+      t = (ahi - xh);                       \
+      alo = (xh - (ahi - t)) - (vh + t);    \
+      ob += (stbsp__int64)(ahi + alo + xl); \
+   }
+
+#define stbsp__ddrenorm(oh, ol) \
+   {                            \
+      double s;                 \
+      s = oh + ol;              \
+      ol = ol - (s - oh);       \
+      oh = s;                   \
+   }
+
+#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh);
+
+#define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl);
+
+static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350
+{
+   double ph, pl;
+   if ((power >= 0) && (power <= 22)) {
+      stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]);
+   } else {
+      stbsp__int32 e, et, eb;
+      double p2h, p2l;
+
+      e = power;
+      if (power < 0)
+         e = -e;
+      et = (e * 0x2c9) >> 14; /* %23 */
+      if (et > 13)
+         et = 13;
+      eb = e - (et * 23);
+
+      ph = d;
+      pl = 0.0;
+      if (power < 0) {
+         if (eb) {
+            --eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]);
+            stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]);
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      } else {
+         if (eb) {
+            e = eb;
+            if (eb > 22)
+               eb = 22;
+            e -= eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]);
+            if (e) {
+               stbsp__ddrenorm(ph, pl);
+               stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]);
+               stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl);
+               ph = p2h;
+               pl = p2l;
+            }
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      }
+   }
+   stbsp__ddrenorm(ph, pl);
+   *ohi = ph;
+   *olo = pl;
+}
+
+// given a float value, returns the significant bits in bits, and the position of the
+//   decimal point in decimal_pos.  +/-INF and NAN are specified by special values
+//   returned in the decimal_pos parameter.
+// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits)
+{
+   double d;
+   stbsp__int64 bits = 0;
+   stbsp__int32 expo, e, ng, tens;
+
+   d = value;
+   STBSP__COPYFP(bits, d);
+   expo = (stbsp__int32)((bits >> 52) & 2047);
+   ng = (stbsp__int32)((stbsp__uint64) bits >> 63);
+   if (ng)
+      d = -d;
+
+   if (expo == 2047) // is nan or inf?
+   {
+      *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf";
+      *decimal_pos = STBSP__SPECIAL;
+      *len = 3;
+      return ng;
+   }
+
+   if (expo == 0) // is zero or denormal
+   {
+      if (((stbsp__uint64) bits << 1) == 0) // do zero
+      {
+         *decimal_pos = 1;
+         *start = out;
+         out[0] = '0';
+         *len = 1;
+         return ng;
+      }
+      // find the right expo for denormals
+      {
+         stbsp__int64 v = ((stbsp__uint64)1) << 51;
+         while ((bits & v) == 0) {
+            --expo;
+            v >>= 1;
+         }
+      }
+   }
+
+   // find the decimal exponent as well as the decimal bits of the value
+   {
+      double ph, pl;
+
+      // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046
+      tens = expo - 1023;
+      tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1);
+
+      // move the significant bits into position and stick them into an int
+      stbsp__raise_to_power10(&ph, &pl, d, 18 - tens);
+
+      // get full as much precision from double-double as possible
+      stbsp__ddtoS64(bits, ph, pl);
+
+      // check if we undershot
+      if (((stbsp__uint64)bits) >= stbsp__tento19th)
+         ++tens;
+   }
+
+   // now do the rounding in integer land
+   frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits);
+   if ((frac_digits < 24)) {
+      stbsp__uint32 dg = 1;
+      if ((stbsp__uint64)bits >= stbsp__powten[9])
+         dg = 10;
+      while ((stbsp__uint64)bits >= stbsp__powten[dg]) {
+         ++dg;
+         if (dg == 20)
+            goto noround;
+      }
+      if (frac_digits < dg) {
+         stbsp__uint64 r;
+         // add 0.5 at the right position and round
+         e = dg - frac_digits;
+         if ((stbsp__uint32)e >= 24)
+            goto noround;
+         r = stbsp__powten[e];
+         bits = bits + (r / 2);
+         if ((stbsp__uint64)bits >= stbsp__powten[dg])
+            ++tens;
+         bits /= r;
+      }
+   noround:;
+   }
+
+   // kill long trailing runs of zeros
+   if (bits) {
+      stbsp__uint32 n;
+      for (;;) {
+         if (bits <= 0xffffffff)
+            break;
+         if (bits % 1000)
+            goto donez;
+         bits /= 1000;
+      }
+      n = (stbsp__uint32)bits;
+      while ((n % 1000) == 0)
+         n /= 1000;
+      bits = n;
+   donez:;
+   }
+
+   // convert to string
+   out += 64;
+   e = 0;
+   for (;;) {
+      stbsp__uint32 n;
+      char *o = out - 8;
+      // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned)
+      if (bits >= 100000000) {
+         n = (stbsp__uint32)(bits % 100000000);
+         bits /= 100000000;
+      } else {
+         n = (stbsp__uint32)bits;
+         bits = 0;
+      }
+      while (n) {
+         out -= 2;
+         *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+         n /= 100;
+         e += 2;
+      }
+      if (bits == 0) {
+         if ((e) && (out[0] == '0')) {
+            ++out;
+            --e;
+         }
+         break;
+      }
+      while (out != o) {
+         *--out = '0';
+         ++e;
+      }
+   }
+
+   *decimal_pos = tens;
+   *start = out;
+   *len = e;
+   return ng;
+}
+
+#undef stbsp__ddmulthi
+#undef stbsp__ddrenorm
+#undef stbsp__ddmultlo
+#undef stbsp__ddmultlos
+#undef STBSP__SPECIAL
+#undef STBSP__COPYFP
+
+#endif // STB_SPRINTF_NOFLOAT
+
+// clean up
+#undef stbsp__uint16
+#undef stbsp__uint32
+#undef stbsp__int32
+#undef stbsp__uint64
+#undef stbsp__int64
+#undef STBSP__UNALIGNED
+
+#endif // STB_SPRINTF_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/

+ 0 - 46
vendor/stb/src/stb_truetype_wasm.c

@@ -1,46 +0,0 @@
-#include <stddef.h>
-
-void *stbtt_malloc(size_t size);
-void stbtt_free(void *ptr);
-
-void stbtt_qsort(void* base, size_t num, size_t size, int (*compare)(const void*, const void*));
-
-double stbtt_floor(double x);
-double stbtt_ceil(double x);
-double stbtt_sqrt(double x);
-double stbtt_pow(double x, double y);
-double stbtt_fmod(double x, double y);
-double stbtt_cos(double x);
-double stbtt_acos(double x);
-double stbtt_fabs(double x);
-
-unsigned long stbtt_strlen(const char *str);
-
-void *memcpy(void *dst, const void *src, size_t count);
-void *memset(void *dst, int x, size_t count);
-
-#define STBRP_SORT stbtt_qsort
-#define STBRP_ASSERT(condition) ((void)0)
-
-#define STBTT_malloc(x,u)  ((void)(u),stbtt_malloc(x))
-#define STBTT_free(x,u)    ((void)(u),stbtt_free(x))
-
-#define STBTT_assert(condition) ((void)0)
-
-#define STBTT_ifloor(x)   ((int) stbtt_floor(x))
-#define STBTT_iceil(x)    ((int) stbtt_ceil(x))
-#define STBTT_sqrt(x)      stbtt_sqrt(x)
-#define STBTT_pow(x,y)     stbtt_pow(x,y)
-#define STBTT_fmod(x,y)    stbtt_fmod(x,y)
-#define STBTT_cos(x)       stbtt_cos(x)
-#define STBTT_acos(x)      stbtt_acos(x)
-#define STBTT_fabs(x)      stbtt_fabs(x)
-#define STBTT_strlen(x)    stbtt_strlen(x)
-#define STBTT_memcpy       memcpy
-#define STBTT_memset       memset
-
-#define STB_RECT_PACK_IMPLEMENTATION
-#include "stb_rect_pack.h"
-
-#define STB_TRUETYPE_IMPLEMENTATION
-#include "stb_truetype.h"

+ 5 - 2
vendor/stb/truetype/stb_truetype.odin

@@ -8,6 +8,7 @@ LIB :: (
 	     "../lib/stb_truetype.lib"      when ODIN_OS == .Windows
 	     "../lib/stb_truetype.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_truetype.a"        when ODIN_OS == .Linux
 	else "../lib/stb_truetype.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_truetype.a" when ODIN_OS == .Darwin
 	else "../lib/darwin/stb_truetype.a" when ODIN_OS == .Darwin
+	else "../lib/stb_truetype_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 	else ""
 )
 )
 
 
@@ -15,10 +16,12 @@ when LIB != "" {
 	when !#exists(LIB) {
 	when !#exists(LIB) {
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
 	}
+}
 
 
-	foreign import stbtt { LIB }
-} else when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
 	foreign import stbtt "../lib/stb_truetype_wasm.o"
 	foreign import stbtt "../lib/stb_truetype_wasm.o"
+} else when LIB != "" {
+	foreign import stbtt { LIB }
 } else {
 } else {
 	foreign import stbtt "system:stb_truetype"
 	foreign import stbtt "system:stb_truetype"
 }
 }

+ 1 - 79
vendor/stb/truetype/stb_truetype_wasm.odin

@@ -1,82 +1,4 @@
 #+build wasm32, wasm64p32
 #+build wasm32, wasm64p32
 package stb_truetype
 package stb_truetype
 
 
-import "base:builtin"
-import "base:intrinsics"
-import "base:runtime"
-
-import "core:c"
-import "core:math"
-import "core:slice"
-import "core:sort"
-
-@(require, linkage="strong", link_name="stbtt_malloc")
-malloc :: proc "c" (size: uint) -> rawptr {
-	context = runtime.default_context()
-	ptr, _ := runtime.mem_alloc_non_zeroed(int(size))
-	return raw_data(ptr)
-}
-
-@(require, linkage="strong", link_name="stbtt_free")
-free :: proc "c" (ptr: rawptr) {
-	context = runtime.default_context()
-	builtin.free(ptr)
-}
-
-@(require, linkage="strong", link_name="stbtt_qsort")
-qsort :: proc "c" (base: rawptr, num: uint, size: uint, cmp: proc "c" (a, b: rawptr) -> i32) {
-	context = runtime.default_context()
-
-	Inputs :: struct {
-		base: rawptr,
-		num:  uint,
-		size: uint,
-		cmp:  proc "c" (a, b: rawptr) -> i32,
-	}
-
-	sort.sort({
-		collection = &Inputs{base, num, size, cmp},
-		len = proc(it: sort.Interface) -> int {
-			inputs := (^Inputs)(it.collection)
-			return int(inputs.num)
-		},
-		less = proc(it: sort.Interface, i, j: int) -> bool {
-			inputs := (^Inputs)(it.collection)
-			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
-			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
-			return inputs.cmp(a, b) < 0
-		},
-		swap = proc(it: sort.Interface, i, j: int) {
-			inputs := (^Inputs)(it.collection)
-
-			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
-			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
-
-			slice.ptr_swap_non_overlapping(a, b, int(inputs.size))
-		},
-	})
-}
-
-@(require, linkage="strong", link_name="stbtt_floor")
-floor :: proc "c" (x: f64) -> f64 { return math.floor(x) }
-@(require, linkage="strong", link_name="stbtt_ceil")
-ceil :: proc "c" (x: f64) -> f64 { return math.ceil(x) }
-@(require, linkage="strong", link_name="stbtt_sqrt")
-sqrt :: proc "c" (x: f64) -> f64 { return math.sqrt(x) }
-@(require, linkage="strong", link_name="stbtt_pow")
-pow :: proc "c" (x, y: f64) -> f64 { return math.pow(x, y) }
-@(require, linkage="strong", link_name="stbtt_fmod")
-fmod :: proc "c" (x, y: f64) -> f64 { return math.mod(x, y) }
-@(require, linkage="strong", link_name="stbtt_cos")
-cos :: proc "c" (x: f64) -> f64 { return math.cos(x) }
-@(require, linkage="strong", link_name="stbtt_acos")
-acos :: proc "c" (x: f64) -> f64 { return math.acos(x) }
-@(require, linkage="strong", link_name="stbtt_fabs")
-fabs :: proc "c" (x: f64) -> f64 { return math.abs(x) }
-
-@(require, linkage="strong", link_name="stbtt_strlen")
-strlen :: proc "c" (str: cstring) -> c.ulong { return c.ulong(len(str)) }
-
-// NOTE: defined in runtime.
-// void *memcpy(void *dst, const void *src, size_t count);
-// void *memset(void *dst, int x, size_t count);
+@(require) import _ "vendor:libc"

Some files were not shown because too many files changed in this diff