Browse Source

Merge branch 'master' of https://github.com/odin-lang/Odin

gingerBill 4 years ago
parent
commit
fbbd43a6d8

+ 154 - 0
core/math/big/api.odin

@@ -0,0 +1,154 @@
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	An arbitrary precision mathematics implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+
+	This file collects public proc maps and their aliases.
+/*
+
+*/
+	=== === === === === === === === === === === === === === === === === === === === === === === ===
+	                                    Basic arithmetic.
+	                                    See `public.odin`.
+	=== === === === === === === === === === === === === === === === === === === === === === === ===
+*/
+
+/*
+	High-level addition. Handles sign.
+*/
+add :: proc {
+	/*
+		int_add :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error)
+	*/
+	int_add,
+	/*
+		Adds the unsigned `DIGIT` immediate to an `Int`, such that the
+		`DIGIT` doesn't have to be turned into an `Int` first.
+
+		int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error)
+	*/
+	int_add_digit,
+};
+
+/*
+	err = sub(dest, a, b);
+*/
+sub :: proc {
+	/*
+		int_sub :: proc(dest, a, b: ^Int) -> (err: Error)
+	*/
+	int_sub,
+	/*
+		int_sub_digit :: proc(dest, a: ^Int, digit: DIGIT) -> (err: Error)
+	*/
+	int_sub_digit,
+};
+
+/*
+	=== === === === === === === === === === === === === === === === === === === === === === === ===
+	                                        Comparisons.
+	                                    See `compare.odin`.
+	=== === === === === === === === === === === === === === === === === === === === === === === ===
+*/
+
+is_initialized :: proc {
+	/*
+		int_is_initialized :: proc(a: ^Int) -> bool
+	*/
+	int_is_initialized,
+};
+
+is_zero :: proc {
+	/*
+		int_is_zero :: proc(a: ^Int) -> bool
+	*/
+	int_is_zero,
+};
+
+is_positive :: proc {
+	/*
+		int_is_positive :: proc(a: ^Int) -> bool
+	*/
+	int_is_positive,
+};
+is_pos :: is_positive;
+
+is_negative :: proc {
+	/*
+		int_is_negative :: proc(a: ^Int) -> bool
+	*/
+	int_is_negative,
+};
+is_neg :: is_negative;
+
+is_even :: proc {
+	/*
+		int_is_even :: proc(a: ^Int) -> bool
+	*/
+	int_is_even,
+};
+
+is_odd :: proc {
+	/*
+		int_is_odd :: proc(a: ^Int) -> bool
+	*/
+	int_is_odd,
+};
+
+is_power_of_two :: proc {
+	/*
+		platform_int_is_power_of_two :: proc(a: int) -> bool
+	*/
+	platform_int_is_power_of_two,
+	/*
+		int_is_power_of_two :: proc(a: ^Int) -> (res: bool)
+	*/
+	int_is_power_of_two,
+};
+
+compare :: proc {
+	/*
+		Compare two `Int`s, signed.
+
+		int_compare :: proc(a, b: ^Int) -> Comparison_Flag
+	*/
+	int_compare,
+	/*
+		Compare an `Int` to an unsigned number upto the size of the backing type.
+
+		int_compare_digit :: proc(a: ^Int, u: DIGIT) -> Comparison_Flag
+	*/
+	int_compare_digit,
+};
+cmp :: compare;
+
+compare_magnitude :: proc {
+	/*
+		Compare the magnitude of two `Int`s, unsigned.
+	*/
+	int_compare_magnitude,
+};
+cmp_mag :: compare_magnitude;
+
+/*
+	=== === === === === === === === === === === === === === === === === === === === === === === ===
+	                              Initialization and other helpers.
+	                                    See `helpers.odin`.
+	=== === === === === === === === === === === === === === === === === === === === === === === ===
+*/
+
+destroy :: proc {
+	/*
+		Clears one or more `Int`s and dellocates their backing memory.
+
+		int_destroy :: proc(integers: ..^Int)
+	*/
+	int_destroy,
+};
+
+

+ 8 - 0
core/math/big/build.bat

@@ -0,0 +1,8 @@
+@echo off
+odin run . -vet
+: -o:size
+:odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check && python test.py -fast-tests
+:odin build . -build-mode:shared -show-timings -o:size -no-bounds-check  && python test.py -fast-tests
+:odin build . -build-mode:shared -show-timings -o:size  && python test.py -fast-tests
+:odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check && python test.py -fast-tests
+:odin build . -build-mode:shared -show-timings -o:speed && python test.py -fast-tests

+ 206 - 0
core/math/big/common.odin

@@ -0,0 +1,206 @@
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	An arbitrary precision mathematics implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+*/
+
+import "core:intrinsics"
+
+/*
+	TODO: Make the tunables runtime adjustable where practical.
+
+	This allows to benchmark and/or setting optimized values for a certain CPU without recompiling.
+*/
+
+/*
+	==========================    TUNABLES     ==========================
+
+	`initialize_constants` returns `#config(MUL_KARATSUBA_CUTOFF, _DEFAULT_MUL_KARATSUBA_CUTOFF)`
+	and we initialize this cutoff that way so that the procedure is used and called,
+	because it handles initializing the constants ONE, ZERO, MINUS_ONE, NAN and INF.
+
+	`initialize_constants` also replaces the other `_DEFAULT_*` cutoffs with custom compile-time values if so `#config`ured.
+
+*/
+MUL_KARATSUBA_CUTOFF := initialize_constants();
+SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF;
+MUL_TOOM_CUTOFF      := _DEFAULT_MUL_TOOM_CUTOFF;
+SQR_TOOM_CUTOFF      := _DEFAULT_SQR_TOOM_CUTOFF;
+
+/*
+	These defaults were tuned on an AMD A8-6600K (64-bit) using libTomMath's `make tune`.
+
+	TODO(Jeroen): Port this tuning algorithm and tune them for more modern processors.
+
+	It would also be cool if we collected some data across various processor families.
+	This would let uss set reasonable defaults at runtime as this library initializes
+	itself by using `cpuid` or the ARM equivalent.
+
+	IMPORTANT: The 32_BIT path has largely gone untested. It needs to be tested and
+	debugged where necessary.
+*/
+
+_DEFAULT_MUL_KARATSUBA_CUTOFF :: #config(MUL_KARATSUBA_CUTOFF,  80);
+_DEFAULT_SQR_KARATSUBA_CUTOFF :: #config(SQR_KARATSUBA_CUTOFF, 120);
+_DEFAULT_MUL_TOOM_CUTOFF      :: #config(MUL_TOOM_CUTOFF,      350);
+_DEFAULT_SQR_TOOM_CUTOFF      :: #config(SQR_TOOM_CUTOFF,      400);
+
+
+MAX_ITERATIONS_ROOT_N := 500;
+
+/*
+	Largest `N` for which we'll compute `N!`
+*/
+FACTORIAL_MAX_N       := 1_000_000;
+
+/*
+	Cutoff to switch to int_factorial_binary_split, and its max recursion level.
+*/
+FACTORIAL_BINARY_SPLIT_CUTOFF         := 6100;
+FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS := 100;
+
+
+/*
+	We don't allow these to be switched at runtime for two reasons:
+
+	1) 32-bit and 64-bit versions of procedures use different types for their storage,
+		so we'd have to double the number of procedures, and they couldn't interact.
+
+	2) Optimizations thanks to precomputed masks wouldn't work.
+*/
+MATH_BIG_FORCE_64_BIT :: #config(MATH_BIG_FORCE_64_BIT, false);
+MATH_BIG_FORCE_32_BIT :: #config(MATH_BIG_FORCE_32_BIT, false);
+when (MATH_BIG_FORCE_32_BIT && MATH_BIG_FORCE_64_BIT) { #panic("Cannot force 32-bit and 64-bit big backend simultaneously."); };
+
+_LOW_MEMORY           :: #config(BIGINT_SMALL_MEMORY, false);
+when _LOW_MEMORY {
+	_DEFAULT_DIGIT_COUNT :: 8;
+} else {
+	_DEFAULT_DIGIT_COUNT :: 32;
+}
+
+/*
+	=======================    END OF TUNABLES     =======================
+*/
+
+Sign :: enum u8 {
+	Zero_or_Positive = 0,
+	Negative         = 1,
+};
+
+Int :: struct {
+	used:  int,
+	digit: [dynamic]DIGIT,
+	sign:  Sign,
+	flags: Flags,
+};
+
+Flag :: enum u8 {
+	NaN,
+	Inf,
+	Immutable,
+};
+
+Flags :: bit_set[Flag; u8];
+
+/*
+	Errors are a strict superset of runtime.Allocation_Error.
+*/
+Error :: enum int {
+	Okay                    = 0,
+	Out_Of_Memory           = 1,
+	Invalid_Pointer         = 2,
+	Invalid_Argument        = 3,
+
+	Assignment_To_Immutable = 4,
+	Max_Iterations_Reached  = 5,
+	Buffer_Overflow         = 6,
+	Integer_Overflow        = 7,
+
+	Division_by_Zero        = 8,
+	Math_Domain_Error       = 9,
+
+	Unimplemented           = 127,
+};
+
+Error_String :: #partial [Error]string{
+	.Out_Of_Memory           = "Out of memory",
+	.Invalid_Pointer         = "Invalid pointer",
+	.Invalid_Argument        = "Invalid argument",
+
+	.Assignment_To_Immutable = "Assignment to immutable",
+	.Max_Iterations_Reached  = "Max iterations reached",
+	.Buffer_Overflow         = "Buffer overflow",
+	.Integer_Overflow        = "Integer overflow",
+
+	.Division_by_Zero        = "Division by zero",
+	.Math_Domain_Error       = "Math domain error",
+
+	.Unimplemented           = "Unimplemented",
+};
+
+Primality_Flag :: enum u8 {
+	Blum_Blum_Shub = 0,	/* BBS style prime */
+	Safe           = 1,	/* Safe prime (p-1)/2 == prime */
+	Second_MSB_On  = 3,  /* force 2nd MSB to 1 */
+};
+Primality_Flags :: bit_set[Primality_Flag; u8];
+
+/*
+	How do we store the Ints?
+
+	Minimum number of available digits in `Int`, `_DEFAULT_DIGIT_COUNT` >= `_MIN_DIGIT_COUNT`
+	- Must be at least 3 for `_div_school`.
+	- Must be large enough such that `init_integer` can store `u128` in the `Int` without growing.
+ */
+
+_MIN_DIGIT_COUNT :: max(3, ((size_of(u128) + _DIGIT_BITS) - 1) / _DIGIT_BITS);
+#assert(_DEFAULT_DIGIT_COUNT >= _MIN_DIGIT_COUNT);
+
+/*
+	Maximum number of digits.
+	- Must be small enough such that `_bit_count` does not overflow.
+ 	- Must be small enough such that `_radix_size` for base 2 does not overflow.
+	`_radix_size` needs two additional bytes for zero termination and sign.
+*/
+_MAX_BIT_COUNT   :: (max(int) - 2);
+_MAX_DIGIT_COUNT :: _MAX_BIT_COUNT / _DIGIT_BITS;
+
+when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) {
+	/*
+		We can use u128 as an intermediary.
+	*/
+	DIGIT        :: distinct u64;
+	_WORD        :: distinct u128;
+} else {
+	DIGIT        :: distinct u32;
+	_WORD        :: distinct u64;
+}
+#assert(size_of(_WORD) == 2 * size_of(DIGIT));
+
+_DIGIT_TYPE_BITS :: 8 * size_of(DIGIT);
+_WORD_TYPE_BITS  :: 8 * size_of(_WORD);
+
+_DIGIT_BITS      :: _DIGIT_TYPE_BITS - 4;
+_WORD_BITS       :: 2 * _DIGIT_BITS;
+
+_MASK            :: (DIGIT(1) << DIGIT(_DIGIT_BITS)) - DIGIT(1);
+_DIGIT_MAX       :: _MASK;
+_MAX_COMBA       :: 1 <<  (_WORD_TYPE_BITS - (2 * _DIGIT_BITS))     ;
+_WARRAY          :: 1 << ((_WORD_TYPE_BITS - (2 * _DIGIT_BITS)) + 1);
+
+Order :: enum i8 {
+	LSB_First = -1,
+	MSB_First =  1,
+};
+
+Endianness :: enum i8 {
+   Little   = -1,
+   Platform =  0,
+   Big      =  1,
+};

+ 246 - 0
core/math/big/example.odin

@@ -0,0 +1,246 @@
+//+ignore
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	A BigInt implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+*/
+
+import "core:fmt"
+import "core:mem"
+
+print_configation :: proc() {
+	fmt.printf(
+`
+Configuration:
+	_DIGIT_BITS                           %v
+	_MIN_DIGIT_COUNT                      %v
+	_MAX_DIGIT_COUNT                      %v
+	_DEFAULT_DIGIT_COUNT                  %v
+	_MAX_COMBA                            %v
+	_WARRAY                               %v
+Runtime tunable:
+	MUL_KARATSUBA_CUTOFF                  %v
+	SQR_KARATSUBA_CUTOFF                  %v
+	MUL_TOOM_CUTOFF                       %v
+	SQR_TOOM_CUTOFF                       %v
+	MAX_ITERATIONS_ROOT_N                 %v
+	FACTORIAL_MAX_N                       %v
+	FACTORIAL_BINARY_SPLIT_CUTOFF         %v
+	FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS %v
+
+`, _DIGIT_BITS,
+_MIN_DIGIT_COUNT,
+_MAX_DIGIT_COUNT,
+_DEFAULT_DIGIT_COUNT,
+_MAX_COMBA,
+_WARRAY,
+MUL_KARATSUBA_CUTOFF,
+SQR_KARATSUBA_CUTOFF,
+MUL_TOOM_CUTOFF,
+SQR_TOOM_CUTOFF,
+MAX_ITERATIONS_ROOT_N,
+FACTORIAL_MAX_N,
+FACTORIAL_BINARY_SPLIT_CUTOFF,
+FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS,
+);
+
+}
+
+print :: proc(name: string, a: ^Int, base := i8(10), print_name := true, newline := true, print_extra_info := false) {
+	assert_if_nil(a);
+
+	as, err := itoa(a, base);
+	defer delete(as);
+
+	cb := internal_count_bits(a);
+	if print_name {
+		fmt.printf("%v", name);
+	}
+	if err != nil {
+		fmt.printf("%v (error: %v | %v)", name, err, a);
+	}
+	fmt.printf("%v", as);
+	if print_extra_info {
+		fmt.printf(" (base: %v, bits: %v (digits: %v), flags: %v)", base, cb, a.used, a.flags);
+	}
+	if newline {
+		fmt.println();
+	}
+}
+
+int_to_byte :: proc(v: ^Int) {
+	err: Error;
+	size: int;
+	print("v: ", v);
+	fmt.println();
+
+	t := &Int{};
+	defer destroy(t);
+
+	if size, err = int_to_bytes_size(v); err != nil {
+		fmt.printf("int_to_bytes_size returned: %v\n", err);
+		return;
+	}
+	b1 := make([]u8, size, context.temp_allocator);
+	err = int_to_bytes_big(v, b1);
+	int_from_bytes_big(t, b1);
+	fmt.printf("big: %v | err: %v\n", b1, err);
+
+	int_from_bytes_big(t, b1);
+	if internal_cmp_mag(t, v) != 0 {
+		print("\tError parsing t: ", t);
+	}
+
+	if size, err = int_to_bytes_size(v); err != nil {
+		fmt.printf("int_to_bytes_size returned: %v\n", err);
+		return;
+	}
+	b2 := make([]u8, size, context.temp_allocator);
+	err = int_to_bytes_big_python(v, b2);
+	fmt.printf("big python: %v | err: %v\n", b2, err);
+
+	if err == nil {
+		int_from_bytes_big_python(t, b2);
+		if internal_cmp_mag(t, v) != 0 {
+			print("\tError parsing t: ", t);
+		}
+	}
+
+	if size, err = int_to_bytes_size(v, true); err != nil {
+		fmt.printf("int_to_bytes_size returned: %v\n", err);
+		return;
+	}
+	b3 := make([]u8, size, context.temp_allocator);
+	err = int_to_bytes_big(v, b3, true);
+	fmt.printf("big signed: %v | err: %v\n", b3, err);
+
+	int_from_bytes_big(t, b3, true);
+	if internal_cmp(t, v) != 0 {
+		print("\tError parsing t: ", t);
+	}
+
+	if size, err = int_to_bytes_size(v, true); err != nil {
+		fmt.printf("int_to_bytes_size returned: %v\n", err);
+		return;
+	}
+	b4 := make([]u8, size, context.temp_allocator);
+	err = int_to_bytes_big_python(v, b4, true);
+	fmt.printf("big signed python: %v | err: %v\n", b4, err);
+
+	int_from_bytes_big_python(t, b4, true);
+	if internal_cmp(t, v) != 0 {
+		print("\tError parsing t: ", t);
+	}
+}
+
+int_to_byte_little :: proc(v: ^Int) {
+	err: Error;
+	size: int;
+	print("v: ", v);
+	fmt.println();
+
+	t := &Int{};
+	defer destroy(t);
+
+	if size, err = int_to_bytes_size(v); err != nil {
+		fmt.printf("int_to_bytes_size returned: %v\n", err);
+		return;
+	}
+	b1 := make([]u8, size, context.temp_allocator);
+	err = int_to_bytes_little(v, b1);
+	fmt.printf("little: %v | err: %v\n", b1, err);
+
+	int_from_bytes_little(t, b1);
+	if internal_cmp_mag(t, v) != 0 {
+		print("\tError parsing t: ", t);
+	}
+
+	if size, err = int_to_bytes_size(v); err != nil {
+		fmt.printf("int_to_bytes_size returned: %v\n", err);
+		return;
+	}
+	b2 := make([]u8, size, context.temp_allocator);
+	err = int_to_bytes_little_python(v, b2);
+	fmt.printf("little python: %v | err: %v\n", b2, err);
+
+	if err == nil {
+		int_from_bytes_little_python(t, b2);
+		if internal_cmp_mag(t, v) != 0 {
+			print("\tError parsing t: ", t);
+		}
+	}
+
+	if size, err = int_to_bytes_size(v, true); err != nil {
+		fmt.printf("int_to_bytes_size returned: %v\n", err);
+		return;
+	}
+	b3 := make([]u8, size, context.temp_allocator);
+	err = int_to_bytes_little(v, b3, true);
+	fmt.printf("little signed: %v | err: %v\n", b3, err);
+
+	int_from_bytes_little(t, b3, true);
+	if internal_cmp(t, v) != 0 {
+		print("\tError parsing t: ", t);
+	}
+
+	if size, err = int_to_bytes_size(v, true); err != nil {
+		fmt.printf("int_to_bytes_size returned: %v\n", err);
+		return;
+	}
+	b4 := make([]u8, size, context.temp_allocator);
+	err = int_to_bytes_little_python(v, b4, true);
+	fmt.printf("little signed python: %v | err: %v\n", b4, err);
+
+	int_from_bytes_little_python(t, b4, true);
+	if internal_cmp(t, v) != 0 {
+		print("\tError parsing t: ", t);
+	}
+}
+
+demo :: proc() {
+	a, b, c, d, e, f := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
+	defer destroy(a, b, c, d, e, f);
+
+	set(a, 64336);
+	fmt.println("--- --- --- ---");
+	int_to_byte(a);
+	fmt.println("--- --- --- ---");
+	int_to_byte_little(a);
+	fmt.println("--- --- --- ---");
+
+	set(b, -64336);
+	fmt.println("--- --- --- ---");
+	int_to_byte(b);
+	fmt.println("--- --- --- ---");
+	int_to_byte_little(b);
+	fmt.println("--- --- --- ---");
+}
+
+main :: proc() {
+	ta := mem.Tracking_Allocator{};
+	mem.tracking_allocator_init(&ta, context.allocator);
+	context.allocator = mem.tracking_allocator(&ta);
+
+	demo();
+
+	print_configation();
+
+	print_timings();
+
+	if len(ta.allocation_map) > 0 {
+		for _, v in ta.allocation_map {
+			fmt.printf("Leaked %v bytes @ %v\n", v.size, v.location);
+		}
+	}
+	if len(ta.bad_free_array) > 0 {
+		fmt.println("Bad frees:");
+		for v in ta.bad_free_array {
+			fmt.println(v);
+		}
+	}
+}

+ 806 - 0
core/math/big/helpers.odin

@@ -0,0 +1,806 @@
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	An arbitrary precision mathematics implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+*/
+
+import "core:intrinsics"
+import rnd "core:math/rand"
+
+// import "core:fmt"
+
+/*
+	TODO: Int.flags and Constants like ONE, NAN, etc, are not yet properly handled everywhere.
+*/
+
+/*
+	Deallocates the backing memory of one or more `Int`s.
+*/
+int_destroy :: proc(integers: ..^Int) {
+	integers := integers;
+
+	for a in &integers {
+		assert_if_nil(a);
+	}
+	#force_inline internal_int_destroy(..integers);
+}
+
+/*
+	Helpers to set an `Int` to a specific value.
+*/
+int_set_from_integer :: proc(dest: ^Int, src: $T, minimize := false, allocator := context.allocator) -> (err: Error)
+	where intrinsics.type_is_integer(T) {
+	context.allocator = allocator;
+	src := src;
+
+	/*
+		Check that `src` is usable and `dest` isn't immutable.
+	*/
+	assert_if_nil(dest);
+	if err = #force_inline internal_error_if_immutable(dest); err != nil { return err; }
+
+	return #force_inline internal_int_set_from_integer(dest, src, minimize);
+}
+
+set :: proc { int_set_from_integer, int_copy, int_atoi, };
+
+/*
+	Copy one `Int` to another.
+*/
+int_copy :: proc(dest, src: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	/*
+		If dest == src, do nothing
+	*/
+	if (dest == src) { return nil; }
+
+	/*
+		Check that `src` is usable and `dest` isn't immutable.
+	*/
+	assert_if_nil(dest, src);
+	context.allocator = allocator;
+
+	if err = #force_inline internal_clear_if_uninitialized(src); err != nil { return err; }
+	if err = #force_inline internal_error_if_immutable(dest);    err != nil { return err; }
+
+	return #force_inline internal_int_copy(dest, src, minimize);
+}
+copy :: proc { int_copy, };
+
+/*
+	In normal code, you can also write `a, b = b, a`.
+	However, that only swaps within the current scope.
+	This helper swaps completely.
+*/
+int_swap :: proc(a, b: ^Int) {
+	assert_if_nil(a, b);
+	#force_inline internal_swap(a, b);
+}
+swap :: proc { int_swap, };
+
+/*
+	Set `dest` to |`src`|.
+*/
+int_abs :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `src` is usable and `dest` isn't immutable.
+	*/
+	assert_if_nil(dest, src);
+	context.allocator = allocator;
+
+	if err = #force_inline internal_clear_if_uninitialized(src); err != nil { return err; }
+	if err = #force_inline internal_error_if_immutable(dest);    err != nil { return err; }
+
+	return #force_inline internal_int_abs(dest, src);
+}
+
+platform_abs :: proc(n: $T) -> T where intrinsics.type_is_integer(T) {
+	return n if n >= 0 else -n;
+}
+abs :: proc{ int_abs, platform_abs, };
+
+/*
+	Set `dest` to `-src`.
+*/
+int_neg :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `src` is usable and `dest` isn't immutable.
+	*/
+	assert_if_nil(dest, src);
+	context.allocator = allocator;
+
+	if err = #force_inline internal_clear_if_uninitialized(src); err != nil { return err; }
+	if err = #force_inline internal_error_if_immutable(dest);    err != nil { return err; }
+
+	return #force_inline internal_int_neg(dest, src);
+}
+neg :: proc { int_neg, };
+
+/*
+	Helpers to extract values from the `Int`.
+*/
+int_bitfield_extract_single :: proc(a: ^Int, offset: int, allocator := context.allocator) -> (bit: _WORD, err: Error) {
+	return #force_inline int_bitfield_extract(a, offset, 1, allocator);
+}
+
+int_bitfield_extract :: proc(a: ^Int, offset, count: int, allocator := context.allocator) -> (res: _WORD, err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	if err = #force_inline internal_clear_if_uninitialized(a); err != nil { return {}, err; }
+	return #force_inline   internal_int_bitfield_extract(a, offset, count);
+}
+
+/*
+	Resize backing store.
+*/
+shrink :: proc(a: ^Int, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	if err = #force_inline internal_clear_if_uninitialized(a); err != nil { return err; }
+	return #force_inline   internal_shrink(a);
+}
+
+int_grow :: proc(a: ^Int, digits: int, allow_shrink := false, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return #force_inline internal_int_grow(a, digits, allow_shrink, allocator);
+}
+grow :: proc { int_grow, };
+
+/*
+	Clear `Int` and resize it to the default size.
+*/
+int_clear :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return #force_inline internal_int_clear(a, minimize, allocator);
+}
+clear :: proc { int_clear, };
+zero  :: clear;
+
+/*
+	Set the `Int` to 1 and optionally shrink it to the minimum backing size.
+*/
+int_one :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return #force_inline internal_one(a, minimize, allocator);
+}
+one :: proc { int_one, };
+
+/*
+	Set the `Int` to -1 and optionally shrink it to the minimum backing size.
+*/
+int_minus_one :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return #force_inline internal_minus_one(a, minimize, allocator);
+}
+minus_one :: proc { int_minus_one, };
+
+/*
+	Set the `Int` to Inf and optionally shrink it to the minimum backing size.
+*/
+int_inf :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return #force_inline internal_inf(a, minimize, allocator);
+}
+inf :: proc { int_inf, };
+
+/*
+	Set the `Int` to -Inf and optionally shrink it to the minimum backing size.
+*/
+int_minus_inf :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return #force_inline internal_minus_inf(a, minimize, allocator);
+}
+minus_inf :: proc { int_inf, };
+
+/*
+	Set the `Int` to NaN and optionally shrink it to the minimum backing size.
+*/
+int_nan :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return #force_inline internal_nan(a, minimize, allocator);
+}
+nan :: proc { int_nan, };
+
+power_of_two :: proc(a: ^Int, power: int, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return #force_inline internal_int_power_of_two(a, power, allocator);
+}
+
+int_get_u128 :: proc(a: ^Int, allocator := context.allocator) -> (res: u128, err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return int_get(a, u128, allocator);
+}
+get_u128 :: proc { int_get_u128, };
+
+int_get_i128 :: proc(a: ^Int, allocator := context.allocator) -> (res: i128, err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return int_get(a, i128, allocator);
+}
+get_i128 :: proc { int_get_i128, };
+
+int_get_u64 :: proc(a: ^Int, allocator := context.allocator) -> (res: u64, err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return int_get(a, u64, allocator);
+}
+get_u64 :: proc { int_get_u64, };
+
+int_get_i64 :: proc(a: ^Int, allocator := context.allocator) -> (res: i64, err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return int_get(a, i64, allocator);
+}
+get_i64 :: proc { int_get_i64, };
+
+int_get_u32 :: proc(a: ^Int, allocator := context.allocator) -> (res: u32, err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return int_get(a, u32, allocator);
+}
+get_u32 :: proc { int_get_u32, };
+
+int_get_i32 :: proc(a: ^Int, allocator := context.allocator) -> (res: i32, err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	return int_get(a, i32, allocator);
+}
+get_i32 :: proc { int_get_i32, };
+
+/*
+	TODO: Think about using `count_bits` to check if the value could be returned completely,
+	and maybe return max(T), .Integer_Overflow if not?
+*/
+int_get :: proc(a: ^Int, $T: typeid, allocator := context.allocator) -> (res: T, err: Error) where intrinsics.type_is_integer(T) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return T{}, err; }
+	return #force_inline   internal_int_get(a, T);
+}
+get :: proc { int_get, };
+
+int_get_float :: proc(a: ^Int, allocator := context.allocator) -> (res: f64, err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return 0, err; }
+	return #force_inline   internal_int_get_float(a);
+}
+
+/*
+	Count bits in an `Int`.
+*/
+count_bits :: proc(a: ^Int, allocator := context.allocator) -> (count: int, err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return 0, err; }
+	return #force_inline   internal_count_bits(a), nil;
+}
+
+/*
+	Returns the number of trailing zeroes before the first one.
+	Differs from regular `ctz` in that 0 returns 0.
+*/
+int_count_lsb :: proc(a: ^Int, allocator := context.allocator) -> (count: int, err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(a);
+	if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return 0, err; }
+	return #force_inline   internal_int_count_lsb(a);
+}
+
+platform_count_lsb :: #force_inline proc(a: $T) -> (count: int)
+	where intrinsics.type_is_integer(T) && intrinsics.type_is_unsigned(T) {
+	return int(intrinsics.count_trailing_zeros(a)) if a > 0 else 0;
+}
+
+count_lsb :: proc { int_count_lsb, platform_count_lsb, };
+
+int_random_digit :: proc(r: ^rnd.Rand = nil) -> (res: DIGIT) {
+	when _DIGIT_BITS == 60 { // DIGIT = u64
+		return DIGIT(rnd.uint64(r)) & _MASK;
+	} else when _DIGIT_BITS == 28 { // DIGIT = u32
+		return DIGIT(rnd.uint32(r)) & _MASK;
+	} else {
+		panic("Unsupported DIGIT size.");
+	}
+
+	return 0; // We shouldn't get here.
+}
+
+int_rand :: proc(dest: ^Int, bits: int, r: ^rnd.Rand = nil, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	assert_if_nil(dest);
+	return #force_inline internal_int_rand(dest, bits, r, allocator);
+
+}
+rand :: proc { int_rand, };
+
+/*
+	Internal helpers.
+*/
+assert_initialized :: proc(a: ^Int, loc := #caller_location) {
+	assert_if_nil(a);
+	assert(is_initialized(a), "`Int` was not properly initialized.", loc);
+}
+
+zero_unused :: proc(dest: ^Int, old_used := -1) {
+	assert_if_nil(dest);
+	if ! #force_inline is_initialized(dest) { return; }
+
+	#force_inline internal_zero_unused(dest, old_used);
+}
+
+clear_if_uninitialized_single :: proc(arg: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(arg);
+	return #force_inline internal_clear_if_uninitialized_single(arg, allocator);
+}
+
+clear_if_uninitialized_multi :: proc(args: ..^Int, allocator := context.allocator) -> (err: Error) {
+	args := args;
+	assert_if_nil(..args);
+
+	for i in &args {
+		if err = #force_inline internal_clear_if_uninitialized_single(i, allocator); err != nil { return nil; }
+	}
+	return err;
+}
+clear_if_uninitialized :: proc {clear_if_uninitialized_single, clear_if_uninitialized_multi, };
+
+error_if_immutable_single :: proc(arg: ^Int) -> (err: Error) {
+	if arg != nil && .Immutable in arg.flags { return .Assignment_To_Immutable; }
+	return nil;
+}
+
+error_if_immutable_multi :: proc(args: ..^Int) -> (err: Error) {
+	for i in args {
+		if i != nil && .Immutable in i.flags { return .Assignment_To_Immutable; }
+	}
+	return nil;
+}
+error_if_immutable :: proc {error_if_immutable_single, error_if_immutable_multi, };
+
+/*
+	Allocates several `Int`s at once.
+*/
+int_init_multi :: proc(integers: ..^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(..integers);
+
+	integers := integers;
+	for a in &integers {
+		if err = #force_inline internal_clear(a, true, allocator); err != nil { return err; }
+	}
+	return nil;
+}
+
+init_multi :: proc { int_init_multi, };
+
+copy_digits :: proc(dest, src: ^Int, digits: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	digits := digits;
+	/*
+		Check that `src` is usable and `dest` isn't immutable.
+	*/
+	assert_if_nil(dest, src);
+	if err = #force_inline internal_clear_if_uninitialized(src); err != nil { return err; }
+
+	digits = min(digits, len(src.digit), len(dest.digit));
+	return #force_inline internal_copy_digits(dest, src, digits);
+}
+
+/*
+	Trim unused digits.
+
+	This is used to ensure that leading zero digits are trimmed and the leading "used" digit will be non-zero.
+	Typically very fast.  Also fixes the sign if there are no more leading digits.
+*/
+clamp :: proc(a: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(a);
+	if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return err; }
+
+	for a.used > 0 && a.digit[a.used - 1] == 0 {
+		a.used -= 1;
+	}
+
+	if z, _ := is_zero(a); z {
+		a.sign = .Zero_or_Positive;
+	}
+	return nil;
+}
+
+
+/*
+	Size binary representation	
+*/
+int_to_bytes_size :: proc(a: ^Int, signed := false, allocator := context.allocator) -> (size_in_bytes: int, err: Error) {
+	assert_if_nil(a);
+	if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return {}, err; }
+
+	size_in_bits := internal_count_bits(a);
+
+	size_in_bytes  = (size_in_bits / 8);
+	size_in_bytes += 0 if size_in_bits % 8 == 0 else 1;
+	size_in_bytes += 1 if signed else 0;
+	return;
+}
+
+/*
+	Return Little Endian binary representation of `a`, either signed or unsigned.
+	If `a` is negative and we ask for the default unsigned representation, we return abs(a).
+*/
+int_to_bytes_little :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(a);
+	size_in_bytes: int;
+
+	if size_in_bytes, err = int_to_bytes_size(a, signed, allocator); err != nil { return err; }
+	l := len(buf);
+	if size_in_bytes > l { return .Buffer_Overflow; }
+
+	size_in_bits := internal_count_bits(a);
+	i := 0;
+	if signed {
+		buf[l - 1] = 1 if a.sign == .Negative else 0;
+	}
+	for offset := 0; offset < size_in_bits; offset += 8 {
+		bits, _ := internal_int_bitfield_extract(a, offset, 8);
+		buf[i] = u8(bits & 255); i += 1;
+	}
+	return;
+}
+
+/*
+	Return Big Endian binary representation of `a`, either signed or unsigned.
+	If `a` is negative and we ask for the default unsigned representation, we return abs(a).
+*/
+int_to_bytes_big :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(a);
+	size_in_bytes: int;
+
+	if size_in_bytes, err = int_to_bytes_size(a, signed, allocator); err != nil { return err; }
+	l := len(buf);
+	if size_in_bytes > l { return .Buffer_Overflow; }
+
+	size_in_bits := internal_count_bits(a);
+	i := l - 1;
+
+	if signed {
+		buf[0] = 1 if a.sign == .Negative else 0;
+	}
+	for offset := 0; offset < size_in_bits; offset += 8 {
+		bits, _ := internal_int_bitfield_extract(a, offset, 8);
+		buf[i] = u8(bits & 255); i -= 1;
+	}
+	return;
+}
+
+/*
+	Return Python 3.x compatible Little Endian binary representation of `a`, either signed or unsigned.
+	If `a` is negative when asking for an unsigned number, we return an error like Python does.
+*/
+int_to_bytes_little_python :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(a);
+	size_in_bytes: int;
+
+	if !signed && a.sign == .Negative { return .Invalid_Argument; }
+
+	l := len(buf);
+	if size_in_bytes, err = int_to_bytes_size(a, signed, allocator); err != nil { return err; }
+	if size_in_bytes > l              { return .Buffer_Overflow;  }
+
+	if a.sign == .Negative {
+		t := &Int{};
+		defer destroy(t);
+		if err = internal_complement(t, a, allocator); err != nil { return err; }
+
+		size_in_bits := internal_count_bits(t);
+		i := 0;
+		for offset := 0; offset < size_in_bits; offset += 8 {
+			bits, _ := internal_int_bitfield_extract(t, offset, 8);
+			buf[i] = 255 - u8(bits & 255); i += 1;
+		}
+		buf[l-1] = 255;
+	} else {
+		size_in_bits := internal_count_bits(a);
+		i := 0;
+		for offset := 0; offset < size_in_bits; offset += 8 {
+			bits, _ := internal_int_bitfield_extract(a, offset, 8);
+			buf[i] = u8(bits & 255); i += 1;
+		}
+	}
+	return;
+}
+
+/*
+	Return Python 3.x compatible Big Endian binary representation of `a`, either signed or unsigned.
+	If `a` is negative when asking for an unsigned number, we return an error like Python does.
+*/
+int_to_bytes_big_python :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(a);
+	size_in_bytes: int;
+
+	if !signed && a.sign == .Negative { return .Invalid_Argument; }
+	if a.sign == .Zero_or_Positive    { return int_to_bytes_big(a, buf, signed, allocator); }
+
+	l := len(buf);
+	if size_in_bytes, err = int_to_bytes_size(a, signed, allocator); err != nil { return err; }
+	if size_in_bytes > l              { return .Buffer_Overflow;  }
+
+	t := &Int{};
+	defer destroy(t);
+
+	if err = internal_complement(t, a, allocator); err != nil { return err; }
+
+	size_in_bits := internal_count_bits(t);
+	i := l - 1;
+	for offset := 0; offset < size_in_bits; offset += 8 {
+		bits, _ := internal_int_bitfield_extract(t, offset, 8);
+		buf[i] = 255 - u8(bits & 255); i -= 1;
+	}
+	buf[0] = 255;
+
+	return;
+}
+
+/*
+	Read `Int` from a Big Endian binary representation.
+	Sign is detected from the first byte if `signed` is true.
+*/
+int_from_bytes_big :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(a);
+	buf := buf;
+	l := len(buf);
+	if l == 0 { return .Invalid_Argument; }
+
+	sign: Sign;
+	size_in_bits := l * 8;
+	if signed { 
+		/*
+			First byte denotes the sign.
+		*/
+		size_in_bits -= 8;
+	}
+	size_in_digits := (size_in_bits + _DIGIT_BITS - 1) / _DIGIT_BITS;
+	size_in_digits += 0 if size_in_bits % 8 == 0 else 1;
+	if err = internal_zero(a, false, allocator); err != nil { return err; }
+	if err = internal_grow(a, size_in_digits, false, allocator); err != nil { return err; }
+
+	if signed {
+		sign = .Zero_or_Positive if buf[0] == 0 else .Negative;
+		buf = buf[1:];
+	}
+
+	for v in buf {
+		if err = internal_shl(a, a, 8); err != nil { return err; }
+		a.digit[0] |= DIGIT(v);
+	}
+	a.sign = sign;
+	a.used = size_in_digits;
+	return internal_clamp(a);
+}
+
+/*
+	Read `Int` from a Big Endian Python binary representation.
+	Sign is detected from the first byte if `signed` is true.
+*/
+int_from_bytes_big_python :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(a);
+	buf := buf;
+	l := len(buf);
+	if l == 0 { return .Invalid_Argument; }
+
+	sign: Sign;
+	size_in_bits := l * 8;
+	if signed { 
+		/*
+			First byte denotes the sign.
+		*/
+		size_in_bits -= 8;
+	}
+	size_in_digits := (size_in_bits + _DIGIT_BITS - 1) / _DIGIT_BITS;
+	size_in_digits += 0 if size_in_bits % 8 == 0 else 1;
+	if err = internal_zero(a, false, allocator); err != nil { return err; }
+	if err = internal_grow(a, size_in_digits, false, allocator); err != nil { return err; }
+
+	if signed {
+		sign = .Zero_or_Positive if buf[0] == 0 else .Negative;
+		buf = buf[1:];
+	}
+
+	for v in buf {
+		if err = internal_shl(a, a, 8); err != nil { return err; }
+		if signed && sign == .Negative {
+			a.digit[0] |= DIGIT(255 - v);	
+		} else {
+			a.digit[0] |= DIGIT(v);
+		}
+	}
+	a.sign = sign;
+	a.used = size_in_digits;
+	if err = internal_clamp(a); err != nil { return err; }
+
+	if signed && sign == .Negative {
+		return internal_sub(a, a, 1);
+	}
+	return nil;
+}
+
+/*
+	Read `Int` from a Little Endian binary representation.
+	Sign is detected from the last byte if `signed` is true.
+*/
+int_from_bytes_little :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(a);
+	buf := buf;
+	l := len(buf);
+	if l == 0 { return .Invalid_Argument; }
+
+	sign: Sign;
+	size_in_bits   := l * 8;
+	if signed { 
+		/*
+			First byte denotes the sign.
+		*/
+		size_in_bits -= 8;
+	}
+	size_in_digits := (size_in_bits + _DIGIT_BITS - 1) / _DIGIT_BITS;
+	size_in_digits += 0 if size_in_bits % 8 == 0 else 1;
+	if err = internal_zero(a, false, allocator); err != nil { return err; }
+	if err = internal_grow(a, size_in_digits, false, allocator); err != nil { return err; }
+
+	if signed {
+		sign = .Zero_or_Positive if buf[l-1] == 0 else .Negative;
+		buf = buf[:l-1];
+		l -= 1;
+	}
+
+	for _, i in buf {
+		if err = internal_shl(a, a, 8); err != nil { return err; }
+		a.digit[0] |= DIGIT(buf[l-i-1]);
+	}
+	a.sign = sign;
+	a.used = size_in_digits;
+	return internal_clamp(a);
+}
+
+/*
+	Read `Int` from a Little Endian Python binary representation.
+	Sign is detected from the first byte if `signed` is true.
+*/
+int_from_bytes_little_python :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(a);
+	buf := buf;
+	l := len(buf);
+	if l == 0 { return .Invalid_Argument; }
+
+	sign: Sign;
+	size_in_bits := l * 8;
+	if signed { 
+		/*
+			First byte denotes the sign.
+		*/
+		size_in_bits -= 8;
+	}
+	size_in_digits := (size_in_bits + _DIGIT_BITS - 1) / _DIGIT_BITS;
+	size_in_digits += 0 if size_in_bits % 8 == 0 else 1;
+	if err = internal_zero(a, false, allocator); err != nil { return err; }
+	if err = internal_grow(a, size_in_digits, false, allocator); err != nil { return err; }
+
+	if signed {
+		sign = .Zero_or_Positive if buf[l-1] == 0 else .Negative;
+		buf = buf[:l-1];
+		l -= 1;
+	}
+
+	for _, i in buf {
+		if err = internal_shl(a, a, 8); err != nil { return err; }
+		if signed && sign == .Negative {
+			a.digit[0] |= DIGIT(255 - buf[l-i-1]);
+		} else {
+			a.digit[0] |= DIGIT(buf[l-i-1]);
+		}
+	}
+	a.sign = sign;
+	a.used = size_in_digits;
+	if err = internal_clamp(a); err != nil { return err; }
+
+	if signed && sign == .Negative {
+		return internal_sub(a, a, 1);
+	}
+	return nil;
+}
+
+/*
+	Initialize constants.
+*/
+INT_ONE, INT_ZERO, INT_MINUS_ONE, INT_INF, INT_MINUS_INF, INT_NAN := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
+
+initialize_constants :: proc() -> (res: int) {
+	internal_set(     INT_ZERO,  0);      INT_ZERO.flags = {.Immutable};
+	internal_set(      INT_ONE,  1);       INT_ONE.flags = {.Immutable};
+	internal_set(INT_MINUS_ONE, -1); INT_MINUS_ONE.flags = {.Immutable};
+
+	/*
+		We set these special values to -1 or 1 so they don't get mistake for zero accidentally.
+		This allows for shortcut tests of is_zero as .used == 0.
+	*/
+	internal_set(      INT_NAN,  1);       INT_NAN.flags = {.Immutable, .NaN};
+	internal_set(      INT_INF,  1);       INT_INF.flags = {.Immutable, .Inf};
+	internal_set(      INT_INF, -1); INT_MINUS_INF.flags = {.Immutable, .Inf};
+
+	return _DEFAULT_MUL_KARATSUBA_CUTOFF;
+}
+
+/*
+	Destroy constants.
+	Optional for an EXE, as this would be called at the very end of a process.
+*/
+destroy_constants :: proc() {
+	internal_destroy(INT_ONE, INT_ZERO, INT_MINUS_ONE, INT_INF, INT_MINUS_INF, INT_NAN);
+}
+
+
+assert_if_nil :: #force_inline proc(integers: ..^Int, loc := #caller_location) {
+	integers := integers;
+
+	for i in &integers {
+		assert(i != nil, "(nil)", loc);
+	}
+}

+ 2606 - 0
core/math/big/internal.odin

@@ -0,0 +1,2606 @@
+//+ignore
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	A BigInt implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+
+	==========================    Low-level routines    ==========================
+
+	IMPORTANT: `internal_*` procedures make certain assumptions about their input.
+
+	The public functions that call them are expected to satisfy their sanity check requirements.
+	This allows `internal_*` call `internal_*` without paying this overhead multiple times.
+
+	Where errors can occur, they are of course still checked and returned as appropriate.
+
+	When importing `math:core/big` to implement an involved algorithm of your own, you are welcome
+	to use these procedures instead of their public counterparts.
+
+	Most inputs and outputs are expected to be passed an initialized `Int`, for example.
+	Exceptions include `quotient` and `remainder`, which are allowed to be `nil` when the calling code doesn't need them.
+
+	Check the comments above each `internal_*` implementation to see what constraints it expects to have met.
+
+	We pass the custom allocator to procedures by default using the pattern `context.allocator = allocator`.
+	This way we don't have to add `, allocator` at the end of each call.
+
+	TODO: Handle +/- Infinity and NaN.
+*/
+
+import "core:mem"
+import "core:intrinsics"
+import rnd "core:math/rand"
+
+/*
+	Low-level addition, unsigned. Handbook of Applied Cryptography, algorithm 14.7.
+
+	Assumptions:
+		`dest`, `a` and `b` != `nil` and have been initalized.
+*/
+internal_int_add_unsigned :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	dest := dest; x := a; y := b;
+	context.allocator = allocator;
+
+	old_used, min_used, max_used, i: int;
+
+	if x.used < y.used {
+		x, y = y, x;
+	}
+
+	min_used = y.used;
+	max_used = x.used;
+	old_used = dest.used;
+
+	if err = internal_grow(dest, max(max_used + 1, _DEFAULT_DIGIT_COUNT)); err != nil { return err; }
+	dest.used = max_used + 1;
+	/*
+		All parameters have been initialized.
+	*/
+
+	/* Zero the carry */
+	carry := DIGIT(0);
+
+	#no_bounds_check for i = 0; i < min_used; i += 1 {
+		/*
+			Compute the sum one _DIGIT at a time.
+			dest[i] = a[i] + b[i] + carry;
+		*/
+		dest.digit[i] = x.digit[i] + y.digit[i] + carry;
+
+		/*
+			Compute carry
+		*/
+		carry = dest.digit[i] >> _DIGIT_BITS;
+		/*
+			Mask away carry from result digit.
+		*/
+		dest.digit[i] &= _MASK;
+	}
+
+	if min_used != max_used {
+		/*
+			Now copy higher words, if any, in A+B.
+			If A or B has more digits, add those in.
+		*/
+		#no_bounds_check for ; i < max_used; i += 1 {
+			dest.digit[i] = x.digit[i] + carry;
+			/*
+				Compute carry
+			*/
+			carry = dest.digit[i] >> _DIGIT_BITS;
+			/*
+				Mask away carry from result digit.
+			*/
+			dest.digit[i] &= _MASK;
+		}
+	}
+	/*
+		Add remaining carry.
+	*/
+	dest.digit[i] = carry;
+
+	/*
+		Zero remainder.
+	*/
+	internal_zero_unused(dest, old_used);
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+	return internal_clamp(dest);
+}
+internal_add_unsigned :: proc { internal_int_add_unsigned, };
+
+/*
+	Low-level addition, signed. Handbook of Applied Cryptography, algorithm 14.7.
+
+	Assumptions:
+		`dest`, `a` and `b` != `nil` and have been initalized.
+*/
+internal_int_add_signed :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	x := a; y := b;
+	context.allocator = allocator;
+	/*
+		Handle both negative or both positive.
+	*/
+	if x.sign == y.sign {
+		dest.sign = x.sign;
+		return #force_inline internal_int_add_unsigned(dest, x, y);
+	}
+
+	/*
+		One positive, the other negative.
+		Subtract the one with the greater magnitude from the other.
+		The result gets the sign of the one with the greater magnitude.
+	*/
+	if #force_inline internal_cmp_mag(a, b) == -1 {
+		x, y = y, x;
+	}
+
+	dest.sign = x.sign;
+	return #force_inline internal_int_sub_unsigned(dest, x, y);
+}
+internal_add_signed :: proc { internal_int_add_signed, };
+
+/*
+	Low-level addition Int+DIGIT, signed. Handbook of Applied Cryptography, algorithm 14.7.
+
+	Assumptions:
+		`dest` and `a` != `nil` and have been initalized.
+		`dest` is large enough (a.used + 1) to fit result.
+*/
+internal_int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if err = internal_grow(dest, a.used + 1); err != nil { return err; }
+	/*
+		Fast paths for destination and input Int being the same.
+	*/
+	if dest == a {
+		/*
+			Fast path for dest.digit[0] + digit fits in dest.digit[0] without overflow.
+		*/
+		if dest.sign == .Zero_or_Positive && (dest.digit[0] + digit < _DIGIT_MAX) {
+			dest.digit[0] += digit;
+			dest.used += 1;
+			return internal_clamp(dest);
+		}
+		/*
+			Can be subtracted from dest.digit[0] without underflow.
+		*/
+		if a.sign == .Negative && (dest.digit[0] > digit) {
+			dest.digit[0] -= digit;
+			dest.used += 1;
+			return internal_clamp(dest);
+		}
+	}
+
+	/*
+		If `a` is negative and `|a|` >= `digit`, call `dest = |a| - digit`
+	*/
+	if a.sign == .Negative && (a.used > 1 || a.digit[0] >= digit) {
+		/*
+			Temporarily fix `a`'s sign.
+		*/
+		a.sign = .Zero_or_Positive;
+		/*
+			dest = |a| - digit
+		*/
+		if err = #force_inline internal_int_add_digit(dest, a, digit); err != nil {
+			/*
+				Restore a's sign.
+			*/
+			a.sign = .Negative;
+			return err;
+		}
+		/*
+			Restore sign and set `dest` sign.
+		*/
+		a.sign    = .Negative;
+		dest.sign = .Negative;
+
+		return internal_clamp(dest);
+	}
+
+	/*
+		Remember the currently used number of digits in `dest`.
+	*/
+	old_used := dest.used;
+
+	/*
+		If `a` is positive
+	*/
+	if a.sign == .Zero_or_Positive {
+		/*
+			Add digits, use `carry`.
+		*/
+		i: int;
+		carry := digit;
+		#no_bounds_check for i = 0; i < a.used; i += 1 {
+			dest.digit[i] = a.digit[i] + carry;
+			carry = dest.digit[i] >> _DIGIT_BITS;
+			dest.digit[i] &= _MASK;
+		}
+		/*
+			Set final carry.
+		*/
+		dest.digit[i] = carry;
+		/*
+			Set `dest` size.
+		*/
+		dest.used = a.used + 1;
+	} else {
+		/*
+			`a` was negative and |a| < digit.
+		*/
+		dest.used = 1;
+		/*
+			The result is a single DIGIT.
+		*/
+		dest.digit[0] = digit - a.digit[0] if a.used == 1 else digit;
+	}
+	/*
+		Sign is always positive.
+	*/
+	dest.sign = .Zero_or_Positive;
+
+	/*
+		Zero remainder.
+	*/
+	internal_zero_unused(dest, old_used);
+
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+	return internal_clamp(dest);	
+}
+internal_add :: proc { internal_int_add_signed, internal_int_add_digit, };
+
+/*
+	Low-level subtraction, dest = number - decrease. Assumes |number| > |decrease|.
+	Handbook of Applied Cryptography, algorithm 14.9.
+
+	Assumptions:
+		`dest`, `number` and `decrease` != `nil` and have been initalized.
+*/
+internal_int_sub_unsigned :: proc(dest, number, decrease: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	dest := dest; x := number; y := decrease;
+	old_used := dest.used;
+	min_used := y.used;
+	max_used := x.used;
+	i: int;
+
+	if err = grow(dest, max(max_used, _DEFAULT_DIGIT_COUNT)); err != nil { return err; }
+	dest.used = max_used;
+	/*
+		All parameters have been initialized.
+	*/
+
+	borrow := DIGIT(0);
+
+	#no_bounds_check for i = 0; i < min_used; i += 1 {
+		dest.digit[i] = (x.digit[i] - y.digit[i] - borrow);
+		/*
+			borrow = carry bit of dest[i]
+			Note this saves performing an AND operation since if a carry does occur,
+			it will propagate all the way to the MSB.
+			As a result a single shift is enough to get the carry.
+		*/
+		borrow = dest.digit[i] >> ((size_of(DIGIT) * 8) - 1);
+		/*
+			Clear borrow from dest[i].
+		*/
+		dest.digit[i] &= _MASK;
+	}
+
+	/*
+		Now copy higher words if any, e.g. if A has more digits than B
+	*/
+	#no_bounds_check for ; i < max_used; i += 1 {
+		dest.digit[i] = x.digit[i] - borrow;
+		/*
+			borrow = carry bit of dest[i]
+			Note this saves performing an AND operation since if a carry does occur,
+			it will propagate all the way to the MSB.
+			As a result a single shift is enough to get the carry.
+		*/
+		borrow = dest.digit[i] >> ((size_of(DIGIT) * 8) - 1);
+		/*
+			Clear borrow from dest[i].
+		*/
+		dest.digit[i] &= _MASK;
+	}
+
+	/*
+		Zero remainder.
+	*/
+	internal_zero_unused(dest, old_used);
+
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+	return internal_clamp(dest);
+}
+internal_sub_unsigned :: proc { internal_int_sub_unsigned, };
+
+/*
+	Low-level subtraction, signed. Handbook of Applied Cryptography, algorithm 14.9.
+	dest = number - decrease. Assumes |number| > |decrease|.
+
+	Assumptions:
+		`dest`, `number` and `decrease` != `nil` and have been initalized.
+*/
+internal_int_sub_signed :: proc(dest, number, decrease: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	number := number; decrease := decrease;
+	if number.sign != decrease.sign {
+		/*
+			Subtract a negative from a positive, OR subtract a positive from a negative.
+			In either case, ADD their magnitudes and use the sign of the first number.
+		*/
+		dest.sign = number.sign;
+		return #force_inline internal_int_add_unsigned(dest, number, decrease);
+	}
+
+	/*
+		Subtract a positive from a positive, OR negative from a negative.
+		First, take the difference between their magnitudes, then...
+	*/
+	if #force_inline internal_cmp_mag(number, decrease) == -1 {
+		/*
+			The second has a larger magnitude.
+			The result has the *opposite* sign from the first number.
+		*/
+		dest.sign = .Negative if number.sign == .Zero_or_Positive else .Zero_or_Positive;
+		number, decrease = decrease, number;
+	} else {
+		/*
+			The first has a larger or equal magnitude.
+			Copy the sign from the first.
+		*/
+		dest.sign = number.sign;
+	}
+	return #force_inline internal_int_sub_unsigned(dest, number, decrease);
+}
+
+/*
+	Low-level subtraction, signed. Handbook of Applied Cryptography, algorithm 14.9.
+	dest = number - decrease. Assumes |number| > |decrease|.
+
+	Assumptions:
+		`dest`, `number` != `nil` and have been initalized.
+		`dest` is large enough (number.used + 1) to fit result.
+*/
+internal_int_sub_digit :: proc(dest, number: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if err = internal_grow(dest, number.used + 1); err != nil { return err; }
+
+	dest := dest; digit := digit;
+	/*
+		All parameters have been initialized.
+
+		Fast paths for destination and input Int being the same.
+	*/
+	if dest == number {
+		/*
+			Fast path for `dest` is negative and unsigned addition doesn't overflow the lowest digit.
+		*/
+		if dest.sign == .Negative && (dest.digit[0] + digit < _DIGIT_MAX) {
+			dest.digit[0] += digit;
+			return nil;
+		}
+		/*
+			Can be subtracted from dest.digit[0] without underflow.
+		*/
+		if number.sign == .Zero_or_Positive && (dest.digit[0] > digit) {
+			dest.digit[0] -= digit;
+			return nil;
+		}
+	}
+
+	/*
+		If `a` is negative, just do an unsigned addition (with fudged signs).
+	*/
+	if number.sign == .Negative {
+		t := number;
+		t.sign = .Zero_or_Positive;
+
+		err =  #force_inline internal_int_add_digit(dest, t, digit);
+		dest.sign = .Negative;
+
+		internal_clamp(dest);
+		return err;
+	}
+
+	old_used := dest.used;
+
+	/*
+		if `a`<= digit, simply fix the single digit.
+	*/
+	if number.used == 1 && (number.digit[0] <= digit) || number.used == 0 {
+		dest.digit[0] = digit - number.digit[0] if number.used == 1 else digit;
+		dest.sign = .Negative;
+		dest.used = 1;
+	} else {
+		dest.sign = .Zero_or_Positive;
+		dest.used = number.used;
+
+		/*
+			Subtract with carry.
+		*/
+		carry := digit;
+
+		#no_bounds_check for i := 0; i < number.used; i += 1 {
+			dest.digit[i] = number.digit[i] - carry;
+			carry = dest.digit[i] >> (_DIGIT_TYPE_BITS - 1);
+			dest.digit[i] &= _MASK;
+		}
+	}
+
+	/*
+		Zero remainder.
+	*/
+	internal_zero_unused(dest, old_used);
+
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+	return internal_clamp(dest);
+}
+
+internal_sub :: proc { internal_int_sub_signed, internal_int_sub_digit, };
+
+/*
+	dest = src  / 2
+	dest = src >> 1
+
+	Assumes `dest` and `src` not to be `nil` and have been initialized.
+	We make no allocations here.
+*/
+internal_int_shr1 :: proc(dest, src: ^Int) -> (err: Error) {
+	old_used  := dest.used; dest.used = src.used;
+	/*
+		Carry
+	*/
+	fwd_carry := DIGIT(0);
+
+	#no_bounds_check for x := dest.used - 1; x >= 0; x -= 1 {
+		/*
+			Get the carry for the next iteration.
+		*/
+		src_digit := src.digit[x];
+		carry     := src_digit & 1;
+		/*
+			Shift the current digit, add in carry and store.
+		*/
+		dest.digit[x] = (src_digit >> 1) | (fwd_carry << (_DIGIT_BITS - 1));
+		/*
+			Forward carry to next iteration.
+		*/
+		fwd_carry = carry;
+	}
+
+	/*
+		Zero remainder.
+	*/
+	internal_zero_unused(dest, old_used);
+
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+	dest.sign = src.sign;
+	return internal_clamp(dest);	
+}
+
+/*
+	dest = src  * 2
+	dest = src << 1
+*/
+internal_int_shl1 :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if err = internal_copy(dest, src); err != nil { return err; }
+	/*
+		Grow `dest` to accommodate the additional bits.
+	*/
+	digits_needed := dest.used + 1;
+	if err = internal_grow(dest, digits_needed); err != nil { return err; }
+	dest.used = digits_needed;
+
+	mask  := (DIGIT(1) << uint(1)) - DIGIT(1);
+	shift := DIGIT(_DIGIT_BITS - 1);
+	carry := DIGIT(0);
+
+	#no_bounds_check for x:= 0; x < dest.used; x+= 1 {		
+		fwd_carry := (dest.digit[x] >> shift) & mask;
+		dest.digit[x] = (dest.digit[x] << uint(1) | carry) & _MASK;
+		carry = fwd_carry;
+	}
+	/*
+		Use final carry.
+	*/
+	if carry != 0 {
+		dest.digit[dest.used] = carry;
+		dest.used += 1;
+	}
+	return internal_clamp(dest);
+}
+
+/*
+	Multiply by a DIGIT.
+*/
+internal_int_mul_digit :: proc(dest, src: ^Int, multiplier: DIGIT, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+	assert_if_nil(dest, src);
+
+	if multiplier == 0 {
+		return internal_zero(dest);
+	}
+	if multiplier == 1 {
+		return internal_copy(dest, src);
+	}
+
+	/*
+		Power of two?
+	*/
+	if multiplier == 2 {
+		return #force_inline internal_int_shl1(dest, src);
+	}
+	if #force_inline platform_int_is_power_of_two(int(multiplier)) {
+		ix: int;
+		if ix, err = internal_log(multiplier, 2); err != nil { return err; }
+		return internal_shl(dest, src, ix);
+	}
+
+	/*
+		Ensure `dest` is big enough to hold `src` * `multiplier`.
+	*/
+	if err = grow(dest, max(src.used + 1, _DEFAULT_DIGIT_COUNT)); err != nil { return err; }
+
+	/*
+		Save the original used count.
+	*/
+	old_used := dest.used;
+	/*
+		Set the sign.
+	*/
+	dest.sign = src.sign;
+	/*
+		Set up carry.
+	*/
+	carry := _WORD(0);
+	/*
+		Compute columns.
+	*/
+	ix := 0;
+	#no_bounds_check for ; ix < src.used; ix += 1 {
+		/*
+			Compute product and carry sum for this term
+		*/
+		product := carry + _WORD(src.digit[ix]) * _WORD(multiplier);
+		/*
+			Mask off higher bits to get a single DIGIT.
+		*/
+		dest.digit[ix] = DIGIT(product & _WORD(_MASK));
+		/*
+			Send carry into next iteration
+		*/
+		carry = product >> _DIGIT_BITS;
+	}
+
+	/*
+		Store final carry [if any] and increment used.
+	*/
+	dest.digit[ix] = DIGIT(carry);
+	dest.used = src.used + 1;
+
+	/*
+		Zero remainder.
+	*/
+	internal_zero_unused(dest, old_used);
+
+	return internal_clamp(dest);
+}
+
+/*
+	High level multiplication (handles sign).
+*/
+internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+	/*
+		Early out for `multiplier` is zero; Set `dest` to zero.
+	*/
+	if multiplier.used == 0 || src.used == 0 { return internal_zero(dest); }
+
+	if src == multiplier {
+		/*
+			Do we need to square?
+		*/
+		if src.used >= SQR_TOOM_CUTOFF {
+			/*
+				Use Toom-Cook?
+			*/
+			err = #force_inline _private_int_sqr_toom(dest, src);
+		} else if src.used >= SQR_KARATSUBA_CUTOFF {
+			/*
+				Karatsuba?
+			*/
+			err = #force_inline _private_int_sqr_karatsuba(dest, src);
+		} else if ((src.used * 2) + 1) < _WARRAY && src.used < (_MAX_COMBA / 2) {
+			/*
+				Fast comba?
+			*/
+			err = #force_inline _private_int_sqr_comba(dest, src);
+			//err = #force_inline _private_int_sqr(dest, src);
+		} else {
+			err = #force_inline _private_int_sqr(dest, src);
+		}
+	} else {
+		/*
+			Can we use the balance method? Check sizes.
+			* The smaller one needs to be larger than the Karatsuba cut-off.
+			* The bigger one needs to be at least about one `_MUL_KARATSUBA_CUTOFF` bigger
+			* to make some sense, but it depends on architecture, OS, position of the
+			* stars... so YMMV.
+			* Using it to cut the input into slices small enough for _mul_comba
+			* was actually slower on the author's machine, but YMMV.
+		*/
+
+		min_used := min(src.used, multiplier.used);
+		max_used := max(src.used, multiplier.used);
+		digits   := src.used + multiplier.used + 1;
+
+		if        false &&  min_used     >= MUL_KARATSUBA_CUTOFF &&
+						    max_used / 2 >= MUL_KARATSUBA_CUTOFF &&
+			/*
+				Not much effect was observed below a ratio of 1:2, but again: YMMV.
+			*/
+							max_used     >= 2 * min_used {
+			// err = s_mp_mul_balance(a,b,c);
+		} else if false && min_used >= MUL_TOOM_CUTOFF {
+			// err = s_mp_mul_toom(a, b, c);
+		} else if false && min_used >= MUL_KARATSUBA_CUTOFF {
+			// err = s_mp_mul_karatsuba(a, b, c);
+		} else if digits < _WARRAY && min_used <= _MAX_COMBA {
+			/*
+				Can we use the fast multiplier?
+				* The fast multiplier can be used if the output will
+				* have less than MP_WARRAY digits and the number of
+				* digits won't affect carry propagation
+			*/
+			err = #force_inline _private_int_mul_comba(dest, src, multiplier, digits);
+		} else {
+			err = #force_inline _private_int_mul(dest, src, multiplier, digits);
+		}
+	}
+	neg := src.sign != multiplier.sign;
+	dest.sign = .Negative if dest.used > 0 && neg else .Zero_or_Positive;
+	return err;
+}
+
+internal_mul :: proc { internal_int_mul, internal_int_mul_digit, };
+
+internal_sqr :: proc (dest, src: ^Int, allocator := context.allocator) -> (res: Error) {
+	/*
+		We call `internal_mul` and not e.g. `_private_int_sqr` because the former
+		will dispatch to the optimal implementation depending on the source.
+	*/
+	return #force_inline internal_mul(dest, src, src, allocator);
+}
+
+/*
+	divmod.
+	Both the quotient and remainder are optional and may be passed a nil.
+	`numerator` and `denominator` are expected not to be `nil` and have been initialized.
+*/
+internal_int_divmod :: proc(quotient, remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if denominator.used == 0 { return .Division_by_Zero; }
+	/*
+		If numerator < denominator then quotient = 0, remainder = numerator.
+	*/
+	if #force_inline internal_cmp_mag(numerator, denominator) == -1 {
+		if remainder != nil {
+			if err = internal_copy(remainder, numerator); err != nil { return err; }
+		}
+		if quotient != nil {
+			internal_zero(quotient);
+		}
+		return nil;
+	}
+
+	if false && (denominator.used > 2 * MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used/3) * 2) {
+		// err = _int_div_recursive(quotient, remainder, numerator, denominator);
+	} else {
+		when true {
+			err = #force_inline _private_int_div_school(quotient, remainder, numerator, denominator);
+		} else {
+			/*
+				NOTE(Jeroen): We no longer need or use `_private_int_div_small`.
+				We'll keep it around for a bit until we're reasonably certain div_school is bug free.
+			*/
+			err = _private_int_div_small(quotient, remainder, numerator, denominator);
+		}
+	}
+	return;
+}
+
+/*
+	Single digit division (based on routine from MPI).
+	The quotient is optional and may be passed a nil.
+*/
+internal_int_divmod_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT, allocator := context.allocator) -> (remainder: DIGIT, err: Error) {
+	context.allocator = allocator;
+
+	/*
+		Cannot divide by zero.
+	*/
+	if denominator == 0 { return 0, .Division_by_Zero; }
+
+	/*
+		Quick outs.
+	*/
+	if denominator == 1 || numerator.used == 0 {
+		if quotient != nil {
+			return 0, internal_copy(quotient, numerator);
+		}
+		return 0, err;
+	}
+	/*
+		Power of two?
+	*/
+	if denominator == 2 {
+		if numerator.used > 0 && numerator.digit[0] & 1 != 0 {
+			// Remainder is 1 if numerator is odd.
+			remainder = 1;
+		}
+		if quotient == nil {
+			return remainder, nil;
+		}
+		return remainder, internal_shr(quotient, numerator, 1);
+	}
+
+	ix: int;
+	if platform_int_is_power_of_two(int(denominator)) {
+		ix = 1;
+		for ix < _DIGIT_BITS && denominator != (1 << uint(ix)) {
+			ix += 1;
+		}
+		remainder = numerator.digit[0] & ((1 << uint(ix)) - 1);
+		if quotient == nil {
+			return remainder, nil;
+		}
+
+		return remainder, internal_shr(quotient, numerator, int(ix));
+	}
+
+	/*
+		Three?
+	*/
+	if denominator == 3 {
+		return _private_int_div_3(quotient, numerator);
+	}
+
+	/*
+		No easy answer [c'est la vie].  Just division.
+	*/
+	q := &Int{};
+
+	if err = internal_grow(q, numerator.used); err != nil { return 0, err; }
+
+	q.used = numerator.used;
+	q.sign = numerator.sign;
+
+	w := _WORD(0);
+
+	for ix = numerator.used - 1; ix >= 0; ix -= 1 {
+		t := DIGIT(0);
+		w = (w << _WORD(_DIGIT_BITS) | _WORD(numerator.digit[ix]));
+		if w >= _WORD(denominator) {
+			t = DIGIT(w / _WORD(denominator));
+			w -= _WORD(t) * _WORD(denominator);
+		}
+		q.digit[ix] = t;
+	}
+	remainder = DIGIT(w);
+
+	if quotient != nil {
+		internal_clamp(q);
+		internal_swap(q, quotient);
+	}
+	internal_destroy(q);
+	return remainder, nil;
+}
+
+internal_divmod :: proc { internal_int_divmod, internal_int_divmod_digit, };
+
+/*
+	Asssumes quotient, numerator and denominator to have been initialized and not to be nil.
+*/
+internal_int_div :: proc(quotient, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) {
+	return #force_inline internal_int_divmod(quotient, nil, numerator, denominator, allocator);
+}
+internal_div :: proc { internal_int_div, };
+
+/*
+	remainder = numerator % denominator.
+	0 <= remainder < denominator if denominator > 0
+	denominator < remainder <= 0 if denominator < 0
+
+	Asssumes quotient, numerator and denominator to have been initialized and not to be nil.
+*/
+internal_int_mod :: proc(remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) {
+	if err = #force_inline internal_int_divmod(nil, remainder, numerator, denominator, allocator); err != nil { return err; }
+
+	if remainder.used == 0 || denominator.sign == remainder.sign { return nil; }
+
+	return #force_inline internal_add(remainder, remainder, numerator, allocator);
+}
+internal_mod :: proc{ internal_int_mod, };
+
+/*
+	remainder = (number + addend) % modulus.
+*/
+internal_int_addmod :: proc(remainder, number, addend, modulus: ^Int, allocator := context.allocator) -> (err: Error) {
+	if err = #force_inline internal_add(remainder, number, addend, allocator); err != nil { return err; }
+	return #force_inline internal_mod(remainder, remainder, modulus, allocator);
+}
+internal_addmod :: proc { internal_int_addmod, };
+
+/*
+	remainder = (number - decrease) % modulus.
+*/
+internal_int_submod :: proc(remainder, number, decrease, modulus: ^Int, allocator := context.allocator) -> (err: Error) {
+	if err = #force_inline internal_sub(remainder, number, decrease, allocator); err != nil { return err; }
+	return #force_inline internal_mod(remainder, remainder, modulus, allocator);
+}
+internal_submod :: proc { internal_int_submod, };
+
+/*
+	remainder = (number * multiplicand) % modulus.
+*/
+internal_int_mulmod :: proc(remainder, number, multiplicand, modulus: ^Int, allocator := context.allocator) -> (err: Error) {
+	if err = #force_inline internal_mul(remainder, number, multiplicand, allocator); err != nil { return err; }
+	return #force_inline internal_mod(remainder, remainder, modulus, allocator);
+}
+internal_mulmod :: proc { internal_int_mulmod, };
+
+/*
+	remainder = (number * number) % modulus.
+*/
+internal_int_sqrmod :: proc(remainder, number, modulus: ^Int, allocator := context.allocator) -> (err: Error) {
+	if err = #force_inline internal_sqr(remainder, number, allocator); err != nil { return err; }
+	return #force_inline internal_mod(remainder, remainder, modulus, allocator);
+}
+internal_sqrmod :: proc { internal_int_sqrmod, };
+
+
+
+/*
+	TODO: Use Sterling's Approximation to estimate log2(N!) to size the result.
+	This way we'll have to reallocate less, possibly not at all.
+*/
+internal_int_factorial :: proc(res: ^Int, n: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if n >= FACTORIAL_BINARY_SPLIT_CUTOFF {
+		return #force_inline _private_int_factorial_binary_split(res, n);
+	}
+
+	i := len(_factorial_table);
+	if n < i {
+		return #force_inline internal_set(res, _factorial_table[n]);
+	}
+
+	if err = #force_inline internal_set(res, _factorial_table[i - 1]); err != nil { return err; }
+	for {
+		if err = #force_inline internal_mul(res, res, DIGIT(i)); err != nil || i == n { return err; }
+		i += 1;
+	}
+
+	return nil;
+}
+
+/*
+	Returns GCD, LCM or both.
+
+	Assumes `a` and `b` to have been initialized.
+	`res_gcd` and `res_lcm` can be nil or ^Int depending on which results are desired.
+*/
+internal_int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	if res_gcd == nil && res_lcm == nil { return nil; }
+
+	return #force_inline _private_int_gcd_lcm(res_gcd, res_lcm, a, b, allocator);
+}
+
+/*
+	remainder = numerator % (1 << bits)
+
+	Assumes `remainder` and `numerator` both not to be `nil` and `bits` to be >= 0.
+*/
+internal_int_mod_bits :: proc(remainder, numerator: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
+	/*
+		Everything is divisible by 1 << 0 == 1, so this returns 0.
+	*/
+	if bits == 0 { return internal_zero(remainder); }
+
+	/*
+		If the modulus is larger than the value, return the value.
+	*/
+	err = internal_copy(remainder, numerator);
+	if bits >= (numerator.used * _DIGIT_BITS) || err != nil {
+		return;
+	}
+
+	/*
+		Zero digits above the last digit of the modulus.
+	*/
+	zero_count := (bits / _DIGIT_BITS);
+	zero_count += 0 if (bits % _DIGIT_BITS == 0) else 1;
+
+	/*
+		Zero remainder. Special case, can't use `internal_zero_unused`.
+	*/
+	if zero_count > 0 {
+		mem.zero_slice(remainder.digit[zero_count:]);
+	}
+
+	/*
+		Clear the digit that is not completely outside/inside the modulus.
+	*/
+	remainder.digit[bits / _DIGIT_BITS] &= DIGIT(1 << DIGIT(bits % _DIGIT_BITS)) - DIGIT(1);
+	return internal_clamp(remainder);
+}
+
+/*
+	=============================    Low-level helpers    =============================
+
+
+	`internal_*` helpers don't return an `Error` like their public counterparts do,
+	because they expect not to be passed `nil` or uninitialized inputs.
+
+	This makes them more suitable for `internal_*` functions and some of the
+	public ones that have already satisfied these constraints.
+*/
+
+/*
+	This procedure will return `true` if the `Int` is initialized, `false` if not.
+	Assumes `a` not to be `nil`.
+*/
+internal_int_is_initialized :: #force_inline proc(a: ^Int) -> (initialized: bool) {
+	raw := transmute(mem.Raw_Dynamic_Array)a.digit;
+	return raw.cap >= _MIN_DIGIT_COUNT;
+}
+internal_is_initialized :: proc { internal_int_is_initialized, };
+
+/*
+	This procedure will return `true` if the `Int` is zero, `false` if not.
+	Assumes `a` not to be `nil`.
+*/
+internal_int_is_zero :: #force_inline proc(a: ^Int) -> (zero: bool) {
+	return a.used == 0;
+}
+internal_is_zero :: proc { internal_int_is_zero, };
+
+/*
+	This procedure will return `true` if the `Int` is positive, `false` if not.
+	Assumes `a` not to be `nil`.
+*/
+internal_int_is_positive :: #force_inline proc(a: ^Int) -> (positive: bool) {
+	return a.sign == .Zero_or_Positive;
+}
+internal_is_positive :: proc { internal_int_is_positive, };
+
+/*
+	This procedure will return `true` if the `Int` is negative, `false` if not.
+	Assumes `a` not to be `nil`.
+*/
+internal_int_is_negative :: #force_inline proc(a: ^Int) -> (negative: bool) {
+	return a.sign == .Negative;
+}
+internal_is_negative :: proc { internal_int_is_negative, };
+
+/*
+	This procedure will return `true` if the `Int` is even, `false` if not.
+	Assumes `a` not to be `nil`.
+*/
+internal_int_is_even :: #force_inline proc(a: ^Int) -> (even: bool) {
+	if internal_is_zero(a) { return true; }
+
+	/*
+		`a.used` > 0 here, because the above handled `is_zero`.
+		We don't need to explicitly test it.
+	*/
+	return a.digit[0] & 1 == 0;
+}
+internal_is_even :: proc { internal_int_is_even, };
+
+/*
+	This procedure will return `true` if the `Int` is even, `false` if not.
+	Assumes `a` not to be `nil`.
+*/
+internal_int_is_odd :: #force_inline proc(a: ^Int) -> (odd: bool) {
+	return !internal_int_is_even(a);
+}
+internal_is_odd :: proc { internal_int_is_odd, };
+
+
+/*
+	This procedure will return `true` if the `Int` is a power of two, `false` if not.
+	Assumes `a` not to be `nil`.
+*/
+internal_int_is_power_of_two :: #force_inline proc(a: ^Int) -> (power_of_two: bool) {
+	/*
+		Early out for Int == 0.
+	*/
+	if #force_inline internal_is_zero(a) { return true; }
+
+	/*
+		For an `Int` to be a power of two, its bottom limb has to be a power of two.
+	*/
+	if ! #force_inline platform_int_is_power_of_two(int(a.digit[a.used - 1])) { return false; }
+
+	/*
+		We've established that the bottom limb is a power of two.
+		If it's the only limb, that makes the entire Int a power of two.
+	*/
+	if a.used == 1 { return true; }
+
+	/*
+		For an `Int` to be a power of two, all limbs except the top one have to be zero.
+	*/
+	for i := 1; i < a.used && a.digit[i - 1] != 0; i += 1 { return false; }
+
+	return true;
+}
+internal_is_power_of_two :: proc { internal_int_is_power_of_two, };
+
+/*
+	Compare two `Int`s, signed.
+	Returns -1 if `a` < `b`, 0 if `a` == `b` and 1 if `b` > `a`.
+
+	Expects `a` and `b` both to be valid `Int`s, i.e. initialized and not `nil`.
+*/
+internal_int_compare :: #force_inline proc(a, b: ^Int) -> (comparison: int) {
+	a_is_negative := #force_inline internal_is_negative(a);
+
+	/*
+		Compare based on sign.
+	*/
+	if a.sign != b.sign { return -1 if a_is_negative else +1; }
+
+	/*
+		If `a` is negative, compare in the opposite direction */
+	if a_is_negative { return #force_inline internal_compare_magnitude(b, a); }
+
+	return #force_inline internal_compare_magnitude(a, b);
+}
+internal_compare :: proc { internal_int_compare, internal_int_compare_digit, };
+internal_cmp :: internal_compare;
+
+/*
+    Compare an `Int` to an unsigned number upto `DIGIT & _MASK`.
+    Returns -1 if `a` < `b`, 0 if `a` == `b` and 1 if `b` > `a`.
+
+    Expects: `a` and `b` both to be valid `Int`s, i.e. initialized and not `nil`.
+*/
+internal_int_compare_digit :: #force_inline proc(a: ^Int, b: DIGIT) -> (comparison: int) {
+	a_is_negative := #force_inline internal_is_negative(a);
+
+	switch {
+	/*
+		Compare based on sign first.
+	*/
+	case a_is_negative:     return -1;
+	/*
+		Then compare on magnitude.
+	*/
+	case a.used > 1:        return +1;
+	/*
+		We have only one digit. Compare it against `b`.
+	*/
+	case a.digit[0] < b:    return -1;
+	case a.digit[0] == b:   return  0;
+	case a.digit[0] > b:    return +1;
+	/*
+		Unreachable.
+		Just here because Odin complains about a missing return value at the bottom of the proc otherwise.
+	*/
+	case:                   return;
+	}
+}
+internal_compare_digit :: proc { internal_int_compare_digit, };
+internal_cmp_digit :: internal_compare_digit;
+
+/*
+	Compare the magnitude of two `Int`s, unsigned.
+*/
+internal_int_compare_magnitude :: #force_inline proc(a, b: ^Int) -> (comparison: int) {
+	/*
+		Compare based on used digits.
+	*/
+	if a.used != b.used {
+		if a.used > b.used {
+			return +1;
+		}
+		return -1;
+	}
+
+	/*
+		Same number of used digits, compare based on their value.
+	*/
+	#no_bounds_check for n := a.used - 1; n >= 0; n -= 1 {
+		if a.digit[n] != b.digit[n] {
+			if a.digit[n] > b.digit[n] {
+				return +1;
+			}
+			return -1;
+		}
+	}
+
+   	return 0;
+}
+internal_compare_magnitude :: proc { internal_int_compare_magnitude, };
+internal_cmp_mag :: internal_compare_magnitude;
+
+
+/*
+	=========================    Logs, powers and roots    ============================
+*/
+
+/*
+	Returns log_base(a).
+	Assumes `a` to not be `nil` and have been iniialized.
+*/
+internal_int_log :: proc(a: ^Int, base: DIGIT) -> (res: int, err: Error) {
+	if base < 2 || DIGIT(base) > _DIGIT_MAX { return -1, .Invalid_Argument; }
+
+	if internal_is_negative(a) { return -1, .Math_Domain_Error; }
+	if internal_is_zero(a)     { return -1, .Math_Domain_Error; }
+
+	/*
+		Fast path for bases that are a power of two.
+	*/
+	if platform_int_is_power_of_two(int(base)) { return _private_log_power_of_two(a, base); }
+
+	/*
+		Fast path for `Int`s that fit within a single `DIGIT`.
+	*/
+	if a.used == 1 { return internal_log(a.digit[0], DIGIT(base)); }
+
+	return _private_int_log(a, base);
+
+}
+
+/*
+	Returns log_base(a), where `a` is a DIGIT.
+*/
+internal_digit_log :: proc(a: DIGIT, base: DIGIT) -> (log: int, err: Error) {
+	/*
+		If the number is smaller than the base, it fits within a fraction.
+		Therefore, we return 0.
+	*/
+	if a  < base { return 0, nil; }
+
+	/*
+		If a number equals the base, the log is 1.
+	*/
+	if a == base { return 1, nil; }
+
+	N := _WORD(a);
+	bracket_low  := _WORD(1);
+	bracket_high := _WORD(base);
+	high := 1;
+	low  := 0;
+
+	for bracket_high < N {
+		low = high;
+		bracket_low = bracket_high;
+		high <<= 1;
+		bracket_high *= bracket_high;
+	}
+
+	for high - low > 1 {
+		mid := (low + high) >> 1;
+		bracket_mid := bracket_low * #force_inline internal_small_pow(_WORD(base), _WORD(mid - low));
+
+		if N < bracket_mid {
+			high = mid;
+			bracket_high = bracket_mid;
+		}
+		if N > bracket_mid {
+			low = mid;
+			bracket_low = bracket_mid;
+		}
+		if N == bracket_mid {
+			return mid, nil;
+		}
+	}
+
+	if bracket_high == N {
+		return high, nil;
+	} else {
+		return low, nil;
+	}
+}
+internal_log :: proc { internal_int_log, internal_digit_log, };
+
+/*
+	Calculate dest = base^power using a square-multiply algorithm.
+	Assumes `dest` and `base` not to be `nil` and to have been initialized.
+*/
+internal_int_pow :: proc(dest, base: ^Int, power: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	power := power;
+	/*
+		Early outs.
+	*/
+	if #force_inline internal_is_zero(base) {
+		/*
+			A zero base is a special case.
+		*/
+		if power  < 0 {
+			if err = internal_zero(dest); err != nil { return err; }
+			return .Math_Domain_Error;
+		}
+		if power == 0 { return  internal_one(dest); }
+		if power  > 0 { return internal_zero(dest); }
+
+	}
+	if power < 0 {
+		/*
+			Fraction, so we'll return zero.
+		*/
+		return internal_zero(dest);
+	}
+	switch(power) {
+	case 0:
+		/*
+			Any base to the power zero is one.
+		*/
+		return #force_inline internal_one(dest);
+	case 1:
+		/*
+			Any base to the power one is itself.
+		*/
+		return copy(dest, base);
+	case 2:
+		return #force_inline internal_sqr(dest, base);
+	}
+
+	g := &Int{};
+	if err = internal_copy(g, base); err != nil { return err; }
+
+	/*
+		Set initial result.
+	*/
+	if err = internal_one(dest); err != nil { return err; }
+
+	loop: for power > 0 {
+		/*
+			If the bit is set, multiply.
+		*/
+		if power & 1 != 0 {
+			if err = internal_mul(dest, g, dest); err != nil {
+				break loop;
+			}
+		}
+		/*
+			Square.
+		*/
+		if power > 1 {
+			if err = #force_inline internal_sqr(g, g); err != nil {
+				break loop;
+			}
+		}
+
+		/* shift to next bit */
+		power >>= 1;
+	}
+
+	internal_destroy(g);
+	return err;
+}
+
+/*
+	Calculate `dest = base^power`.
+	Assumes `dest` not to be `nil` and to have been initialized.
+*/
+internal_int_pow_int :: proc(dest: ^Int, base, power: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	base_t := &Int{};
+	defer internal_destroy(base_t);
+
+	if err = internal_set(base_t, base); err != nil { return err; }
+
+	return #force_inline internal_int_pow(dest, base_t, power);
+}
+
+internal_pow :: proc { internal_int_pow, internal_int_pow_int, };
+internal_exp :: pow;
+
+/*
+
+*/
+internal_small_pow :: proc(base: _WORD, exponent: _WORD) -> (result: _WORD) {
+	exponent := exponent; base := base;
+	result = _WORD(1);
+
+	for exponent != 0 {
+		if exponent & 1 == 1 {
+			result *= base;
+		}
+		exponent >>= 1;
+		base *= base;
+	}
+	return result;
+}
+
+/*
+	This function is less generic than `root_n`, simpler and faster.
+	Assumes `dest` and `src` not to be `nil` and to have been initialized.
+*/
+internal_int_sqrt :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	/*
+		Must be positive.
+	*/
+	if #force_inline internal_is_negative(src)  { return .Invalid_Argument; }
+
+	/*
+		Easy out. If src is zero, so is dest.
+	*/
+	if #force_inline internal_is_zero(src)      { return internal_zero(dest); }
+
+	/*
+		Set up temporaries.
+	*/
+	x, y, t1, t2 := &Int{}, &Int{}, &Int{}, &Int{};
+	defer internal_destroy(x, y, t1, t2);
+
+	count := #force_inline internal_count_bits(src);
+
+	a, b := count >> 1, count & 1;
+	if err = internal_int_power_of_two(x, a+b, allocator);   err != nil { return err; }
+
+	for {
+		/*
+			y = (x + n // x) // 2
+		*/
+		if err = internal_div(t1, src, x); err != nil { return err; }
+		if err = internal_add(t2, t1, x);  err != nil { return err; }
+		if err = internal_shr(y, t2, 1);   err != nil { return err; }
+
+		if c := internal_cmp(y, x); c == 0 || c == 1 {
+			internal_swap(dest, x);
+			return nil;
+		}
+		internal_swap(x, y);
+	}
+
+	internal_swap(dest, x);
+	return err;
+}
+internal_sqrt :: proc { internal_int_sqrt, };
+
+
+/*
+	Find the nth root of an Integer.
+	Result found such that `(dest)**n <= src` and `(dest+1)**n > src`
+
+	This algorithm uses Newton's approximation `x[i+1] = x[i] - f(x[i])/f'(x[i])`,
+	which will find the root in `log(n)` time where each step involves a fair bit.
+
+	Assumes `dest` and `src` not to be `nil` and have been initialized.
+*/
+internal_int_root_n :: proc(dest, src: ^Int, n: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	/*
+		Fast path for n == 2
+	*/
+	if n == 2 { return #force_inline internal_sqrt(dest, src); }
+
+	if n < 0 || n > int(_DIGIT_MAX) { return .Invalid_Argument; }
+
+	if n & 1 == 0 && #force_inline internal_is_negative(src) { return .Invalid_Argument; }
+
+	/*
+		Set up temporaries.
+	*/
+	t1, t2, t3, a := &Int{}, &Int{}, &Int{}, &Int{};
+	defer internal_destroy(t1, t2, t3);
+
+	/*
+		If `src` is negative fudge the sign but keep track.
+	*/
+	a.sign  = .Zero_or_Positive;
+	a.used  = src.used;
+	a.digit = src.digit;
+
+	/*
+		If "n" is larger than INT_MAX it is also larger than
+		log_2(src) because the bit-length of the "src" is measured
+		with an int and hence the root is always < 2 (two).
+	*/
+	if n > max(int) / 2 {
+		err = set(dest, 1);
+		dest.sign = a.sign;
+		return err;
+	}
+
+	/*
+		Compute seed: 2^(log_2(src)/n + 2)
+	*/
+	ilog2 := internal_count_bits(src);
+
+	/*
+		"src" is smaller than max(int), we can cast safely.
+	*/
+	if ilog2 < n {
+		err = internal_one(dest);
+		dest.sign = a.sign;
+		return err;
+	}
+
+	ilog2 /= n;
+	if ilog2 == 0 {
+		err = internal_one(dest);
+		dest.sign = a.sign;
+		return err;
+	}
+
+	/*
+		Start value must be larger than root.
+	*/
+	ilog2 += 2;
+	if err = internal_int_power_of_two(t2, ilog2); err != nil { return err; }
+
+	c: int;
+	iterations := 0;
+	for {
+		/* t1 = t2 */
+		if err = internal_copy(t1, t2); err != nil { return err; }
+
+		/* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */
+
+		/* t3 = t1**(b-1) */
+		if err = internal_pow(t3, t1, n-1); err != nil { return err; }
+
+		/* numerator */
+		/* t2 = t1**b */
+		if err = internal_mul(t2, t1, t3); err != nil { return err; }
+
+		/* t2 = t1**b - a */
+		if err = internal_sub(t2, t2, a); err != nil { return err; }
+
+		/* denominator */
+		/* t3 = t1**(b-1) * b  */
+		if err = internal_mul(t3, t3, DIGIT(n)); err != nil { return err; }
+
+		/* t3 = (t1**b - a)/(b * t1**(b-1)) */
+		if err = internal_div(t3, t2, t3); err != nil { return err; }
+		if err = internal_sub(t2, t1, t3); err != nil { return err; }
+
+		/*
+			 Number of rounds is at most log_2(root). If it is more it
+			 got stuck, so break out of the loop and do the rest manually.
+		*/
+		if ilog2 -= 1;    ilog2 == 0 { break; }
+		if internal_cmp(t1, t2) == 0 { break; }
+
+		iterations += 1;
+		if iterations == MAX_ITERATIONS_ROOT_N {
+			return .Max_Iterations_Reached;
+		}
+	}
+
+	/*						Result can be off by a few so check.					*/
+	/* Loop beneath can overshoot by one if found root is smaller than actual root. */
+
+	iterations = 0;
+	for {
+		if err = internal_pow(t2, t1, n); err != nil { return err; }
+
+		c = internal_cmp(t2, a);
+		if c == 0 {
+			swap(dest, t1);
+			return nil;
+		} else if c == -1 {
+			if err = internal_add(t1, t1, DIGIT(1)); err != nil { return err; }
+		} else {
+			break;
+		}
+
+		iterations += 1;
+		if iterations == MAX_ITERATIONS_ROOT_N {
+			return .Max_Iterations_Reached;
+		}
+	}
+
+	iterations = 0;
+	/*
+		Correct overshoot from above or from recurrence.
+		*/
+	for {
+		if err = internal_pow(t2, t1, n); err != nil { return err; }
+	
+		if internal_cmp(t2, a) != 1 { break; }
+		
+		if err = internal_sub(t1, t1, DIGIT(1)); err != nil { return err; }
+
+		iterations += 1;
+		if iterations == MAX_ITERATIONS_ROOT_N {
+			return .Max_Iterations_Reached;
+		}
+	}
+
+	/*
+		Set the result.
+	*/
+	internal_swap(dest, t1);
+
+	/*
+		Set the sign of the result.
+	*/
+	dest.sign = src.sign;
+
+	return err;
+}
+internal_root_n :: proc { internal_int_root_n, };
+
+/*
+	Other internal helpers
+*/
+
+/*
+	Deallocates the backing memory of one or more `Int`s.
+	Asssumes none of the `integers` to be a `nil`.
+*/
+internal_int_destroy :: proc(integers: ..^Int) {
+	integers := integers;
+
+	for a in &integers {
+		raw := transmute(mem.Raw_Dynamic_Array)a.digit;
+		if raw.cap > 0 {
+			mem.zero_slice(a.digit[:]);
+			free(&a.digit[0]);
+		}
+		a = &Int{};
+	}
+}
+internal_destroy :: proc{ internal_int_destroy, };
+
+/*
+	Helpers to set an `Int` to a specific value.
+*/
+internal_int_set_from_integer :: proc(dest: ^Int, src: $T, minimize := false, allocator := context.allocator) -> (err: Error)
+	where intrinsics.type_is_integer(T) {
+	context.allocator = allocator;
+
+	src := src;
+
+	if err = internal_error_if_immutable(dest); err != nil { return err; }
+	/*
+		Most internal procs asssume an Int to have already been initialize,
+		but as this is one of the procs that initializes, we have to check the following.
+	*/
+	if err = internal_clear_if_uninitialized_single(dest); err != nil { return err; }
+
+	dest.flags = {}; // We're not -Inf, Inf, NaN or Immutable.
+
+	dest.used  = 0;
+	dest.sign = .Zero_or_Positive if src >= 0 else .Negative;
+	src = internal_abs(src);
+
+	#no_bounds_check for src != 0 {
+		dest.digit[dest.used] = DIGIT(src) & _MASK;
+		dest.used += 1;
+		src >>= _DIGIT_BITS;
+	}
+	internal_zero_unused(dest);
+	return nil;
+}
+
+internal_set :: proc { internal_int_set_from_integer, internal_int_copy };
+
+internal_copy_digits :: #force_inline proc(dest, src: ^Int, digits: int) -> (err: Error) {
+	if err = #force_inline internal_error_if_immutable(dest); err != nil { return err; }
+
+	/*
+		If dest == src, do nothing
+	*/
+	if (dest == src) { return nil; }
+
+	#force_inline mem.copy_non_overlapping(&dest.digit[0], &src.digit[0], size_of(DIGIT) * digits);
+	return nil;
+}
+
+/*
+	Copy one `Int` to another.
+*/
+internal_int_copy :: proc(dest, src: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	/*
+		If dest == src, do nothing
+	*/
+	if (dest == src) { return nil; }
+
+	if err = internal_error_if_immutable(dest); err != nil { return err; }
+
+	/*
+		Grow `dest` to fit `src`.
+		If `dest` is not yet initialized, it will be using `allocator`.
+	*/
+	needed := src.used if minimize else max(src.used, _DEFAULT_DIGIT_COUNT);
+
+	if err = internal_grow(dest, needed, minimize); err != nil { return err; }
+
+	/*
+		Copy everything over and zero high digits.
+	*/
+	internal_copy_digits(dest, src, src.used);
+
+	dest.used  = src.used;
+	dest.sign  = src.sign;
+	dest.flags = src.flags &~ {.Immutable};
+
+	internal_zero_unused(dest);
+	return nil;
+}
+internal_copy :: proc { internal_int_copy, };
+
+/*
+	In normal code, you can also write `a, b = b, a`.
+	However, that only swaps within the current scope.
+	This helper swaps completely.
+*/
+internal_int_swap :: #force_inline proc(a, b: ^Int) {
+	a := a; b := b;
+
+	a.used,  b.used  = b.used,  a.used;
+	a.sign,  b.sign  = b.sign,  a.sign;
+	a.digit, b.digit = b.digit, a.digit;
+}
+internal_swap :: proc { internal_int_swap, };
+
+/*
+	Set `dest` to |`src`|.
+*/
+internal_int_abs :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	/*
+		If `dest == src`, just fix `dest`'s sign.
+	*/
+	if (dest == src) {
+		dest.sign = .Zero_or_Positive;
+		return nil;
+	}
+
+	/*
+		Copy `src` to `dest`
+	*/
+	if err = internal_copy(dest, src); err != nil {
+		return err;
+	}
+
+	/*
+		Fix sign.
+	*/
+	dest.sign = .Zero_or_Positive;
+	return nil;
+}
+
+internal_platform_abs :: proc(n: $T) -> T where intrinsics.type_is_integer(T) {
+	return n if n >= 0 else -n;
+}
+internal_abs :: proc{ internal_int_abs, internal_platform_abs, };
+
+/*
+	Set `dest` to `-src`.
+*/
+internal_int_neg :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	/*
+		If `dest == src`, just fix `dest`'s sign.
+	*/
+	sign := Sign.Negative;
+	if #force_inline internal_is_zero(src) || #force_inline internal_is_negative(src) {
+		sign = .Zero_or_Positive;
+	}
+	if dest == src {
+		dest.sign = sign;
+		return nil;
+	}
+	/*
+		Copy `src` to `dest`
+	*/
+	if err = internal_copy(dest, src); err != nil { return err; }
+
+	/*
+		Fix sign.
+	*/
+	dest.sign = sign;
+	return nil;
+}
+internal_neg :: proc { internal_int_neg, };
+
+
+/*
+	Helpers to extract values from the `Int`.
+*/
+internal_int_bitfield_extract_single :: proc(a: ^Int, offset: int) -> (bit: _WORD, err: Error) {
+	return #force_inline int_bitfield_extract(a, offset, 1);
+}
+
+internal_int_bitfield_extract :: proc(a: ^Int, offset, count: int) -> (res: _WORD, err: Error) #no_bounds_check {
+	/*
+		Early out for single bit.
+	*/
+	if count == 1 {
+		limb := offset / _DIGIT_BITS;
+		if limb < 0 || limb >= a.used  { return 0, .Invalid_Argument; }
+		i := _WORD(1 << _WORD((offset % _DIGIT_BITS)));
+		return 1 if ((_WORD(a.digit[limb]) & i) != 0) else 0, nil;
+	}
+
+	if count > _WORD_BITS || count < 1 { return 0, .Invalid_Argument; }
+
+	/*
+		There are 3 possible cases.
+		-	[offset:][:count] covers 1 DIGIT,
+				e.g. offset:  0, count:  60 = bits 0..59
+		-	[offset:][:count] covers 2 DIGITS,
+				e.g. offset:  5, count:  60 = bits 5..59, 0..4
+				e.g. offset:  0, count: 120 = bits 0..59, 60..119
+		-	[offset:][:count] covers 3 DIGITS,
+				e.g. offset: 40, count: 100 = bits 40..59, 0..59, 0..19
+				e.g. offset: 40, count: 120 = bits 40..59, 0..59, 0..39
+	*/
+
+	limb        := offset / _DIGIT_BITS;
+	bits_left   := count;
+	bits_offset := offset % _DIGIT_BITS;
+
+	num_bits    := min(bits_left, _DIGIT_BITS - bits_offset);
+
+	shift       := offset % _DIGIT_BITS;
+	mask        := (_WORD(1) << uint(num_bits)) - 1;
+	res          = (_WORD(a.digit[limb]) >> uint(shift)) & mask;
+
+	bits_left -= num_bits;
+	if bits_left == 0 { return res, nil; }
+
+	res_shift := num_bits;
+	num_bits   = min(bits_left, _DIGIT_BITS);
+	mask       = (1 << uint(num_bits)) - 1;
+
+	res |= (_WORD(a.digit[limb + 1]) & mask) << uint(res_shift);
+
+	bits_left -= num_bits;
+	if bits_left == 0 { return res, nil; }
+
+	mask     = (1 << uint(bits_left)) - 1;
+	res_shift += _DIGIT_BITS;
+
+	res |= (_WORD(a.digit[limb + 2]) & mask) << uint(res_shift);
+
+	return res, nil;
+}
+
+/*
+	Resize backing store.
+	We don't need to pass the allocator, because the storage itself stores it.
+
+	Assumes `a` not to be `nil`, and to have already been initialized.
+*/
+internal_int_shrink :: proc(a: ^Int) -> (err: Error) {
+	needed := max(_MIN_DIGIT_COUNT, a.used);
+
+	if a.used != needed { return internal_grow(a, needed, true); }
+	return nil;
+}
+internal_shrink :: proc { internal_int_shrink, };
+
+internal_int_grow :: proc(a: ^Int, digits: int, allow_shrink := false, allocator := context.allocator) -> (err: Error) {
+	raw := transmute(mem.Raw_Dynamic_Array)a.digit;
+
+	/*
+		We need at least _MIN_DIGIT_COUNT or a.used digits, whichever is bigger.
+		The caller is asking for `digits`. Let's be accomodating.
+	*/
+	needed := max(_MIN_DIGIT_COUNT, a.used, digits);
+	if !allow_shrink {
+		needed = max(needed, raw.cap);
+	}
+
+	/*
+		If not yet iniialized, initialize the `digit` backing with the allocator we were passed.
+	*/
+	if raw.cap == 0 {
+		a.digit = mem.make_dynamic_array_len_cap([dynamic]DIGIT, needed, needed, allocator);
+	} else if raw.cap != needed {
+		/*
+			`[dynamic]DIGIT` already knows what allocator was used for it, so resize will do the right thing.
+		*/
+		resize(&a.digit, needed);
+	}
+	/*
+		Let's see if the allocation/resize worked as expected.
+	*/
+	if len(a.digit) != needed {
+		return .Out_Of_Memory;
+	}
+	return nil;
+}
+internal_grow :: proc { internal_int_grow, };
+
+/*
+	Clear `Int` and resize it to the default size.
+	Assumes `a` not to be `nil`.
+*/
+internal_int_clear :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	raw := transmute(mem.Raw_Dynamic_Array)a.digit;
+	if raw.cap != 0 {
+		mem.zero_slice(a.digit[:a.used]);
+	}
+	a.sign = .Zero_or_Positive;
+	a.used = 0;
+
+	return #force_inline internal_grow(a, a.used, minimize, allocator);
+}
+internal_clear :: proc { internal_int_clear, };
+internal_zero  :: internal_clear;
+
+/*
+	Set the `Int` to 1 and optionally shrink it to the minimum backing size.
+*/
+internal_int_one :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	return internal_copy(a, INT_ONE, minimize, allocator);
+}
+internal_one :: proc { internal_int_one, };
+
+/*
+	Set the `Int` to -1 and optionally shrink it to the minimum backing size.
+*/
+internal_int_minus_one :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	return internal_copy(a, INT_MINUS_ONE, minimize, allocator);
+}
+internal_minus_one :: proc { internal_int_minus_one, };
+
+/*
+	Set the `Int` to Inf and optionally shrink it to the minimum backing size.
+*/
+internal_int_inf :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	return internal_copy(a, INT_INF, minimize, allocator);
+}
+internal_inf :: proc { internal_int_inf, };
+
+/*
+	Set the `Int` to -Inf and optionally shrink it to the minimum backing size.
+*/
+internal_int_minus_inf :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	return internal_copy(a, INT_MINUS_INF, minimize, allocator);
+}
+internal_minus_inf :: proc { internal_int_inf, };
+
+/*
+	Set the `Int` to NaN and optionally shrink it to the minimum backing size.
+*/
+internal_int_nan :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
+	return internal_copy(a, INT_NAN, minimize, allocator);
+}
+internal_nan :: proc { internal_int_nan, };
+
+internal_int_power_of_two :: proc(a: ^Int, power: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if power < 0 || power > _MAX_BIT_COUNT { return .Invalid_Argument; }
+
+	/*
+		Grow to accomodate the single bit.
+	*/
+	a.used = (power / _DIGIT_BITS) + 1;
+	if err = internal_grow(a, a.used); err != nil { return err; }
+	/*
+		Zero the entirety.
+	*/
+	mem.zero_slice(a.digit[:]);
+
+	/*
+		Set the bit.
+	*/
+	a.digit[power / _DIGIT_BITS] = 1 << uint((power % _DIGIT_BITS));
+	return nil;
+}
+
+internal_int_get_u128 :: proc(a: ^Int) -> (res: u128, err: Error) {
+	return internal_int_get(a, u128);
+}
+internal_get_u128 :: proc { internal_int_get_u128, };
+
+internal_int_get_i128 :: proc(a: ^Int) -> (res: i128, err: Error) {
+	return internal_int_get(a, i128);
+}
+internal_get_i128 :: proc { internal_int_get_i128, };
+
+internal_int_get_u64 :: proc(a: ^Int) -> (res: u64, err: Error) {
+	return internal_int_get(a, u64);
+}
+internal_get_u64 :: proc { internal_int_get_u64, };
+
+internal_int_get_i64 :: proc(a: ^Int) -> (res: i64, err: Error) {
+	return internal_int_get(a, i64);
+}
+internal_get_i64 :: proc { internal_int_get_i64, };
+
+internal_int_get_u32 :: proc(a: ^Int) -> (res: u32, err: Error) {
+	return internal_int_get(a, u32);
+}
+internal_get_u32 :: proc { internal_int_get_u32, };
+
+internal_int_get_i32 :: proc(a: ^Int) -> (res: i32, err: Error) {
+	return internal_int_get(a, i32);
+}
+internal_get_i32 :: proc { internal_int_get_i32, };
+
+/*
+	TODO: Think about using `count_bits` to check if the value could be returned completely,
+	and maybe return max(T), .Integer_Overflow if not?
+*/
+internal_int_get :: proc(a: ^Int, $T: typeid) -> (res: T, err: Error) where intrinsics.type_is_integer(T) {
+	size_in_bits := int(size_of(T) * 8);
+	i := int((size_in_bits + _DIGIT_BITS - 1) / _DIGIT_BITS);
+	i  = min(int(a.used), i);
+
+	#no_bounds_check for ; i >= 0; i -= 1 {
+		res <<= uint(0) if size_in_bits <= _DIGIT_BITS else _DIGIT_BITS;
+		res |= T(a.digit[i]);
+		if size_in_bits <= _DIGIT_BITS {
+			break;
+		};
+	}
+
+	when !intrinsics.type_is_unsigned(T) {
+		/*
+			Mask off sign bit.
+		*/
+		res ~= 1 << uint(size_in_bits - 1);
+		/*
+			Set the sign.
+		*/
+		if a.sign == .Negative { res = -res; }
+	}
+	return;
+}
+internal_get :: proc { internal_int_get, };
+
+internal_int_get_float :: proc(a: ^Int) -> (res: f64, err: Error) {
+	l   := min(a.used, 17); // log2(max(f64)) is approximately 1020, or 17 legs.
+	fac := f64(1 << _DIGIT_BITS);
+	d   := 0.0;
+
+	#no_bounds_check for i := l; i >= 0; i -= 1 {
+		d = (d * fac) + f64(a.digit[i]);
+	}
+
+	res = -d if a.sign == .Negative else d;
+	return;
+}
+
+/*
+	The `and`, `or` and `xor` binops differ in two lines only.
+	We could handle those with a switch, but that adds overhead.
+
+	TODO: Implement versions that take a DIGIT immediate.
+*/
+
+/*
+	2's complement `and`, returns `dest = a & b;`
+*/
+internal_int_and :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	used := max(a.used, b.used) + 1;
+	/*
+		Grow the destination to accomodate the result.
+	*/
+	if err = internal_grow(dest, used); err != nil { return err; }
+
+	neg_a := #force_inline internal_is_negative(a);
+	neg_b := #force_inline internal_is_negative(b);
+	neg   := neg_a && neg_b;
+
+	ac, bc, cc := DIGIT(1), DIGIT(1), DIGIT(1);
+
+	#no_bounds_check for i := 0; i < used; i += 1 {
+		x, y: DIGIT;
+
+		/*
+			Convert to 2's complement if negative.
+		*/
+		if neg_a {
+			ac += _MASK if i >= a.used else (~a.digit[i] & _MASK);
+			x = ac & _MASK;
+			ac >>= _DIGIT_BITS;
+		} else {
+			x = 0 if i >= a.used else a.digit[i];
+		}
+
+		/*
+			Convert to 2's complement if negative.
+		*/
+		if neg_b {
+			bc += _MASK if i >= b.used else (~b.digit[i] & _MASK);
+			y = bc & _MASK;
+			bc >>= _DIGIT_BITS;
+		} else {
+			y = 0 if i >= b.used else b.digit[i];
+		}
+
+		dest.digit[i] = x & y;
+
+		/*
+			Convert to to sign-magnitude if negative.
+		*/
+		if neg {
+			cc += ~dest.digit[i] & _MASK;
+			dest.digit[i] = cc & _MASK;
+			cc >>= _DIGIT_BITS;
+		}
+	}
+
+	dest.used = used;
+	dest.sign = .Negative if neg else .Zero_or_Positive;
+	return internal_clamp(dest);
+}
+internal_and :: proc { internal_int_and, };
+
+/*
+	2's complement `or`, returns `dest = a | b;`
+*/
+internal_int_or :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	used := max(a.used, b.used) + 1;
+	/*
+		Grow the destination to accomodate the result.
+	*/
+	if err = internal_grow(dest, used); err != nil { return err; }
+
+	neg_a := #force_inline internal_is_negative(a);
+	neg_b := #force_inline internal_is_negative(b);
+	neg   := neg_a || neg_b;
+
+	ac, bc, cc := DIGIT(1), DIGIT(1), DIGIT(1);
+
+	#no_bounds_check for i := 0; i < used; i += 1 {
+		x, y: DIGIT;
+
+		/*
+			Convert to 2's complement if negative.
+		*/
+		if neg_a {
+			ac += _MASK if i >= a.used else (~a.digit[i] & _MASK);
+			x = ac & _MASK;
+			ac >>= _DIGIT_BITS;
+		} else {
+			x = 0 if i >= a.used else a.digit[i];
+		}
+
+		/*
+			Convert to 2's complement if negative.
+		*/
+		if neg_b {
+			bc += _MASK if i >= b.used else (~b.digit[i] & _MASK);
+			y = bc & _MASK;
+			bc >>= _DIGIT_BITS;
+		} else {
+			y = 0 if i >= b.used else b.digit[i];
+		}
+
+		dest.digit[i] = x | y;
+
+		/*
+			Convert to to sign-magnitude if negative.
+		*/
+		if neg {
+			cc += ~dest.digit[i] & _MASK;
+			dest.digit[i] = cc & _MASK;
+			cc >>= _DIGIT_BITS;
+		}
+	}
+
+	dest.used = used;
+	dest.sign = .Negative if neg else .Zero_or_Positive;
+	return internal_clamp(dest);
+}
+internal_or :: proc { internal_int_or, };
+
+/*
+	2's complement `xor`, returns `dest = a ~ b;`
+*/
+internal_int_xor :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	used := max(a.used, b.used) + 1;
+	/*
+		Grow the destination to accomodate the result.
+	*/
+	if err = internal_grow(dest, used); err != nil { return err; }
+
+	neg_a := #force_inline internal_is_negative(a);
+	neg_b := #force_inline internal_is_negative(b);
+	neg   := neg_a != neg_b;
+
+	ac, bc, cc := DIGIT(1), DIGIT(1), DIGIT(1);
+
+	#no_bounds_check for i := 0; i < used; i += 1 {
+		x, y: DIGIT;
+
+		/*
+			Convert to 2's complement if negative.
+		*/
+		if neg_a {
+			ac += _MASK if i >= a.used else (~a.digit[i] & _MASK);
+			x = ac & _MASK;
+			ac >>= _DIGIT_BITS;
+		} else {
+			x = 0 if i >= a.used else a.digit[i];
+		}
+
+		/*
+			Convert to 2's complement if negative.
+		*/
+		if neg_b {
+			bc += _MASK if i >= b.used else (~b.digit[i] & _MASK);
+			y = bc & _MASK;
+			bc >>= _DIGIT_BITS;
+		} else {
+			y = 0 if i >= b.used else b.digit[i];
+		}
+
+		dest.digit[i] = x ~ y;
+
+		/*
+			Convert to to sign-magnitude if negative.
+		*/
+		if neg {
+			cc += ~dest.digit[i] & _MASK;
+			dest.digit[i] = cc & _MASK;
+			cc >>= _DIGIT_BITS;
+		}
+	}
+
+	dest.used = used;
+	dest.sign = .Negative if neg else .Zero_or_Positive;
+	return internal_clamp(dest);
+}
+internal_xor :: proc { internal_int_xor, };
+
+/*
+	dest = ~src
+*/
+internal_int_complement :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	/*
+		Temporarily fix sign.
+	*/
+	old_sign := src.sign;
+
+	neg := #force_inline internal_is_zero(src) || #force_inline internal_is_positive(src);
+
+	src.sign = .Negative if neg else .Zero_or_Positive;
+
+	err = #force_inline internal_sub(dest, src, 1);
+	/*
+		Restore sign.
+	*/
+	src.sign = old_sign;
+
+	return err;
+}
+internal_complement :: proc { internal_int_complement, };
+
+/*
+	quotient, remainder := numerator >> bits;
+	`remainder` is allowed to be passed a `nil`, in which case `mod` won't be computed.
+*/
+internal_int_shrmod :: proc(quotient, remainder, numerator: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	bits := bits;
+	if bits < 0 { return .Invalid_Argument; }
+
+	if err = internal_copy(quotient, numerator); err != nil { return err; }
+
+	/*
+		Shift right by a certain bit count (store quotient and optional remainder.)
+	   `numerator` should not be used after this.
+	*/
+	if remainder != nil {
+		if err = internal_int_mod_bits(remainder, numerator, bits); err != nil { return err; }
+	}
+
+	/*
+		Shift by as many digits in the bit count.
+	*/
+	if bits >= _DIGIT_BITS {
+		if err = internal_shr_digit(quotient, bits / _DIGIT_BITS); err != nil { return err; }
+	}
+
+	/*
+		Shift any bit count < _DIGIT_BITS.
+	*/
+	bits %= _DIGIT_BITS;
+	if bits != 0 {
+		mask  := DIGIT(1 << uint(bits)) - 1;
+		shift := DIGIT(_DIGIT_BITS - bits);
+		carry := DIGIT(0);
+
+		#no_bounds_check for x := quotient.used - 1; x >= 0; x -= 1 {
+			/*
+				Get the lower bits of this word in a temp.
+			*/
+			fwd_carry := quotient.digit[x] & mask;
+
+			/*
+				Shift the current word and mix in the carry bits from the previous word.
+			*/
+	        quotient.digit[x] = (quotient.digit[x] >> uint(bits)) | (carry << shift);
+
+	        /*
+	        	Update carry from forward carry.
+	        */
+	        carry = fwd_carry;
+		}
+
+	}
+	return internal_clamp(numerator);
+}
+internal_shrmod :: proc { internal_int_shrmod, };
+
+internal_int_shr :: proc(dest, source: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
+	return #force_inline internal_shrmod(dest, nil, source, bits, allocator);
+}
+internal_shr :: proc { internal_int_shr, };
+
+/*
+	Shift right by `digits` * _DIGIT_BITS bits.
+*/
+internal_int_shr_digit :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if digits <= 0 { return nil; }
+
+	/*
+		If digits > used simply zero and return.
+	*/
+	if digits > quotient.used { return internal_zero(quotient); }
+
+   	/*
+		Much like `int_shl_digit`, this is implemented using a sliding window,
+		except the window goes the other way around.
+
+		b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
+		            /\                   |      ---->
+		             \-------------------/      ---->
+    */
+
+	#no_bounds_check for x := 0; x < (quotient.used - digits); x += 1 {
+    	quotient.digit[x] = quotient.digit[x + digits];
+	}
+	quotient.used -= digits;
+	internal_zero_unused(quotient);
+	return internal_clamp(quotient);
+}
+internal_shr_digit :: proc { internal_int_shr_digit, };
+
+/*
+	Shift right by a certain bit count with sign extension.
+*/
+internal_int_shr_signed :: proc(dest, src: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if src.sign == .Zero_or_Positive {
+		return internal_shr(dest, src, bits);
+	}
+	if err = internal_int_add_digit(dest, src, DIGIT(1)); err != nil { return err; }
+
+	if err = internal_shr(dest, dest, bits);              err != nil { return err; }
+	return internal_sub(dest, src, DIGIT(1));
+}
+
+internal_shr_signed :: proc { internal_int_shr_signed, };
+
+/*
+	Shift left by a certain bit count.
+*/
+internal_int_shl :: proc(dest, src: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	bits := bits;
+
+	if bits < 0 { return .Invalid_Argument; }
+
+	if err = internal_copy(dest, src); err != nil { return err; }
+
+	/*
+		Grow `dest` to accommodate the additional bits.
+	*/
+	digits_needed := dest.used + (bits / _DIGIT_BITS) + 1;
+	if err = internal_grow(dest, digits_needed); err != nil { return err; }
+	dest.used = digits_needed;
+	/*
+		Shift by as many digits in the bit count as we have.
+	*/
+	if bits >= _DIGIT_BITS {
+		if err = internal_shl_digit(dest, bits / _DIGIT_BITS); err != nil { return err; }
+	}
+
+	/*
+		Shift any remaining bit count < _DIGIT_BITS
+	*/
+	bits %= _DIGIT_BITS;
+	if bits != 0 {
+		mask  := (DIGIT(1) << uint(bits)) - DIGIT(1);
+		shift := DIGIT(_DIGIT_BITS - bits);
+		carry := DIGIT(0);
+
+		#no_bounds_check for x:= 0; x < dest.used; x+= 1 {
+			fwd_carry := (dest.digit[x] >> shift) & mask;
+			dest.digit[x] = (dest.digit[x] << uint(bits) | carry) & _MASK;
+			carry = fwd_carry;
+		}
+
+		/*
+			Use final carry.
+		*/
+		if carry != 0 {
+			dest.digit[dest.used] = carry;
+			dest.used += 1;
+		}
+	}
+	return internal_clamp(dest);
+}
+internal_shl :: proc { internal_int_shl, };
+
+
+/*
+	Shift left by `digits` * _DIGIT_BITS bits.
+*/
+internal_int_shl_digit :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if digits <= 0 { return nil; }
+
+	/*
+		No need to shift a zero.
+	*/
+	if #force_inline internal_is_zero(quotient) { return {}; }
+
+	/*
+		Resize `quotient` to accomodate extra digits.
+	*/
+	if err = #force_inline internal_grow(quotient, quotient.used + digits); err != nil { return err; }
+
+	/*
+		Increment the used by the shift amount then copy upwards.
+	*/
+
+	/*
+		Much like `int_shr_digit`, this is implemented using a sliding window,
+		except the window goes the other way around.
+    */
+    #no_bounds_check for x := quotient.used; x > 0; x -= 1 {
+    	quotient.digit[x+digits-1] = quotient.digit[x-1];
+    }
+
+   	quotient.used += digits;
+    mem.zero_slice(quotient.digit[:digits]);
+    return nil;
+}
+internal_shl_digit :: proc { internal_int_shl_digit, };
+
+/*
+	Count bits in an `Int`.
+	Assumes `a` not to be `nil` and to have been initialized.
+*/
+internal_count_bits :: proc(a: ^Int) -> (count: int) {
+	/*
+		Fast path for zero.
+	*/
+	if #force_inline internal_is_zero(a) { return {}; }
+	/*
+		Get the number of DIGITs and use it.
+	*/
+	count  = (a.used - 1) * _DIGIT_BITS;
+	/*
+		Take the last DIGIT and count the bits in it.
+	*/
+	clz   := int(intrinsics.count_leading_zeros(a.digit[a.used - 1]));
+	count += (_DIGIT_TYPE_BITS - clz);
+	return;
+}
+
+/*
+	Returns the number of trailing zeroes before the first one.
+	Differs from regular `ctz` in that 0 returns 0.
+
+	Assumes `a` not to be `nil` and have been initialized.
+*/
+internal_int_count_lsb :: proc(a: ^Int) -> (count: int, err: Error) {
+	/*
+		Easy out.
+	*/
+	if #force_inline internal_is_zero(a) { return {}, nil; }
+
+	/*
+		Scan lower digits until non-zero.
+	*/
+	x: int;
+	#no_bounds_check for x = 0; x < a.used && a.digit[x] == 0; x += 1 {}
+
+	q := a.digit[x];
+	x *= _DIGIT_BITS;
+	x += internal_count_lsb(q);
+	return x, nil;
+}
+
+internal_platform_count_lsb :: #force_inline proc(a: $T) -> (count: int)
+	where intrinsics.type_is_integer(T) && intrinsics.type_is_unsigned(T) {
+	return int(intrinsics.count_trailing_zeros(a)) if a > 0 else 0;
+}
+
+internal_count_lsb :: proc { internal_int_count_lsb, internal_platform_count_lsb, };
+
+internal_int_random_digit :: proc(r: ^rnd.Rand = nil) -> (res: DIGIT) {
+	when _DIGIT_BITS == 60 { // DIGIT = u64
+		return DIGIT(rnd.uint64(r)) & _MASK;
+	} else when _DIGIT_BITS == 28 { // DIGIT = u32
+		return DIGIT(rnd.uint32(r)) & _MASK;
+	} else {
+		panic("Unsupported DIGIT size.");
+	}
+
+	return 0; // We shouldn't get here.
+}
+
+internal_int_rand :: proc(dest: ^Int, bits: int, r: ^rnd.Rand = nil, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	bits := bits;
+
+	if bits <= 0 { return .Invalid_Argument; }
+
+	digits := bits / _DIGIT_BITS;
+	bits   %= _DIGIT_BITS;
+
+	if bits > 0 {
+		digits += 1;
+	}
+
+	if err = #force_inline internal_grow(dest, digits); err != nil { return err; }
+
+	for i := 0; i < digits; i += 1 {
+		dest.digit[i] = int_random_digit(r) & _MASK;
+	}
+	if bits > 0 {
+		dest.digit[digits - 1] &= ((1 << uint(bits)) - 1);
+	}
+	dest.used = digits;
+	return nil;
+}
+internal_rand :: proc { internal_int_rand, };
+
+/*
+	Internal helpers.
+*/
+internal_assert_initialized :: proc(a: ^Int, loc := #caller_location) {
+	assert(internal_is_initialized(a), "`Int` was not properly initialized.", loc);
+}
+
+internal_clear_if_uninitialized_single :: proc(arg: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if ! #force_inline internal_is_initialized(arg) {
+		return #force_inline internal_grow(arg, _DEFAULT_DIGIT_COUNT);
+	}
+	return err;
+}
+
+internal_clear_if_uninitialized_multi :: proc(args: ..^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	for i in args {
+		if ! #force_inline internal_is_initialized(i) {
+			e := #force_inline internal_grow(i, _DEFAULT_DIGIT_COUNT);
+			if e != nil { err = e; }
+		}
+	}
+	return err;
+}
+internal_clear_if_uninitialized :: proc {internal_clear_if_uninitialized_single, internal_clear_if_uninitialized_multi, };
+
+internal_error_if_immutable_single :: proc(arg: ^Int) -> (err: Error) {
+	if arg != nil && .Immutable in arg.flags { return .Assignment_To_Immutable; }
+	return nil;
+}
+
+internal_error_if_immutable_multi :: proc(args: ..^Int) -> (err: Error) {
+	for i in args {
+		if i != nil && .Immutable in i.flags { return .Assignment_To_Immutable; }
+	}
+	return nil;
+}
+internal_error_if_immutable :: proc {internal_error_if_immutable_single, internal_error_if_immutable_multi, };
+
+/*
+	Allocates several `Int`s at once.
+*/
+internal_int_init_multi :: proc(integers: ..^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	integers := integers;
+	for a in &integers {
+		if err = internal_clear(a); err != nil { return err; }
+	}
+	return nil;
+}
+
+internal_init_multi :: proc { internal_int_init_multi, };
+
+/*
+	Trim unused digits.
+
+	This is used to ensure that leading zero digits are trimmed and the leading "used" digit will be non-zero.
+	Typically very fast.  Also fixes the sign if there are no more leading digits.
+*/
+internal_clamp :: proc(a: ^Int) -> (err: Error) {
+	for a.used > 0 && a.digit[a.used - 1] == 0 { a.used -= 1; }
+
+	if #force_inline internal_is_zero(a) { a.sign = .Zero_or_Positive; }
+
+	return nil;
+}
+
+
+internal_int_zero_unused :: #force_inline proc(dest: ^Int, old_used := -1) {
+	/*
+		If we don't pass the number of previously used DIGITs, we zero all remaining ones.
+	*/
+	zero_count: int;
+	if old_used == -1 {
+		zero_count = len(dest.digit) - dest.used;
+	} else {
+		zero_count = old_used - dest.used;
+	}
+
+	/*
+		Zero remainder.
+	*/
+	if zero_count > 0 && dest.used < len(dest.digit) {
+		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
+	}
+}
+internal_zero_unused :: proc { internal_int_zero_unused, };
+
+/*
+	==========================    End of low-level routines   ==========================
+*/

+ 144 - 0
core/math/big/logical.odin

@@ -0,0 +1,144 @@
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	An arbitrary precision mathematics implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+
+	This file contains logical operations like `and`, `or` and `xor`.
+*/
+
+/*
+	The `and`, `or` and `xor` binops differ in two lines only.
+	We could handle those with a switch, but that adds overhead.
+
+	TODO: Implement versions that take a DIGIT immediate.
+*/
+
+/*
+	2's complement `and`, returns `dest = a & b;`
+*/
+int_and :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, a, b);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a, b); err != nil { return err; }
+	return #force_inline internal_int_and(dest, a, b);
+}
+and :: proc { int_and, };
+
+/*
+	2's complement `or`, returns `dest = a | b;`
+*/
+int_or :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, a, b);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a, b); err != nil { return err; }
+	return #force_inline internal_int_or(dest, a, b);
+}
+or :: proc { int_or, };
+
+/*
+	2's complement `xor`, returns `dest = a ^ b;`
+*/
+int_xor :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, a, b);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a, b); err != nil { return err; }
+	return #force_inline internal_int_xor(dest, a, b);
+}
+xor :: proc { int_xor, };
+
+/*
+	dest = ~src
+*/
+int_complement :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `src` and `dest` are usable.
+	*/
+	assert_if_nil(dest, src);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; }
+	return #force_inline internal_int_complement(dest, src);
+}
+complement :: proc { int_complement, };
+
+/*
+	quotient, remainder := numerator >> bits;
+	`remainder` is allowed to be passed a `nil`, in which case `mod` won't be computed.
+*/
+int_shrmod :: proc(quotient, remainder, numerator: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(quotient, numerator);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(quotient, numerator);  err != nil { return err; }
+	return #force_inline internal_int_shrmod(quotient, remainder, numerator, bits);
+}
+shrmod :: proc { int_shrmod, };
+
+int_shr :: proc(dest, source: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
+	return #force_inline shrmod(dest, nil, source, bits, allocator);
+}
+shr :: proc { int_shr, };
+
+/*
+	Shift right by `digits` * _DIGIT_BITS bits.
+*/
+int_shr_digit :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `quotient` is usable.
+	*/
+	assert_if_nil(quotient);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(quotient); err != nil { return err; }
+	return #force_inline internal_int_shr_digit(quotient, digits);
+}
+shr_digit :: proc { int_shr_digit, };
+
+/*
+	Shift right by a certain bit count with sign extension.
+*/
+int_shr_signed :: proc(dest, src: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, src);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; }
+	return #force_inline internal_int_shr_signed(dest, src, bits);
+}
+
+shr_signed :: proc { int_shr_signed, };
+
+/*
+	Shift left by a certain bit count.
+*/
+int_shl :: proc(dest, src: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, src);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; }
+	return #force_inline internal_int_shl(dest, src, bits);
+}
+shl :: proc { int_shl, };
+
+
+/*
+	Shift left by `digits` * _DIGIT_BITS bits.
+*/
+int_shl_digit :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) {
+	/*
+		Check that `quotient` is usable.
+	*/
+	assert_if_nil(quotient);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(quotient); err != nil { return err; }
+	return #force_inline internal_int_shl_digit(quotient, digits);
+}
+shl_digit :: proc { int_shl_digit, };

+ 33 - 0
core/math/big/prime.odin

@@ -0,0 +1,33 @@
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	An arbitrary precision mathematics implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+
+	This file contains prime finding operations.
+*/
+
+/*
+	Determines if an Integer is divisible by one of the _PRIME_TABLE primes.
+	Returns true if it is, false if not. 
+*/
+int_prime_is_divisible :: proc(a: ^Int, allocator := context.allocator) -> (res: bool, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a); err != nil { return {}, err; }
+
+	rem: DIGIT;
+	for prime in _private_prime_table {
+		if rem, err = #force_inline int_mod_digit(a, prime); err != nil { return false, err; }
+		if rem == 0 { return true, nil; }
+	}
+	/*
+		Default to not divisible.
+	*/
+	return false, nil;
+}

+ 1247 - 0
core/math/big/private.odin

@@ -0,0 +1,1247 @@
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	An arbitrary precision mathematics implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+
+	=============================    Private procedures    =============================
+
+	Private procedures used by the above low-level routines follow.
+
+	Don't call these yourself unless you really know what you're doing.
+	They include implementations that are optimimal for certain ranges of input only.
+
+	These aren't exported for the same reasons.
+*/
+
+import "core:intrinsics"
+import "core:mem"
+
+/*
+	Multiplies |a| * |b| and only computes upto digs digits of result.
+	HAC pp. 595, Algorithm 14.12  Modified so you can control how
+	many digits of output are created.
+*/
+_private_int_mul :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	/*
+		Can we use the fast multiplier?
+	*/
+	if digits < _WARRAY && min(a.used, b.used) < _MAX_COMBA {
+		return #force_inline _private_int_mul_comba(dest, a, b, digits);
+	}
+
+	/*
+		Set up temporary output `Int`, which we'll swap for `dest` when done.
+	*/
+
+	t := &Int{};
+
+	if err = internal_grow(t, max(digits, _DEFAULT_DIGIT_COUNT)); err != nil { return err; }
+	t.used = digits;
+
+	/*
+		Compute the digits of the product directly.
+	*/
+	pa := a.used;
+	for ix := 0; ix < pa; ix += 1 {
+		/*
+			Limit ourselves to `digits` DIGITs of output.
+		*/
+		pb    := min(b.used, digits - ix);
+		carry := _WORD(0);
+		iy    := 0;
+
+		/*
+			Compute the column of the output and propagate the carry.
+		*/
+		#no_bounds_check for iy = 0; iy < pb; iy += 1 {
+			/*
+				Compute the column as a _WORD.
+			*/
+			column := _WORD(t.digit[ix + iy]) + _WORD(a.digit[ix]) * _WORD(b.digit[iy]) + carry;
+
+			/*
+				The new column is the lower part of the result.
+			*/
+			t.digit[ix + iy] = DIGIT(column & _WORD(_MASK));
+
+			/*
+				Get the carry word from the result.
+			*/
+			carry = column >> _DIGIT_BITS;
+		}
+		/*
+			Set carry if it is placed below digits
+		*/
+		if ix + iy < digits {
+			t.digit[ix + pb] = DIGIT(carry);
+		}
+	}
+
+	internal_swap(dest, t);
+	internal_destroy(t);
+	return internal_clamp(dest);
+}
+
+/*
+	Fast (comba) multiplier
+
+	This is the fast column-array [comba] multiplier.  It is
+	designed to compute the columns of the product first
+	then handle the carries afterwards.  This has the effect
+	of making the nested loops that compute the columns very
+	simple and schedulable on super-scalar processors.
+
+	This has been modified to produce a variable number of
+	digits of output so if say only a half-product is required
+	you don't have to compute the upper half (a feature
+	required for fast Barrett reduction).
+
+	Based on Algorithm 14.12 on pp.595 of HAC.
+*/
+_private_int_mul_comba :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	/*
+		Set up array.
+	*/
+	W: [_WARRAY]DIGIT = ---;
+
+	/*
+		Grow the destination as required.
+	*/
+	if err = internal_grow(dest, digits); err != nil { return err; }
+
+	/*
+		Number of output digits to produce.
+	*/
+	pa := min(digits, a.used + b.used);
+
+	/*
+		Clear the carry
+	*/
+	_W := _WORD(0);
+
+	ix: int;
+	for ix = 0; ix < pa; ix += 1 {
+		tx, ty, iy, iz: int;
+
+		/*
+			Get offsets into the two bignums.
+		*/
+		ty = min(b.used - 1, ix);
+		tx = ix - ty;
+
+		/*
+			This is the number of times the loop will iterate, essentially.
+			while (tx++ < a->used && ty-- >= 0) { ... }
+		*/
+		 
+		iy = min(a.used - tx, ty + 1);
+
+		/*
+			Execute loop.
+		*/
+		#no_bounds_check for iz = 0; iz < iy; iz += 1 {
+			_W += _WORD(a.digit[tx + iz]) * _WORD(b.digit[ty - iz]);
+		}
+
+		/*
+			Store term.
+		*/
+		W[ix] = DIGIT(_W) & _MASK;
+
+		/*
+			Make next carry.
+		*/
+		_W = _W >> _WORD(_DIGIT_BITS);
+	}
+
+	/*
+		Setup dest.
+	*/
+	old_used := dest.used;
+	dest.used = pa;
+
+	/*
+		Now extract the previous digit [below the carry].
+	*/
+	copy_slice(dest.digit[0:], W[:pa]);	
+
+	/*
+		Clear unused digits [that existed in the old copy of dest].
+	*/
+	internal_zero_unused(dest, old_used);
+
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+
+	return internal_clamp(dest);
+}
+
+/*
+	Low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16
+	Assumes `dest` and `src` to not be `nil`, and `src` to have been initialized.
+*/
+_private_int_sqr :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+	pa := src.used;
+
+	t := &Int{}; ix, iy: int;
+	/*
+		Grow `t` to maximum needed size, or `_DEFAULT_DIGIT_COUNT`, whichever is bigger.
+	*/
+	if err = internal_grow(t, max((2 * pa) + 1, _DEFAULT_DIGIT_COUNT)); err != nil { return err; }
+	t.used = (2 * pa) + 1;
+
+	#no_bounds_check for ix = 0; ix < pa; ix += 1 {
+		carry := DIGIT(0);
+		/*
+			First calculate the digit at 2*ix; calculate double precision result.
+		*/
+		r := _WORD(t.digit[ix+ix]) + (_WORD(src.digit[ix]) * _WORD(src.digit[ix]));
+
+		/*
+			Store lower part in result.
+		*/
+		t.digit[ix+ix] = DIGIT(r & _WORD(_MASK));
+		/*
+			Get the carry.
+		*/
+		carry = DIGIT(r >> _DIGIT_BITS);
+
+		#no_bounds_check for iy = ix + 1; iy < pa; iy += 1 {
+			/*
+				First calculate the product.
+			*/
+			r = _WORD(src.digit[ix]) * _WORD(src.digit[iy]);
+
+			/* Now calculate the double precision result. Nóte we use
+			 * addition instead of *2 since it's easier to optimize
+			 */
+			r = _WORD(t.digit[ix+iy]) + r + r + _WORD(carry);
+
+			/*
+				Store lower part.
+			*/
+			t.digit[ix+iy] = DIGIT(r & _WORD(_MASK));
+
+			/*
+				Get carry.
+			*/
+			carry = DIGIT(r >> _DIGIT_BITS);
+		}
+		/*
+			Propagate upwards.
+		*/
+		#no_bounds_check for carry != 0 {
+			r     = _WORD(t.digit[ix+iy]) + _WORD(carry);
+			t.digit[ix+iy] = DIGIT(r & _WORD(_MASK));
+			carry = DIGIT(r >> _WORD(_DIGIT_BITS));
+			iy += 1;
+		}
+	}
+
+	err = internal_clamp(t);
+	internal_swap(dest, t);
+	internal_destroy(t);
+	return err;
+}
+
+/*
+	The jist of squaring...
+	You do like mult except the offset of the tmpx [one that starts closer to zero] can't equal the offset of tmpy.
+	So basically you set up iy like before then you min it with (ty-tx) so that it never happens.
+	You double all those you add in the inner loop. After that loop you do the squares and add them in.
+
+	Assumes `dest` and `src` not to be `nil` and `src` to have been initialized.	
+*/
+_private_int_sqr_comba :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	W: [_WARRAY]DIGIT = ---;
+
+	/*
+		Grow the destination as required.
+	*/
+	pa := uint(src.used) + uint(src.used);
+	if err = internal_grow(dest, int(pa)); err != nil { return err; }
+
+	/*
+		Number of output digits to produce.
+	*/
+	W1 := _WORD(0);
+	_W  : _WORD = ---;
+	ix := uint(0);
+
+	#no_bounds_check for ; ix < pa; ix += 1 {
+		/*
+			Clear counter.
+		*/
+		_W = {};
+
+		/*
+			Get offsets into the two bignums.
+		*/
+		ty := min(uint(src.used) - 1, ix);
+		tx := ix - ty;
+
+		/*
+			This is the number of times the loop will iterate,
+			essentially while (tx++ < a->used && ty-- >= 0) { ... }
+		*/
+		iy := min(uint(src.used) - tx, ty + 1);
+
+		/*
+			Now for squaring, tx can never equal ty.
+			We halve the distance since they approach at a rate of 2x,
+			and we have to round because odd cases need to be executed.
+		*/
+		iy = min(iy, ((ty - tx) + 1) >> 1 );
+
+		/*
+			Execute loop.
+		*/
+		#no_bounds_check for iz := uint(0); iz < iy; iz += 1 {
+			_W += _WORD(src.digit[tx + iz]) * _WORD(src.digit[ty - iz]);
+		}
+
+		/*
+			Double the inner product and add carry.
+		*/
+		_W = _W + _W + W1;
+
+		/*
+			Even columns have the square term in them.
+		*/
+		if ix & 1 == 0 {
+			_W += _WORD(src.digit[ix >> 1]) * _WORD(src.digit[ix >> 1]);
+		}
+
+		/*
+			Store it.
+		*/
+		W[ix] = DIGIT(_W & _WORD(_MASK));
+
+		/*
+			Make next carry.
+		*/
+		W1 = _W >> _DIGIT_BITS;
+	}
+
+	/*
+		Setup dest.
+	*/
+	old_used := dest.used;
+	dest.used = src.used + src.used;
+
+	#no_bounds_check for ix = 0; ix < pa; ix += 1 {
+		dest.digit[ix] = W[ix] & _MASK;
+	}
+
+	/*
+		Clear unused digits [that existed in the old copy of dest].
+	*/
+	internal_zero_unused(dest, old_used);
+
+	return internal_clamp(dest);
+}
+
+/*
+	Karatsuba squaring, computes `dest` = `src` * `src` using three half-size squarings.
+ 
+ 	See comments of `_private_int_mul_karatsuba` for details.
+ 	It is essentially the same algorithm but merely tuned to perform recursive squarings.
+*/
+_private_int_sqr_karatsuba :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	x0, x1, t1, t2, x0x0, x1x1 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
+	defer internal_destroy(x0, x1, t1, t2, x0x0, x1x1);
+
+	/*
+		Min # of digits, divided by two.
+	*/
+	B := src.used >> 1;
+
+	/*
+		Init temps.
+	*/
+	if err = internal_grow(x0,   B);                  err != nil { return err; }
+	if err = internal_grow(x1,   src.used - B);       err != nil { return err; }
+	if err = internal_grow(t1,   src.used * 2);       err != nil { return err; }
+	if err = internal_grow(t2,   src.used * 2);       err != nil { return err; }
+	if err = internal_grow(x0x0, B * 2       );       err != nil { return err; }
+	if err = internal_grow(x1x1, (src.used - B) * 2); err != nil { return err; }
+
+	/*
+		Now shift the digits.
+	*/
+	x0.used = B;
+	x1.used = src.used - B;
+
+	#force_inline internal_copy_digits(x0, src, x0.used);
+	#force_inline mem.copy_non_overlapping(&x1.digit[0], &src.digit[B], size_of(DIGIT) * x1.used);
+	#force_inline internal_clamp(x0);
+
+	/*
+		Now calc the products x0*x0 and x1*x1.
+	*/
+	if err = internal_sqr(x0x0, x0);                  err != nil { return err; }
+	if err = internal_sqr(x1x1, x1);                  err != nil { return err; }
+
+	/*
+		Now calc (x1+x0)^2
+	*/
+	if err = internal_add(t1, x0, x1);                err != nil { return err; }
+	if err = internal_sqr(t1, t1);                    err != nil { return err; }
+
+	/*
+		Add x0y0
+	*/
+	if err = internal_add(t2, x0x0, x1x1);            err != nil { return err; }
+	if err = internal_sub(t1, t1, t2);                err != nil { return err; }
+
+	/*
+		Shift by B.
+	*/
+	if err = internal_shl_digit(t1, B);               err != nil { return err; }
+	if err = internal_shl_digit(x1x1, B * 2);         err != nil { return err; }
+	if err = internal_add(t1, t1, x0x0);              err != nil { return err; }
+	if err = internal_add(dest, t1, x1x1);            err != nil { return err; }
+
+	return #force_inline internal_clamp(dest);
+}
+
+/*
+	Squaring using Toom-Cook 3-way algorithm.
+
+	Setup and interpolation from algorithm SQR_3 in Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae."
+	  18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007.
+*/
+_private_int_sqr_toom :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	S0, a0, a1, a2 := &Int{}, &Int{}, &Int{}, &Int{};
+	defer destroy(S0, a0, a1, a2);
+
+	/*
+		Init temps.
+	*/
+	if err = internal_zero(S0);                     err != nil { return err; }
+
+	/*
+		B
+	*/
+	B := src.used / 3;
+
+	/*
+		a = a2 * x^2 + a1 * x + a0;
+	*/
+	if err = internal_grow(a0, B);                  err != nil { return err; }
+	if err = internal_grow(a1, B);                  err != nil { return err; }
+	if err = internal_grow(a2, src.used - (2 * B)); err != nil { return err; }
+
+	a0.used = B;
+	a1.used = B;
+	a2.used = src.used - 2 * B;
+
+	#force_inline mem.copy_non_overlapping(&a0.digit[0], &src.digit[    0], size_of(DIGIT) * a0.used);
+	#force_inline mem.copy_non_overlapping(&a1.digit[0], &src.digit[    B], size_of(DIGIT) * a1.used);
+	#force_inline mem.copy_non_overlapping(&a2.digit[0], &src.digit[2 * B], size_of(DIGIT) * a2.used);
+
+	internal_clamp(a0);
+	internal_clamp(a1);
+	internal_clamp(a2);
+
+	/** S0 = a0^2;  */
+	if err = internal_sqr(S0, a0);                  err != nil { return err; }
+
+	/** \\S1 = (a2 + a1 + a0)^2 */
+	/** \\S2 = (a2 - a1 + a0)^2  */
+	/** \\S1 = a0 + a2; */
+	/** a0 = a0 + a2; */
+	if err = internal_add(a0, a0, a2);              err != nil { return err; }
+	/** \\S2 = S1 - a1; */
+	/** b = a0 - a1; */
+	if err = internal_sub(dest, a0, a1);            err != nil { return err; }
+	/** \\S1 = S1 + a1; */
+	/** a0 = a0 + a1; */
+	if err = internal_add(a0, a0, a1);              err != nil { return err; }
+	/** \\S1 = S1^2;  */
+	/** a0 = a0^2; */
+	if err = internal_sqr(a0, a0);                  err != nil { return err; }
+	/** \\S2 = S2^2;  */
+	/** b = b^2; */
+	if err = internal_sqr(dest, dest);              err != nil { return err; }
+	/** \\ S3 = 2 * a1 * a2  */
+	/** \\S3 = a1 * a2;  */
+	/** a1 = a1 * a2; */
+	if err = internal_mul(a1, a1, a2);              err != nil { return err; }
+	/** \\S3 = S3 << 1;  */
+	/** a1 = a1 << 1; */
+	if err = internal_shl(a1, a1, 1);               err != nil { return err; }
+	/** \\S4 = a2^2;  */
+	/** a2 = a2^2; */
+	if err = internal_sqr(a2, a2);                  err != nil { return err; }
+	/** \\ tmp = (S1 + S2)/2  */
+	/** \\tmp = S1 + S2; */
+	/** b = a0 + b; */
+	if err = internal_add(dest, a0, dest);          err != nil { return err; }
+	/** \\tmp = tmp >> 1; */
+	/** b = b >> 1; */
+	if err = internal_shr(dest, dest, 1);           err != nil { return err; }
+	/** \\ S1 = S1 - tmp - S3  */
+	/** \\S1 = S1 - tmp; */
+	/** a0 = a0 - b; */
+	if err = internal_sub(a0, a0, dest);            err != nil { return err; }
+	/** \\S1 = S1 - S3;  */
+	/** a0 = a0 - a1; */
+	if err = internal_sub(a0, a0, a1);              err != nil { return err; }
+	/** \\S2 = tmp - S4 -S0  */
+	/** \\S2 = tmp - S4;  */
+	/** b = b - a2; */
+	if err = internal_sub(dest, dest, a2);          err != nil { return err; }
+	/** \\S2 = S2 - S0;  */
+	/** b = b - S0; */
+	if err = internal_sub(dest, dest, S0);          err != nil { return err; }
+	/** \\P = S4*x^4 + S3*x^3 + S2*x^2 + S1*x + S0; */
+	/** P = a2*x^4 + a1*x^3 + b*x^2 + a0*x + S0; */
+	if err = internal_shl_digit(  a2, 4 * B);       err != nil { return err; }
+	if err = internal_shl_digit(  a1, 3 * B);       err != nil { return err; }
+	if err = internal_shl_digit(dest, 2 * B);       err != nil { return err; }
+	if err = internal_shl_digit(  a0, 1 * B);       err != nil { return err; }
+
+	if err = internal_add(a2, a2, a1);              err != nil { return err; }
+	if err = internal_add(dest, dest, a2);          err != nil { return err; }
+	if err = internal_add(dest, dest, a0);          err != nil { return err; }
+	if err = internal_add(dest, dest, S0);          err != nil { return err; }
+	/** a^2 - P  */
+
+	return #force_inline internal_clamp(dest);
+}
+
+/*
+	Divide by three (based on routine from MPI and the GMP manual).
+*/
+_private_int_div_3 :: proc(quotient, numerator: ^Int, allocator := context.allocator) -> (remainder: DIGIT, err: Error) {
+	context.allocator = allocator;
+
+	/*
+		b = 2^_DIGIT_BITS / 3
+	*/
+ 	b := _WORD(1) << _WORD(_DIGIT_BITS) / _WORD(3);
+
+	q := &Int{};
+	if err = internal_grow(q, numerator.used); err != nil { return 0, err; }
+	q.used = numerator.used;
+	q.sign = numerator.sign;
+
+	w, t: _WORD;
+	#no_bounds_check for ix := numerator.used; ix >= 0; ix -= 1 {
+		w = (w << _WORD(_DIGIT_BITS)) | _WORD(numerator.digit[ix]);
+		if w >= 3 {
+			/*
+				Multiply w by [1/3].
+			*/
+			t = (w * b) >> _WORD(_DIGIT_BITS);
+
+			/*
+				Now subtract 3 * [w/3] from w, to get the remainder.
+			*/
+			w -= t+t+t;
+
+			/*
+				Fixup the remainder as required since the optimization is not exact.
+			*/
+			for w >= 3 {
+				t += 1;
+				w -= 3;
+			}
+		} else {
+			t = 0;
+		}
+		q.digit[ix] = DIGIT(t);
+	}
+	remainder = DIGIT(w);
+
+	/*
+		[optional] store the quotient.
+	*/
+	if quotient != nil {
+		err = clamp(q);
+ 		internal_swap(q, quotient);
+ 	}
+	internal_destroy(q);
+	return remainder, nil;
+}
+
+/*
+	Signed Integer Division
+
+	c*b + d == a [i.e. a/b, c=quotient, d=remainder], HAC pp.598 Algorithm 14.20
+
+	Note that the description in HAC is horribly incomplete.
+	For example, it doesn't consider the case where digits are removed from 'x' in
+	the inner loop.
+
+	It also doesn't consider the case that y has fewer than three digits, etc.
+	The overall algorithm is as described as 14.20 from HAC but fixed to treat these cases.
+*/
+_private_int_div_school :: proc(quotient, remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if err = error_if_immutable(quotient, remainder); err != nil { return err; }
+
+	q, x, y, t1, t2 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
+	defer internal_destroy(q, x, y, t1, t2);
+
+	if err = internal_grow(q, numerator.used + 2); err != nil { return err; }
+	q.used = numerator.used + 2;
+
+	if err = internal_init_multi(t1, t2);   err != nil { return err; }
+	if err = internal_copy(x, numerator);   err != nil { return err; }
+	if err = internal_copy(y, denominator); err != nil { return err; }
+
+	/*
+		Fix the sign.
+	*/
+	neg   := numerator.sign != denominator.sign;
+	x.sign = .Zero_or_Positive;
+	y.sign = .Zero_or_Positive;
+
+	/*
+		Normalize both x and y, ensure that y >= b/2, [b == 2**MP_DIGIT_BIT]
+	*/
+	norm := internal_count_bits(y) % _DIGIT_BITS;
+
+	if norm < _DIGIT_BITS - 1 {
+		norm = (_DIGIT_BITS - 1) - norm;
+		if err = internal_shl(x, x, norm); err != nil { return err; }
+		if err = internal_shl(y, y, norm); err != nil { return err; }
+	} else {
+		norm = 0;
+	}
+
+	/*
+		Note: HAC does 0 based, so if used==5 then it's 0,1,2,3,4, i.e. use 4
+	*/
+	n := x.used - 1;
+	t := y.used - 1;
+
+	/*
+		while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} }
+		y = y*b**{n-t}
+	*/
+
+	if err = internal_shl_digit(y, n - t); err != nil { return err; }
+
+	c := internal_cmp(x, y);
+	for c != -1 {
+		q.digit[n - t] += 1;
+		if err = internal_sub(x, x, y); err != nil { return err; }
+		c = internal_cmp(x, y);
+	}
+
+	/*
+		Reset y by shifting it back down.
+	*/
+	internal_shr_digit(y, n - t);
+
+	/*
+		Step 3. for i from n down to (t + 1).
+	*/
+	#no_bounds_check for i := n; i >= (t + 1); i -= 1 {
+		if (i > x.used) { continue; }
+
+		/*
+			step 3.1 if xi == yt then set q{i-t-1} to b-1, otherwise set q{i-t-1} to (xi*b + x{i-1})/yt
+		*/
+		if x.digit[i] == y.digit[t] {
+			q.digit[(i - t) - 1] = 1 << (_DIGIT_BITS - 1);
+		} else {
+
+			tmp := _WORD(x.digit[i]) << _DIGIT_BITS;
+			tmp |= _WORD(x.digit[i - 1]);
+			tmp /= _WORD(y.digit[t]);
+			if tmp > _WORD(_MASK) {
+				tmp = _WORD(_MASK);
+			}
+			q.digit[(i - t) - 1] = DIGIT(tmp & _WORD(_MASK));
+		}
+
+		/* while (q{i-t-1} * (yt * b + y{t-1})) >
+					xi * b**2 + xi-1 * b + xi-2
+
+			do q{i-t-1} -= 1;
+		*/
+
+		iter := 0;
+
+		q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] + 1) & _MASK;
+		#no_bounds_check for {
+			q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] - 1) & _MASK;
+
+			/*
+				Find left hand.
+			*/
+			internal_zero(t1);
+			t1.digit[0] = ((t - 1) < 0) ? 0 : y.digit[t - 1];
+			t1.digit[1] = y.digit[t];
+			t1.used = 2;
+			if err = internal_mul(t1, t1, q.digit[(i - t) - 1]); err != nil { return err; }
+
+			/*
+				Find right hand.
+			*/
+			t2.digit[0] = ((i - 2) < 0) ? 0 : x.digit[i - 2];
+			t2.digit[1] = x.digit[i - 1]; /* i >= 1 always holds */
+			t2.digit[2] = x.digit[i];
+			t2.used = 3;
+
+			if t1_t2 := internal_cmp_mag(t1, t2); t1_t2 != 1 {
+				break;
+			}
+			iter += 1; if iter > 100 { return .Max_Iterations_Reached; }
+		}
+
+		/*
+			Step 3.3 x = x - q{i-t-1} * y * b**{i-t-1}
+		*/
+		if err = int_mul_digit(t1, y, q.digit[(i - t) - 1]); err != nil { return err; }
+		if err = internal_shl_digit(t1, (i - t) - 1);        err != nil { return err; }
+		if err = internal_sub(x, x, t1);                     err != nil { return err; }
+
+		/*
+			if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; }
+		*/
+		if x.sign == .Negative {
+			if err = internal_copy(t1, y);                err != nil { return err; }
+			if err = internal_shl_digit(t1, (i - t) - 1); err != nil { return err; }
+			if err = internal_add(x, x, t1);              err != nil { return err; }
+
+			q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] - 1) & _MASK;
+		}
+	}
+
+	/*
+		Now q is the quotient and x is the remainder, [which we have to normalize]
+		Get sign before writing to c.
+	*/
+	z, _ := is_zero(x);
+	x.sign = .Zero_or_Positive if z else numerator.sign;
+
+	if quotient != nil {
+		internal_clamp(q);
+		internal_swap(q, quotient);
+		quotient.sign = .Negative if neg else .Zero_or_Positive;
+	}
+
+	if remainder != nil {
+		if err = internal_shr(x, x, norm); err != nil { return err; }
+		internal_swap(x, remainder);
+	}
+
+	return nil;
+}
+
+/*
+	Slower bit-bang division... also smaller.
+*/
+@(deprecated="Use `_int_div_school`, it's 3.5x faster.")
+_private_int_div_small :: proc(quotient, remainder, numerator, denominator: ^Int) -> (err: Error) {
+
+	ta, tb, tq, q := &Int{}, &Int{}, &Int{}, &Int{};
+	c: int;
+
+	goto_end: for {
+		if err = internal_one(tq);							err != nil { break goto_end; }
+
+		num_bits, _ := count_bits(numerator);
+		den_bits, _ := count_bits(denominator);
+		n := num_bits - den_bits;
+
+		if err = abs(ta, numerator);						err != nil { break goto_end; }
+		if err = abs(tb, denominator);						err != nil { break goto_end; }
+		if err = shl(tb, tb, n);							err != nil { break goto_end; }
+		if err = shl(tq, tq, n);							err != nil { break goto_end; }
+
+		for n >= 0 {
+			if c, _ = cmp_mag(ta, tb); c == 0 || c == 1 {
+				// ta -= tb
+				if err = sub(ta, ta, tb);					err != nil { break goto_end; }
+				//  q += tq
+				if err = add( q, q,  tq);					err != nil { break goto_end; }
+			}
+			if err = shr1(tb, tb);							err != nil { break goto_end; }
+			if err = shr1(tq, tq);							err != nil { break goto_end; }
+
+			n -= 1;
+		}
+
+		/*
+			Now q == quotient and ta == remainder.
+		*/
+		neg := numerator.sign != denominator.sign;
+		if quotient != nil {
+			swap(quotient, q);
+			z, _ := is_zero(quotient);
+			quotient.sign = .Negative if neg && !z else .Zero_or_Positive;
+		}
+		if remainder != nil {
+			swap(remainder, ta);
+			z, _ := is_zero(numerator);
+			remainder.sign = .Zero_or_Positive if z else numerator.sign;
+		}
+
+		break goto_end;
+	}
+	destroy(ta, tb, tq, q);
+	return err;
+}
+
+
+
+/*
+	Binary split factorial algo due to: http://www.luschny.de/math/factorial/binarysplitfact.html
+*/
+_private_int_factorial_binary_split :: proc(res: ^Int, n: int, allocator := context.allocator) -> (err: Error) {
+
+	inner, outer, start, stop, temp := &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
+	defer internal_destroy(inner, outer, start, stop, temp);
+
+	if err = internal_one(inner, false, allocator); err != nil { return err; }
+	if err = internal_one(outer, false, allocator); err != nil { return err; }
+
+	bits_used := int(_DIGIT_TYPE_BITS - intrinsics.count_leading_zeros(n));
+
+	for i := bits_used; i >= 0; i -= 1 {
+		start := (n >> (uint(i) + 1)) + 1 | 1;
+		stop  := (n >> uint(i)) + 1 | 1;
+		if err = _private_int_recursive_product(temp, start, stop, 0, allocator); err != nil { return err; }
+		if err = internal_mul(inner, inner, temp, allocator);                   err != nil { return err; }
+		if err = internal_mul(outer, outer, inner, allocator);                  err != nil { return err; }
+	}
+	shift := n - intrinsics.count_ones(n);
+
+	return internal_shl(res, outer, int(shift), allocator);
+}
+
+/*
+	Recursive product used by binary split factorial algorithm.
+*/
+_private_int_recursive_product :: proc(res: ^Int, start, stop: int, level := int(0), allocator := context.allocator) -> (err: Error) {
+	t1, t2 := &Int{}, &Int{};
+	defer internal_destroy(t1, t2);
+
+	if level > FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS { return .Max_Iterations_Reached; }
+
+	num_factors := (stop - start) >> 1;
+	if num_factors == 2 {
+		if err = internal_set(t1, start, false, allocator); err != nil { return err; }
+		when true {
+			if err = internal_grow(t2, t1.used + 1, false, allocator); err != nil { return err; }
+			if err = internal_add(t2, t1, 2, allocator); err != nil { return err; }
+		} else {
+			if err = add(t2, t1, 2); err != nil { return err; }
+		}
+		return internal_mul(res, t1, t2, allocator);
+	}
+
+	if num_factors > 1 {
+		mid := (start + num_factors) | 1;
+		if err = _private_int_recursive_product(t1, start,  mid, level + 1, allocator); err != nil { return err; }
+		if err = _private_int_recursive_product(t2,   mid, stop, level + 1, allocator); err != nil { return err; }
+		return internal_mul(res, t1, t2, allocator);
+	}
+
+	if num_factors == 1 { return #force_inline internal_set(res, start, true, allocator); }
+
+	return #force_inline internal_one(res, true, allocator);
+}
+
+/*
+	Internal function computing both GCD using the binary method,
+		and, if target isn't `nil`, also LCM.
+
+	Expects the `a` and `b` to have been initialized
+		and one or both of `res_gcd` or `res_lcm` not to be `nil`.
+
+	If both `a` and `b` are zero, return zero.
+	If either `a` or `b`, return the other one.
+
+	The `gcd` and `lcm` wrappers have already done this test,
+	but `gcd_lcm` wouldn't have, so we still need to perform it.
+
+	If neither result is wanted, we have nothing to do.
+*/
+_private_int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	if res_gcd == nil && res_lcm == nil { return nil; }
+
+	/*
+		We need a temporary because `res_gcd` is allowed to be `nil`.
+	*/
+	if a.used == 0 && b.used == 0 {
+		/*
+			GCD(0, 0) and LCM(0, 0) are both 0.
+		*/
+		if res_gcd != nil {
+			if err = internal_zero(res_gcd);	err != nil { return err; }
+		}
+		if res_lcm != nil {
+			if err = internal_zero(res_lcm);	err != nil { return err; }
+		}
+		return nil;
+	} else if a.used == 0 {
+		/*
+			We can early out with GCD = B and LCM = 0
+		*/
+		if res_gcd != nil {
+			if err = internal_abs(res_gcd, b); err != nil { return err; }
+		}
+		if res_lcm != nil {
+			if err = internal_zero(res_lcm); err != nil { return err; }
+		}
+		return nil;
+	} else if b.used == 0 {
+		/*
+			We can early out with GCD = A and LCM = 0
+		*/
+		if res_gcd != nil {
+			if err = internal_abs(res_gcd, a); err != nil { return err; }
+		}
+		if res_lcm != nil {
+			if err = internal_zero(res_lcm); err != nil { return err; }
+		}
+		return nil;
+	}
+
+	temp_gcd_res := &Int{};
+	defer internal_destroy(temp_gcd_res);
+
+	/*
+		If neither `a` or `b` was zero, we need to compute `gcd`.
+ 		Get copies of `a` and `b` we can modify.
+ 	*/
+	u, v := &Int{}, &Int{};
+	defer internal_destroy(u, v);
+	if err = internal_copy(u, a); err != nil { return err; }
+	if err = internal_copy(v, b); err != nil { return err; }
+
+ 	/*
+ 		Must be positive for the remainder of the algorithm.
+ 	*/
+	u.sign = .Zero_or_Positive; v.sign = .Zero_or_Positive;
+
+ 	/*
+ 		B1.  Find the common power of two for `u` and `v`.
+ 	*/
+ 	u_lsb, _ := internal_count_lsb(u);
+ 	v_lsb, _ := internal_count_lsb(v);
+ 	k        := min(u_lsb, v_lsb);
+
+	if k > 0 {
+		/*
+			Divide the power of two out.
+		*/
+		if err = internal_shr(u, u, k); err != nil { return err; }
+		if err = internal_shr(v, v, k); err != nil { return err; }
+	}
+
+	/*
+		Divide any remaining factors of two out.
+	*/
+	if u_lsb != k {
+		if err = internal_shr(u, u, u_lsb - k); err != nil { return err; }
+	}
+	if v_lsb != k {
+		if err = internal_shr(v, v, v_lsb - k); err != nil { return err; }
+	}
+
+	for v.used != 0 {
+		/*
+			Make sure `v` is the largest.
+		*/
+		if internal_cmp_mag(u, v) == 1 {
+			/*
+				Swap `u` and `v` to make sure `v` is >= `u`.
+			*/
+			internal_swap(u, v);
+		}
+
+		/*
+			Subtract smallest from largest.
+		*/
+		if err = internal_sub(v, v, u); err != nil { return err; }
+
+		/*
+			Divide out all factors of two.
+		*/
+		b, _ := internal_count_lsb(v);
+		if err = internal_shr(v, v, b); err != nil { return err; }
+	}
+
+ 	/*
+ 		Multiply by 2**k which we divided out at the beginning.
+ 	*/
+ 	if err = internal_shl(temp_gcd_res, u, k); err != nil { return err; }
+ 	temp_gcd_res.sign = .Zero_or_Positive;
+
+	/*
+		We've computed `gcd`, either the long way, or because one of the inputs was zero.
+		If we don't want `lcm`, we're done.
+	*/
+	if res_lcm == nil {
+		internal_swap(temp_gcd_res, res_gcd);
+		return nil;
+	}
+
+	/*
+		Computes least common multiple as `|a*b|/gcd(a,b)`
+		Divide the smallest by the GCD.
+	*/
+	if internal_cmp_mag(a, b) == -1 {
+		/*
+			Store quotient in `t2` such that `t2 * b` is the LCM.
+		*/
+		if err = internal_div(res_lcm, a, temp_gcd_res); err != nil { return err; }
+		err = internal_mul(res_lcm, res_lcm, b);
+	} else {
+		/*
+			Store quotient in `t2` such that `t2 * a` is the LCM.
+		*/
+		if err = internal_div(res_lcm, a, temp_gcd_res); err != nil { return err; }
+		err = internal_mul(res_lcm, res_lcm, b);
+	}
+
+	if res_gcd != nil {
+		internal_swap(temp_gcd_res, res_gcd);
+	}
+
+	/*
+		Fix the sign to positive and return.
+	*/
+	res_lcm.sign = .Zero_or_Positive;
+	return err;
+}
+
+/*
+	Internal implementation of log.
+	Assumes `a` not to be `nil` and to have been initialized.
+*/
+_private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -> (res: int, err: Error) {
+	bracket_low, bracket_high, bracket_mid, t, bi_base := &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
+	defer destroy(bracket_low, bracket_high, bracket_mid, t, bi_base);
+
+	ic := #force_inline internal_cmp(a, base);
+	if ic == -1 || ic == 0 {
+		return 1 if ic == 0 else 0, nil;
+	}
+
+	if err = internal_set(bi_base, base, true, allocator);        err != nil { return -1, err; }
+	if err = internal_clear(bracket_mid, false, allocator);       err != nil { return -1, err; }
+	if err = internal_clear(t, false, allocator);                 err != nil { return -1, err; }
+	if err = internal_one(bracket_low, false, allocator);         err != nil { return -1, err; }
+	if err = internal_set(bracket_high, base, false, allocator);  err != nil { return -1, err; }
+
+	low := 0; high := 1;
+
+	/*
+		A kind of Giant-step/baby-step algorithm.
+		Idea shamelessly stolen from https://programmingpraxis.com/2010/05/07/integer-logarithms/2/
+		The effect is asymptotic, hence needs benchmarks to test if the Giant-step should be skipped
+		for small n.
+	*/
+
+	for {
+		/*
+			Iterate until `a` is bracketed between low + high.
+		*/
+		if #force_inline internal_cmp(bracket_high, a) != -1 { break; }
+
+		low = high;
+		if err = #force_inline internal_copy(bracket_low, bracket_high);                err != nil { return -1, err; }
+		high <<= 1;
+		if err = #force_inline internal_sqr(bracket_high, bracket_high);  err != nil { return -1, err; }
+	}
+
+	for (high - low) > 1 {
+		mid := (high + low) >> 1;
+
+		if err = #force_inline internal_pow(t, bi_base, mid - low);       err != nil { return -1, err; }
+
+		if err = #force_inline internal_mul(bracket_mid, bracket_low, t); err != nil { return -1, err; }
+
+		mc := #force_inline internal_cmp(a, bracket_mid);
+		switch mc {
+		case -1:
+			high = mid;
+			internal_swap(bracket_mid, bracket_high);
+		case  0:
+			return mid, nil;
+		case  1:
+			low = mid;
+			internal_swap(bracket_mid, bracket_low);
+		}
+	}
+
+	fc := #force_inline internal_cmp(bracket_high, a);
+	res = high if fc == 0 else low;
+
+	return;
+}
+
+/*
+	Returns the log2 of an `Int`.
+	Assumes `a` not to be `nil` and to have been initialized.
+	Also assumes `base` is a power of two.
+*/
+_private_log_power_of_two :: proc(a: ^Int, base: DIGIT) -> (log: int, err: Error) {
+	base := base;
+	y: int;
+	for y = 0; base & 1 == 0; {
+		y += 1;
+		base >>= 1;
+	}
+	log = internal_count_bits(a);
+	return (log - 1) / y, err;
+}
+
+/*
+	Copies DIGITs from `src` to `dest`.
+	Assumes `src` and `dest` to not be `nil` and have been initialized.
+*/
+_private_copy_digits :: proc(dest, src: ^Int, digits: int) -> (err: Error) {
+	digits := digits;
+	/*
+		If dest == src, do nothing
+	*/
+	if dest == src { return nil; }
+
+	digits = min(digits, len(src.digit), len(dest.digit));
+	mem.copy_non_overlapping(&dest.digit[0], &src.digit[0], size_of(DIGIT) * digits);
+	return nil;
+}
+
+/*	
+	========================    End of private procedures    =======================
+
+	===============================  Private tables  ===============================
+
+	Tables used by `internal_*` and `_*`.
+*/
+
+_private_prime_table := []DIGIT{
+	0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
+	0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
+	0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
+	0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
+	0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
+	0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
+	0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
+	0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
+
+	0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
+	0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
+	0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
+	0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
+	0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
+	0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
+	0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
+	0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
+
+	0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
+	0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
+	0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
+	0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
+	0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
+	0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
+	0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
+	0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
+
+	0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
+	0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
+	0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
+	0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
+	0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
+	0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
+	0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
+	0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653,
+};
+
+when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) {
+	_factorial_table := [35]_WORD{
+/* f(00): */                                                     1,
+/* f(01): */                                                     1,
+/* f(02): */                                                     2,
+/* f(03): */                                                     6,
+/* f(04): */                                                    24,
+/* f(05): */                                                   120,
+/* f(06): */                                                   720,
+/* f(07): */                                                 5_040,
+/* f(08): */                                                40_320,
+/* f(09): */                                               362_880,
+/* f(10): */                                             3_628_800,
+/* f(11): */                                            39_916_800,
+/* f(12): */                                           479_001_600,
+/* f(13): */                                         6_227_020_800,
+/* f(14): */                                        87_178_291_200,
+/* f(15): */                                     1_307_674_368_000,
+/* f(16): */                                    20_922_789_888_000,
+/* f(17): */                                   355_687_428_096_000,
+/* f(18): */                                 6_402_373_705_728_000,
+/* f(19): */                               121_645_100_408_832_000,
+/* f(20): */                             2_432_902_008_176_640_000,
+/* f(21): */                            51_090_942_171_709_440_000,
+/* f(22): */                         1_124_000_727_777_607_680_000,
+/* f(23): */                        25_852_016_738_884_976_640_000,
+/* f(24): */                       620_448_401_733_239_439_360_000,
+/* f(25): */                    15_511_210_043_330_985_984_000_000,
+/* f(26): */                   403_291_461_126_605_635_584_000_000,
+/* f(27): */                10_888_869_450_418_352_160_768_000_000,
+/* f(28): */               304_888_344_611_713_860_501_504_000_000,
+/* f(29): */             8_841_761_993_739_701_954_543_616_000_000,
+/* f(30): */           265_252_859_812_191_058_636_308_480_000_000,
+/* f(31): */         8_222_838_654_177_922_817_725_562_880_000_000,
+/* f(32): */       263_130_836_933_693_530_167_218_012_160_000_000,
+/* f(33): */     8_683_317_618_811_886_495_518_194_401_280_000_000,
+/* f(34): */   295_232_799_039_604_140_847_618_609_643_520_000_000,
+	};
+} else {
+	_factorial_table := [21]_WORD{
+/* f(00): */                                                     1,
+/* f(01): */                                                     1,
+/* f(02): */                                                     2,
+/* f(03): */                                                     6,
+/* f(04): */                                                    24,
+/* f(05): */                                                   120,
+/* f(06): */                                                   720,
+/* f(07): */                                                 5_040,
+/* f(08): */                                                40_320,
+/* f(09): */                                               362_880,
+/* f(10): */                                             3_628_800,
+/* f(11): */                                            39_916_800,
+/* f(12): */                                           479_001_600,
+/* f(13): */                                         6_227_020_800,
+/* f(14): */                                        87_178_291_200,
+/* f(15): */                                     1_307_674_368_000,
+/* f(16): */                                    20_922_789_888_000,
+/* f(17): */                                   355_687_428_096_000,
+/* f(18): */                                 6_402_373_705_728_000,
+/* f(19): */                               121_645_100_408_832_000,
+/* f(20): */                             2_432_902_008_176_640_000,
+	};
+};
+
+/*
+	=========================  End of private tables  ========================
+*/

+ 559 - 0
core/math/big/public.odin

@@ -0,0 +1,559 @@
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	An arbitrary precision mathematics implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+
+	This file contains basic arithmetic operations like `add`, `sub`, `mul`, `div`, ...
+*/
+
+/*
+	===========================
+		User-level routines    
+	===========================
+*/
+
+/*
+	High-level addition. Handles sign.
+*/
+int_add :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, a, b);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(dest, a, b); err != nil { return err; }
+	/*
+		All parameters have been initialized.
+	*/
+	return #force_inline internal_int_add_signed(dest, a, b);
+}
+
+/*
+	Adds the unsigned `DIGIT` immediate to an `Int`,
+	such that the `DIGIT` doesn't have to be turned into an `Int` first.
+
+	dest = a + digit;
+*/
+int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, a);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a); err != nil { return err; }
+	/*
+		Grow destination as required.
+	*/
+	if err = grow(dest, a.used + 1); err != nil { return err; }
+
+	/*
+		All parameters have been initialized.
+	*/
+	return #force_inline internal_int_add_digit(dest, a, digit);
+}
+
+/*
+	High-level subtraction, dest = number - decrease. Handles signs.
+*/
+int_sub :: proc(dest, number, decrease: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, number, decrease);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(dest, number, decrease); err != nil { return err; }
+	/*
+		All parameters have been initialized.
+	*/
+	return #force_inline internal_int_sub_signed(dest, number, decrease);
+}
+
+/*
+	Adds the unsigned `DIGIT` immediate to an `Int`,
+	such that the `DIGIT` doesn't have to be turned into an `Int` first.
+
+	dest = a - digit;
+*/
+int_sub_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, a);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a); err != nil { return err; }
+	/*
+		Grow destination as required.
+	*/
+	if err = grow(dest, a.used + 1); err != nil { return err; }
+
+	/*
+		All parameters have been initialized.
+	*/
+	return #force_inline internal_int_sub_digit(dest, a, digit);
+}
+
+/*
+	dest = src  / 2
+	dest = src >> 1
+*/
+int_halve :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, src);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; }
+	/*
+		Grow destination as required.
+	*/
+	if dest != src { if err = grow(dest, src.used + 1); err != nil { return err; } }
+
+	return #force_inline internal_int_shr1(dest, src);
+}
+halve :: proc { int_halve, };
+shr1  :: halve;
+
+/*
+	dest = src  * 2
+	dest = src << 1
+*/
+int_double :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, src);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; }
+	/*
+		Grow destination as required.
+	*/
+	if dest != src { if err = grow(dest, src.used + 1); err != nil { return err; } }
+
+	return #force_inline internal_int_shl1(dest, src);
+}
+double :: proc { int_double, };
+shl1   :: double;
+
+/*
+	Multiply by a DIGIT.
+*/
+int_mul_digit :: proc(dest, src: ^Int, multiplier: DIGIT, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, src);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(src, dest); err != nil { return err; }
+
+	return #force_inline internal_int_mul_digit(dest, src, multiplier);
+}
+
+/*
+	High level multiplication (handles sign).
+*/
+int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, src, multiplier);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(dest, src, multiplier); err != nil { return err; }
+
+	return #force_inline internal_int_mul(dest, src, multiplier);
+}
+
+mul :: proc { int_mul, int_mul_digit, };
+
+sqr :: proc(dest, src: ^Int) -> (err: Error) { return mul(dest, src, src); }
+
+/*
+	divmod.
+	Both the quotient and remainder are optional and may be passed a nil.
+*/
+int_divmod :: proc(quotient, remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	/*
+		Early out if neither of the results is wanted.
+	*/
+	if quotient == nil && remainder == nil { return nil; }
+	if err = internal_clear_if_uninitialized(numerator, denominator); err != nil { return err; }
+
+	return #force_inline internal_divmod(quotient, remainder, numerator, denominator);
+}
+
+int_divmod_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT, allocator := context.allocator) -> (remainder: DIGIT, err: Error) {
+	assert_if_nil(quotient, numerator);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(numerator); err != nil { return 0, err; }
+
+	return #force_inline internal_divmod(quotient, numerator, denominator);
+}
+divmod :: proc{ int_divmod, int_divmod_digit, };
+
+int_div :: proc(quotient, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(quotient, numerator, denominator);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(numerator, denominator); err != nil { return err; }
+
+	return #force_inline internal_divmod(quotient, nil, numerator, denominator);
+}
+
+int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(quotient, numerator);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(numerator); err != nil { return err; }
+
+	remainder: DIGIT;
+	remainder, err = #force_inline internal_divmod(quotient, numerator, denominator);
+	return err;
+}
+div :: proc { int_div, int_div_digit, };
+
+/*
+	remainder = numerator % denominator.
+	0 <= remainder < denominator if denominator > 0
+	denominator < remainder <= 0 if denominator < 0
+*/
+int_mod :: proc(remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(remainder, numerator, denominator);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(numerator, denominator); err != nil { return err; }
+
+	return #force_inline internal_int_mod(remainder, numerator, denominator);
+}
+
+int_mod_digit :: proc(numerator: ^Int, denominator: DIGIT, allocator := context.allocator) -> (remainder: DIGIT, err: Error) {
+	return #force_inline internal_divmod(nil, numerator, denominator, allocator);
+}
+
+mod :: proc { int_mod, int_mod_digit, };
+
+/*
+	remainder = (number + addend) % modulus.
+*/
+int_addmod :: proc(remainder, number, addend, modulus: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(remainder, number, addend);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(number, addend, modulus); err != nil { return err; }
+
+	return #force_inline internal_addmod(remainder, number, addend, modulus);
+}
+addmod :: proc { int_addmod, };
+
+/*
+	remainder = (number - decrease) % modulus.
+*/
+int_submod :: proc(remainder, number, decrease, modulus: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(remainder, number, decrease);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(number, decrease, modulus); err != nil { return err; }
+
+	return #force_inline internal_submod(remainder, number, decrease, modulus);
+}
+submod :: proc { int_submod, };
+
+/*
+	remainder = (number * multiplicand) % modulus.
+*/
+int_mulmod :: proc(remainder, number, multiplicand, modulus: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(remainder, number, multiplicand);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(number, multiplicand, modulus); err != nil { return err; }
+
+	return #force_inline internal_mulmod(remainder, number, multiplicand, modulus);
+}
+mulmod :: proc { int_mulmod, };
+
+/*
+	remainder = (number * number) % modulus.
+*/
+int_sqrmod :: proc(remainder, number, modulus: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(remainder, number, modulus);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(number, modulus); err != nil { return err; }
+
+	return #force_inline internal_sqrmod(remainder, number, modulus);
+}
+sqrmod :: proc { int_sqrmod, };
+
+
+int_factorial :: proc(res: ^Int, n: int, allocator := context.allocator) -> (err: Error) {
+	if n < 0 || n > FACTORIAL_MAX_N { return .Invalid_Argument; }
+	assert_if_nil(res);
+
+	return #force_inline internal_int_factorial(res, n, allocator);
+}
+factorial :: proc { int_factorial, };
+
+
+/*
+	Number of ways to choose `k` items from `n` items.
+	Also known as the binomial coefficient.
+
+	TODO: Speed up.
+
+	Could be done faster by reusing code from factorial and reusing the common "prefix" results for n!, k! and n-k!
+	We know that n >= k, otherwise we early out with res = 0.
+
+	So:
+		n-k, keep result
+		n, start from previous result
+		k, start from previous result
+
+*/
+int_choose_digit :: proc(res: ^Int, n, k: int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(res);
+	context.allocator = allocator;
+
+	if n < 0 || n > FACTORIAL_MAX_N { return .Invalid_Argument; }
+	if k > n { return internal_zero(res); }
+
+	/*
+		res = n! / (k! * (n - k)!)
+	*/
+	n_fac, k_fac, n_minus_k_fac := &Int{}, &Int{}, &Int{};
+	defer internal_destroy(n_fac, k_fac, n_minus_k_fac);
+
+	if err = #force_inline internal_int_factorial(n_minus_k_fac, n - k);  err != nil { return err; }
+	if err = #force_inline internal_int_factorial(k_fac, k);              err != nil { return err; }
+	if err = #force_inline internal_mul(k_fac, k_fac, n_minus_k_fac);     err != nil { return err; }
+
+	if err = #force_inline internal_int_factorial(n_fac, n);              err != nil { return err; }
+	if err = #force_inline internal_div(res, n_fac, k_fac);               err != nil { return err; }
+
+	return err;	
+}
+choose :: proc { int_choose_digit, };
+
+/*
+	Function computing both GCD and (if target isn't `nil`) also LCM.
+*/
+int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	if res_gcd == nil && res_lcm == nil { return nil; }
+	assert_if_nil(a, b);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a, b); err != nil { return err; }
+	return #force_inline internal_int_gcd_lcm(res_gcd, res_lcm, a, b);
+}
+gcd_lcm :: proc { int_gcd_lcm, };
+
+/*
+	Greatest Common Divisor.
+*/
+int_gcd :: proc(res, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	return #force_inline int_gcd_lcm(res, nil, a, b, allocator);
+}
+gcd :: proc { int_gcd, };
+
+/*
+	Least Common Multiple.
+*/
+int_lcm :: proc(res, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	return #force_inline int_gcd_lcm(nil, res, a, b, allocator);
+}
+lcm :: proc { int_lcm, };
+
+/*
+	remainder = numerator % (1 << bits)
+*/
+int_mod_bits :: proc(remainder, numerator: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(remainder, numerator);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(remainder, numerator); err != nil { return err; }
+	if bits  < 0 { return .Invalid_Argument; }
+
+	return #force_inline internal_int_mod_bits(remainder, numerator, bits);
+}
+
+mod_bits :: proc { int_mod_bits, };
+
+
+/*
+	Logs and roots and such.
+*/
+int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -> (res: int, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a); err != nil { return 0, err; }
+
+	return #force_inline internal_int_log(a, base);
+}
+
+digit_log :: proc(a: DIGIT, base: DIGIT) -> (log: int, err: Error) {
+	return #force_inline internal_digit_log(a, base);
+}
+log :: proc { int_log, digit_log, };
+
+/*
+	Calculate `dest = base^power` using a square-multiply algorithm.
+*/
+int_pow :: proc(dest, base: ^Int, power: int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, base);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(dest, base); err != nil { return err; }
+
+	return #force_inline internal_int_pow(dest, base, power);
+}
+
+/*
+	Calculate `dest = base^power` using a square-multiply algorithm.
+*/
+int_pow_int :: proc(dest: ^Int, base, power: int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest);
+
+	return #force_inline internal_pow(dest, base, power, allocator);
+}
+
+pow :: proc { int_pow, int_pow_int, small_pow, };
+exp :: pow;
+
+small_pow :: proc(base: _WORD, exponent: _WORD) -> (result: _WORD) {
+	return #force_inline internal_small_pow(base, exponent);
+}
+
+/*
+	This function is less generic than `root_n`, simpler and faster.
+*/
+int_sqrt :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(dest, src);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(dest, src);	err != nil { return err; }
+
+	return #force_inline internal_int_sqrt(dest, src);
+}
+sqrt :: proc { int_sqrt, };
+
+
+/*
+	Find the nth root of an Integer.
+	Result found such that `(dest)**n <= src` and `(dest+1)**n > src`
+
+	This algorithm uses Newton's approximation `x[i+1] = x[i] - f(x[i])/f'(x[i])`,
+	which will find the root in `log(n)` time where each step involves a fair bit.
+*/
+int_root_n :: proc(dest, src: ^Int, n: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	/*
+		Fast path for n == 2.
+	*/
+	if n == 2 { return sqrt(dest, src); }
+
+	assert_if_nil(dest, src);
+	/*
+		Initialize dest + src if needed.
+	*/
+	if err = internal_clear_if_uninitialized(dest, src);	err != nil { return err; }
+
+	return #force_inline internal_int_root_n(dest, src, n);
+}
+root_n :: proc { int_root_n, };
+
+/*
+	Comparison routines.
+*/
+
+int_is_initialized :: proc(a: ^Int) -> bool {
+	if a == nil { return false; }
+
+	return #force_inline internal_int_is_initialized(a);
+}
+
+int_is_zero :: proc(a: ^Int, allocator := context.allocator) -> (zero: bool, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a); err != nil { return false, err; }
+
+	return #force_inline internal_is_zero(a), nil;
+}
+
+int_is_positive :: proc(a: ^Int, allocator := context.allocator) -> (positive: bool, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a); err != nil { return false, err; }
+
+	return #force_inline internal_is_positive(a), nil;
+}
+
+int_is_negative :: proc(a: ^Int, allocator := context.allocator) -> (negative: bool, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a); err != nil { return false, err; }
+
+	return #force_inline internal_is_negative(a), nil;
+}
+
+int_is_even :: proc(a: ^Int, allocator := context.allocator) -> (even: bool, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a); err != nil { return false, err; }
+
+	return #force_inline internal_is_even(a), nil;
+}
+
+int_is_odd :: proc(a: ^Int, allocator := context.allocator) -> (odd: bool, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a); err != nil { return false, err; }
+
+	return #force_inline internal_is_odd(a), nil;
+}
+
+platform_int_is_power_of_two :: #force_inline proc(a: int) -> bool {
+	return ((a) != 0) && (((a) & ((a) - 1)) == 0);
+}
+
+int_is_power_of_two :: proc(a: ^Int, allocator := context.allocator) -> (res: bool, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a); err != nil { return false, err; }
+
+	return #force_inline internal_is_power_of_two(a), nil;
+}
+
+/*
+	Compare two `Int`s, signed.
+*/
+int_compare :: proc(a, b: ^Int, allocator := context.allocator) -> (comparison: int, err: Error) {
+	assert_if_nil(a, b);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a, b); err != nil {	return 0, err; }
+
+	return #force_inline internal_cmp(a, b), nil;
+}
+int_cmp :: int_compare;
+
+/*
+	Compare an `Int` to an unsigned number upto the size of the backing type.
+*/
+int_compare_digit :: proc(a: ^Int, b: DIGIT, allocator := context.allocator) -> (comparison: int, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a); err != nil { return 0, err; }
+
+	return #force_inline internal_cmp_digit(a, b), nil;
+}
+int_cmp_digit :: int_compare_digit;
+
+/*
+	Compare the magnitude of two `Int`s, unsigned.
+*/
+int_compare_magnitude :: proc(a, b: ^Int, allocator := context.allocator) -> (res: int, err: Error) {
+	assert_if_nil(a, b);
+	context.allocator = allocator;
+
+	if err = internal_clear_if_uninitialized(a, b); err != nil { return 0, err; }
+
+	return #force_inline internal_cmp_mag(a, b), nil;
+}

+ 485 - 0
core/math/big/radix.odin

@@ -0,0 +1,485 @@
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	An arbitrary precision mathematics implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+
+	This file contains radix conversions, `string_to_int` (atoi) and `int_to_string` (itoa).
+
+	TODO:
+		- Use Barrett reduction for non-powers-of-two.
+		- Also look at extracting and splatting several digits at once.
+*/
+
+import "core:intrinsics"
+import "core:mem"
+
+/*
+	This version of `itoa` allocates one behalf of the caller. The caller must free the string.
+*/
+int_itoa_string :: proc(a: ^Int, radix := i8(-1), zero_terminate := false, allocator := context.allocator) -> (res: string, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	a := a; radix := radix;
+	if err = clear_if_uninitialized(a); err != nil { return "", err; }
+	/*
+		Radix defaults to 10.
+	*/
+	radix = radix if radix > 0 else 10;
+
+	/*
+		TODO: If we want to write a prefix for some of the radixes, we can oversize the buffer.
+		Then after the digits are written and the string is reversed
+	*/
+
+	/*
+		Calculate the size of the buffer we need, and 
+	*/
+	size: int;
+	/*
+		Exit if calculating the size returned an error.
+	*/
+	if size, err = radix_size(a, radix, zero_terminate); err != nil {
+		return "", err;
+	}
+
+	/*
+		Allocate the buffer we need.
+	*/
+	buffer := make([]u8, size);
+
+	/*
+		Write the digits out into the buffer.
+	*/
+	written: int;
+	written, err = int_itoa_raw(a, radix, buffer, size, zero_terminate);
+
+	return string(buffer[:written]), err;
+}
+
+/*
+	This version of `itoa` allocates one behalf of the caller. The caller must free the string.
+*/
+int_itoa_cstring :: proc(a: ^Int, radix := i8(-1), allocator := context.allocator) -> (res: cstring, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	a := a; radix := radix;
+	if err = clear_if_uninitialized(a); err != nil { return "", err; }
+	/*
+		Radix defaults to 10.
+	*/
+	radix = radix if radix > 0 else 10;
+
+	s: string;
+	s, err = int_itoa_string(a, radix, true);
+	return cstring(raw_data(s)), err;
+}
+
+/*
+	A low-level `itoa` using a caller-provided buffer. `itoa_string` and `itoa_cstring` use this.
+	You can use also use it if you want to pre-allocate a buffer and optionally reuse it.
+
+	Use `radix_size` or `radix_size_estimate` to determine a buffer size big enough.
+
+	You can pass the output of `radix_size` to `size` if you've previously called it to size
+	the output buffer. If you haven't, this routine will call it. This way it knows if the buffer
+	is the appropriate size, and we can write directly in place without a reverse step at the end.
+
+					=== === === IMPORTANT === === ===
+
+	If you determined the buffer size using `radix_size_estimate`, or have a buffer
+	that you reuse that you know is large enough, don't pass this size unless you know what you are doing,
+	because we will always write backwards starting at last byte of the buffer.
+
+	Keep in mind that if you set `size` yourself and it's smaller than the buffer,
+	it'll result in buffer overflows, as we use it to avoid reversing at the end
+	and having to perform a buffer overflow check each character.
+*/
+int_itoa_raw :: proc(a: ^Int, radix: i8, buffer: []u8, size := int(-1), zero_terminate := false) -> (written: int, err: Error) {
+	assert_if_nil(a);
+	a := a; radix := radix; size := size;
+	if err = clear_if_uninitialized(a); err != nil { return 0, err; }
+	/*
+		Radix defaults to 10.
+	*/
+	radix = radix if radix > 0 else 10;
+	if radix < 2 || radix > 64 {
+		return 0, .Invalid_Argument;
+	}
+
+	/*
+		We weren't given a size. Let's compute it.
+	*/
+	if size == -1 {
+		if size, err = radix_size(a, radix, zero_terminate); err != nil {
+			return 0, err;
+		}
+	}
+
+	/*
+		Early exit if the buffer we were given is too small.
+	*/
+	available := len(buffer);
+	if available < size {
+		return 0, .Buffer_Overflow;
+	}
+	/*
+		Fast path for when `Int` == 0 or the entire `Int` fits in a single radix digit.
+	*/
+	z, _ := is_zero(a);
+	if z || (a.used == 1 && a.digit[0] < DIGIT(radix)) {
+		if zero_terminate {
+			available -= 1;
+			buffer[available] = 0;
+		}
+		available -= 1;
+		buffer[available] = RADIX_TABLE[a.digit[0]];
+
+		if n, _ := is_neg(a); n {
+			available -= 1;
+			buffer[available] = '-';
+		}
+
+		/*
+			If we overestimated the size, we need to move the buffer left.
+		*/
+		written = len(buffer) - available;
+		if written < size {
+			diff := size - written;
+			mem.copy(&buffer[0], &buffer[diff], written);
+		}
+		return written, nil;
+	}
+
+	/*
+		Fast path for when `Int` fits within a `_WORD`.
+	*/
+	if a.used == 1 || a.used == 2 {
+		if zero_terminate {
+			available -= 1;
+			buffer[available] = 0;
+		}
+
+		val := _WORD(a.digit[1]) << _DIGIT_BITS + _WORD(a.digit[0]);
+		for val > 0 {
+			q := val / _WORD(radix);
+			available -= 1;
+			buffer[available] = RADIX_TABLE[val - (q * _WORD(radix))];
+
+			val = q;
+		}
+		if n, _ := is_neg(a); n {
+			available -= 1;
+			buffer[available] = '-';
+		}
+
+		/*
+			If we overestimated the size, we need to move the buffer left.
+		*/
+		written = len(buffer) - available;
+		if written < size {
+			diff := size - written;
+			mem.copy(&buffer[0], &buffer[diff], written);
+		}
+		return written, nil;
+	}
+
+	/*
+		Fast path for radixes that are a power of two.
+	*/
+	if is_power_of_two(int(radix)) {
+		if zero_terminate {
+			available -= 1;
+			buffer[available] = 0;
+		}
+
+		shift, count: int;
+		// mask  := _WORD(radix - 1);
+		shift, err = log(DIGIT(radix), 2);
+		count, err = count_bits(a);
+		digit: _WORD;
+
+		for offset := 0; offset < count; offset += shift {
+			bits_to_get := int(min(count - offset, shift));
+
+			digit, err = int_bitfield_extract(a, offset, bits_to_get);
+			if err != nil {
+				return len(buffer) - available, .Invalid_Argument;
+			}
+			available -= 1;
+			buffer[available] = RADIX_TABLE[digit];
+		}
+
+		if n, _ := is_neg(a); n {
+			available -= 1;
+			buffer[available] = '-';
+		}
+
+		/*
+			If we overestimated the size, we need to move the buffer left.
+		*/
+		written = len(buffer) - available;
+		if written < size {
+			diff := size - written;
+			mem.copy(&buffer[0], &buffer[diff], written);
+		}
+		return written, nil;
+	}
+
+	return _itoa_raw_full(a, radix, buffer, zero_terminate);
+}
+
+itoa :: proc{int_itoa_string, int_itoa_raw};
+int_to_string  :: int_itoa_string;
+int_to_cstring :: int_itoa_cstring;
+
+/*
+	Read a string [ASCII] in a given radix.
+*/
+int_atoi :: proc(res: ^Int, input: string, radix: i8, allocator := context.allocator) -> (err: Error) {
+	assert_if_nil(res);
+	input := input;
+	context.allocator = allocator;
+
+	/*
+		Make sure the radix is ok.
+	*/
+
+	if radix < 2 || radix > 64 { return .Invalid_Argument; }
+
+	/*
+		Set the integer to the default of zero.
+	*/
+	if err = internal_zero(res); err != nil { return err; }
+
+	/*
+		We'll interpret an empty string as zero.
+	*/
+	if len(input) == 0 { return nil; }
+
+	/*
+		If the leading digit is a minus set the sign to negative.
+		Given the above early out, the length should be at least 1.
+	*/
+	sign := Sign.Zero_or_Positive;
+	if input[0] == '-' {
+		input = input[1:];
+		sign = .Negative;
+	}
+
+	/*
+		Process each digit of the string.
+	*/
+	ch: rune;
+	for len(input) > 0 {
+		/* if the radix <= 36 the conversion is case insensitive
+		 * this allows numbers like 1AB and 1ab to represent the same value
+		 * [e.g. in hex]
+		*/
+
+		ch = rune(input[0]);
+		if radix <= 36 && ch >= 'a' && ch <= 'z' {
+			ch -= 32; // 'a' - 'A'
+		}
+
+		pos := ch - '+';
+		if RADIX_TABLE_REVERSE_SIZE <= pos {
+			break;
+		}
+		y := RADIX_TABLE_REVERSE[pos];
+		/* if the char was found in the map
+		 * and is less than the given radix add it
+		 * to the number, otherwise exit the loop.
+		 */
+		if y >= u8(radix) {
+			break;
+		}
+
+		if err = internal_mul(res, res, DIGIT(radix)); err != nil { return err; }
+		if err = internal_add(res, res, DIGIT(y));     err != nil { return err; }
+
+		input = input[1:];
+	}
+	/*
+		If an illegal character was found, fail.
+	*/
+	if len(input) > 0 && ch != 0 && ch != '\r' && ch != '\n' {
+		return .Invalid_Argument;
+	}
+	/*
+		Set the sign only if res != 0.
+	*/
+	if res.used > 0 {
+		res.sign = sign;
+	}
+
+	return nil;
+}
+
+
+atoi :: proc { int_atoi, };
+
+/*
+	We size for `string` by default.
+*/
+radix_size :: proc(a: ^Int, radix: i8, zero_terminate := false, allocator := context.allocator) -> (size: int, err: Error) {
+	a := a;
+	assert_if_nil(a);
+
+	if radix < 2 || radix > 64                     { return -1, .Invalid_Argument; }
+	if err = clear_if_uninitialized(a); err != nil { return {}, err; }
+
+	if internal_is_zero(a) {
+		if zero_terminate {
+			return 2, nil;
+		}
+		return 1, nil;
+	}
+
+	if internal_is_power_of_two(a) {
+		/*
+			Calculate `log` on a temporary "copy" with its sign set to positive.
+		*/
+		t := &Int{
+			used      = a.used,
+			sign      = .Zero_or_Positive,
+			digit     = a.digit,
+		};
+
+		if size, err = internal_log(t, DIGIT(radix));   err != nil { return {}, err; }
+	} else {
+		la, k := &Int{}, &Int{};
+		defer internal_destroy(la, k);
+
+		/* la = floor(log_2(a)) + 1 */
+		bit_count := internal_count_bits(a);
+		if err = internal_set(la, bit_count);           err != nil { return {}, err; }
+
+		/* k = floor(2^29/log_2(radix)) + 1 */
+		lb := _log_bases;
+		if err = internal_set(k, lb[radix]);            err != nil { return {}, err; }
+
+		/* n = floor((la *  k) / 2^29) + 1 */
+		if err = internal_mul(k, la, k);                err != nil { return 0, err; }
+		if err = internal_shr(k, k, _RADIX_SIZE_SCALE); err != nil { return {}, err; }
+
+		/* The "+1" here is the "+1" in "floor((la *  k) / 2^29) + 1" */
+		/* n = n + 1 + EOS + sign */
+		size_, _ := internal_get(k, u128);
+		size = int(size_);
+	}
+
+	/*
+		log truncates to zero, so we need to add one more, and one for `-` if negative.
+	*/
+	size += 2 if a.sign == .Negative else 1;
+	size += 1 if zero_terminate else 0;
+	return size, nil;
+}
+
+/*
+	Overestimate the size needed for the bigint to string conversion by a very small amount.
+	The error is about 10^-8; it will overestimate the result by at most 11 elements for
+	a number of the size 2^(2^31)-1 which is currently the largest possible in this library.
+	Some short tests gave no results larger than 5 (plus 2 for sign and EOS).
+ */
+
+/*
+	Table of {0, INT(log_2([1..64])*2^p)+1 } where p is the scale
+	factor defined in MP_RADIX_SIZE_SCALE and INT() extracts the integer part (truncating).
+	Good for 32 bit "int". Set MP_RADIX_SIZE_SCALE = 61 and recompute values
+	for 64 bit "int".
+ */
+
+_RADIX_SIZE_SCALE :: 29;
+_log_bases :: [65]u32{
+			0,         0, 0x20000001, 0x14309399, 0x10000001,
+	0xdc81a35, 0xc611924,  0xb660c9e,  0xaaaaaab,  0xa1849cd,
+	0x9a209a9, 0x94004e1,  0x8ed19c2,  0x8a5ca7d,  0x867a000,
+	0x830cee3, 0x8000001,  0x7d42d60,  0x7ac8b32,  0x7887847,
+	0x7677349, 0x749131f,  0x72d0163,  0x712f657,  0x6fab5db,
+	0x6e40d1b, 0x6ced0d0,  0x6badbde,  0x6a80e3b,  0x6964c19,
+	0x6857d31, 0x6758c38,  0x6666667,  0x657fb21,  0x64a3b9f,
+	0x63d1ab4, 0x6308c92,  0x624869e,  0x618ff47,  0x60dedea,
+	0x6034ab0, 0x5f90e7b,  0x5ef32cb,  0x5e5b1b2,  0x5dc85c3,
+	0x5d3aa02, 0x5cb19d9,  0x5c2d10f,  0x5bacbbf,  0x5b3064f,
+	0x5ab7d68, 0x5a42df0,  0x59d1506,  0x5962ffe,  0x58f7c57,
+	0x588f7bc, 0x582a000,  0x57c7319,  0x5766f1d,  0x5709243,
+	0x56adad9, 0x565474d,  0x55fd61f,  0x55a85e8,  0x5555556,
+};
+
+/*
+	Characters used in radix conversions.
+*/
+RADIX_TABLE := "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+RADIX_TABLE_REVERSE := [RADIX_TABLE_REVERSE_SIZE]u8{
+   0x3e, 0xff, 0xff, 0xff, 0x3f, 0x00, 0x01, 0x02, 0x03, 0x04, /* +,-./01234 */
+   0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, /* 56789:;<=> */
+   0xff, 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, /* ?@ABCDEFGH */
+   0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, /* IJKLMNOPQR */
+   0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0xff, 0xff, /* STUVWXYZ[\ */
+   0xff, 0xff, 0xff, 0xff, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, /* ]^_`abcdef */
+   0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, /* ghijklmnop */
+   0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* qrstuvwxyz */
+};
+RADIX_TABLE_REVERSE_SIZE :: 80;
+
+/*
+	Stores a bignum as a ASCII string in a given radix (2..64)
+	The buffer must be appropriately sized. This routine doesn't check.
+*/
+_itoa_raw_full :: proc(a: ^Int, radix: i8, buffer: []u8, zero_terminate := false, allocator := context.allocator) -> (written: int, err: Error) {
+	assert_if_nil(a);
+	context.allocator = allocator;
+
+	temp, denominator := &Int{}, &Int{};
+
+	if err = internal_copy(temp, a);           err != nil { return 0, err; }
+	if err = internal_set(denominator, radix); err != nil { return 0, err; }
+
+	available := len(buffer);
+	if zero_terminate {
+		available -= 1;
+		buffer[available] = 0;
+	}
+
+	if a.sign == .Negative {
+		temp.sign = .Zero_or_Positive;
+	}
+
+	remainder: DIGIT;
+	for {
+		if remainder, err = #force_inline internal_divmod(temp, temp, DIGIT(radix)); err != nil {
+			internal_destroy(temp, denominator);
+			return len(buffer) - available, err;
+		}
+		available -= 1;
+		buffer[available] = RADIX_TABLE[remainder];
+		if temp.used == 0 {
+			break;
+		}
+	}
+
+	if a.sign == .Negative {
+		available -= 1;
+		buffer[available] = '-';
+	}
+
+	internal_destroy(temp, denominator);
+
+	/*
+		If we overestimated the size, we need to move the buffer left.
+	*/
+	written = len(buffer) - available;
+	if written < len(buffer) {
+		diff := len(buffer) - written;
+		mem.copy(&buffer[0], &buffer[diff], written);
+	}
+	return written, nil;
+}

+ 369 - 0
core/math/big/test.odin

@@ -0,0 +1,369 @@
+//+ignore
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	An arbitrary precision mathematics implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+
+	This file exports procedures for use with the test.py test suite.
+*/
+
+/*
+	TODO: Write tests for `internal_*` and test reusing parameters with the public implementations.
+*/
+
+import "core:runtime"
+import "core:strings"
+
+PyRes :: struct {
+	res: cstring,
+	err: Error,
+}
+
+@export test_initialize_constants :: proc "c" () -> (res: u64) {
+	context = runtime.default_context();
+	return u64(initialize_constants());
+}
+
+@export test_error_string :: proc "c" (err: Error) -> (res: cstring) {
+	context = runtime.default_context();
+	es := Error_String;
+	return strings.clone_to_cstring(es[err], context.temp_allocator);
+}
+
+@export test_add :: proc "c" (a, b: cstring) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	aa, bb, sum := &Int{}, &Int{}, &Int{};
+	defer internal_destroy(aa, bb, sum);
+
+	if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":add:atoi(a):", err=err}; }
+	if err = atoi(bb, string(b), 16); err != nil { return PyRes{res=":add:atoi(b):", err=err}; }
+	if bb.used == 1 {
+		if err = #force_inline internal_add(sum, aa, bb.digit[0]); err != nil { return PyRes{res=":add:add(sum,a,b):", err=err}; }	
+	} else {
+		if err = #force_inline internal_add(sum, aa, bb);          err != nil { return PyRes{res=":add:add(sum,a,b):", err=err}; }
+	}
+
+	r: cstring;
+	r, err = int_itoa_cstring(sum, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":add:itoa(sum):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+@export test_sub :: proc "c" (a, b: cstring) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	aa, bb, sum := &Int{}, &Int{}, &Int{};
+	defer internal_destroy(aa, bb, sum);
+
+	if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":sub:atoi(a):", err=err}; }
+	if err = atoi(bb, string(b), 16); err != nil { return PyRes{res=":sub:atoi(b):", err=err}; }
+	if bb.used == 1 {
+		if err = #force_inline internal_sub(sum, aa, bb.digit[0]); err != nil { return PyRes{res=":sub:sub(sum,a,b):", err=err}; }
+	} else {
+		if err = #force_inline internal_sub(sum, aa, bb);          err != nil { return PyRes{res=":sub:sub(sum,a,b):", err=err}; }
+	}
+
+	r: cstring;
+	r, err = int_itoa_cstring(sum, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":sub:itoa(sum):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+@export test_mul :: proc "c" (a, b: cstring) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	aa, bb, product := &Int{}, &Int{}, &Int{};
+	defer internal_destroy(aa, bb, product);
+
+	if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":mul:atoi(a):", err=err}; }
+	if err = atoi(bb, string(b), 16); err != nil { return PyRes{res=":mul:atoi(b):", err=err}; }
+	if err = #force_inline internal_mul(product, aa, bb);    err != nil { return PyRes{res=":mul:mul(product,a,b):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(product, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":mul:itoa(product):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+@export test_sqr :: proc "c" (a: cstring) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	aa, square := &Int{}, &Int{};
+	defer internal_destroy(aa, square);
+
+	if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":sqr:atoi(a):", err=err}; }
+	if err = #force_inline internal_sqr(square, aa);        err != nil { return PyRes{res=":sqr:sqr(square,a):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(square, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":sqr:itoa(square):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	NOTE(Jeroen): For simplicity, we don't return the quotient and the remainder, just the quotient.
+*/
+@export test_div :: proc "c" (a, b: cstring) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	aa, bb, quotient := &Int{}, &Int{}, &Int{};
+	defer internal_destroy(aa, bb, quotient);
+
+	if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":div:atoi(a):", err=err}; }
+	if err = atoi(bb, string(b), 16); err != nil { return PyRes{res=":div:atoi(b):", err=err}; }
+	if err = #force_inline internal_div(quotient, aa, bb);   err != nil { return PyRes{res=":div:div(quotient,a,b):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(quotient, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":div:itoa(quotient):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+
+/*
+	res = log(a, base)
+*/
+@export test_log :: proc "c" (a: cstring, base := DIGIT(2)) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+	l: int;
+
+	aa := &Int{};
+	defer internal_destroy(aa);
+
+	if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":log:atoi(a):", err=err}; }
+	if l, err = #force_inline internal_log(aa, base);        err != nil { return PyRes{res=":log:log(a, base):", err=err}; }
+
+	#force_inline internal_zero(aa);
+	aa.digit[0] = DIGIT(l)  & _MASK;
+	aa.digit[1] = DIGIT(l) >> _DIGIT_BITS;
+	aa.used = 2;
+	clamp(aa);
+
+	r: cstring;
+	r, err = int_itoa_cstring(aa, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":log:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	dest = base^power
+*/
+@export test_pow :: proc "c" (base: cstring, power := int(2)) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	dest, bb := &Int{}, &Int{};
+	defer internal_destroy(dest, bb);
+
+	if err = atoi(bb, string(base), 16); err != nil { return PyRes{res=":pow:atoi(base):", err=err}; }
+	if err = #force_inline internal_pow(dest, bb, power);       err != nil { return PyRes{res=":pow:pow(dest, base, power):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(dest, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":log:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	dest = sqrt(src)
+*/
+@export test_sqrt :: proc "c" (source: cstring) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	src := &Int{};
+	defer internal_destroy(src);
+
+	if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":sqrt:atoi(src):", err=err}; }
+	if err = #force_inline internal_sqrt(src, src);                err != nil { return PyRes{res=":sqrt:sqrt(src):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(src, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":log:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	dest = root_n(src, power)
+*/
+@export test_root_n :: proc "c" (source: cstring, power: int) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	src := &Int{};
+	defer internal_destroy(src);
+
+	if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":root_n:atoi(src):", err=err}; }
+	if err = #force_inline internal_root_n(src, src, power);       err != nil { return PyRes{res=":root_n:root_n(src):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(src, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":root_n:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	dest = shr_digit(src, digits)
+*/
+@export test_shr_digit :: proc "c" (source: cstring, digits: int) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	src := &Int{};
+	defer internal_destroy(src);
+
+	if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":shr_digit:atoi(src):", err=err}; }
+	if err = #force_inline internal_shr_digit(src, digits);        err != nil { return PyRes{res=":shr_digit:shr_digit(src):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(src, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":shr_digit:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	dest = shl_digit(src, digits)
+*/
+@export test_shl_digit :: proc "c" (source: cstring, digits: int) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	src := &Int{};
+	defer internal_destroy(src);
+
+	if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":shl_digit:atoi(src):", err=err}; }
+	if err = #force_inline internal_shl_digit(src, digits);        err != nil { return PyRes{res=":shl_digit:shr_digit(src):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(src, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":shl_digit:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	dest = shr(src, bits)
+*/
+@export test_shr :: proc "c" (source: cstring, bits: int) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	src := &Int{};
+	defer internal_destroy(src);
+
+	if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":shr:atoi(src):", err=err}; }
+	if err = #force_inline internal_shr(src, src, bits);           err != nil { return PyRes{res=":shr:shr(src, bits):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(src, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":shr:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	dest = shr_signed(src, bits)
+*/
+@export test_shr_signed :: proc "c" (source: cstring, bits: int) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	src := &Int{};
+	defer internal_destroy(src);
+
+	if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":shr_signed:atoi(src):", err=err}; }
+	if err = #force_inline internal_shr_signed(src, src, bits);    err != nil { return PyRes{res=":shr_signed:shr_signed(src, bits):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(src, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":shr_signed:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	dest = shl(src, bits)
+*/
+@export test_shl :: proc "c" (source: cstring, bits: int) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	src := &Int{};
+	defer internal_destroy(src);
+
+	if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":shl:atoi(src):", err=err}; }
+	if err = #force_inline internal_shl(src, src, bits);           err != nil { return PyRes{res=":shl:shl(src, bits):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(src, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":shl:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	dest = factorial(n)
+*/
+@export test_factorial :: proc "c" (n: int) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	dest := &Int{};
+	defer internal_destroy(dest);
+
+	if err = #force_inline internal_int_factorial(dest, n); err != nil { return PyRes{res=":factorial:factorial(n):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(dest, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":factorial:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	dest = gcd(a, b)
+*/
+@export test_gcd :: proc "c" (a, b: cstring) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	ai, bi, dest := &Int{}, &Int{}, &Int{};
+	defer internal_destroy(ai, bi, dest);
+
+	if err = atoi(ai, string(a), 16); err != nil { return PyRes{res=":gcd:atoi(a):", err=err}; }
+	if err = atoi(bi, string(b), 16); err != nil { return PyRes{res=":gcd:atoi(b):", err=err}; }
+	if err = #force_inline internal_int_gcd_lcm(dest, nil, ai, bi); err != nil { return PyRes{res=":gcd:gcd(a, b):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(dest, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":gcd:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+
+/*
+	dest = lcm(a, b)
+*/
+@export test_lcm :: proc "c" (a, b: cstring) -> (res: PyRes) {
+	context = runtime.default_context();
+	err: Error;
+
+	ai, bi, dest := &Int{}, &Int{}, &Int{};
+	defer internal_destroy(ai, bi, dest);
+
+	if err = atoi(ai, string(a), 16); err != nil { return PyRes{res=":lcm:atoi(a):", err=err}; }
+	if err = atoi(bi, string(b), 16); err != nil { return PyRes{res=":lcm:atoi(b):", err=err}; }
+	if err = #force_inline internal_int_gcd_lcm(nil, dest, ai, bi); err != nil { return PyRes{res=":lcm:lcm(a, b):", err=err}; }
+
+	r: cstring;
+	r, err = int_itoa_cstring(dest, 16, context.temp_allocator);
+	if err != nil { return PyRes{res=":lcm:itoa(res):", err=err}; }
+	return PyRes{res = r, err = nil};
+}
+

+ 668 - 0
core/math/big/test.py

@@ -0,0 +1,668 @@
+from ctypes import *
+from random import *
+import math
+import os
+import platform
+import time
+import gc
+from enum import Enum
+import argparse
+
+
+parser = argparse.ArgumentParser(
+	description     = "Odin core:math/big test suite",
+	epilog          = "By default we run regression and random tests with preset parameters.",
+    formatter_class = argparse.ArgumentDefaultsHelpFormatter,
+)
+#
+# Normally, we report the number of passes and fails. With this option set, we exit at first fail.
+#
+parser.add_argument(
+	"-exit-on-fail",
+	help    = "Exit when a test fails",
+	action  = "store_true",
+)
+#
+# We skip randomized tests altogether if this is set.
+#
+
+no_random = parser.add_mutually_exclusive_group()
+
+no_random.add_argument(
+	"-no-random",
+	help    = "No random tests",
+	action  = "store_true",
+)
+#
+# Normally we run a given number of cycles on each test.
+# Timed tests budget 1 second per 20_000 bits instead.
+#
+# For timed tests we budget a second per `n` bits and iterate until we hit that time.
+#
+
+timed_or_fast = no_random.add_mutually_exclusive_group()
+
+timed_or_fast.add_argument(
+	"-timed",
+	type    = bool,
+	default = False,
+	help    = "Timed tests instead of a preset number of iterations.",
+)
+parser.add_argument(
+	"-timed-bits",
+	type    = int,
+	metavar = "BITS",
+	default = 20_000,
+	help    = "Timed tests. Every `BITS` worth of input is given a second of running time.",
+)
+#
+# For normal tests (non-timed), `-fast-tests` cuts down on the number of iterations.
+#
+timed_or_fast.add_argument(
+	"-fast-tests",
+	help    = "Cut down on the number of iterations of each test",
+	action  = "store_true",
+)
+
+args = parser.parse_args()
+
+#
+# How many iterations of each random test do we want to run?
+#
+BITS_AND_ITERATIONS = [
+	(   120, 10_000),
+	( 1_200,  1_000),
+	( 4_096,    100),
+	(12_000,     10),
+]
+
+if args.fast_tests:
+	for k in range(len(BITS_AND_ITERATIONS)):
+		b, i = BITS_AND_ITERATIONS[k]
+		BITS_AND_ITERATIONS[k] = (b, i // 10 if i >= 100 else 5)
+
+if args.no_random:
+	BITS_AND_ITERATIONS = []
+
+#
+# Where is the DLL? If missing, build using: `odin build . -build-mode:shared`
+#
+if platform.system() == "Windows":
+	LIB_PATH = os.getcwd() + os.sep + "big.dll"
+elif platform.system() == "Linux":
+	LIB_PATH = os.getcwd() + os.sep + "big.so"
+elif platform.system() == "Darwin":
+	LIB_PATH = os.getcwd() + os.sep + "big.dylib"
+else:
+	print("Platform is unsupported.")
+	exit(1)
+
+
+TOTAL_TIME  = 0
+UNTIL_TIME  = 0
+UNTIL_ITERS = 0
+
+def we_iterate():
+	if args.timed:
+		return TOTAL_TIME < UNTIL_TIME
+	else:
+		global UNTIL_ITERS
+		UNTIL_ITERS -= 1
+		return UNTIL_ITERS != -1
+
+#
+# Error enum values
+#
+class Error(Enum):
+	Okay                   = 0
+	Out_Of_Memory          = 1
+	Invalid_Pointer        = 2
+	Invalid_Argument       = 3
+	Unknown_Error          = 4
+	Max_Iterations_Reached = 5
+	Buffer_Overflow        = 6
+	Integer_Overflow       = 7
+	Division_by_Zero       = 8
+	Math_Domain_Error      = 9
+	Unimplemented          = 127
+
+#
+# Disable garbage collection
+#
+gc.disable()
+
+#
+# Set up exported procedures
+#
+
+try:
+	l = cdll.LoadLibrary(LIB_PATH)
+except:
+ 	print("Couldn't find or load " + LIB_PATH + ".")
+ 	exit(1)
+
+def load(export_name, args, res):
+	export_name.argtypes = args
+	export_name.restype  = res
+	return export_name
+
+#
+# Result values will be passed in a struct { res: cstring, err: Error }
+#
+class Res(Structure):
+	_fields_ = [("res", c_char_p), ("err", c_uint64)]
+
+initialize_constants = load(l.test_initialize_constants, [], c_uint64)
+initialize_constants()
+
+error_string = load(l.test_error_string, [c_byte], c_char_p)
+
+add        = load(l.test_add,    [c_char_p, c_char_p],   Res)
+sub        = load(l.test_sub,    [c_char_p, c_char_p],   Res)
+mul        = load(l.test_mul,    [c_char_p, c_char_p],   Res)
+sqr        = load(l.test_sqr,    [c_char_p          ],   Res)
+div        = load(l.test_div,    [c_char_p, c_char_p],   Res)
+
+# Powers and such
+int_log    = load(l.test_log,    [c_char_p, c_longlong], Res)
+int_pow    = load(l.test_pow,    [c_char_p, c_longlong], Res)
+int_sqrt   = load(l.test_sqrt,   [c_char_p            ], Res)
+int_root_n = load(l.test_root_n, [c_char_p, c_longlong], Res)
+
+# Logical operations
+
+int_shl_digit  = load(l.test_shl_digit, [c_char_p, c_longlong], Res)
+int_shr_digit  = load(l.test_shr_digit, [c_char_p, c_longlong], Res)
+int_shl        = load(l.test_shl, [c_char_p, c_longlong], Res)
+int_shr        = load(l.test_shr, [c_char_p, c_longlong], Res)
+int_shr_signed = load(l.test_shr_signed, [c_char_p, c_longlong], Res)
+
+int_factorial  = load(l.test_factorial, [c_uint64], Res)
+int_gcd        = load(l.test_gcd, [c_char_p, c_char_p], Res)
+int_lcm        = load(l.test_lcm, [c_char_p, c_char_p], Res)
+
+def test(test_name: "", res: Res, param=[], expected_error = Error.Okay, expected_result = "", radix=16):
+	passed = True
+	r = None
+	err = Error(res.err)
+
+	if err != expected_error:
+		error_loc  = res.res.decode('utf-8')
+		error = "{}: {} in '{}'".format(test_name, err, error_loc)
+
+		if len(param):
+			error += " with params {}".format(param)
+
+		print(error, flush=True)
+		passed = False
+	elif err == Error.Okay:
+		r = None
+		try:
+			r = res.res.decode('utf-8')
+			r = int(res.res, radix)
+		except:
+			pass
+
+		if r != expected_result:
+			error = "{}: Result was '{}', expected '{}'".format(test_name, r, expected_result)
+			if len(param):
+				error += " with params {}".format(param)
+
+			print(error, flush=True)
+			passed = False
+
+	if args.exit_on_fail and not passed: exit(res.err)
+
+	return passed
+
+def arg_to_odin(a):
+	if a >= 0:
+		s = hex(a)[2:]
+	else:
+		s = '-' + hex(a)[3:]
+	return s.encode('utf-8')
+
+def test_add(a = 0, b = 0, expected_error = Error.Okay):
+	args = [arg_to_odin(a), arg_to_odin(b)]
+	res  = add(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		expected_result = a + b
+	return test("test_add", res, [a, b], expected_error, expected_result)
+
+def test_sub(a = 0, b = 0, expected_error = Error.Okay):
+	args = [arg_to_odin(a), arg_to_odin(b)]
+	res  = sub(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		expected_result = a - b
+	return test("test_sub", res, [a, b], expected_error, expected_result)
+
+def test_mul(a = 0, b = 0, expected_error = Error.Okay):
+	args = [arg_to_odin(a), arg_to_odin(b)]
+	try:
+		res  = mul(*args)
+	except OSError as e:
+		print("{} while trying to multiply {} x {}.".format(e, a, b))
+		if EXIT_ON_FAIL: exit(3)
+		return False
+
+	expected_result = None
+	if expected_error == Error.Okay:
+		expected_result = a * b
+	return test("test_mul", res, [a, b], expected_error, expected_result)
+
+def test_sqr(a = 0, b = 0, expected_error = Error.Okay):
+	args = [arg_to_odin(a)]
+	try:
+		res  = sqr(*args)
+	except OSError as e:
+		print("{} while trying to square {} x {}.".format(e, a))
+		if EXIT_ON_FAIL: exit(3)
+		return False
+
+	expected_result = None
+	if expected_error == Error.Okay:
+		expected_result = a * a
+	return test("test_sqr", res, [a], expected_error, expected_result)
+
+def test_div(a = 0, b = 0, expected_error = Error.Okay):
+	args = [arg_to_odin(a), arg_to_odin(b)]
+	res  = div(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		#
+		# We don't round the division results, so if one component is negative, we're off by one.
+		#
+		if a < 0 and b > 0:
+			expected_result = int(-(abs(a) // b))
+		elif b < 0 and a > 0:
+			expected_result = int(-(a // abs((b))))
+		else:
+			expected_result = a // b if b != 0 else None
+	return test("test_div", res, [a, b], expected_error, expected_result)
+
+
+def test_log(a = 0, base = 0, expected_error = Error.Okay):
+	args = [arg_to_odin(a), base]
+	res  = int_log(*args)
+
+	expected_result = None
+	if expected_error == Error.Okay:
+		expected_result = int(math.log(a, base))
+	return test("test_log", res, [a, base], expected_error, expected_result)
+
+def test_pow(base = 0, power = 0, expected_error = Error.Okay):
+	args = [arg_to_odin(base), power]
+	res  = int_pow(*args)
+
+	expected_result = None
+	if expected_error == Error.Okay:
+		if power < 0:
+			expected_result = 0
+		else:
+			# NOTE(Jeroen): Don't use `math.pow`, it's a floating point approximation.
+			#               Use built-in `pow` or `a**b` instead.
+			expected_result = pow(base, power)
+	return test("test_pow", res, [base, power], expected_error, expected_result)
+
+def test_sqrt(number = 0, expected_error = Error.Okay):
+	args  = [arg_to_odin(number)]
+	try:
+		res = int_sqrt(*args)
+	except OSError as e:
+		print("{} while trying to sqrt {}.".format(e, number))
+		if EXIT_ON_FAIL: exit(3)
+		return False
+
+	expected_result = None
+	if expected_error == Error.Okay:
+		if number < 0:
+			expected_result = 0
+		else:
+			expected_result = int(math.isqrt(number))
+	return test("test_sqrt", res, [number], expected_error, expected_result)
+
+def root_n(number, root):
+	u, s = number, number + 1
+	while u < s:
+		s = u
+		t = (root-1) * s + number // pow(s, root - 1)
+		u = t // root
+	return s
+
+def test_root_n(number = 0, root = 0, expected_error = Error.Okay):
+	args = [arg_to_odin(number), root]
+	res  = int_root_n(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		if number < 0:
+			expected_result = 0
+		else:
+			expected_result = root_n(number, root)
+
+	return test("test_root_n", res, [number, root], expected_error, expected_result)
+
+def test_shl_digit(a = 0, digits = 0, expected_error = Error.Okay):
+	args  = [arg_to_odin(a), digits]
+	res   = int_shl_digit(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		expected_result = a << (digits * 60)
+	return test("test_shl_digit", res, [a, digits], expected_error, expected_result)
+
+def test_shr_digit(a = 0, digits = 0, expected_error = Error.Okay):
+	args  = [arg_to_odin(a), digits]
+	res   = int_shr_digit(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		if a < 0:
+			# Don't pass negative numbers. We have a shr_signed.
+			return False
+		else:
+			expected_result = a >> (digits * 60)
+		
+	return test("test_shr_digit", res, [a, digits], expected_error, expected_result)
+
+def test_shl(a = 0, bits = 0, expected_error = Error.Okay):
+	args  = [arg_to_odin(a), bits]
+	res   = int_shl(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		expected_result = a << bits
+	return test("test_shl", res, [a, bits], expected_error, expected_result)
+
+def test_shr(a = 0, bits = 0, expected_error = Error.Okay):
+	args  = [arg_to_odin(a), bits]
+	res   = int_shr(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		if a < 0:
+			# Don't pass negative numbers. We have a shr_signed.
+			return False
+		else:
+			expected_result = a >> bits
+		
+	return test("test_shr", res, [a, bits], expected_error, expected_result)
+
+def test_shr_signed(a = 0, bits = 0, expected_error = Error.Okay):
+	args  = [arg_to_odin(a), bits]
+	res   = int_shr_signed(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		expected_result = a >> bits
+		
+	return test("test_shr_signed", res, [a, bits], expected_error, expected_result)
+
+def test_factorial(n = 0, expected_error = Error.Okay):
+	args  = [n]
+	res   = int_factorial(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		expected_result = math.factorial(n)
+		
+	return test("test_factorial", res, [n], expected_error, expected_result)
+
+def test_gcd(a = 0, b = 0, expected_error = Error.Okay):
+	args  = [arg_to_odin(a), arg_to_odin(b)]
+	res   = int_gcd(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		expected_result = math.gcd(a, b)
+		
+	return test("test_gcd", res, [a, b], expected_error, expected_result)
+
+def test_lcm(a = 0, b = 0, expected_error = Error.Okay):
+	args  = [arg_to_odin(a), arg_to_odin(b)]
+	res   = int_lcm(*args)
+	expected_result = None
+	if expected_error == Error.Okay:
+		expected_result = math.lcm(a, b)
+		
+	return test("test_lcm", res, [a, b], expected_error, expected_result)
+
+# TODO(Jeroen): Make sure tests cover edge cases, fast paths, and so on.
+#
+# The last two arguments in tests are the expected error and expected result.
+#
+# The expected error defaults to None.
+# By default the Odin implementation will be tested against the Python one.
+# You can override that by supplying an expected result as the last argument instead.
+
+TESTS = {
+	test_add: [
+		[ 1234,   5432],
+	],
+	test_sub: [
+		[ 1234,   5432],
+	],
+	test_mul: [
+		[ 1234,   5432],
+		[ 0xd3b4e926aaba3040e1c12b5ea553b5, 0x1a821e41257ed9281bee5bc7789ea7 ],
+	],
+	test_sqr: [
+		[ 5432],
+		[ 0xd3b4e926aaba3040e1c12b5ea553b5 ],
+	],
+	test_div: [
+		[ 54321,	12345],
+		[ 55431,		0, Error.Division_by_Zero],
+		[ 12980742146337069150589594264770969721, 4611686018427387904 ],
+		[   831956404029821402159719858789932422, 243087903122332132 ],
+	],
+	test_log: [
+		[ 3192,			1, Error.Invalid_Argument],
+		[ -1234,		2, Error.Math_Domain_Error],
+		[ 0,			2, Error.Math_Domain_Error],
+		[ 1024,			2],
+	],
+	test_pow: [
+		[ 0,  -1, Error.Math_Domain_Error ], # Math
+		[ 0,   0 ], # 1
+		[ 0,   2 ], # 0
+		[ 42, -1,], # 0
+		[ 42,  1 ], # 1
+		[ 42,  0 ], # 42
+		[ 42,  2 ], # 42*42
+	],
+	test_sqrt: [
+		[  -1, Error.Invalid_Argument, ],
+		[  42, Error.Okay, ],
+		[  12345678901234567890, Error.Okay, ],
+		[  1298074214633706907132624082305024, Error.Okay, ],
+		[  686885735734829009541949746871140768343076607029752932751182108475420900392874228486622313727012705619148037570309621219533087263900443932890792804879473795673302686046941536636874184361869252299636701671980034458333859202703255467709267777184095435235980845369829397344182319113372092844648570818726316581751114346501124871729572474923695509057166373026411194094493240101036672016770945150422252961487398124677567028263059046193391737576836378376192651849283925197438927999526058932679219572030021792914065825542626400207956134072247020690107136531852625253942429167557531123651471221455967386267137846791963149859804549891438562641323068751514370656287452006867713758971418043865298618635213551059471668293725548570452377976322899027050925842868079489675596835389444833567439058609775325447891875359487104691935576723532407937236505941186660707032433807075470656782452889754501872408562496805517394619388777930253411467941214807849472083814447498068636264021405175653742244368865090604940094889189800007448083930490871954101880815781177612910234741529950538835837693870921008635195545246771593130784786737543736434086434015200264933536294884482218945403958647118802574342840790536176272341586020230110889699633073513016344826709214, Error.Okay, ],
+	],
+	test_root_n: [
+		[  1298074214633706907132624082305024, 2, Error.Okay, ],	
+	],
+	test_shl_digit: [
+		[ 3192,			1 ],
+		[ 1298074214633706907132624082305024, 2 ],
+		[ 1024,			3 ],
+	],
+	test_shr_digit: [
+		[ 3680125442705055547392, 1 ],
+		[ 1725436586697640946858688965569256363112777243042596638790631055949824, 2 ],
+		[ 219504133884436710204395031992179571, 2 ],
+	],
+	test_shl: [
+		[ 3192,			1 ],
+		[ 1298074214633706907132624082305024, 2 ],
+		[ 1024,			3 ],
+	],
+	test_shr: [
+		[ 3680125442705055547392, 1 ],
+		[ 1725436586697640946858688965569256363112777243042596638790631055949824, 2 ],
+		[ 219504133884436710204395031992179571, 2 ],
+	],
+	test_shr_signed: [
+		[ -611105530635358368578155082258244262, 12 ],
+		[ -149195686190273039203651143129455, 12 ],
+		[ 611105530635358368578155082258244262, 12 ],
+		[ 149195686190273039203651143129455, 12 ],
+	],
+	test_factorial: [
+		[  6_000 ],   # Regular factorial, see cutoff in common.odin.
+		[ 12_345 ],   # Binary split factorial
+	],
+	test_gcd: [
+		[  23, 25, ],
+		[ 125, 25, ],
+		[ 125, 0,  ],
+		[   0, 0,  ],
+		[   0, 125,],
+	],
+	test_lcm: [
+		[  23,  25,],
+		[ 125, 25, ],
+		[ 125, 0,  ],
+		[   0, 0,  ],
+		[   0, 125,],
+	],
+}
+
+if not args.fast_tests:
+	TESTS[test_factorial].append(
+		# This one on its own takes around 800ms, so we exclude it for FAST_TESTS
+		[ 100_000 ],
+	)
+
+total_passes   = 0
+total_failures = 0
+
+#
+# test_shr_signed also tests shr, so we're not going to test shr randomly.
+#
+RANDOM_TESTS = [
+	test_add, test_sub, test_mul, test_sqr, test_div,
+	test_log, test_pow, test_sqrt, test_root_n,
+	test_shl_digit, test_shr_digit, test_shl, test_shr_signed,
+	test_gcd, test_lcm,
+]
+SKIP_LARGE   = [
+	test_pow, test_root_n, # test_gcd,
+]
+SKIP_LARGEST = []
+
+# Untimed warmup.
+for test_proc in TESTS:
+	for t in TESTS[test_proc]:
+		res   = test_proc(*t)
+
+if __name__ == '__main__':
+	print("\n---- math/big tests ----")
+	print()
+
+	for test_proc in TESTS:
+		count_pass = 0
+		count_fail = 0
+		TIMINGS    = {}
+		for t in TESTS[test_proc]:
+			start = time.perf_counter()
+			res   = test_proc(*t)
+			diff  = time.perf_counter() - start
+			TOTAL_TIME += diff
+
+			if test_proc not in TIMINGS:
+				TIMINGS[test_proc] = diff
+			else:
+				TIMINGS[test_proc] += diff
+
+			if res:
+				count_pass     += 1
+				total_passes   += 1
+			else:
+				count_fail     += 1
+				total_failures += 1
+
+		print("{name}: {count_pass:,} passes and {count_fail:,} failures in {timing:.3f} ms.".format(name=test_proc.__name__, count_pass=count_pass, count_fail=count_fail, timing=TIMINGS[test_proc] * 1_000))
+
+	for BITS, ITERATIONS in BITS_AND_ITERATIONS:
+		print()		
+		print("---- math/big with two random {bits:,} bit numbers ----".format(bits=BITS))
+		print()
+
+		#
+		# We've already tested up to the 10th root.
+		#
+		TEST_ROOT_N_PARAMS = [2, 3, 4, 5, 6]
+
+		for test_proc in RANDOM_TESTS:
+			if BITS >  1_200 and test_proc in SKIP_LARGE: continue
+			if BITS >  4_096 and test_proc in SKIP_LARGEST: continue
+
+			count_pass = 0
+			count_fail = 0
+			TIMINGS    = {}
+
+			UNTIL_ITERS = ITERATIONS
+			if test_proc == test_root_n and BITS == 1_200:
+				UNTIL_ITERS /= 10
+
+			UNTIL_TIME  = TOTAL_TIME + BITS / args.timed_bits
+			# We run each test for a second per 20k bits
+
+			index = 0
+
+			while we_iterate():
+				a = randint(-(1 << BITS), 1 << BITS)
+				b = randint(-(1 << BITS), 1 << BITS)
+
+				if test_proc == test_div:
+					# We've already tested division by zero above.
+					bits = int(BITS * 0.6)
+					b = randint(-(1 << bits), 1 << bits)
+					if b == 0:
+						b == 42
+				elif test_proc == test_log:
+					# We've already tested log's domain errors.
+					a = randint(1, 1 << BITS)
+					b = randint(2, 1 << 60)
+				elif test_proc == test_pow:
+					b = randint(1, 10)
+				elif test_proc == test_sqrt:
+					a = randint(1, 1 << BITS)
+					b = Error.Okay
+				elif test_proc == test_root_n:
+					a = randint(1, 1 << BITS)
+					b = TEST_ROOT_N_PARAMS[index]
+					index = (index + 1) % len(TEST_ROOT_N_PARAMS)
+				elif test_proc == test_shl_digit:
+					b = randint(0, 10);
+				elif test_proc == test_shr_digit:
+					a = abs(a)
+					b = randint(0, 10);
+				elif test_proc == test_shl:
+					b = randint(0, min(BITS, 120));
+				elif test_proc == test_shr_signed:
+					b = randint(0, min(BITS, 120));
+				else:
+					b = randint(0, 1 << BITS)					
+
+				res = None
+
+				start = time.perf_counter()
+				res   = test_proc(a, b)
+				diff  = time.perf_counter() - start
+
+				TOTAL_TIME += diff
+
+				if test_proc not in TIMINGS:
+					TIMINGS[test_proc] = diff
+				else:
+					TIMINGS[test_proc] += diff
+
+				if res:
+					count_pass     += 1; total_passes   += 1
+				else:
+					count_fail     += 1; total_failures += 1
+
+			print("{name}: {count_pass:,} passes and {count_fail:,} failures in {timing:.3f} ms.".format(name=test_proc.__name__, count_pass=count_pass, count_fail=count_fail, timing=TIMINGS[test_proc] * 1_000))
+
+	print()		
+	print("---- THE END ----")
+	print()
+	print("total: {count_pass:,} passes and {count_fail:,} failures in {timing:.3f} ms.".format(count_pass=total_passes, count_fail=total_failures, timing=TOTAL_TIME * 1_000))
+
+	if total_failures:
+		exit(1)

+ 80 - 0
core/math/big/tune.odin

@@ -0,0 +1,80 @@
+//+ignore
+package math_big
+
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	A BigInt implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+*/
+
+import "core:fmt"
+import "core:time"
+
+Category :: enum {
+	itoa,
+	atoi,
+	factorial,
+	factorial_bin,
+	choose,
+	lsb,
+	ctz,
+	sqr,
+	bitfield_extract,
+};
+
+Event :: struct {
+	ticks:  time.Duration,
+	count:  int,
+	cycles: u64,
+}
+Timings := [Category]Event{};
+
+print_timings :: proc() {
+	duration :: proc(d: time.Duration) -> (res: string) {
+		switch {
+		case d < time.Microsecond:
+			return fmt.tprintf("%v ns", time.duration_nanoseconds(d));
+		case d < time.Millisecond:
+			return fmt.tprintf("%v µs", time.duration_microseconds(d));
+		case:
+			return fmt.tprintf("%v ms", time.duration_milliseconds(d));
+		}
+	}
+
+	for v in Timings {
+		if v.count > 0 {
+			fmt.println("\nTimings:");
+			break;
+		}
+	}
+
+	for v, i in Timings {
+		if v.count > 0 {
+			avg_ticks  := time.Duration(f64(v.ticks) / f64(v.count));
+			avg_cycles := f64(v.cycles) / f64(v.count);
+
+			fmt.printf("\t%v: %s / %v cycles (avg), %s / %v cycles (total, %v calls)\n", i, duration(avg_ticks), avg_cycles, duration(v.ticks), v.cycles, v.count);
+		}
+	}
+}
+
+@(deferred_in_out=_SCOPE_END)
+SCOPED_TIMING :: #force_inline proc(c: Category) -> (ticks: time.Tick, cycles: u64) {
+	cycles = time.read_cycle_counter();
+	ticks  = time.tick_now();
+	return;
+}
+_SCOPE_END :: #force_inline proc(c: Category, ticks: time.Tick, cycles: u64) {
+	cycles_now := time.read_cycle_counter();
+	ticks_now  := time.tick_now();
+
+	Timings[c].ticks  = time.tick_diff(ticks, ticks_now);
+	Timings[c].cycles = cycles_now - cycles;
+	Timings[c].count += 1;
+}
+SCOPED_COUNT_ADD :: #force_inline proc(c: Category, count: int) {
+	Timings[c].count += count;
+}

+ 1 - 1
core/math/rand/rand.odin

@@ -95,7 +95,7 @@ int63_max :: proc(n: i64, r: ^Rand = nil) -> i64 {
 
 
 int127_max :: proc(n: i128, r: ^Rand = nil) -> i128 {
 int127_max :: proc(n: i128, r: ^Rand = nil) -> i128 {
 	if n <= 0 {
 	if n <= 0 {
-		panic("Invalid argument to int63_max");
+		panic("Invalid argument to int127_max");
 	}
 	}
 	if n&(n-1) == 0 {
 	if n&(n-1) == 0 {
 		return int127(r) & (n-1);
 		return int127(r) & (n-1);