4 years ago · 777e17d80f
--- a/core/math/big/basic.odin
+++ b/core/math/big/basic.odin
@@ -11,8 +11,6 @@ package big
 
															 	This file contains basic arithmetic operations like `add`, `sub`, `mul`, `div`, ...

														
 
															 */

														
 
															-import "core:mem"

														
 
															-

														
 
															 /*

														
 
															 	===========================

														
 
															 		User-level routines    

														
@@ -244,7 +242,7 @@ sqrmod :: proc { int_sqrmod, };
 
															 int_factorial :: proc(res: ^Int, n: int) -> (err: Error) {

														
 
															-	if n < 0 || n > _FACTORIAL_MAX_N { return .Invalid_Argument; }

														
 
															+	if n < 0 || n > FACTORIAL_MAX_N { return .Invalid_Argument; }

														
 
															 	if res == nil { return .Invalid_Pointer; }

														
 
															 	return #force_inline internal_int_factorial(res, n);

														
@@ -269,7 +267,7 @@ factorial :: proc { int_factorial, };
 
															 */

														
 
															 int_choose_digit :: proc(res: ^Int, n, k: int) -> (err: Error) {

														
 
															 	if res == nil  { return .Invalid_Pointer; }

														
 
															-	if n < 0 || n > _FACTORIAL_MAX_N { return .Invalid_Argument; }

														
 
															+	if n < 0 || n > FACTORIAL_MAX_N { return .Invalid_Argument; }

														
 
															 	if k > n { return zero(res); }

														
--- a/core/math/big/build.bat
+++ b/core/math/big/build.bat
@@ -1,10 +1,10 @@
 
															 @echo off

														
 
															-:odin run . -vet

														
 
															+odin run . -vet

														
 
															 : -o:size

														
 
															 :odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check

														
 
															 :odin build . -build-mode:shared -show-timings -o:size -no-bounds-check

														
 
															 :odin build . -build-mode:shared -show-timings -o:size

														
 
															-odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check

														
 
															+:odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check

														
 
															 :odin build . -build-mode:shared -show-timings -o:speed

														
 
															-python test.py
														
 
															+:python test.py
														
--- a/core/math/big/common.odin
+++ b/core/math/big/common.odin
@@ -13,56 +13,77 @@ import "core:intrinsics"
 
															 /*
														
 
															 	TODO: Make the tunables runtime adjustable where practical.
														
 
															+
														
 
															 	This allows to benchmark and/or setting optimized values for a certain CPU without recompiling.
														
 
															 */
														
 
															 /*
														
 
															-	Tunables
														
 
															-*/
														
 
															-
														
 
															-MATH_BIG_FORCE_64_BIT :: false;
														
 
															-MATH_BIG_FORCE_32_BIT :: false;
														
 
															-when (MATH_BIG_FORCE_32_BIT && MATH_BIG_FORCE_64_BIT) { #panic("Cannot force 32-bit and 64-bit big backend simultaneously."); };
														
 
															-
														
 
															+	==========================    TUNABLES     ==========================
														
 
															-_LOW_MEMORY          :: #config(BIGINT_SMALL_MEMORY, false);
														
 
															-when _LOW_MEMORY {
														
 
															-	_DEFAULT_DIGIT_COUNT :: 8;
														
 
															-} else {
														
 
															-	_DEFAULT_DIGIT_COUNT :: 32;
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															 	`initialize_constants` returns `#config(MUL_KARATSUBA_CUTOFF, _DEFAULT_MUL_KARATSUBA_CUTOFF)`
														
 
															 	and we initialize this cutoff that way so that the procedure is used and called,
														
 
															 	because it handles initializing the constants ONE, ZERO, MINUS_ONE, NAN and INF.
														
 
															+
														
 
															+	`initialize_constants` also replaces the other `_DEFAULT_*` cutoffs with custom compile-time values if so `#config`ured.
														
 
															+
														
 
															 */
														
 
															-_MUL_KARATSUBA_CUTOFF := initialize_constants();
														
 
															-_SQR_KARATSUBA_CUTOFF := #config(SQR_KARATSUBA_CUTOFF, _DEFAULT_SQR_KARATSUBA_CUTOFF);
														
 
															-_MUL_TOOM_CUTOFF      := #config(MUL_TOOM_CUTOFF,      _DEFAULT_MUL_TOOM_CUTOFF);
														
 
															-_SQR_TOOM_CUTOFF      := #config(SQR_TOOM_CUTOFF,      _DEFAULT_SQR_TOOM_CUTOFF);
														
 
															+MUL_KARATSUBA_CUTOFF := initialize_constants();
														
 
															+SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF;
														
 
															+MUL_TOOM_CUTOFF      := _DEFAULT_MUL_TOOM_CUTOFF;
														
 
															+SQR_TOOM_CUTOFF      := _DEFAULT_SQR_TOOM_CUTOFF;
														
 
															 /*
														
 
															 	These defaults were tuned on an AMD A8-6600K (64-bit) using libTomMath's `make tune`.
														
 
															+
														
 
															 	TODO(Jeroen): Port this tuning algorithm and tune them for more modern processors.
														
 
															+
														
 
															+	It would also be cool if we collected some data across various processor families.
														
 
															+	This would let uss set reasonable defaults at runtime as this library initializes
														
 
															+	itself by using `cpuid` or the ARM equivalent.
														
 
															 */
														
 
															-_DEFAULT_MUL_KARATSUBA_CUTOFF ::  80;
														
 
															-_DEFAULT_SQR_KARATSUBA_CUTOFF :: 120;
														
 
															-_DEFAULT_MUL_TOOM_CUTOFF      :: 350;
														
 
															-_DEFAULT_SQR_TOOM_CUTOFF      :: 400;
														
 
															-_MAX_ITERATIONS_ROOT_N        :: 500;
														
 
															+_DEFAULT_MUL_KARATSUBA_CUTOFF :: #config(MUL_KARATSUBA_CUTOFF,  80);
														
 
															+_DEFAULT_SQR_KARATSUBA_CUTOFF :: #config(SQR_KARATSUBA_CUTOFF, 120);
														
 
															+_DEFAULT_MUL_TOOM_CUTOFF      :: #config(MUL_TOOM_CUTOFF,      350);
														
 
															+_DEFAULT_SQR_TOOM_CUTOFF      :: #config(SQR_TOOM_CUTOFF,      400);
														
 
															+
														
 
															+
														
 
															+MAX_ITERATIONS_ROOT_N := 500;
														
 
															 /*
														
 
															 	Largest `N` for which we'll compute `N!`
														
 
															 */
														
 
															-_FACTORIAL_MAX_N              :: 1_000_000;
														
 
															+FACTORIAL_MAX_N       := 1_000_000;
														
 
															 /*
														
 
															 	Cutoff to switch to int_factorial_binary_split, and its max recursion level.
														
 
															 */
														
 
															-_FACTORIAL_BINARY_SPLIT_CUTOFF         :: 6100;
														
 
															-_FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS :: 100;
														
 
															+FACTORIAL_BINARY_SPLIT_CUTOFF         := 6100;
														
 
															+FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS := 100;
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+	We don't allow these to be switched at runtime for two reasons:
														
 
															+
														
 
															+	1) 32-bit and 64-bit versions of procedures use different types for their storage,
														
 
															+		so we'd have to double the number of procedures, and they couldn't interact.
														
 
															+
														
 
															+	2) Optimizations thanks to precomputed masks wouldn't work.
														
 
															+*/
														
 
															+MATH_BIG_FORCE_64_BIT :: #config(MATH_BIG_FORCE_64_BIT, false);
														
 
															+MATH_BIG_FORCE_32_BIT :: #config(MATH_BIG_FORCE_32_BIT, false);
														
 
															+when (MATH_BIG_FORCE_32_BIT && MATH_BIG_FORCE_64_BIT) { #panic("Cannot force 32-bit and 64-bit big backend simultaneously."); };
														
 
															+
														
 
															+_LOW_MEMORY           :: #config(BIGINT_SMALL_MEMORY, false);
														
 
															+when _LOW_MEMORY {
														
 
															+	_DEFAULT_DIGIT_COUNT :: 8;
														
 
															+} else {
														
 
															+	_DEFAULT_DIGIT_COUNT :: 32;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+	=======================    END OF TUNABLES     =======================
														
 
															+*/
														
 
															 Sign :: enum u8 {
														
 
															 	Zero_or_Positive = 0,
														
--- a/core/math/big/example.odin
+++ b/core/math/big/example.odin
@@ -15,17 +15,23 @@ import "core:mem"
 
															 print_configation :: proc() {
														
 
															 	fmt.printf(
														
 
															-`Configuration:
														
 
															-	DIGIT_BITS           %v
														
 
															-	MIN_DIGIT_COUNT      %v
														
 
															-	MAX_DIGIT_COUNT      %v
														
 
															-	DEFAULT_DIGIT_COUNT  %v
														
 
															-	MAX_COMBA            %v
														
 
															-	WARRAY               %v
														
 
															-	MUL_KARATSUBA_CUTOFF %v
														
 
															-	SQR_KARATSUBA_CUTOFF %v
														
 
															-	MUL_TOOM_CUTOFF      %v
														
 
															-	SQR_TOOM_CUTOFF      %v
														
 
															+`
														
 
															+Configuration:
														
 
															+	_DIGIT_BITS                           %v
														
 
															+	_MIN_DIGIT_COUNT                      %v
														
 
															+	_MAX_DIGIT_COUNT                      %v
														
 
															+	_DEFAULT_DIGIT_COUNT                  %v
														
 
															+	_MAX_COMBA                            %v
														
 
															+	_WARRAY                               %v
														
 
															+Runtime tunable:
														
 
															+	MUL_KARATSUBA_CUTOFF                  %v
														
 
															+	SQR_KARATSUBA_CUTOFF                  %v
														
 
															+	MUL_TOOM_CUTOFF                       %v
														
 
															+	SQR_TOOM_CUTOFF                       %v
														
 
															+	MAX_ITERATIONS_ROOT_N                 %v
														
 
															+	FACTORIAL_MAX_N                       %v
														
 
															+	FACTORIAL_BINARY_SPLIT_CUTOFF         %v
														
 
															+	FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS %v
														
 
															 `, _DIGIT_BITS,
														
 
															 _MIN_DIGIT_COUNT,
														
@@ -33,10 +39,14 @@ _MAX_DIGIT_COUNT,
 
															 _DEFAULT_DIGIT_COUNT,
														
 
															 _MAX_COMBA,
														
 
															 _WARRAY,
														
 
															-_MUL_KARATSUBA_CUTOFF,
														
 
															-_SQR_KARATSUBA_CUTOFF,
														
 
															-_MUL_TOOM_CUTOFF,
														
 
															-_SQR_TOOM_CUTOFF,
														
 
															+MUL_KARATSUBA_CUTOFF,
														
 
															+SQR_KARATSUBA_CUTOFF,
														
 
															+MUL_TOOM_CUTOFF,
														
 
															+SQR_TOOM_CUTOFF,
														
 
															+MAX_ITERATIONS_ROOT_N,
														
 
															+FACTORIAL_MAX_N,
														
 
															+FACTORIAL_BINARY_SPLIT_CUTOFF,
														
 
															+FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS,
														
 
															 );
														
 
															 }
														
@@ -84,6 +94,8 @@ main :: proc() {
 
															 	demo();
														
 
															+	print_configation();
														
 
															+
														
 
															 	print_timings();
														
 
															 	if len(ta.allocation_map) > 0 {
														
--- a/core/math/big/exp_log.odin
+++ b/core/math/big/exp_log.odin
@@ -360,7 +360,7 @@ int_root_n :: proc(dest, src: ^Int, n: int) -> (err: Error) {
 
															 		}

														
 
															 		if c, err = cmp(t1, t2); c == 0 { break; }

														
 
															 		iterations += 1;

														
 
															-		if iterations == _MAX_ITERATIONS_ROOT_N {

														
 
															+		if iterations == MAX_ITERATIONS_ROOT_N {

														
 
															 			return .Max_Iterations_Reached;

														
 
															 		}

														
 
															 	}

														
@@ -383,7 +383,7 @@ int_root_n :: proc(dest, src: ^Int, n: int) -> (err: Error) {
 
															 		}

														
 
															 		iterations += 1;

														
 
															-		if iterations == _MAX_ITERATIONS_ROOT_N {

														
 
															+		if iterations == MAX_ITERATIONS_ROOT_N {

														
 
															 			return .Max_Iterations_Reached;

														
 
															 		}

														
 
															 	}

														
@@ -401,7 +401,7 @@ int_root_n :: proc(dest, src: ^Int, n: int) -> (err: Error) {
 
															 		}

														
 
															 		iterations += 1;

														
 
															-		if iterations == _MAX_ITERATIONS_ROOT_N {

														
 
															+		if iterations == MAX_ITERATIONS_ROOT_N {

														
 
															 			return .Max_Iterations_Reached;

														
 
															 		}

														
 
															 	}

														
--- a/core/math/big/helpers.odin
+++ b/core/math/big/helpers.odin
@@ -691,7 +691,7 @@ initialize_constants :: proc() -> (res: int) {
 
															 	set(      INF,  1);       INF.flags = {.Immutable, .Inf};
														
 
															 	set(      INF, -1); MINUS_INF.flags = {.Immutable, .Inf};
														
 
															-	return #config(MUL_KARATSUBA_CUTOFF, _DEFAULT_MUL_KARATSUBA_CUTOFF);
														
 
															+	return _DEFAULT_MUL_KARATSUBA_CUTOFF;
														
 
															 }
														
 
															 destroy_constants :: proc() {
														
--- a/core/math/big/internal.odin
+++ b/core/math/big/internal.odin
@@ -597,10 +597,10 @@ internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.alloc
 
															 		/*
														
 
															 			Do we need to square?
														
 
															 		*/
														
 
															-		if        false && src.used >= _SQR_TOOM_CUTOFF {
														
 
															+		if        false && src.used >= SQR_TOOM_CUTOFF {
														
 
															 			/* Use Toom-Cook? */
														
 
															 			// err = s_mp_sqr_toom(a, c);
														
 
															-		} else if false && src.used >= _SQR_KARATSUBA_CUTOFF {
														
 
															+		} else if false && src.used >= SQR_KARATSUBA_CUTOFF {
														
 
															 			/* Karatsuba? */
														
 
															 			// err = s_mp_sqr_karatsuba(a, c);
														
 
															 		} else if false && ((src.used * 2) + 1) < _WARRAY &&
														
@@ -625,16 +625,16 @@ internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.alloc
 
															 		max_used := max(src.used, multiplier.used);
														
 
															 		digits   := src.used + multiplier.used + 1;
														
 
															-		if        false &&  min_used     >= _MUL_KARATSUBA_CUTOFF &&
														
 
															-						    max_used / 2 >= _MUL_KARATSUBA_CUTOFF &&
														
 
															+		if        false &&  min_used     >= MUL_KARATSUBA_CUTOFF &&
														
 
															+						    max_used / 2 >= MUL_KARATSUBA_CUTOFF &&
														
 
															 			/*
														
 
															 				Not much effect was observed below a ratio of 1:2, but again: YMMV.
														
 
															 			*/
														
 
															 							max_used     >= 2 * min_used {
														
 
															 			// err = s_mp_mul_balance(a,b,c);
														
 
															-		} else if false && min_used >= _MUL_TOOM_CUTOFF {
														
 
															+		} else if false && min_used >= MUL_TOOM_CUTOFF {
														
 
															 			// err = s_mp_mul_toom(a, b, c);
														
 
															-		} else if false && min_used >= _MUL_KARATSUBA_CUTOFF {
														
 
															+		} else if false && min_used >= MUL_KARATSUBA_CUTOFF {
														
 
															 			// err = s_mp_mul_karatsuba(a, b, c);
														
 
															 		} else if digits < _WARRAY && min_used <= _MAX_COMBA {
														
 
															 			/*
														
@@ -676,7 +676,7 @@ internal_int_divmod :: proc(quotient, remainder, numerator, denominator: ^Int, a
 
															 		return nil;
														
 
															 	}
														
 
															-	if false && (denominator.used > 2 * _MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used/3) * 2) {
														
 
															+	if false && (denominator.used > 2 * MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used/3) * 2) {
														
 
															 		// err = _int_div_recursive(quotient, remainder, numerator, denominator);
														
 
															 	} else {
														
 
															 		when true {
														
@@ -846,7 +846,7 @@ internal_sqrmod :: proc { internal_int_sqrmod, };
 
															 	This way we'll have to reallocate less, possibly not at all.
														
 
															 */
														
 
															 internal_int_factorial :: proc(res: ^Int, n: int) -> (err: Error) {
														
 
															-	if n >= _FACTORIAL_BINARY_SPLIT_CUTOFF {
														
 
															+	if n >= FACTORIAL_BINARY_SPLIT_CUTOFF {
														
 
															 		return #force_inline _private_int_factorial_binary_split(res, n);
														
 
															 	}
														
@@ -1490,7 +1490,7 @@ _private_int_recursive_product :: proc(res: ^Int, start, stop: int, level := int
 
															 	t1, t2 := &Int{}, &Int{};
														
 
															 	defer destroy(t1, t2);
														
 
															-	if level > _FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS { return .Max_Iterations_Reached; }
														
 
															+	if level > FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS { return .Max_Iterations_Reached; }
														
 
															 	num_factors := (stop - start) >> 1;
														
 
															 	if num_factors == 2 {