4 роки тому · 777e17d80f
--- a/core/math/big/basic.odin
+++ b/core/math/big/basic.odin
@@ -11,8 +11,6 @@ package big
 
				 	This file contains basic arithmetic operations like `add`, `sub`, `mul`, `div`, ...

			
 
				 */

			
 
				 

			
 
				-import "core:mem"

			
 
				-

			
 
				 /*

			
 
				 	===========================

			
 
				 		User-level routines    

			
@@ -244,7 +242,7 @@ sqrmod :: proc { int_sqrmod, };
 
				 

			
 
				 

			
 
				 int_factorial :: proc(res: ^Int, n: int) -> (err: Error) {

			
 
				-	if n < 0 || n > _FACTORIAL_MAX_N { return .Invalid_Argument; }

			
 
				+	if n < 0 || n > FACTORIAL_MAX_N { return .Invalid_Argument; }

			
 
				 	if res == nil { return .Invalid_Pointer; }

			
 
				 

			
 
				 	return #force_inline internal_int_factorial(res, n);

			
@@ -269,7 +267,7 @@ factorial :: proc { int_factorial, };
 
				 */

			
 
				 int_choose_digit :: proc(res: ^Int, n, k: int) -> (err: Error) {

			
 
				 	if res == nil  { return .Invalid_Pointer; }

			
 
				-	if n < 0 || n > _FACTORIAL_MAX_N { return .Invalid_Argument; }

			
 
				+	if n < 0 || n > FACTORIAL_MAX_N { return .Invalid_Argument; }

			
 
				 

			
 
				 	if k > n { return zero(res); }

			
 
				 

			
--- a/core/math/big/build.bat
+++ b/core/math/big/build.bat
@@ -1,10 +1,10 @@
 
				 @echo off

			
 
				-:odin run . -vet

			
 
				+odin run . -vet

			
 
				 : -o:size

			
 
				 :odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check

			
 
				 :odin build . -build-mode:shared -show-timings -o:size -no-bounds-check

			
 
				 :odin build . -build-mode:shared -show-timings -o:size

			
 
				-odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check

			
 
				+:odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check

			
 
				 :odin build . -build-mode:shared -show-timings -o:speed

			
 
				 

			
 
				-python test.py
			
 
				+:python test.py
			
--- a/core/math/big/common.odin
+++ b/core/math/big/common.odin
@@ -13,56 +13,77 @@ import "core:intrinsics"
 
				 
			
 
				 /*
			
 
				 	TODO: Make the tunables runtime adjustable where practical.
			
 
				+
			
 
				 	This allows to benchmark and/or setting optimized values for a certain CPU without recompiling.
			
 
				 */
			
 
				 
			
 
				 /*
			
 
				-	Tunables
			
 
				-*/
			
 
				-
			
 
				-MATH_BIG_FORCE_64_BIT :: false;
			
 
				-MATH_BIG_FORCE_32_BIT :: false;
			
 
				-when (MATH_BIG_FORCE_32_BIT && MATH_BIG_FORCE_64_BIT) { #panic("Cannot force 32-bit and 64-bit big backend simultaneously."); };
			
 
				-
			
 
				+	==========================    TUNABLES     ==========================
			
 
				 
			
 
				-_LOW_MEMORY          :: #config(BIGINT_SMALL_MEMORY, false);
			
 
				-when _LOW_MEMORY {
			
 
				-	_DEFAULT_DIGIT_COUNT :: 8;
			
 
				-} else {
			
 
				-	_DEFAULT_DIGIT_COUNT :: 32;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				 	`initialize_constants` returns `#config(MUL_KARATSUBA_CUTOFF, _DEFAULT_MUL_KARATSUBA_CUTOFF)`
			
 
				 	and we initialize this cutoff that way so that the procedure is used and called,
			
 
				 	because it handles initializing the constants ONE, ZERO, MINUS_ONE, NAN and INF.
			
 
				+
			
 
				+	`initialize_constants` also replaces the other `_DEFAULT_*` cutoffs with custom compile-time values if so `#config`ured.
			
 
				+
			
 
				 */
			
 
				-_MUL_KARATSUBA_CUTOFF := initialize_constants();
			
 
				-_SQR_KARATSUBA_CUTOFF := #config(SQR_KARATSUBA_CUTOFF, _DEFAULT_SQR_KARATSUBA_CUTOFF);
			
 
				-_MUL_TOOM_CUTOFF      := #config(MUL_TOOM_CUTOFF,      _DEFAULT_MUL_TOOM_CUTOFF);
			
 
				-_SQR_TOOM_CUTOFF      := #config(SQR_TOOM_CUTOFF,      _DEFAULT_SQR_TOOM_CUTOFF);
			
 
				+MUL_KARATSUBA_CUTOFF := initialize_constants();
			
 
				+SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF;
			
 
				+MUL_TOOM_CUTOFF      := _DEFAULT_MUL_TOOM_CUTOFF;
			
 
				+SQR_TOOM_CUTOFF      := _DEFAULT_SQR_TOOM_CUTOFF;
			
 
				 
			
 
				 /*
			
 
				 	These defaults were tuned on an AMD A8-6600K (64-bit) using libTomMath's `make tune`.
			
 
				+
			
 
				 	TODO(Jeroen): Port this tuning algorithm and tune them for more modern processors.
			
 
				+
			
 
				+	It would also be cool if we collected some data across various processor families.
			
 
				+	This would let uss set reasonable defaults at runtime as this library initializes
			
 
				+	itself by using `cpuid` or the ARM equivalent.
			
 
				 */
			
 
				-_DEFAULT_MUL_KARATSUBA_CUTOFF ::  80;
			
 
				-_DEFAULT_SQR_KARATSUBA_CUTOFF :: 120;
			
 
				-_DEFAULT_MUL_TOOM_CUTOFF      :: 350;
			
 
				-_DEFAULT_SQR_TOOM_CUTOFF      :: 400;
			
 
				 
			
 
				-_MAX_ITERATIONS_ROOT_N        :: 500;
			
 
				+_DEFAULT_MUL_KARATSUBA_CUTOFF :: #config(MUL_KARATSUBA_CUTOFF,  80);
			
 
				+_DEFAULT_SQR_KARATSUBA_CUTOFF :: #config(SQR_KARATSUBA_CUTOFF, 120);
			
 
				+_DEFAULT_MUL_TOOM_CUTOFF      :: #config(MUL_TOOM_CUTOFF,      350);
			
 
				+_DEFAULT_SQR_TOOM_CUTOFF      :: #config(SQR_TOOM_CUTOFF,      400);
			
 
				+
			
 
				+
			
 
				+MAX_ITERATIONS_ROOT_N := 500;
			
 
				 
			
 
				 /*
			
 
				 	Largest `N` for which we'll compute `N!`
			
 
				 */
			
 
				-_FACTORIAL_MAX_N              :: 1_000_000;
			
 
				+FACTORIAL_MAX_N       := 1_000_000;
			
 
				 
			
 
				 /*
			
 
				 	Cutoff to switch to int_factorial_binary_split, and its max recursion level.
			
 
				 */
			
 
				-_FACTORIAL_BINARY_SPLIT_CUTOFF         :: 6100;
			
 
				-_FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS :: 100;
			
 
				+FACTORIAL_BINARY_SPLIT_CUTOFF         := 6100;
			
 
				+FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS := 100;
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+	We don't allow these to be switched at runtime for two reasons:
			
 
				+
			
 
				+	1) 32-bit and 64-bit versions of procedures use different types for their storage,
			
 
				+		so we'd have to double the number of procedures, and they couldn't interact.
			
 
				+
			
 
				+	2) Optimizations thanks to precomputed masks wouldn't work.
			
 
				+*/
			
 
				+MATH_BIG_FORCE_64_BIT :: #config(MATH_BIG_FORCE_64_BIT, false);
			
 
				+MATH_BIG_FORCE_32_BIT :: #config(MATH_BIG_FORCE_32_BIT, false);
			
 
				+when (MATH_BIG_FORCE_32_BIT && MATH_BIG_FORCE_64_BIT) { #panic("Cannot force 32-bit and 64-bit big backend simultaneously."); };
			
 
				+
			
 
				+_LOW_MEMORY           :: #config(BIGINT_SMALL_MEMORY, false);
			
 
				+when _LOW_MEMORY {
			
 
				+	_DEFAULT_DIGIT_COUNT :: 8;
			
 
				+} else {
			
 
				+	_DEFAULT_DIGIT_COUNT :: 32;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+	=======================    END OF TUNABLES     =======================
			
 
				+*/
			
 
				 
			
 
				 Sign :: enum u8 {
			
 
				 	Zero_or_Positive = 0,
			
--- a/core/math/big/example.odin
+++ b/core/math/big/example.odin
@@ -15,17 +15,23 @@ import "core:mem"
 
				 
			
 
				 print_configation :: proc() {
			
 
				 	fmt.printf(
			
 
				-`Configuration:
			
 
				-	DIGIT_BITS           %v
			
 
				-	MIN_DIGIT_COUNT      %v
			
 
				-	MAX_DIGIT_COUNT      %v
			
 
				-	DEFAULT_DIGIT_COUNT  %v
			
 
				-	MAX_COMBA            %v
			
 
				-	WARRAY               %v
			
 
				-	MUL_KARATSUBA_CUTOFF %v
			
 
				-	SQR_KARATSUBA_CUTOFF %v
			
 
				-	MUL_TOOM_CUTOFF      %v
			
 
				-	SQR_TOOM_CUTOFF      %v
			
 
				+`
			
 
				+Configuration:
			
 
				+	_DIGIT_BITS                           %v
			
 
				+	_MIN_DIGIT_COUNT                      %v
			
 
				+	_MAX_DIGIT_COUNT                      %v
			
 
				+	_DEFAULT_DIGIT_COUNT                  %v
			
 
				+	_MAX_COMBA                            %v
			
 
				+	_WARRAY                               %v
			
 
				+Runtime tunable:
			
 
				+	MUL_KARATSUBA_CUTOFF                  %v
			
 
				+	SQR_KARATSUBA_CUTOFF                  %v
			
 
				+	MUL_TOOM_CUTOFF                       %v
			
 
				+	SQR_TOOM_CUTOFF                       %v
			
 
				+	MAX_ITERATIONS_ROOT_N                 %v
			
 
				+	FACTORIAL_MAX_N                       %v
			
 
				+	FACTORIAL_BINARY_SPLIT_CUTOFF         %v
			
 
				+	FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS %v
			
 
				 
			
 
				 `, _DIGIT_BITS,
			
 
				 _MIN_DIGIT_COUNT,
			
@@ -33,10 +39,14 @@ _MAX_DIGIT_COUNT,
 
				 _DEFAULT_DIGIT_COUNT,
			
 
				 _MAX_COMBA,
			
 
				 _WARRAY,
			
 
				-_MUL_KARATSUBA_CUTOFF,
			
 
				-_SQR_KARATSUBA_CUTOFF,
			
 
				-_MUL_TOOM_CUTOFF,
			
 
				-_SQR_TOOM_CUTOFF,
			
 
				+MUL_KARATSUBA_CUTOFF,
			
 
				+SQR_KARATSUBA_CUTOFF,
			
 
				+MUL_TOOM_CUTOFF,
			
 
				+SQR_TOOM_CUTOFF,
			
 
				+MAX_ITERATIONS_ROOT_N,
			
 
				+FACTORIAL_MAX_N,
			
 
				+FACTORIAL_BINARY_SPLIT_CUTOFF,
			
 
				+FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS,
			
 
				 );
			
 
				 
			
 
				 }
			
@@ -84,6 +94,8 @@ main :: proc() {
 
				 
			
 
				 	demo();
			
 
				 
			
 
				+	print_configation();
			
 
				+
			
 
				 	print_timings();
			
 
				 
			
 
				 	if len(ta.allocation_map) > 0 {
			
--- a/core/math/big/exp_log.odin
+++ b/core/math/big/exp_log.odin
@@ -360,7 +360,7 @@ int_root_n :: proc(dest, src: ^Int, n: int) -> (err: Error) {
 
				 		}

			
 
				 		if c, err = cmp(t1, t2); c == 0 { break; }

			
 
				 		iterations += 1;

			
 
				-		if iterations == _MAX_ITERATIONS_ROOT_N {

			
 
				+		if iterations == MAX_ITERATIONS_ROOT_N {

			
 
				 			return .Max_Iterations_Reached;

			
 
				 		}

			
 
				 	}

			
@@ -383,7 +383,7 @@ int_root_n :: proc(dest, src: ^Int, n: int) -> (err: Error) {
 
				 		}

			
 
				 

			
 
				 		iterations += 1;

			
 
				-		if iterations == _MAX_ITERATIONS_ROOT_N {

			
 
				+		if iterations == MAX_ITERATIONS_ROOT_N {

			
 
				 			return .Max_Iterations_Reached;

			
 
				 		}

			
 
				 	}

			
@@ -401,7 +401,7 @@ int_root_n :: proc(dest, src: ^Int, n: int) -> (err: Error) {
 
				 		}

			
 
				 

			
 
				 		iterations += 1;

			
 
				-		if iterations == _MAX_ITERATIONS_ROOT_N {

			
 
				+		if iterations == MAX_ITERATIONS_ROOT_N {

			
 
				 			return .Max_Iterations_Reached;

			
 
				 		}

			
 
				 	}

			
--- a/core/math/big/helpers.odin
+++ b/core/math/big/helpers.odin
@@ -691,7 +691,7 @@ initialize_constants :: proc() -> (res: int) {
 
				 	set(      INF,  1);       INF.flags = {.Immutable, .Inf};
			
 
				 	set(      INF, -1); MINUS_INF.flags = {.Immutable, .Inf};
			
 
				 
			
 
				-	return #config(MUL_KARATSUBA_CUTOFF, _DEFAULT_MUL_KARATSUBA_CUTOFF);
			
 
				+	return _DEFAULT_MUL_KARATSUBA_CUTOFF;
			
 
				 }
			
 
				 
			
 
				 destroy_constants :: proc() {
			
--- a/core/math/big/internal.odin
+++ b/core/math/big/internal.odin
@@ -597,10 +597,10 @@ internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.alloc
 
				 		/*
			
 
				 			Do we need to square?
			
 
				 		*/
			
 
				-		if        false && src.used >= _SQR_TOOM_CUTOFF {
			
 
				+		if        false && src.used >= SQR_TOOM_CUTOFF {
			
 
				 			/* Use Toom-Cook? */
			
 
				 			// err = s_mp_sqr_toom(a, c);
			
 
				-		} else if false && src.used >= _SQR_KARATSUBA_CUTOFF {
			
 
				+		} else if false && src.used >= SQR_KARATSUBA_CUTOFF {
			
 
				 			/* Karatsuba? */
			
 
				 			// err = s_mp_sqr_karatsuba(a, c);
			
 
				 		} else if false && ((src.used * 2) + 1) < _WARRAY &&
			
@@ -625,16 +625,16 @@ internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.alloc
 
				 		max_used := max(src.used, multiplier.used);
			
 
				 		digits   := src.used + multiplier.used + 1;
			
 
				 
			
 
				-		if        false &&  min_used     >= _MUL_KARATSUBA_CUTOFF &&
			
 
				-						    max_used / 2 >= _MUL_KARATSUBA_CUTOFF &&
			
 
				+		if        false &&  min_used     >= MUL_KARATSUBA_CUTOFF &&
			
 
				+						    max_used / 2 >= MUL_KARATSUBA_CUTOFF &&
			
 
				 			/*
			
 
				 				Not much effect was observed below a ratio of 1:2, but again: YMMV.
			
 
				 			*/
			
 
				 							max_used     >= 2 * min_used {
			
 
				 			// err = s_mp_mul_balance(a,b,c);
			
 
				-		} else if false && min_used >= _MUL_TOOM_CUTOFF {
			
 
				+		} else if false && min_used >= MUL_TOOM_CUTOFF {
			
 
				 			// err = s_mp_mul_toom(a, b, c);
			
 
				-		} else if false && min_used >= _MUL_KARATSUBA_CUTOFF {
			
 
				+		} else if false && min_used >= MUL_KARATSUBA_CUTOFF {
			
 
				 			// err = s_mp_mul_karatsuba(a, b, c);
			
 
				 		} else if digits < _WARRAY && min_used <= _MAX_COMBA {
			
 
				 			/*
			
@@ -676,7 +676,7 @@ internal_int_divmod :: proc(quotient, remainder, numerator, denominator: ^Int, a
 
				 		return nil;
			
 
				 	}
			
 
				 
			
 
				-	if false && (denominator.used > 2 * _MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used/3) * 2) {
			
 
				+	if false && (denominator.used > 2 * MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used/3) * 2) {
			
 
				 		// err = _int_div_recursive(quotient, remainder, numerator, denominator);
			
 
				 	} else {
			
 
				 		when true {
			
@@ -846,7 +846,7 @@ internal_sqrmod :: proc { internal_int_sqrmod, };
 
				 	This way we'll have to reallocate less, possibly not at all.
			
 
				 */
			
 
				 internal_int_factorial :: proc(res: ^Int, n: int) -> (err: Error) {
			
 
				-	if n >= _FACTORIAL_BINARY_SPLIT_CUTOFF {
			
 
				+	if n >= FACTORIAL_BINARY_SPLIT_CUTOFF {
			
 
				 		return #force_inline _private_int_factorial_binary_split(res, n);
			
 
				 	}
			
 
				 
			
@@ -1490,7 +1490,7 @@ _private_int_recursive_product :: proc(res: ^Int, start, stop: int, level := int
 
				 	t1, t2 := &Int{}, &Int{};
			
 
				 	defer destroy(t1, t2);
			
 
				 
			
 
				-	if level > _FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS { return .Max_Iterations_Reached; }
			
 
				+	if level > FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS { return .Max_Iterations_Reached; }
			
 
				 
			
 
				 	num_factors := (stop - start) >> 1;
			
 
				 	if num_factors == 2 {