Browse Source

big: Improve tunables.

Jeroen van Rijn 4 years ago
parent
commit
777e17d80f

+ 2 - 4
core/math/big/basic.odin

@@ -11,8 +11,6 @@ package big
 	This file contains basic arithmetic operations like `add`, `sub`, `mul`, `div`, ...
 	This file contains basic arithmetic operations like `add`, `sub`, `mul`, `div`, ...
 */
 */
 
 
-import "core:mem"
-
 /*
 /*
 	===========================
 	===========================
 		User-level routines    
 		User-level routines    
@@ -244,7 +242,7 @@ sqrmod :: proc { int_sqrmod, };
 
 
 
 
 int_factorial :: proc(res: ^Int, n: int) -> (err: Error) {
 int_factorial :: proc(res: ^Int, n: int) -> (err: Error) {
-	if n < 0 || n > _FACTORIAL_MAX_N { return .Invalid_Argument; }
+	if n < 0 || n > FACTORIAL_MAX_N { return .Invalid_Argument; }
 	if res == nil { return .Invalid_Pointer; }
 	if res == nil { return .Invalid_Pointer; }
 
 
 	return #force_inline internal_int_factorial(res, n);
 	return #force_inline internal_int_factorial(res, n);
@@ -269,7 +267,7 @@ factorial :: proc { int_factorial, };
 */
 */
 int_choose_digit :: proc(res: ^Int, n, k: int) -> (err: Error) {
 int_choose_digit :: proc(res: ^Int, n, k: int) -> (err: Error) {
 	if res == nil  { return .Invalid_Pointer; }
 	if res == nil  { return .Invalid_Pointer; }
-	if n < 0 || n > _FACTORIAL_MAX_N { return .Invalid_Argument; }
+	if n < 0 || n > FACTORIAL_MAX_N { return .Invalid_Argument; }
 
 
 	if k > n { return zero(res); }
 	if k > n { return zero(res); }
 
 

+ 3 - 3
core/math/big/build.bat

@@ -1,10 +1,10 @@
 @echo off
 @echo off
-:odin run . -vet
+odin run . -vet
 : -o:size
 : -o:size
 :odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check
 :odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check
 :odin build . -build-mode:shared -show-timings -o:size -no-bounds-check
 :odin build . -build-mode:shared -show-timings -o:size -no-bounds-check
 :odin build . -build-mode:shared -show-timings -o:size
 :odin build . -build-mode:shared -show-timings -o:size
-odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check
+:odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check
 :odin build . -build-mode:shared -show-timings -o:speed
 :odin build . -build-mode:shared -show-timings -o:speed
 
 
-python test.py
+:python test.py

+ 48 - 27
core/math/big/common.odin

@@ -13,56 +13,77 @@ import "core:intrinsics"
 
 
 /*
 /*
 	TODO: Make the tunables runtime adjustable where practical.
 	TODO: Make the tunables runtime adjustable where practical.
+
 	This allows to benchmark and/or setting optimized values for a certain CPU without recompiling.
 	This allows to benchmark and/or setting optimized values for a certain CPU without recompiling.
 */
 */
 
 
 /*
 /*
-	Tunables
-*/
-
-MATH_BIG_FORCE_64_BIT :: false;
-MATH_BIG_FORCE_32_BIT :: false;
-when (MATH_BIG_FORCE_32_BIT && MATH_BIG_FORCE_64_BIT) { #panic("Cannot force 32-bit and 64-bit big backend simultaneously."); };
-
+	==========================    TUNABLES     ==========================
 
 
-_LOW_MEMORY          :: #config(BIGINT_SMALL_MEMORY, false);
-when _LOW_MEMORY {
-	_DEFAULT_DIGIT_COUNT :: 8;
-} else {
-	_DEFAULT_DIGIT_COUNT :: 32;
-}
-
-/*
 	`initialize_constants` returns `#config(MUL_KARATSUBA_CUTOFF, _DEFAULT_MUL_KARATSUBA_CUTOFF)`
 	`initialize_constants` returns `#config(MUL_KARATSUBA_CUTOFF, _DEFAULT_MUL_KARATSUBA_CUTOFF)`
 	and we initialize this cutoff that way so that the procedure is used and called,
 	and we initialize this cutoff that way so that the procedure is used and called,
 	because it handles initializing the constants ONE, ZERO, MINUS_ONE, NAN and INF.
 	because it handles initializing the constants ONE, ZERO, MINUS_ONE, NAN and INF.
+
+	`initialize_constants` also replaces the other `_DEFAULT_*` cutoffs with custom compile-time values if so `#config`ured.
+
 */
 */
-_MUL_KARATSUBA_CUTOFF := initialize_constants();
-_SQR_KARATSUBA_CUTOFF := #config(SQR_KARATSUBA_CUTOFF, _DEFAULT_SQR_KARATSUBA_CUTOFF);
-_MUL_TOOM_CUTOFF      := #config(MUL_TOOM_CUTOFF,      _DEFAULT_MUL_TOOM_CUTOFF);
-_SQR_TOOM_CUTOFF      := #config(SQR_TOOM_CUTOFF,      _DEFAULT_SQR_TOOM_CUTOFF);
+MUL_KARATSUBA_CUTOFF := initialize_constants();
+SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF;
+MUL_TOOM_CUTOFF      := _DEFAULT_MUL_TOOM_CUTOFF;
+SQR_TOOM_CUTOFF      := _DEFAULT_SQR_TOOM_CUTOFF;
 
 
 /*
 /*
 	These defaults were tuned on an AMD A8-6600K (64-bit) using libTomMath's `make tune`.
 	These defaults were tuned on an AMD A8-6600K (64-bit) using libTomMath's `make tune`.
+
 	TODO(Jeroen): Port this tuning algorithm and tune them for more modern processors.
 	TODO(Jeroen): Port this tuning algorithm and tune them for more modern processors.
+
+	It would also be cool if we collected some data across various processor families.
+	This would let uss set reasonable defaults at runtime as this library initializes
+	itself by using `cpuid` or the ARM equivalent.
 */
 */
-_DEFAULT_MUL_KARATSUBA_CUTOFF ::  80;
-_DEFAULT_SQR_KARATSUBA_CUTOFF :: 120;
-_DEFAULT_MUL_TOOM_CUTOFF      :: 350;
-_DEFAULT_SQR_TOOM_CUTOFF      :: 400;
 
 
-_MAX_ITERATIONS_ROOT_N        :: 500;
+_DEFAULT_MUL_KARATSUBA_CUTOFF :: #config(MUL_KARATSUBA_CUTOFF,  80);
+_DEFAULT_SQR_KARATSUBA_CUTOFF :: #config(SQR_KARATSUBA_CUTOFF, 120);
+_DEFAULT_MUL_TOOM_CUTOFF      :: #config(MUL_TOOM_CUTOFF,      350);
+_DEFAULT_SQR_TOOM_CUTOFF      :: #config(SQR_TOOM_CUTOFF,      400);
+
+
+MAX_ITERATIONS_ROOT_N := 500;
 
 
 /*
 /*
 	Largest `N` for which we'll compute `N!`
 	Largest `N` for which we'll compute `N!`
 */
 */
-_FACTORIAL_MAX_N              :: 1_000_000;
+FACTORIAL_MAX_N       := 1_000_000;
 
 
 /*
 /*
 	Cutoff to switch to int_factorial_binary_split, and its max recursion level.
 	Cutoff to switch to int_factorial_binary_split, and its max recursion level.
 */
 */
-_FACTORIAL_BINARY_SPLIT_CUTOFF         :: 6100;
-_FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS :: 100;
+FACTORIAL_BINARY_SPLIT_CUTOFF         := 6100;
+FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS := 100;
+
+
+/*
+	We don't allow these to be switched at runtime for two reasons:
+
+	1) 32-bit and 64-bit versions of procedures use different types for their storage,
+		so we'd have to double the number of procedures, and they couldn't interact.
+
+	2) Optimizations thanks to precomputed masks wouldn't work.
+*/
+MATH_BIG_FORCE_64_BIT :: #config(MATH_BIG_FORCE_64_BIT, false);
+MATH_BIG_FORCE_32_BIT :: #config(MATH_BIG_FORCE_32_BIT, false);
+when (MATH_BIG_FORCE_32_BIT && MATH_BIG_FORCE_64_BIT) { #panic("Cannot force 32-bit and 64-bit big backend simultaneously."); };
+
+_LOW_MEMORY           :: #config(BIGINT_SMALL_MEMORY, false);
+when _LOW_MEMORY {
+	_DEFAULT_DIGIT_COUNT :: 8;
+} else {
+	_DEFAULT_DIGIT_COUNT :: 32;
+}
+
+/*
+	=======================    END OF TUNABLES     =======================
+*/
 
 
 Sign :: enum u8 {
 Sign :: enum u8 {
 	Zero_or_Positive = 0,
 	Zero_or_Positive = 0,

+ 27 - 15
core/math/big/example.odin

@@ -15,17 +15,23 @@ import "core:mem"
 
 
 print_configation :: proc() {
 print_configation :: proc() {
 	fmt.printf(
 	fmt.printf(
-`Configuration:
-	DIGIT_BITS           %v
-	MIN_DIGIT_COUNT      %v
-	MAX_DIGIT_COUNT      %v
-	DEFAULT_DIGIT_COUNT  %v
-	MAX_COMBA            %v
-	WARRAY               %v
-	MUL_KARATSUBA_CUTOFF %v
-	SQR_KARATSUBA_CUTOFF %v
-	MUL_TOOM_CUTOFF      %v
-	SQR_TOOM_CUTOFF      %v
+`
+Configuration:
+	_DIGIT_BITS                           %v
+	_MIN_DIGIT_COUNT                      %v
+	_MAX_DIGIT_COUNT                      %v
+	_DEFAULT_DIGIT_COUNT                  %v
+	_MAX_COMBA                            %v
+	_WARRAY                               %v
+Runtime tunable:
+	MUL_KARATSUBA_CUTOFF                  %v
+	SQR_KARATSUBA_CUTOFF                  %v
+	MUL_TOOM_CUTOFF                       %v
+	SQR_TOOM_CUTOFF                       %v
+	MAX_ITERATIONS_ROOT_N                 %v
+	FACTORIAL_MAX_N                       %v
+	FACTORIAL_BINARY_SPLIT_CUTOFF         %v
+	FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS %v
 
 
 `, _DIGIT_BITS,
 `, _DIGIT_BITS,
 _MIN_DIGIT_COUNT,
 _MIN_DIGIT_COUNT,
@@ -33,10 +39,14 @@ _MAX_DIGIT_COUNT,
 _DEFAULT_DIGIT_COUNT,
 _DEFAULT_DIGIT_COUNT,
 _MAX_COMBA,
 _MAX_COMBA,
 _WARRAY,
 _WARRAY,
-_MUL_KARATSUBA_CUTOFF,
-_SQR_KARATSUBA_CUTOFF,
-_MUL_TOOM_CUTOFF,
-_SQR_TOOM_CUTOFF,
+MUL_KARATSUBA_CUTOFF,
+SQR_KARATSUBA_CUTOFF,
+MUL_TOOM_CUTOFF,
+SQR_TOOM_CUTOFF,
+MAX_ITERATIONS_ROOT_N,
+FACTORIAL_MAX_N,
+FACTORIAL_BINARY_SPLIT_CUTOFF,
+FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS,
 );
 );
 
 
 }
 }
@@ -84,6 +94,8 @@ main :: proc() {
 
 
 	demo();
 	demo();
 
 
+	print_configation();
+
 	print_timings();
 	print_timings();
 
 
 	if len(ta.allocation_map) > 0 {
 	if len(ta.allocation_map) > 0 {

+ 3 - 3
core/math/big/exp_log.odin

@@ -360,7 +360,7 @@ int_root_n :: proc(dest, src: ^Int, n: int) -> (err: Error) {
 		}
 		}
 		if c, err = cmp(t1, t2); c == 0 { break; }
 		if c, err = cmp(t1, t2); c == 0 { break; }
 		iterations += 1;
 		iterations += 1;
-		if iterations == _MAX_ITERATIONS_ROOT_N {
+		if iterations == MAX_ITERATIONS_ROOT_N {
 			return .Max_Iterations_Reached;
 			return .Max_Iterations_Reached;
 		}
 		}
 	}
 	}
@@ -383,7 +383,7 @@ int_root_n :: proc(dest, src: ^Int, n: int) -> (err: Error) {
 		}
 		}
 
 
 		iterations += 1;
 		iterations += 1;
-		if iterations == _MAX_ITERATIONS_ROOT_N {
+		if iterations == MAX_ITERATIONS_ROOT_N {
 			return .Max_Iterations_Reached;
 			return .Max_Iterations_Reached;
 		}
 		}
 	}
 	}
@@ -401,7 +401,7 @@ int_root_n :: proc(dest, src: ^Int, n: int) -> (err: Error) {
 		}
 		}
 
 
 		iterations += 1;
 		iterations += 1;
-		if iterations == _MAX_ITERATIONS_ROOT_N {
+		if iterations == MAX_ITERATIONS_ROOT_N {
 			return .Max_Iterations_Reached;
 			return .Max_Iterations_Reached;
 		}
 		}
 	}
 	}

+ 1 - 1
core/math/big/helpers.odin

@@ -691,7 +691,7 @@ initialize_constants :: proc() -> (res: int) {
 	set(      INF,  1);       INF.flags = {.Immutable, .Inf};
 	set(      INF,  1);       INF.flags = {.Immutable, .Inf};
 	set(      INF, -1); MINUS_INF.flags = {.Immutable, .Inf};
 	set(      INF, -1); MINUS_INF.flags = {.Immutable, .Inf};
 
 
-	return #config(MUL_KARATSUBA_CUTOFF, _DEFAULT_MUL_KARATSUBA_CUTOFF);
+	return _DEFAULT_MUL_KARATSUBA_CUTOFF;
 }
 }
 
 
 destroy_constants :: proc() {
 destroy_constants :: proc() {

+ 9 - 9
core/math/big/internal.odin

@@ -597,10 +597,10 @@ internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.alloc
 		/*
 		/*
 			Do we need to square?
 			Do we need to square?
 		*/
 		*/
-		if        false && src.used >= _SQR_TOOM_CUTOFF {
+		if        false && src.used >= SQR_TOOM_CUTOFF {
 			/* Use Toom-Cook? */
 			/* Use Toom-Cook? */
 			// err = s_mp_sqr_toom(a, c);
 			// err = s_mp_sqr_toom(a, c);
-		} else if false && src.used >= _SQR_KARATSUBA_CUTOFF {
+		} else if false && src.used >= SQR_KARATSUBA_CUTOFF {
 			/* Karatsuba? */
 			/* Karatsuba? */
 			// err = s_mp_sqr_karatsuba(a, c);
 			// err = s_mp_sqr_karatsuba(a, c);
 		} else if false && ((src.used * 2) + 1) < _WARRAY &&
 		} else if false && ((src.used * 2) + 1) < _WARRAY &&
@@ -625,16 +625,16 @@ internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.alloc
 		max_used := max(src.used, multiplier.used);
 		max_used := max(src.used, multiplier.used);
 		digits   := src.used + multiplier.used + 1;
 		digits   := src.used + multiplier.used + 1;
 
 
-		if        false &&  min_used     >= _MUL_KARATSUBA_CUTOFF &&
-						    max_used / 2 >= _MUL_KARATSUBA_CUTOFF &&
+		if        false &&  min_used     >= MUL_KARATSUBA_CUTOFF &&
+						    max_used / 2 >= MUL_KARATSUBA_CUTOFF &&
 			/*
 			/*
 				Not much effect was observed below a ratio of 1:2, but again: YMMV.
 				Not much effect was observed below a ratio of 1:2, but again: YMMV.
 			*/
 			*/
 							max_used     >= 2 * min_used {
 							max_used     >= 2 * min_used {
 			// err = s_mp_mul_balance(a,b,c);
 			// err = s_mp_mul_balance(a,b,c);
-		} else if false && min_used >= _MUL_TOOM_CUTOFF {
+		} else if false && min_used >= MUL_TOOM_CUTOFF {
 			// err = s_mp_mul_toom(a, b, c);
 			// err = s_mp_mul_toom(a, b, c);
-		} else if false && min_used >= _MUL_KARATSUBA_CUTOFF {
+		} else if false && min_used >= MUL_KARATSUBA_CUTOFF {
 			// err = s_mp_mul_karatsuba(a, b, c);
 			// err = s_mp_mul_karatsuba(a, b, c);
 		} else if digits < _WARRAY && min_used <= _MAX_COMBA {
 		} else if digits < _WARRAY && min_used <= _MAX_COMBA {
 			/*
 			/*
@@ -676,7 +676,7 @@ internal_int_divmod :: proc(quotient, remainder, numerator, denominator: ^Int, a
 		return nil;
 		return nil;
 	}
 	}
 
 
-	if false && (denominator.used > 2 * _MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used/3) * 2) {
+	if false && (denominator.used > 2 * MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used/3) * 2) {
 		// err = _int_div_recursive(quotient, remainder, numerator, denominator);
 		// err = _int_div_recursive(quotient, remainder, numerator, denominator);
 	} else {
 	} else {
 		when true {
 		when true {
@@ -846,7 +846,7 @@ internal_sqrmod :: proc { internal_int_sqrmod, };
 	This way we'll have to reallocate less, possibly not at all.
 	This way we'll have to reallocate less, possibly not at all.
 */
 */
 internal_int_factorial :: proc(res: ^Int, n: int) -> (err: Error) {
 internal_int_factorial :: proc(res: ^Int, n: int) -> (err: Error) {
-	if n >= _FACTORIAL_BINARY_SPLIT_CUTOFF {
+	if n >= FACTORIAL_BINARY_SPLIT_CUTOFF {
 		return #force_inline _private_int_factorial_binary_split(res, n);
 		return #force_inline _private_int_factorial_binary_split(res, n);
 	}
 	}
 
 
@@ -1490,7 +1490,7 @@ _private_int_recursive_product :: proc(res: ^Int, start, stop: int, level := int
 	t1, t2 := &Int{}, &Int{};
 	t1, t2 := &Int{}, &Int{};
 	defer destroy(t1, t2);
 	defer destroy(t1, t2);
 
 
-	if level > _FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS { return .Max_Iterations_Reached; }
+	if level > FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS { return .Max_Iterations_Reached; }
 
 
 	num_factors := (stop - start) >> 1;
 	num_factors := (stop - start) >> 1;
 	if num_factors == 2 {
 	if num_factors == 2 {