Browse Source

Merge branch 'master' of https://github.com/odin-lang/Odin

gingerBill 4 years ago
parent
commit
9fb486b2ad

+ 0 - 2
core/math/big/api.odin

@@ -9,9 +9,7 @@ package math_big
 	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
 	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
 
 
 	This file collects public proc maps and their aliases.
 	This file collects public proc maps and their aliases.
-/*
 
 
-*/
 	=== === === === === === === === === === === === === === === === === === === === === === === ===
 	=== === === === === === === === === === === === === === === === === === === === === === === ===
 	                                    Basic arithmetic.
 	                                    Basic arithmetic.
 	                                    See `public.odin`.
 	                                    See `public.odin`.

+ 6 - 6
core/math/big/build.bat

@@ -1,8 +1,8 @@
 @echo off
 @echo off
-odin run . -vet
+:odin run . -vet
 : -o:size
 : -o:size
-:odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check && python test.py -fast-tests
-:odin build . -build-mode:shared -show-timings -o:size -no-bounds-check  && python test.py -fast-tests
-:odin build . -build-mode:shared -show-timings -o:size  && python test.py -fast-tests
-:odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check && python test.py -fast-tests
-:odin build . -build-mode:shared -show-timings -o:speed && python test.py -fast-tests
+:odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check -define:MATH_BIG_EXE=false && python test.py -fast-tests
+:odin build . -build-mode:shared -show-timings -o:size -no-bounds-check -define:MATH_BIG_EXE=false && python test.py -fast-tests
+:odin build . -build-mode:shared -show-timings -o:size -define:MATH_BIG_EXE=false && python test.py -fast-tests
+odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check -define:MATH_BIG_EXE=false && python test.py -fast-tests
+:odin build . -build-mode:shared -show-timings -o:speed -define:MATH_BIG_EXE=false && python test.py -fast-tests

+ 16 - 4
core/math/big/common.odin

@@ -27,10 +27,22 @@ import "core:intrinsics"
 	`initialize_constants` also replaces the other `_DEFAULT_*` cutoffs with custom compile-time values if so `#config`ured.
 	`initialize_constants` also replaces the other `_DEFAULT_*` cutoffs with custom compile-time values if so `#config`ured.
 
 
 */
 */
-MUL_KARATSUBA_CUTOFF := initialize_constants();
-SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF;
-MUL_TOOM_CUTOFF      := _DEFAULT_MUL_TOOM_CUTOFF;
-SQR_TOOM_CUTOFF      := _DEFAULT_SQR_TOOM_CUTOFF;
+
+/*
+	There is a bug with DLL globals. They don't get set.
+	To allow tests to run we add `-define:MATH_BIG_EXE=false` to hardcode the cutoffs for now.
+*/
+when #config(MATH_BIG_EXE, true) {
+	MUL_KARATSUBA_CUTOFF := initialize_constants();
+	SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF;
+	MUL_TOOM_CUTOFF      := _DEFAULT_MUL_TOOM_CUTOFF;
+	SQR_TOOM_CUTOFF      := _DEFAULT_SQR_TOOM_CUTOFF;
+} else {
+	MUL_KARATSUBA_CUTOFF := _DEFAULT_MUL_KARATSUBA_CUTOFF;
+	SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF;
+	MUL_TOOM_CUTOFF      := _DEFAULT_MUL_TOOM_CUTOFF;
+	SQR_TOOM_CUTOFF      := _DEFAULT_SQR_TOOM_CUTOFF;	
+}
 
 
 /*
 /*
 	These defaults were tuned on an AMD A8-6600K (64-bit) using libTomMath's `make tune`.
 	These defaults were tuned on an AMD A8-6600K (64-bit) using libTomMath's `make tune`.

+ 10 - 13
core/math/big/example.odin

@@ -206,19 +206,16 @@ demo :: proc() {
 	a, b, c, d, e, f := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
 	a, b, c, d, e, f := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
 	defer destroy(a, b, c, d, e, f);
 	defer destroy(a, b, c, d, e, f);
 
 
-	set(a, 64336);
-	fmt.println("--- --- --- ---");
-	int_to_byte(a);
-	fmt.println("--- --- --- ---");
-	int_to_byte_little(a);
-	fmt.println("--- --- --- ---");
-
-	set(b, -64336);
-	fmt.println("--- --- --- ---");
-	int_to_byte(b);
-	fmt.println("--- --- --- ---");
-	int_to_byte_little(b);
-	fmt.println("--- --- --- ---");
+	atoi(a, "12980742146337069150589594264770969721", 10);
+	print("a: ", a, 10, true, true, true);
+	atoi(b, "4611686018427387904", 10);
+	print("b: ", b, 10, true, true, true);
+
+	if err := internal_divmod(c, d, a, b); err != nil {
+		fmt.printf("Error: %v\n", err);
+	}
+	print("c: ", c);
+	print("c: ", d);
 }
 }
 
 
 main :: proc() {
 main :: proc() {

+ 45 - 4
core/math/big/internal.odin

@@ -36,6 +36,8 @@ import "core:mem"
 import "core:intrinsics"
 import "core:intrinsics"
 import rnd "core:math/rand"
 import rnd "core:math/rand"
 
 
+import "core:fmt"
+
 /*
 /*
 	Low-level addition, unsigned. Handbook of Applied Cryptography, algorithm 14.7.
 	Low-level addition, unsigned. Handbook of Applied Cryptography, algorithm 14.7.
 
 
@@ -260,6 +262,12 @@ internal_int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context
 }
 }
 internal_add :: proc { internal_int_add_signed, internal_int_add_digit, };
 internal_add :: proc { internal_int_add_signed, internal_int_add_digit, };
 
 
+
+internal_int_incr :: proc(dest: ^Int, allocator := context.allocator) -> (err: Error) {
+	return #force_inline internal_add(dest, dest, 1);
+}
+internal_incr :: proc { internal_int_incr, };
+
 /*
 /*
 	Low-level subtraction, dest = number - decrease. Assumes |number| > |decrease|.
 	Low-level subtraction, dest = number - decrease. Assumes |number| > |decrease|.
 	Handbook of Applied Cryptography, algorithm 14.9.
 	Handbook of Applied Cryptography, algorithm 14.9.
@@ -458,6 +466,11 @@ internal_int_sub_digit :: proc(dest, number: ^Int, digit: DIGIT, allocator := co
 
 
 internal_sub :: proc { internal_int_sub_signed, internal_int_sub_digit, };
 internal_sub :: proc { internal_int_sub_signed, internal_int_sub_digit, };
 
 
+internal_int_decr :: proc(dest: ^Int, allocator := context.allocator) -> (err: Error) {
+	return #force_inline internal_sub(dest, dest, 1);
+}
+internal_decr :: proc { internal_int_decr, };
+
 /*
 /*
 	dest = src  / 2
 	dest = src  / 2
 	dest = src >> 1
 	dest = src >> 1
@@ -703,7 +716,6 @@ internal_sqr :: proc (dest, src: ^Int, allocator := context.allocator) -> (res:
 */
 */
 internal_int_divmod :: proc(quotient, remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) {
 internal_int_divmod :: proc(quotient, remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) {
 	context.allocator = allocator;
 	context.allocator = allocator;
-
 	if denominator.used == 0 { return .Division_by_Zero; }
 	if denominator.used == 0 { return .Division_by_Zero; }
 	/*
 	/*
 		If numerator < denominator then quotient = 0, remainder = numerator.
 		If numerator < denominator then quotient = 0, remainder = numerator.
@@ -718,8 +730,10 @@ internal_int_divmod :: proc(quotient, remainder, numerator, denominator: ^Int, a
 		return nil;
 		return nil;
 	}
 	}
 
 
-	if false && (denominator.used > 2 * MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used/3) * 2) {
-		// err = _int_div_recursive(quotient, remainder, numerator, denominator);
+	if (denominator.used > 2 * MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used / 3) * 2) {
+		assert(denominator.used >= 160 && numerator.used >= 240, "MUL_KARATSUBA_CUTOFF global not properly set.");
+		err = _private_int_div_recursive(quotient, remainder, numerator, denominator);
+		// err = #force_inline _private_int_div_school(quotient, remainder, numerator, denominator);
 	} else {
 	} else {
 		when true {
 		when true {
 			err = #force_inline _private_int_div_school(quotient, remainder, numerator, denominator);
 			err = #force_inline _private_int_div_school(quotient, remainder, numerator, denominator);
@@ -1740,6 +1754,29 @@ internal_int_neg :: proc(dest, src: ^Int, allocator := context.allocator) -> (er
 }
 }
 internal_neg :: proc { internal_int_neg, };
 internal_neg :: proc { internal_int_neg, };
 
 
+/*
+	hac 14.61, pp608.
+*/
+internal_int_inverse_modulo :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+	/*
+		For all n in N and n > 0, n = 0 mod 1.
+	*/
+	if internal_is_positive(a) && internal_cmp(b, 1) == 0 { return internal_zero(dest);	}
+
+	/*
+		`b` cannot be negative and has to be > 1
+	*/
+	if internal_is_negative(b) && internal_cmp(b, 1) != 1 { return .Invalid_Argument; }
+
+	/*
+		If the modulus is odd we can use a faster routine instead.
+	*/
+	if internal_is_odd(b) { return _private_inverse_modulo_odd(dest, a, b); }
+
+	return _private_inverse_modulo(dest, a, b);
+}
+internal_invmod :: proc{ internal_int_inverse_modulo, };
 
 
 /*
 /*
 	Helpers to extract values from the `Int`.
 	Helpers to extract values from the `Int`.
@@ -1991,7 +2028,11 @@ internal_int_get :: proc(a: ^Int, $T: typeid) -> (res: T, err: Error) where intr
 internal_get :: proc { internal_int_get, };
 internal_get :: proc { internal_int_get, };
 
 
 internal_int_get_float :: proc(a: ^Int) -> (res: f64, err: Error) {
 internal_int_get_float :: proc(a: ^Int) -> (res: f64, err: Error) {
-	l   := min(a.used, 17); // log2(max(f64)) is approximately 1020, or 17 legs.
+	/*
+		log2(max(f64)) is approximately 1020, or 17 legs with the 64-bit storage.
+	*/
+	legs :: 1020 / _DIGIT_BITS;
+	l   := min(a.used, legs);
 	fac := f64(1 << _DIGIT_BITS);
 	fac := f64(1 << _DIGIT_BITS);
 	d   := 0.0;
 	d   := 0.0;
 
 

+ 45 - 0
core/math/big/prime.odin

@@ -31,3 +31,48 @@ int_prime_is_divisible :: proc(a: ^Int, allocator := context.allocator) -> (res:
 	*/
 	*/
 	return false, nil;
 	return false, nil;
 }
 }
+
+number_of_rabin_miller_trials :: proc(bit_size: int) -> (number_of_trials: int) {
+	switch {
+	case bit_size <=    80:
+		return - 1;		/* Use deterministic algorithm for size <= 80 bits */
+	case bit_size >=    81 && bit_size <     96:
+		return 37;		/* max. error = 2^(-96)  */
+	case bit_size >=    96 && bit_size <    128:
+		return 32;		/* max. error = 2^(-96)  */
+	case bit_size >=   128 && bit_size <    160:
+		return 40;		/* max. error = 2^(-112) */
+	case bit_size >=   160 && bit_size <    256:
+		return 35;		/* max. error = 2^(-112) */
+	case bit_size >=   256 && bit_size <    384:
+		return 27;		/* max. error = 2^(-128) */
+	case bit_size >=   384 && bit_size <    512:
+		return 16;		/* max. error = 2^(-128) */
+	case bit_size >=   512 && bit_size <    768:
+		return 18;		/* max. error = 2^(-160) */
+	case bit_size >=   768 && bit_size <    896:
+		return 11;		/* max. error = 2^(-160) */
+	case bit_size >=   896 && bit_size <  1_024:
+		return 10;		/* max. error = 2^(-160) */
+	case bit_size >= 1_024 && bit_size <  1_536:
+		return 12;		/* max. error = 2^(-192) */
+	case bit_size >= 1_536 && bit_size <  2_048:
+		return  8;		/* max. error = 2^(-192) */
+	case bit_size >= 2_048 && bit_size <  3_072:
+		return  6;		/* max. error = 2^(-192) */
+	case bit_size >= 3_072 && bit_size <  4_096:
+		return  4;		/* max. error = 2^(-192) */
+	case bit_size >= 4_096 && bit_size <  5_120:
+		return  5;		/* max. error = 2^(-256) */
+	case bit_size >= 5_120 && bit_size <  6_144:
+		return  4;		/* max. error = 2^(-256) */
+	case bit_size >= 6_144 && bit_size <  8_192:
+		return  4;		/* max. error = 2^(-256) */
+	case bit_size >= 8_192 && bit_size <  9_216:
+		return  3;		/* max. error = 2^(-256) */
+	case bit_size >= 9_216 && bit_size < 10_240:
+		return  3;		/* max. error = 2^(-256) */
+	case:
+		return  2;		/* For keysizes bigger than 10_240 use always at least 2 Rounds */
+	}
+}

+ 478 - 2
core/math/big/private.odin

@@ -430,7 +430,7 @@ _private_int_sqr_toom :: proc(dest, src: ^Int, allocator := context.allocator) -
 	context.allocator = allocator;
 	context.allocator = allocator;
 
 
 	S0, a0, a1, a2 := &Int{}, &Int{}, &Int{}, &Int{};
 	S0, a0, a1, a2 := &Int{}, &Int{}, &Int{}, &Int{};
-	defer destroy(S0, a0, a1, a2);
+	defer internal_destroy(S0, a0, a1, a2);
 
 
 	/*
 	/*
 		Init temps.
 		Init temps.
@@ -752,6 +752,188 @@ _private_int_div_school :: proc(quotient, remainder, numerator, denominator: ^In
 	return nil;
 	return nil;
 }
 }
 
 
+/*
+	Direct implementation of algorithms 1.8 "RecursiveDivRem" and 1.9 "UnbalancedDivision" from:
+
+		Brent, Richard P., and Paul Zimmermann. "Modern computer arithmetic"
+		Vol. 18. Cambridge University Press, 2010
+		Available online at https://arxiv.org/pdf/1004.4710
+
+	pages 19ff. in the above online document.
+*/
+_private_div_recursion :: proc(quotient, remainder, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	A1, A2, B1, B0, Q1, Q0, R1, R0, t := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
+	defer internal_destroy(A1, A2, B1, B0, Q1, Q0, R1, R0, t);
+
+	m := a.used - b.used;
+	k := m / 2;
+
+	if m < MUL_KARATSUBA_CUTOFF { return _private_int_div_school(quotient, remainder, a, b); }
+
+	if err = internal_init_multi(A1, A2, B1, B0, Q1, Q0, R1, R0, t); err != nil { return err; }
+
+	/*
+		`B1` = `b` / `beta`^`k`, `B0` = `b` % `beta`^`k`
+	*/
+	if err = internal_shrmod(B1, B0, b, k * _DIGIT_BITS);            err != nil { return err; }
+
+	/*
+		(Q1, R1) =  RecursiveDivRem(A / beta^(2k), B1)
+	*/
+	if err = internal_shrmod(A1, t, a, 2 * k * _DIGIT_BITS);         err != nil { return err; }
+	if err = _private_div_recursion(Q1, R1, A1, B1);                 err != nil { return err; }
+
+	/*
+		A1 = (R1 * beta^(2k)) + (A % beta^(2k)) - (Q1 * B0 * beta^k)
+	*/
+	if err = internal_shl_digit(R1, 2 * k);                          err != nil { return err; }
+	if err = internal_add(A1, R1, t);                                err != nil { return err; }
+	if err = internal_mul(t, Q1, B0);                                err != nil { return err; }
+
+	/*
+		While A1 < 0 do Q1 = Q1 - 1, A1 = A1 + (beta^k * B)
+	*/
+	if internal_cmp(A1, 0) == -1 {
+		if internal_shl(t, b, k * _DIGIT_BITS);                      err != nil { return err; }
+
+		for {
+			if err = internal_decr(Q1);                              err != nil { return err; }
+			if err = internal_add(A1, A1, t);                        err != nil { return err; }
+			if internal_cmp(A1, 0) != -1 { break; }
+		}
+	}
+
+	/*
+		(Q0, R0) =  RecursiveDivRem(A1 / beta^(k), B1)
+	*/
+	if internal_shrmod(A1, t, A1, k * _DIGIT_BITS);                  err != nil { return err; }
+	if _private_div_recursion(Q0, R0, A1, B1);                       err != nil { return err; }
+
+	/*
+		A2 = (R0*beta^k) +  (A1 % beta^k) - (Q0*B0)
+	*/
+	if err = internal_shl_digit(R0, k);                              err != nil { return err; }
+	if err = internal_add(A2, R0, t);                                err != nil { return err; } 
+	if err = internal_mul(t, Q0, B0);                                err != nil { return err; }
+	if err = internal_sub(A2, A2, t);                                err != nil { return err; }
+
+	/*
+		While A2 < 0 do Q0 = Q0 - 1, A2 = A2 + B.
+	*/
+	for internal_cmp(A2, 0) == -1 {
+		if err = internal_decr(Q0);                                  err != nil { return err; }
+		if err = internal_add(A2, A2, b);                            err != nil { return err; }
+	}
+
+	/*
+		Return q = (Q1*beta^k) + Q0, r = A2.
+	*/
+	if err = internal_shl_digit(Q1, k);                              err != nil { return err; }
+	if err = internal_add(quotient, Q1, Q0);                         err != nil { return err; }
+
+	return internal_copy(remainder, A2);
+}
+
+_private_int_div_recursive :: proc(quotient, remainder, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	A, B, Q, Q1, R, A_div, A_mod := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
+	defer internal_destroy(A, B, Q, Q1, R, A_div, A_mod);
+
+	if err = internal_init_multi(A, B, Q, Q1, R, A_div, A_mod);      err != nil { return err; }
+
+	/*
+		Most significant bit of a limb.
+		Assumes  _DIGIT_MAX < (sizeof(DIGIT) * sizeof(u8)).
+	*/
+	msb := (_DIGIT_MAX + DIGIT(1)) >> 1;
+	sigma := 0;
+	msb_b := b.digit[b.used - 1];
+	for msb_b < msb {
+		sigma += 1;
+		msb_b <<= 1;
+	}
+
+	/*
+		Use that sigma to normalize B.
+	*/
+	if err = internal_shl(B, b, sigma);                              err != nil { return err; }
+	if err = internal_shl(A, a, sigma);                              err != nil { return err; }
+
+	/*
+		Fix the sign.
+	*/
+	neg := a.sign != b.sign;
+	A.sign = .Zero_or_Positive; B.sign = .Zero_or_Positive;
+
+	/*
+		If the magnitude of "A" is not more more than twice that of "B" we can work
+		on them directly, otherwise we need to work at "A" in chunks.
+	*/
+	n := B.used;
+	m := A.used - B.used;
+
+	/*
+		Q = 0. We already ensured that when we called `internal_init_multi`.
+	*/
+	for m > n {
+		/*
+			(q, r) = RecursiveDivRem(A / (beta^(m-n)), B)
+		*/
+		j := (m - n) * _DIGIT_BITS;
+		if err = internal_shrmod(A_div, A_mod, A, j);                err != nil { return err; }
+		if err = _private_div_recursion(Q1, R, A_div, B);            err != nil { return err; }
+
+		/*
+			Q = (Q*beta!(n)) + q
+		*/
+		if err = internal_shl(Q, Q, n * _DIGIT_BITS);                err != nil { return err; }
+		if err = internal_add(Q, Q, Q1);                             err != nil { return err; }
+
+		/*
+			A = (r * beta^(m-n)) + (A % beta^(m-n))
+		*/
+		if err = internal_shl(R, R, (m - n) * _DIGIT_BITS);          err != nil { return err; }
+		if err = internal_add(A, R, A_mod);                          err != nil { return err; }
+
+		/*
+			m = m - n
+		*/
+		m -= n;
+	}
+
+	/*
+		(q, r) = RecursiveDivRem(A, B)
+	*/
+	if err = _private_div_recursion(Q1, R, A, B);                    err != nil { return err; }
+
+	/*
+		Q = (Q * beta^m) + q, R = r
+	*/
+	if err = internal_shl(Q, Q, m * _DIGIT_BITS);                    err != nil { return err; }
+	if err = internal_add(Q, Q, Q1);                                 err != nil { return err; }
+
+	/*
+		Get sign before writing to dest.
+	*/
+	R.sign = .Zero_or_Positive if internal_is_zero(Q) else a.sign;
+
+	if quotient != nil {
+		swap(quotient, Q);
+		quotient.sign = .Negative if neg else .Zero_or_Positive;
+	}
+	if remainder != nil {
+		/*
+			De-normalize the remainder.
+		*/
+		if err = internal_shrmod(R, nil, R, sigma);                  err != nil { return err; }
+		swap(remainder, R);
+	}
+	return nil;
+}
+
 /*
 /*
 	Slower bit-bang division... also smaller.
 	Slower bit-bang division... also smaller.
 */
 */
@@ -1040,7 +1222,7 @@ _private_int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int, allocator := context.
 */
 */
 _private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -> (res: int, err: Error) {
 _private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -> (res: int, err: Error) {
 	bracket_low, bracket_high, bracket_mid, t, bi_base := &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
 	bracket_low, bracket_high, bracket_mid, t, bi_base := &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
-	defer destroy(bracket_low, bracket_high, bracket_mid, t, bi_base);
+	defer internal_destroy(bracket_low, bracket_high, bracket_mid, t, bi_base);
 
 
 	ic := #force_inline internal_cmp(a, base);
 	ic := #force_inline internal_cmp(a, base);
 	if ic == -1 || ic == 0 {
 	if ic == -1 || ic == 0 {
@@ -1100,6 +1282,300 @@ _private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -
 	return;
 	return;
 }
 }
 
 
+
+/*
+	hac 14.61, pp608
+*/
+_private_inverse_modulo :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+	x, y, u, v, A, B, C, D := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
+	defer internal_destroy(x, y, u, v, A, B, C, D);
+
+	/*
+		`b` cannot be negative.
+	*/
+	if b.sign == .Negative || internal_is_zero(b)                    { return .Invalid_Argument; }
+
+	/*
+		init temps.
+	*/
+	if err = internal_init_multi(x, y, u, v, A, B, C, D); err != nil { return err; }
+
+	/*
+		`x` = `a` % `b`, `y` = `b`
+	*/
+	if err = internal_mod(x, a, b);                       err != nil { return err; }
+	if err = internal_copy(y, b);                         err != nil { return err; }
+
+	/*
+		2. [modified] if x,y are both even then return an error!
+	*/
+	if internal_is_even(x) && internal_is_even(y)                    { return .Invalid_Argument; }
+
+	/*
+		3. u=x, v=y, A=1, B=0, C=0, D=1
+	*/
+	if err = internal_copy(u, x);                         err != nil { return err; }
+	if err = internal_copy(v, y);                         err != nil { return err; }
+	if err = internal_one(A);                             err != nil { return err; }
+	if err = internal_one(D);                             err != nil { return err; }
+
+	for {
+		/*
+			4.  while `u` is even do:
+		*/
+		for internal_is_even(u) {
+			/*
+				4.1 `u` = `u` / 2
+			*/
+			if err = internal_int_shr1(u, u);             err != nil { return err; }
+
+			/*
+				4.2 if `A` or `B` is odd then:
+			*/
+			if internal_is_odd(A) || internal_is_odd(B) {
+				/*
+					`A` = (`A`+`y`) / 2, `B` = (`B`-`x`) / 2
+				*/
+				if err = internal_add(A, A, y);           err != nil { return err; }
+				if err = internal_add(B, B, x);           err != nil { return err; }
+			}
+			/*
+				`A` = `A` / 2, `B` = `B` / 2
+			*/
+			if err = internal_int_shr1(A, A);             err != nil { return err; }
+			if err = internal_int_shr1(B, B);             err != nil { return err; }
+		}
+
+		/*
+			5.  while `v` is even do:
+		*/
+		for internal_is_even(v) {
+			/*
+				5.1 `v` = `v` / 2
+			*/
+			if err = internal_int_shr1(v, v);             err != nil { return err; }
+
+			/*
+				5.2 if `C` or `D` is odd then:
+			*/
+			if internal_is_odd(C) || internal_is_odd(D) {
+				/*
+					`C` = (`C`+`y`) / 2, `D` = (`D`-`x`) / 2
+				*/
+				if err = internal_add(C, C, y);           err != nil { return err; }
+				if err = internal_add(D, D, x);           err != nil { return err; }
+			}
+			/*
+				`C` = `C` / 2, `D` = `D` / 2
+			*/
+			if err = internal_int_shr1(C, C);             err != nil { return err; }
+			if err = internal_int_shr1(D, D);             err != nil { return err; }
+		}
+
+		/*
+			6.  if `u` >= `v` then:
+		*/
+		if internal_cmp(u, v) != -1 {
+			/*
+				`u` = `u` - `v`, `A` = `A` - `C`, `B` = `B` - `D`
+			*/
+			if err = internal_sub(u, u, v);               err != nil { return err; }
+			if err = internal_sub(A, A, C);               err != nil { return err; }
+			if err = internal_sub(B, B, D);               err != nil { return err; }
+		} else {
+			/* v - v - u, C = C - A, D = D - B */
+			if err = internal_sub(v, v, u);               err != nil { return err; }
+			if err = internal_sub(C, C, A);               err != nil { return err; }
+			if err = internal_sub(D, D, B);               err != nil { return err; }
+		}
+
+		/*
+			If not zero goto step 4
+		*/
+		if internal_is_zero(u)                                       { break; }
+	}
+
+	/*
+		Now `a` = `C`, `b` = `D`, `gcd` == `g`*`v`
+	*/
+
+	/*
+		If `v` != `1` then there is no inverse.
+	*/
+	if internal_cmp(v, 1) !=  0                                      { return .Invalid_Argument; }
+
+	/*
+		If its too low.
+	*/
+	if internal_cmp(C, 0) == -1 {
+		if err = internal_add(C, C, b); err != nil                   { return err; }
+	}
+
+	/*
+		Too big.
+	*/
+	if internal_cmp(C, 0) != -1 {
+		if err = internal_sub(C, C, b); err != nil                   { return err; }
+	}
+
+	/*
+		`C` is now the inverse.
+	*/
+	swap(dest, C);
+
+	return;
+}
+
+/*
+	Computes the modular inverse via binary extended Euclidean algorithm, that is `dest` = 1 / `a` mod `b`.
+
+	Based on slow invmod except this is optimized for the case where `b` is odd,
+	as per HAC Note 14.64 on pp. 610.
+*/
+_private_inverse_modulo_odd :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+	x, y, u, v, B, D := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
+	defer internal_destroy(x, y, u, v, B, D);
+
+	sign: Sign;
+
+	/*
+		2. [modified] `b` must be odd.
+	*/
+	if internal_is_even(b)                                           { return .Invalid_Argument; }
+
+	/*
+		Init all our temps.
+	*/
+	if err = internal_init_multi(x, y, u, v, B, D); err != nil       { return err; }
+
+	/*
+		`x` == modulus, `y` == value to invert.
+	*/
+	if err = internal_copy(x, b);                   err != nil       { return err; }
+
+	/*
+		We need `y` = `|a|`.
+	*/
+	if err = internal_mod(y, a, b);                 err != nil       { return err; }
+
+	/*
+		If one of `x`, `y` is zero return an error!
+	*/
+	if internal_is_zero(x) || internal_is_zero(y)                    { return .Invalid_Argument; }
+
+	/*
+		3. `u` = `x`, `v` = `y`, `A` = 1, `B` = 0, `C` = 0, `D` = 1
+	*/
+	if err = internal_copy(u, x);                   err != nil       { return err; }
+	if err = internal_copy(v, y);                   err != nil       { return err; }
+
+	if err = internal_one(D);                       err != nil       { return err; }
+
+	for {
+		/*
+			4.  while `u` is even do.
+		*/
+		for internal_is_even(u) {
+			/*
+				4.1 `u` = `u` / 2
+			*/
+			if err = internal_int_shr1(u, u);       err != nil       { return err; }
+
+			/*
+				4.2 if `B` is odd then:
+			*/
+			if internal_is_odd(B) {
+				/*
+					`B` = (`B` - `x`) / 2
+				*/
+				if err = internal_sub(B, B, x);     err != nil       { return err; }
+			}
+
+			/*
+				`B` = `B` / 2
+			*/
+			if err = internal_int_shr1(B, B);       err != nil       { return err; }
+		}
+
+		/*
+			5.  while `v` is even do:
+		*/
+		for internal_is_even(v) {
+			/*
+				5.1 `v` = `v` / 2
+			*/
+			if err = internal_int_shr1(v, v);       err != nil       { return err; }
+
+			/*
+				5.2 if `D` is odd then:
+			*/
+			if internal_is_odd(D) {
+				/*
+					`D` = (`D` - `x`) / 2
+				*/
+				if err = internal_sub(D, D, x);     err != nil       { return err; }
+			}
+			/*
+				`D` = `D` / 2
+			*/
+			if err = internal_int_shr1(D, D);       err != nil       { return err; }
+		}
+
+		/*
+			6.  if `u` >= `v` then:
+		*/
+		if internal_cmp(u, v) != -1 {
+			/*
+				`u` = `u` - `v`, `B` = `B` - `D`
+			*/
+			if err = internal_sub(u, u, v);         err != nil       { return err; }
+			if err = internal_sub(B, B, D);         err != nil       { return err; }
+		} else {
+			/*
+				`v` - `v` - `u`, `D` = `D` - `B`
+			*/
+			if err = internal_sub(v, v, u);         err != nil       { return err; }
+			if err = internal_sub(D, D, B);         err != nil       { return err; }
+		}
+
+		/*
+			If not zero goto step 4.
+		*/
+		if internal_is_zero(u)                                       { break; }
+	}
+
+	/*
+		Now `a` = C, `b` = D, gcd == g*v
+	*/
+
+	/*
+		if `v` != 1 then there is no inverse
+	*/
+	if internal_cmp(v, 1) != 0                                       { return .Invalid_Argument; }
+
+	/*
+		`b` is now the inverse.
+	*/
+	sign = a.sign;
+	for internal_int_is_negative(D) {
+		if err = internal_add(D, D, b);             err != nil       { return err; }
+	}
+
+	/*
+		Too big.
+	*/
+	for internal_cmp_mag(D, b) != -1 {
+		if err = internal_sub(D, D, b);             err != nil       { return err; }
+	}
+
+	swap(dest, D);
+	dest.sign = sign;
+	return nil;
+}
+
+
 /*
 /*
 	Returns the log2 of an `Int`.
 	Returns the log2 of an `Int`.
 	Assumes `a` not to be `nil` and to have been initialized.
 	Assumes `a` not to be `nil` and to have been initialized.

+ 3 - 1
core/math/big/test.odin

@@ -26,7 +26,9 @@ PyRes :: struct {
 
 
 @export test_initialize_constants :: proc "c" () -> (res: u64) {
 @export test_initialize_constants :: proc "c" () -> (res: u64) {
 	context = runtime.default_context();
 	context = runtime.default_context();
-	return u64(initialize_constants());
+	res = u64(initialize_constants());
+	//assert(MUL_KARATSUBA_CUTOFF >= 40);
+	return res;
 }
 }
 
 
 @export test_error_string :: proc "c" (err: Error) -> (res: cstring) {
 @export test_error_string :: proc "c" (err: Error) -> (res: cstring) {

+ 11 - 4
core/math/big/test.py

@@ -66,6 +66,8 @@ timed_or_fast.add_argument(
 
 
 args = parser.parse_args()
 args = parser.parse_args()
 
 
+EXIT_ON_FAIL = args.exit_on_fail
+
 #
 #
 # How many iterations of each random test do we want to run?
 # How many iterations of each random test do we want to run?
 #
 #
@@ -153,7 +155,7 @@ class Res(Structure):
 	_fields_ = [("res", c_char_p), ("err", c_uint64)]
 	_fields_ = [("res", c_char_p), ("err", c_uint64)]
 
 
 initialize_constants = load(l.test_initialize_constants, [], c_uint64)
 initialize_constants = load(l.test_initialize_constants, [], c_uint64)
-initialize_constants()
+print("initialize_constants: ", initialize_constants())
 
 
 error_string = load(l.test_error_string, [c_byte], c_char_p)
 error_string = load(l.test_error_string, [c_byte], c_char_p)
 
 
@@ -211,7 +213,7 @@ def test(test_name: "", res: Res, param=[], expected_error = Error.Okay, expecte
 			print(error, flush=True)
 			print(error, flush=True)
 			passed = False
 			passed = False
 
 
-	if args.exit_on_fail and not passed: exit(res.err)
+	if EXIT_ON_FAIL and not passed: exit(res.err)
 
 
 	return passed
 	return passed
 
 
@@ -257,7 +259,7 @@ def test_sqr(a = 0, b = 0, expected_error = Error.Okay):
 	try:
 	try:
 		res  = sqr(*args)
 		res  = sqr(*args)
 	except OSError as e:
 	except OSError as e:
-		print("{} while trying to square {} x {}.".format(e, a))
+		print("{} while trying to square {}.".format(e, a))
 		if EXIT_ON_FAIL: exit(3)
 		if EXIT_ON_FAIL: exit(3)
 		return False
 		return False
 
 
@@ -268,7 +270,12 @@ def test_sqr(a = 0, b = 0, expected_error = Error.Okay):
 
 
 def test_div(a = 0, b = 0, expected_error = Error.Okay):
 def test_div(a = 0, b = 0, expected_error = Error.Okay):
 	args = [arg_to_odin(a), arg_to_odin(b)]
 	args = [arg_to_odin(a), arg_to_odin(b)]
-	res  = div(*args)
+	try:
+		res  = div(*args)
+	except OSError as e:
+		print("{} while trying divide to {} / {}.".format(e, a, b))
+		if EXIT_ON_FAIL: exit(3)
+		return False
 	expected_result = None
 	expected_result = None
 	if expected_error == Error.Okay:
 	if expected_error == Error.Okay:
 		#
 		#

+ 1 - 0
core/math/big/tune.odin

@@ -23,6 +23,7 @@ Category :: enum {
 	ctz,
 	ctz,
 	sqr,
 	sqr,
 	bitfield_extract,
 	bitfield_extract,
+	rm_trials,
 };
 };
 
 
 Event :: struct {
 Event :: struct {