4 years ago · 586641d77f
--- a/core/math/big/build.bat
+++ b/core/math/big/build.bat
@@ -1,9 +1,9 @@
 
															 @echo off

														
 
															-odin run . -vet -o:size

														
 
															-: -o:size

														
 
															-:odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check -define:MATH_BIG_EXE=false && python test.py -fast-tests

														
 
															-:odin build . -build-mode:shared -show-timings -o:size -no-bounds-check -define:MATH_BIG_EXE=false && python test.py -fast-tests

														
 
															-:odin build . -build-mode:shared -show-timings -o:size -define:MATH_BIG_EXE=false && python test.py -fast-tests

														
 
															-:odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check -define:MATH_BIG_EXE=false && python test.py

														
 
															-: -fast-tests

														
 
															-:odin build . -build-mode:shared -show-timings -o:speed -define:MATH_BIG_EXE=false && python test.py -fast-tests
														
 
															+:odin run . -vet

														
 
															+

														
 
															+set TEST_ARGS=-fast-tests

														
 
															+:odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS%

														
 
															+odin build . -build-mode:shared -show-timings -o:size -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS%

														
 
															+:odin build . -build-mode:shared -show-timings -o:size -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS%

														
 
															+:odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS%

														
 
															+:odin build . -build-mode:shared -show-timings -o:speed -define:MATH_BIG_EXE=false && python test.py -fast-tests %TEST_ARGS%
														
--- a/core/math/big/example.odin
+++ b/core/math/big/example.odin
--- a/core/math/big/internal.odin
+++ b/core/math/big/internal.odin
@@ -670,7 +670,7 @@ internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.alloc
 
															 		digits   := src.used + multiplier.used + 1;
														
 
															 		if        false &&  min_used     >= MUL_KARATSUBA_CUTOFF &&
														
 
															-						    max_used / 2 >= MUL_KARATSUBA_CUTOFF &&
														
 
															+							max_used / 2 >= MUL_KARATSUBA_CUTOFF &&
														
 
															 			/*
														
 
															 				Not much effect was observed below a ratio of 1:2, but again: YMMV.
														
 
															 			*/
														
@@ -861,7 +861,12 @@ internal_int_mod :: proc(remainder, numerator, denominator: ^Int, allocator := c
 
															 	return #force_inline internal_add(remainder, remainder, numerator, allocator);
														
 
															 }
														
 
															-internal_mod :: proc{ internal_int_mod, };
														
 
															+
														
 
															+internal_int_mod_digit :: proc(numerator: ^Int, denominator: DIGIT, allocator := context.allocator) -> (remainder: DIGIT, err: Error) {
														
 
															+	return internal_int_divmod_digit(nil, numerator, denominator, allocator);
														
 
															+}
														
 
															+
														
 
															+internal_mod :: proc{ internal_int_mod, internal_int_mod_digit};
														
 
															 /*
														
 
															 	remainder = (number + addend) % modulus.
														
@@ -1106,10 +1111,10 @@ internal_compare :: proc { internal_int_compare, internal_int_compare_digit, };
 
															 internal_cmp :: internal_compare;
														
 
															 /*
														
 
															-    Compare an `Int` to an unsigned number upto `DIGIT & _MASK`.
														
 
															-    Returns -1 if `a` < `b`, 0 if `a` == `b` and 1 if `b` > `a`.
														
 
															+	Compare an `Int` to an unsigned number upto `DIGIT & _MASK`.
														
 
															+	Returns -1 if `a` < `b`, 0 if `a` == `b` and 1 if `b` > `a`.
														
 
															-    Expects: `a` and `b` both to be valid `Int`s, i.e. initialized and not `nil`.
														
 
															+	Expects: `a` and `b` both to be valid `Int`s, i.e. initialized and not `nil`.
														
 
															 */
														
 
															 internal_int_compare_digit :: #force_inline proc(a: ^Int, b: DIGIT) -> (comparison: int) {
														
 
															 	a_is_negative := #force_inline internal_is_negative(a);
														
@@ -1165,11 +1170,72 @@ internal_int_compare_magnitude :: #force_inline proc(a, b: ^Int) -> (comparison:
 
															 		}
														
 
															 	}
														
 
															-   	return 0;
														
 
															+	return 0;
														
 
															 }
														
 
															 internal_compare_magnitude :: proc { internal_int_compare_magnitude, };
														
 
															 internal_cmp_mag :: internal_compare_magnitude;
														
 
															+/*
														
 
															+	Check if remainders are possible squares - fast exclude non-squares.
														
 
															+
														
 
															+	Returns `true` if `a` is a square, `false` if not.
														
 
															+	Assumes `a` not to be `nil` and to have been initialized.
														
 
															+*/
														
 
															+internal_int_is_square :: proc(a: ^Int, allocator := context.allocator) -> (square: bool, err: Error) {
														
 
															+	context.allocator = allocator;
														
 
															+
														
 
															+	/*
														
 
															+		Default to Non-square :)
														
 
															+	*/
														
 
															+	square = false;
														
 
															+
														
 
															+	if internal_is_negative(a)                                       { return; }
														
 
															+	if internal_is_zero(a)                                           { return; }
														
 
															+
														
 
															+	/*
														
 
															+		First check mod 128 (suppose that _DIGIT_BITS is at least 7).
														
 
															+	*/
														
 
															+	if _private_int_rem_128[127 & a.digit[0]] == 1                   { return; }
														
 
															+
														
 
															+	/*
														
 
															+		Next check mod 105 (3*5*7).
														
 
															+	*/
														
 
															+	c: DIGIT;
														
 
															+	c, err = internal_mod(a, 105);
														
 
															+	if _private_int_rem_105[c] == 1                                  { return; }
														
 
															+
														
 
															+	t := &Int{};
														
 
															+	defer destroy(t);
														
 
															+
														
 
															+	set(t, 11 * 13 * 17 * 19 * 23 * 29 * 31) or_return;
														
 
															+	internal_mod(t, a, t) or_return;
														
 
															+
														
 
															+	r: u64;
														
 
															+	r, err = internal_int_get(t, u64);
														
 
															+
														
 
															+	/*
														
 
															+		Check for other prime modules, note it's not an ERROR but we must
														
 
															+		free "t" so the easiest way is to goto LBL_ERR.  We know that err
														
 
															+		is already equal to MP_OKAY from the mp_mod call
														
 
															+	*/
														
 
															+	if (1 << (r % 11) &      0x5C4) != 0                             { return; }
														
 
															+	if (1 << (r % 13) &      0x9E4) != 0                             { return; }
														
 
															+	if (1 << (r % 17) &     0x5CE8) != 0                             { return; }
														
 
															+	if (1 << (r % 19) &    0x4F50C) != 0                             { return; }
														
 
															+	if (1 << (r % 23) &   0x7ACCA0) != 0                             { return; }
														
 
															+	if (1 << (r % 29) &  0xC2EDD0C) != 0                             { return; }
														
 
															+	if (1 << (r % 31) & 0x6DE2B848) != 0                             { return; }
														
 
															+
														
 
															+	/*
														
 
															+		Final check - is sqr(sqrt(arg)) == arg?
														
 
															+	*/
														
 
															+	sqrt(t, a) or_return;
														
 
															+	sqr(t, t)  or_return;
														
 
															+
														
 
															+	square = internal_cmp_mag(t, a) == 0;
														
 
															+
														
 
															+	return;
														
 
															+}
														
 
															 /*
														
 
															 	=========================    Logs, powers and roots    ============================
														
@@ -2300,12 +2366,12 @@ internal_int_shrmod :: proc(quotient, remainder, numerator: ^Int, bits: int, all
 
															 			/*
														
 
															 				Shift the current word and mix in the carry bits from the previous word.
														
 
															 			*/
														
 
															-	        quotient.digit[x] = (quotient.digit[x] >> uint(bits)) | (carry << shift);
														
 
															+			quotient.digit[x] = (quotient.digit[x] >> uint(bits)) | (carry << shift);
														
 
															-	        /*
														
 
															-	        	Update carry from forward carry.
														
 
															-	        */
														
 
															-	        carry = fwd_carry;
														
 
															+			/*
														
 
															+				Update carry from forward carry.
														
 
															+			*/
														
 
															+			carry = fwd_carry;
														
 
															 		}
														
 
															 	}
														
@@ -2331,17 +2397,17 @@ internal_int_shr_digit :: proc(quotient: ^Int, digits: int, allocator := context
 
															 	*/
														
 
															 	if digits > quotient.used { return internal_zero(quotient); }
														
 
															-   	/*
														
 
															+	/*
														
 
															 		Much like `int_shl_digit`, this is implemented using a sliding window,
														
 
															 		except the window goes the other way around.
														
 
															 		b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
														
 
															-		            /\                   |      ---->
														
 
															-		             \-------------------/      ---->
														
 
															-    */
														
 
															+					/\                   |      ---->
														
 
															+					 \-------------------/      ---->
														
 
															+	*/
														
 
															 	#no_bounds_check for x := 0; x < (quotient.used - digits); x += 1 {
														
 
															-    	quotient.digit[x] = quotient.digit[x + digits];
														
 
															+		quotient.digit[x] = quotient.digit[x + digits];
														
 
															 	}
														
 
															 	quotient.used -= digits;
														
 
															 	internal_zero_unused(quotient);
														
@@ -2445,14 +2511,14 @@ internal_int_shl_digit :: proc(quotient: ^Int, digits: int, allocator := context
 
															 	/*
														
 
															 		Much like `int_shr_digit`, this is implemented using a sliding window,
														
 
															 		except the window goes the other way around.
														
 
															-    */
														
 
															-    #no_bounds_check for x := quotient.used; x > 0; x -= 1 {
														
 
															-    	quotient.digit[x+digits-1] = quotient.digit[x-1];
														
 
															-    }
														
 
															-
														
 
															-   	quotient.used += digits;
														
 
															-    mem.zero_slice(quotient.digit[:digits]);
														
 
															-    return nil;
														
 
															+	*/
														
 
															+	#no_bounds_check for x := quotient.used; x > 0; x -= 1 {
														
 
															+		quotient.digit[x+digits-1] = quotient.digit[x-1];
														
 
															+	}
														
 
															+
														
 
															+	quotient.used += digits;
														
 
															+	mem.zero_slice(quotient.digit[:digits]);
														
 
															+	return nil;
														
 
															 }
														
 
															 internal_shl_digit :: proc { internal_int_shl_digit, };
														
--- a/core/math/big/prime.odin
+++ b/core/math/big/prime.odin
@@ -33,6 +33,183 @@ int_prime_is_divisible :: proc(a: ^Int, allocator := context.allocator) -> (res:
 
															 	return false, nil;
														
 
															 }
														
 
															+/*
														
 
															+	Computes xR**-1 == x (mod N) via Montgomery Reduction.
														
 
															+*/
														
 
															+internal_int_montgomery_reduce :: proc(x, n: ^Int, rho: DIGIT, allocator := context.allocator) -> (err: Error) {
														
 
															+	context.allocator = allocator;
														
 
															+	/*
														
 
															+		Can the fast reduction [comba] method be used?
														
 
															+		Note that unlike in mul, you're safely allowed *less* than the available columns [255 per default],
														
 
															+		since carries are fixed up in the inner loop.
														
 
															+	*/
														
 
															+	digs := (n.used * 2) + 1;
														
 
															+	if digs < _WARRAY && x.used <= _WARRAY && n.used < _MAX_COMBA {
														
 
															+		return _private_montgomery_reduce_comba(x, n, rho);
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+		Grow the input as required
														
 
															+	*/
														
 
															+	internal_grow(x, digs)                                           or_return;
														
 
															+	x.used = digs;
														
 
															+
														
 
															+	for ix := 0; ix < n.used; ix += 1 {
														
 
															+		/*
														
 
															+			`mu = ai * rho mod b`
														
 
															+			The value of rho must be precalculated via `int_montgomery_setup()`,
														
 
															+			such that it equals -1/n0 mod b this allows the following inner loop
														
 
															+			to reduce the input one digit at a time.
														
 
															+		*/
														
 
															+
														
 
															+		mu := DIGIT((_WORD(x.digit[ix]) * _WORD(rho)) & _WORD(_MASK));
														
 
															+
														
 
															+		/*
														
 
															+			a = a + mu * m * b**i
														
 
															+			Multiply and add in place.
														
 
															+		*/
														
 
															+		u  := DIGIT(0);
														
 
															+		iy := int(0);
														
 
															+		for ; iy < n.used; iy += 1 {
														
 
															+			/*
														
 
															+				Compute product and sum.
														
 
															+			*/
														
 
															+			r := (_WORD(mu) * _WORD(n.digit[iy]) + _WORD(u) + _WORD(x.digit[ix + iy]));
														
 
															+
														
 
															+			/*
														
 
															+				Get carry.
														
 
															+			*/
														
 
															+			u = DIGIT(r >> _DIGIT_BITS);
														
 
															+
														
 
															+			/*
														
 
															+				Fix digit.
														
 
															+			*/
														
 
															+			x.digit[ix + iy] = DIGIT(r & _WORD(_MASK));
														
 
															+		}
														
 
															+
														
 
															+		/*
														
 
															+			At this point the ix'th digit of x should be zero.
														
 
															+			Propagate carries upwards as required.
														
 
															+		*/
														
 
															+		for u != 0 {
														
 
															+			x.digit[ix + iy] += u;
														
 
															+			u = x.digit[ix + iy] >> _DIGIT_BITS;
														
 
															+			x.digit[ix + iy] &= _MASK;
														
 
															+			iy += 1;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+		At this point the n.used'th least significant digits of x are all zero,
														
 
															+		which means we can shift x to the right by n.used digits and the
														
 
															+		residue is unchanged.
														
 
															+
														
 
															+		x = x/b**n.used.
														
 
															+	*/
														
 
															+	internal_clamp(x);
														
 
															+	internal_shr_digit(x, n.used);
														
 
															+
														
 
															+	/*
														
 
															+		if x >= n then x = x - n
														
 
															+	*/
														
 
															+	if internal_cmp_mag(x, n) != -1 {
														
 
															+		return internal_sub(x, x, n);
														
 
															+	}
														
 
															+
														
 
															+	return nil;
														
 
															+}
														
 
															+
														
 
															+int_montgomery_reduce :: proc(x, n: ^Int, rho: DIGIT, allocator := context.allocator) -> (err: Error) {
														
 
															+	assert_if_nil(x, n);
														
 
															+	context.allocator = allocator;
														
 
															+
														
 
															+	internal_clear_if_uninitialized(x, n) or_return;
														
 
															+
														
 
															+	return #force_inline internal_int_montgomery_reduce(x, n, rho);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+	Shifts with subtractions when the result is greater than b.
														
 
															+
														
 
															+	The method is slightly modified to shift B unconditionally upto just under
														
 
															+	the leading bit of b.  This saves alot of multiple precision shifting.
														
 
															+*/
														
 
															+internal_int_montgomery_calc_normalization :: proc(a, b: ^Int, allocator := context.allocator) -> (err: Error) {
														
 
															+	context.allocator = allocator;
														
 
															+	/*
														
 
															+		How many bits of last digit does b use.
														
 
															+	*/
														
 
															+	bits := internal_count_bits(b) % _DIGIT_BITS;
														
 
															+
														
 
															+	if b.used > 1 {
														
 
															+		power := ((b.used - 1) * _DIGIT_BITS) + bits - 1;
														
 
															+		internal_int_power_of_two(a, power)                          or_return;
														
 
															+	} else {
														
 
															+		internal_one(a);
														
 
															+		bits = 1;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+		Now compute C = A * B mod b.
														
 
															+	*/
														
 
															+	for x := bits - 1; x < _DIGIT_BITS; x += 1 {
														
 
															+		internal_int_shl1(a, a)                                      or_return;
														
 
															+		if internal_cmp_mag(a, b) != -1 {
														
 
															+			internal_sub(a, a, b)                                    or_return;
														
 
															+		}
														
 
															+	}
														
 
															+	return nil;
														
 
															+}
														
 
															+
														
 
															+int_montgomery_calc_normalization :: proc(a, b: ^Int, allocator := context.allocator) -> (err: Error) {
														
 
															+	assert_if_nil(a, b);
														
 
															+	context.allocator = allocator;
														
 
															+
														
 
															+	internal_clear_if_uninitialized(a, b) or_return;
														
 
															+
														
 
															+	return #force_inline internal_int_montgomery_calc_normalization(a, b);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+	Sets up the Montgomery reduction stuff.
														
 
															+*/
														
 
															+internal_int_montgomery_setup :: proc(n: ^Int) -> (rho: DIGIT, err: Error) {
														
 
															+	/*
														
 
															+		Fast inversion mod 2**k
														
 
															+		Based on the fact that:
														
 
															+
														
 
															+		XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
														
 
															+		                  =>  2*X*A - X*X*A*A = 1
														
 
															+		                  =>  2*(1) - (1)     = 1
														
 
															+	*/
														
 
															+	b := n.digit[0];
														
 
															+	if b & 1 == 0 { return 0, .Invalid_Argument; }
														
 
															+
														
 
															+	x := (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
														
 
															+	x *= 2 - (b * x);              /* here x*a==1 mod 2**8 */
														
 
															+	x *= 2 - (b * x);              /* here x*a==1 mod 2**16 */
														
 
															+	when _WORD_TYPE_BITS == 64 {
														
 
															+		x *= 2 - (b * x);              /* here x*a==1 mod 2**32 */
														
 
															+		x *= 2 - (b * x);              /* here x*a==1 mod 2**64 */
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+		rho = -1/m mod b
														
 
															+	*/
														
 
															+	rho = DIGIT(((_WORD(1) << _WORD(_DIGIT_BITS)) - _WORD(x)) & _WORD(_MASK));
														
 
															+	return rho, nil;
														
 
															+}
														
 
															+
														
 
															+int_montgomery_setup :: proc(n: ^Int, allocator := context.allocator) -> (rho: DIGIT, err: Error) {
														
 
															+	assert_if_nil(n);
														
 
															+	internal_clear_if_uninitialized(n, allocator) or_return;
														
 
															+
														
 
															+	return #force_inline internal_int_montgomery_setup(n);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+	Returns the number of Rabin-Miller trials needed for a given bit size.
														
 
															+*/
														
 
															 number_of_rabin_miller_trials :: proc(bit_size: int) -> (number_of_trials: int) {
														
 
															 	switch {
														
 
															 	case bit_size <=    80:
														
--- a/core/math/big/private.odin
+++ b/core/math/big/private.odin
@@ -1542,6 +1542,126 @@ _private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -
 
															+/*

														
 
															+	Computes xR**-1 == x (mod N) via Montgomery Reduction.

														
 
															+	This is an optimized implementation of `internal_montgomery_reduce`

														
 
															+	which uses the comba method to quickly calculate the columns of the reduction.

														
 
															+	Based on Algorithm 14.32 on pp.601 of HAC.

														
 
															+*/

														
 
															+_private_montgomery_reduce_comba :: proc(x, n: ^Int, rho: DIGIT, allocator := context.allocator) -> (err: Error) {

														
 
															+	context.allocator = allocator;

														
 
															+	W: [_WARRAY]_WORD = ---;

														
 
															+

														
 
															+	if x.used > _WARRAY { return .Invalid_Argument; }

														
 
															+

														
 
															+	/*

														
 
															+		Get old used count.

														
 
															+	*/

														
 
															+	old_used := x.used;

														
 
															+

														
 
															+	/*

														
 
															+		Grow `x` as required.

														
 
															+	*/

														
 
															+	internal_grow(x, n.used + 1) or_return;

														
 
															+

														
 
															+	/*

														
 
															+		First we have to get the digits of the input into an array of double precision words W[...]

														
 
															+		Copy the digits of `x` into W[0..`x.used` - 1]

														
 
															+	*/

														
 
															+	ix: int;

														
 
															+	for ix = 0; ix < x.used; ix += 1 {

														
 
															+		W[ix] = _WORD(x.digit[ix]);

														
 
															+	}

														
 
															+

														
 
															+	/*

														
 
															+		Zero the high words of W[a->used..m->used*2].

														
 
															+	*/

														
 
															+	zero_upper := (n.used * 2) + 1;

														
 
															+	if ix < zero_upper {

														
 
															+		for ix = x.used; ix < zero_upper; ix += 1 {

														
 
															+			W[ix] = {};

														
 
															+		}

														
 
															+	}

														
 
															+

														
 
															+	/*

														
 
															+		Now we proceed to zero successive digits from the least significant upwards.

														
 
															+	*/

														
 
															+	for ix = 0; ix < n.used; ix += 1 {

														
 
															+		/*

														
 
															+			`mu = ai * m' mod b`

														
 
															+

														
 
															+			We avoid a double precision multiplication (which isn't required)

														
 
															+			by casting the value down to a DIGIT.  Note this requires

														
 
															+			that W[ix-1] have the carry cleared (see after the inner loop)

														
 
															+		*/

														
 
															+		mu := ((W[ix] & _WORD(_MASK)) * _WORD(rho)) & _WORD(_MASK);

														
 
															+

														
 
															+		/*

														
 
															+			`a = a + mu * m * b**i`

														
 
															+		

														
 
															+			This is computed in place and on the fly.  The multiplication

														
 
															+		 	by b**i is handled by offseting which columns the results

														
 
															+		 	are added to.

														
 
															+		

														
 
															+			Note the comba method normally doesn't handle carries in the

														
 
															+			inner loop In this case we fix the carry from the previous

														
 
															+			column since the Montgomery reduction requires digits of the

														
 
															+			result (so far) [see above] to work.

														
 
															+

														
 
															+			This is	handled by fixing up one carry after the inner loop.

														
 
															+			The carry fixups are done in order so after these loops the

														
 
															+			first m->used words of W[] have the carries fixed.

														
 
															+		*/

														
 
															+		for iy := 0; iy < n.used; iy += 1 {

														
 
															+			W[ix + iy] += mu * _WORD(n.digit[iy]);

														
 
															+		}

														
 
															+

														
 
															+		/*

														
 
															+			Now fix carry for next digit, W[ix+1].

														
 
															+		*/

														
 
															+		W[ix + 1] += (W[ix] >> _DIGIT_BITS);

														
 
															+	}

														
 
															+

														
 
															+	/*

														
 
															+		Now we have to propagate the carries and shift the words downward

														
 
															+		[all those least significant digits we zeroed].

														
 
															+	*/

														
 
															+

														
 
															+	for ; ix < n.used * 2; ix += 1 {

														
 
															+		W[ix + 1] += (W[ix] >> _DIGIT_BITS);

														
 
															+	}

														
 
															+

														
 
															+	/* copy out, A = A/b**n

														
 
															+	 *

														
 
															+	 * The result is A/b**n but instead of converting from an

														
 
															+	 * array of mp_word to mp_digit than calling mp_rshd

														
 
															+	 * we just copy them in the right order

														
 
															+	 */

														
 
															+

														
 
															+	for ix = 0; ix < (n.used + 1); ix += 1 {

														
 
															+		x.digit[ix] = DIGIT(W[n.used + ix] & _WORD(_MASK));

														
 
															+	}

														
 
															+

														
 
															+	/*

														
 
															+		Set the max used.

														
 
															+	*/

														
 
															+	x.used = n.used + 1;

														
 
															+

														
 
															+	/*

														
 
															+		Zero old_used digits, if the input a was larger than m->used+1 we'll have to clear the digits.

														
 
															+	*/

														
 
															+	internal_zero_unused(x, old_used);

														
 
															+	internal_clamp(x);

														
 
															+

														
 
															+	/*

														
 
															+		if A >= m then A = A - m

														
 
															+	*/

														
 
															+	if internal_cmp_mag(x, n) != -1 {

														
 
															+		return internal_sub(x, x, n);

														
 
															+	}

														
 
															+	return nil;

														
 
															+}

														
 
															+

														
 
															 /*

														
 
															 	hac 14.61, pp608

														
 
															 */

														
@@ -1887,7 +2007,30 @@ _private_copy_digits :: proc(dest, src: ^Int, digits: int, offset := int(0)) ->
 
															 	Tables used by `internal_*` and `_*`.

														
 
															 */

														
 
															-_private_prime_table := []DIGIT{

														
 
															+_private_int_rem_128 := [?]DIGIT{

														
 
															+	0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,

														
 
															+	0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,

														
 
															+	1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,

														
 
															+	1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,

														
 
															+	0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,

														
 
															+	1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,

														
 
															+	1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,

														
 
															+	1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,

														
 
															+};

														
 
															+#assert(128 * size_of(DIGIT) == size_of(_private_int_rem_128));

														
 
															+

														
 
															+_private_int_rem_105 := [?]DIGIT{

														
 
															+	0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,

														
 
															+	0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,

														
 
															+	0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,

														
 
															+	1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,

														
 
															+	0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,

														
 
															+	1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,

														
 
															+	1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,

														
 
															+};

														
 
															+#assert(105 * size_of(DIGIT) == size_of(_private_int_rem_105));

														
 
															+

														
 
															+_private_prime_table := [?]DIGIT{

														
 
															 	0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,

														
 
															 	0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,

														
 
															 	0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,

														
@@ -1924,6 +2067,7 @@ _private_prime_table := []DIGIT{
 
															 	0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,

														
 
															 	0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653,

														
 
															 };

														
 
															+#assert(256 * size_of(DIGIT) == size_of(_private_prime_table));

														
 
															 when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) {

														
 
															 	_factorial_table := [35]_WORD{

														
--- a/core/math/big/public.odin
+++ b/core/math/big/public.odin
@@ -555,4 +555,19 @@ int_compare_magnitude :: proc(a, b: ^Int, allocator := context.allocator) -> (re
 
															 	internal_clear_if_uninitialized(a, b) or_return;

														
 
															 	return #force_inline internal_cmp_mag(a, b), nil;

														
 
															+}

														
 
															+

														
 
															+/*

														
 
															+	Check if remainders are possible squares - fast exclude non-squares.

														
 
															+

														
 
															+	Returns `true` if `a` is a square, `false` if not.

														
 
															+	Assumes `a` not to be `nil` and to have been initialized.

														
 
															+*/

														
 
															+int_is_square :: proc(a: ^Int, allocator := context.allocator) -> (square: bool, err: Error) {

														
 
															+	assert_if_nil(a);

														
 
															+	context.allocator = allocator;

														
 
															+

														
 
															+	internal_clear_if_uninitialized(a) or_return;

														
 
															+

														
 
															+	return #force_inline internal_int_is_square(a);

														
 
															 }
														
--- a/core/math/big/radix.odin
+++ b/core/math/big/radix.odin
@@ -235,7 +235,7 @@ int_to_cstring :: int_itoa_cstring;
 
															 /*
														
 
															 	Read a string [ASCII] in a given radix.
														
 
															 */
														
 
															-int_atoi :: proc(res: ^Int, input: string, radix: i8, allocator := context.allocator) -> (err: Error) {
														
 
															+int_atoi :: proc(res: ^Int, input: string, radix := i8(10), allocator := context.allocator) -> (err: Error) {
														
 
															 	assert_if_nil(res);
														
 
															 	input := input;
														
 
															 	context.allocator = allocator;
														
--- a/core/math/big/test.odin
+++ b/core/math/big/test.odin
@@ -369,3 +369,22 @@ PyRes :: struct {
 
															 	return PyRes{res = r, err = nil};
														
 
															 }
														
 
															+/*
														
 
															+	dest = lcm(a, b)
														
 
															+*/
														
 
															+@export test_is_square :: proc "c" (a: cstring) -> (res: PyRes) {
														
 
															+	context = runtime.default_context();
														
 
															+	err:    Error;
														
 
															+	square: bool;
														
 
															+
														
 
															+	ai := &Int{};
														
 
															+	defer internal_destroy(ai);
														
 
															+
														
 
															+	if err = atoi(ai, string(a), 16); err != nil { return PyRes{res=":is_square:atoi(a):", err=err}; }
														
 
															+	if square, err = #force_inline internal_int_is_square(ai); err != nil { return PyRes{res=":is_square:is_square(a):", err=err}; }
														
 
															+
														
 
															+	if square {
														
 
															+		return PyRes{"True", nil};
														
 
															+	}
														
 
															+	return PyRes{"False", nil};
														
 
															+}
														
--- a/core/math/big/test.py
+++ b/core/math/big/test.py
@@ -160,11 +160,11 @@ print("initialize_constants: ", initialize_constants())
 
															 error_string = load(l.test_error_string, [c_byte], c_char_p)

														
 
															-add        =     load(l.test_add,        [c_char_p, c_char_p],   Res)

														
 
															-sub        =     load(l.test_sub,        [c_char_p, c_char_p],   Res)

														
 
															-mul        =     load(l.test_mul,        [c_char_p, c_char_p],   Res)

														
 
															-sqr        =     load(l.test_sqr,        [c_char_p          ],   Res)

														
 
															-div        =     load(l.test_div,        [c_char_p, c_char_p],   Res)

														
 
															+add        =     load(l.test_add,        [c_char_p, c_char_p  ], Res)

														
 
															+sub        =     load(l.test_sub,        [c_char_p, c_char_p  ], Res)

														
 
															+mul        =     load(l.test_mul,        [c_char_p, c_char_p  ], Res)

														
 
															+sqr        =     load(l.test_sqr,        [c_char_p            ], Res)

														
 
															+div        =     load(l.test_div,        [c_char_p, c_char_p  ], Res)

														
 
															 # Powers and such

														
 
															 int_log    =     load(l.test_log,        [c_char_p, c_longlong], Res)

														
@@ -179,9 +179,11 @@ int_shl        = load(l.test_shl,        [c_char_p, c_longlong], Res)
 
															 int_shr        = load(l.test_shr,        [c_char_p, c_longlong], Res)

														
 
															 int_shr_signed = load(l.test_shr_signed, [c_char_p, c_longlong], Res)

														
 
															-int_factorial  = load(l.test_factorial,  [c_uint64], Res)

														
 
															-int_gcd        = load(l.test_gcd,        [c_char_p, c_char_p], Res)

														
 
															-int_lcm        = load(l.test_lcm,        [c_char_p, c_char_p], Res)

														
 
															+int_factorial  = load(l.test_factorial,  [c_uint64            ], Res)

														
 
															+int_gcd        = load(l.test_gcd,        [c_char_p, c_char_p  ], Res)

														
 
															+int_lcm        = load(l.test_lcm,        [c_char_p, c_char_p  ], Res)

														
 
															+

														
 
															+is_square      = load(l.test_is_square,  [c_char_p            ], Res)

														
 
															 def test(test_name: "", res: Res, param=[], expected_error = Error.Okay, expected_result = "", radix=16):

														
 
															 	passed = True

														
@@ -428,6 +430,15 @@ def test_lcm(a = 0, b = 0, expected_error = Error.Okay):
 
															 	return test("test_lcm", res, [a, b], expected_error, expected_result)

														
 
															+def test_is_square(a = 0, b = 0, expected_error = Error.Okay):

														
 
															+	args  = [arg_to_odin(a)]

														
 
															+	res   = is_square(*args)

														
 
															+	expected_result = None

														
 
															+	if expected_error == Error.Okay:

														
 
															+		expected_result = str(math.isqrt(a) ** 2 == a) if a > 0 else "False"

														
 
															+		

														
 
															+	return test("test_is_square", res, [a], expected_error, expected_result)

														
 
															+

														
 
															 # TODO(Jeroen): Make sure tests cover edge cases, fast paths, and so on.

														
 
															 #

														
 
															 # The last two arguments in tests are the expected error and expected result.

														
@@ -527,6 +538,10 @@ TESTS = {
 
															 		[   0, 0,  ],

														
 
															 		[   0, 125,],

														
 
															 	],

														
 
															+	test_is_square: [

														
 
															+		[ 12, ],

														
 
															+		[ 92232459121502451677697058974826760244863271517919321608054113675118660929276431348516553336313179167211015633639725554914519355444316239500734169769447134357534241879421978647995614218985202290368055757891124109355450669008628757662409138767505519391883751112010824030579849970582074544353971308266211776494228299586414907715854328360867232691292422194412634523666770452490676515117702116926803826546868467146319938818238521874072436856528051486567230096290549225463582766830777324099589751817442141036031904145041055454639783559905920619197290800070679733841430619962318433709503256637256772215111521321630777950145713049902839937043785039344243357384899099910837463164007565230287809026956254332260375327814271845678201, ]

														
 
															+	],

														
 
															 }

														
 
															 if not args.fast_tests:

														
@@ -545,7 +560,7 @@ RANDOM_TESTS = [
 
															 	test_add, test_sub, test_mul, test_sqr, test_div,

														
 
															 	test_log, test_pow, test_sqrt, test_root_n,

														
 
															 	test_shl_digit, test_shr_digit, test_shl, test_shr_signed,

														
 
															-	test_gcd, test_lcm,

														
 
															+	test_gcd, test_lcm, test_is_square,

														
 
															 ]

														
 
															 SKIP_LARGE   = [

														
 
															 	test_pow, test_root_n, # test_gcd,

														
@@ -648,11 +663,13 @@ if __name__ == '__main__':
 
															 					a = abs(a)

														
 
															 					b = randint(0, 10);

														
 
															 				elif test_proc == test_shl:

														
 
															-					b = randint(0, min(BITS, 120));

														
 
															+					b = randint(0, min(BITS, 120))

														
 
															 				elif test_proc == test_shr_signed:

														
 
															-					b = randint(0, min(BITS, 120));

														
 
															+					b = randint(0, min(BITS, 120))

														
 
															+				elif test_proc == test_is_square:

														
 
															+					a = randint(0, 1 << BITS)

														
 
															 				else:

														
 
															-					b = randint(0, 1 << BITS)					

														
 
															+					b = randint(0, 1 << BITS)

														
 
															 				res = None