4 years ago · 2110778040
--- a/core/math/big/example.odin
+++ b/core/math/big/example.odin
@@ -208,15 +208,17 @@ int_to_byte_little :: proc(v: ^Int) {
 
															 	}
														
 
															 }
														
 
															+// printf :: fmt.printf;
														
 
															+
														
 
															 demo :: proc() {
														
 
															 	a, b, c, d, e, f, res := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
														
 
															 	defer destroy(a, b, c, d, e, f, res);
														
 
															 	set(a, 42);
														
 
															 	set(b, 6);
														
 
															-	set(c, 5);
														
 
															+	set(c, 131);
														
 
															-	if err := internal_int_exponent_mod(res, a, b, c, 0); err != nil {
														
 
															+	if err := internal_int_exponent_mod_fast(res, a, b, c, 0); err != nil {
														
 
															 		fmt.printf("Error: %v\n", err);
														
 
															 	}
														
--- a/core/math/big/internal.odin
+++ b/core/math/big/internal.odin
@@ -991,13 +991,21 @@ internal_int_mod_bits :: proc(remainder, numerator: ^Int, bits: int, allocator :
 
															 	public ones that have already satisfied these constraints.
														
 
															 */
														
 
															+/*
														
 
															+	This procedure returns the allocated capacity of an Int.
														
 
															+	Assumes `a` not to be `nil`.
														
 
															+*/
														
 
															+internal_int_allocated_cap :: #force_inline proc(a: ^Int) -> (cap: int) {
														
 
															+	raw := transmute(mem.Raw_Dynamic_Array)a.digit;
														
 
															+	return raw.cap;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															 	This procedure will return `true` if the `Int` is initialized, `false` if not.
														
 
															 	Assumes `a` not to be `nil`.
														
 
															 */
														
 
															 internal_int_is_initialized :: #force_inline proc(a: ^Int) -> (initialized: bool) {
														
 
															-	raw := transmute(mem.Raw_Dynamic_Array)a.digit;
														
 
															-	return raw.cap >= _MIN_DIGIT_COUNT;
														
 
															+	return internal_int_allocated_cap(a) >= _MIN_DIGIT_COUNT;
														
 
															 }
														
 
															 internal_is_initialized :: proc { internal_int_is_initialized, };
														
@@ -1650,8 +1658,7 @@ internal_int_destroy :: proc(integers: ..^Int) {
 
															 	integers := integers;
														
 
															 	for a in &integers {
														
 
															-		raw := transmute(mem.Raw_Dynamic_Array)a.digit;
														
 
															-		if raw.cap > 0 {
														
 
															+		if internal_int_allocated_cap(a) > 0 {
														
 
															 			mem.zero_slice(a.digit[:]);
														
 
															 			free(&a.digit[0]);
														
 
															 		}
														
@@ -1913,23 +1920,23 @@ internal_int_shrink :: proc(a: ^Int) -> (err: Error) {
 
															 internal_shrink :: proc { internal_int_shrink, };
														
 
															 internal_int_grow :: proc(a: ^Int, digits: int, allow_shrink := false, allocator := context.allocator) -> (err: Error) {
														
 
															-	raw := transmute(mem.Raw_Dynamic_Array)a.digit;
														
 
															-
														
 
															 	/*
														
 
															 		We need at least _MIN_DIGIT_COUNT or a.used digits, whichever is bigger.
														
 
															 		The caller is asking for `digits`. Let's be accomodating.
														
 
															 	*/
														
 
															+	cap := internal_int_allocated_cap(a);
														
 
															+
														
 
															 	needed := max(_MIN_DIGIT_COUNT, a.used, digits);
														
 
															 	if !allow_shrink {
														
 
															-		needed = max(needed, raw.cap);
														
 
															+		needed = max(needed, cap);
														
 
															 	}
														
 
															 	/*
														
 
															 		If not yet iniialized, initialize the `digit` backing with the allocator we were passed.
														
 
															 	*/
														
 
															-	if raw.cap == 0 {
														
 
															+	if cap == 0 {
														
 
															 		a.digit = make([dynamic]DIGIT, needed, allocator);
														
 
															-	} else if raw.cap != needed {
														
 
															+	} else if cap != needed {
														
 
															 		/*
														
 
															 			`[dynamic]DIGIT` already knows what allocator was used for it, so resize will do the right thing.
														
 
															 		*/
														
--- a/core/math/big/prime.odin
+++ b/core/math/big/prime.odin
@@ -144,7 +144,7 @@ internal_int_montgomery_calc_normalization :: proc(a, b: ^Int, allocator := cont
 
															 		power := ((b.used - 1) * _DIGIT_BITS) + bits - 1;
														
 
															 		internal_int_power_of_two(a, power)                          or_return;
														
 
															 	} else {
														
 
															-		internal_one(a);
														
 
															+		internal_one(a)                                              or_return;
														
 
															 		bits = 1;
														
 
															 	}
														
@@ -187,7 +187,8 @@ internal_int_montgomery_setup :: proc(n: ^Int) -> (rho: DIGIT, err: Error) {
 
															 	x := (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
														
 
															 	x *= 2 - (b * x);              /* here x*a==1 mod 2**8 */
														
 
															 	x *= 2 - (b * x);              /* here x*a==1 mod 2**16 */
														
 
															-	when _WORD_TYPE_BITS == 64 {
														
 
															+
														
 
															+	when _DIGIT_TYPE_BITS == 64 {
														
 
															 		x *= 2 - (b * x);              /* here x*a==1 mod 2**32 */
														
 
															 		x *= 2 - (b * x);              /* here x*a==1 mod 2**64 */
														
 
															 	}
														
@@ -473,6 +474,10 @@ internal_int_exponent_mod :: proc(res, G, X, P: ^Int, redmode: int, allocator :=
 
															 	M := [_TAB_SIZE]Int{};
														
 
															 	winsize: uint;
														
 
															+	/*
														
 
															+		Use a pointer to the reduction algorithm.
														
 
															+		This allows us to use one of many reduction algorithms without modding the guts of the code with if statements everywhere.
														
 
															+	*/
														
 
															 	redux: #type proc(x, m, mu: ^Int, allocator := context.allocator) -> (err: Error);
														
 
															 	defer {
														
@@ -686,6 +691,280 @@ internal_int_exponent_mod :: proc(res, G, X, P: ^Int, redmode: int, allocator :=
 
															 	return err;
														
 
															 }
														
 
															+/*
														
 
															+	Computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
														
 
															+
														
 
															+	Uses a left-to-right `k`-ary sliding window to compute the modular exponentiation.
														
 
															+	The value of `k` changes based on the size of the exponent.
														
 
															+
														
 
															+	Uses Montgomery or Diminished Radix reduction [whichever appropriate]
														
 
															+
														
 
															+	Assumes `res`, `G`, `X` and `P` to not be `nil` and for `G`, `X` and `P` to have been initialized.
														
 
															+*/
														
 
															+internal_int_exponent_mod_fast :: proc(res, G, X, P: ^Int, redmode: int, allocator := context.allocator) -> (err: Error) {
														
 
															+	context.allocator = allocator;
														
 
															+
														
 
															+	M := [_TAB_SIZE]Int{};
														
 
															+	winsize: uint;
														
 
															+
														
 
															+	/*
														
 
															+		Use a pointer to the reduction algorithm.
														
 
															+		This allows us to use one of many reduction algorithms without modding the guts of the code with if statements everywhere.
														
 
															+	*/
														
 
															+	redux: #type proc(x, n: ^Int, rho: DIGIT, allocator := context.allocator) -> (err: Error);
														
 
															+
														
 
															+	defer {
														
 
															+		internal_destroy(&M[1]);
														
 
															+		for x := 1 << (winsize - 1); x < (1 << winsize); x += 1 {
														
 
															+			internal_destroy(&M[x]);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+		Find window size.
														
 
															+	*/
														
 
															+	x := internal_count_bits(X);
														
 
															+	switch {
														
 
															+	case x <= 7:
														
 
															+		winsize = 2;
														
 
															+	case x <= 36:
														
 
															+		winsize = 3;
														
 
															+	case x <= 140:
														
 
															+		winsize = 4;
														
 
															+	case x <= 450:
														
 
															+		winsize = 5;
														
 
															+	case x <= 1303:
														
 
															+		winsize = 6;
														
 
															+	case x <= 3529:
														
 
															+		winsize = 7;
														
 
															+	case:
														
 
															+		winsize = 8;
														
 
															+	}
														
 
															+
														
 
															+	winsize = min(_MAX_WIN_SIZE, winsize) if _MAX_WIN_SIZE > 0 else winsize;
														
 
															+
														
 
															+	/*
														
 
															+		Init M array
														
 
															+		Init first cell.
														
 
															+	*/
														
 
															+	cap := internal_int_allocated_cap(P);
														
 
															+	internal_grow(&M[1], cap)                                        or_return;
														
 
															+
														
 
															+	/*
														
 
															+		Now init the second half of the array.
														
 
															+	*/
														
 
															+	for x = 1 << (winsize - 1); x < (1 << winsize); x += 1 {
														
 
															+		internal_grow(&M[x], cap)                                    or_return;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+		Determine and setup reduction code.
														
 
															+	*/
														
 
															+	rho: DIGIT;
														
 
															+
														
 
															+	if redmode == 0 {
														
 
															+		/*
														
 
															+			Now setup Montgomery.
														
 
															+		*/
														
 
															+		rho = internal_int_montgomery_setup(P)                       or_return;
														
 
															+
														
 
															+		/*
														
 
															+			Automatically pick the comba one if available (saves quite a few calls/ifs).
														
 
															+		*/
														
 
															+		if ((P.used * 2) + 1) < _WARRAY && P.used < _MAX_COMBA {
														
 
															+			redux = _private_montgomery_reduce_comba;
														
 
															+		} else {
														
 
															+			/*
														
 
															+				Use slower baseline Montgomery method.
														
 
															+			*/
														
 
															+			redux = internal_int_montgomery_reduce;
														
 
															+		}
														
 
															+	} else if redmode == 1 {
														
 
															+		/*
														
 
															+		if (MP_HAS(MP_DR_SETUP) && MP_HAS(MP_DR_REDUCE)) {
														
 
															+			/* setup DR reduction for moduli of the form B**k - b */
														
 
															+			mp_dr_setup(P, &mp);
														
 
															+			redux = mp_dr_reduce;
														
 
															+		} else {
														
 
															+			err = MP_VAL;
														
 
															+			goto LBL_M;
														
 
															+		}
														
 
															+		*/
														
 
															+		return .Unimplemented;
														
 
															+	} else {
														
 
															+		/*
														
 
															+			Setup DR reduction for moduli of the form 2**k - b.
														
 
															+		*/
														
 
															+		rho = internal_int_reduce_2k_setup(P)                        or_return;
														
 
															+		redux = internal_int_reduce_2k;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+		Setup result.
														
 
															+	*/
														
 
															+	internal_grow(res, cap)                                          or_return;
														
 
															+
														
 
															+	/*
														
 
															+		Create M table
														
 
															+		The first half of the table is not computed, though, except for M[0] and M[1]
														
 
															+	*/
														
 
															+
														
 
															+	if redmode == 0 {
														
 
															+		/*
														
 
															+			Now we need R mod m.
														
 
															+		*/
														
 
															+		internal_int_montgomery_calc_normalization(res, P)           or_return;
														
 
															+
														
 
															+		/*
														
 
															+			Now set M[1] to G * R mod m.
														
 
															+		*/
														
 
															+		internal_mulmod(&M[1], G, res, P)                            or_return;
														
 
															+	} else {
														
 
															+		internal_one(res)                                            or_return;
														
 
															+		internal_mod(&M[1], G, P)                                    or_return;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+		Compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times.
														
 
															+	*/
														
 
															+	slot := 1 << (winsize - 1);
														
 
															+	internal_copy(&M[slot], &M[1])                                   or_return;
														
 
															+
														
 
															+	for x = 0; x < int(winsize - 1); x += 1 {
														
 
															+		internal_sqr(&M[slot], &M[slot])                             or_return;
														
 
															+   		print("slot: ", &M[slot]);
														
 
															+		redux(&M[slot], P, rho)                                      or_return;
														
 
															+		print("slot redux: ", &M[slot]);
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+		Create upper table.
														
 
															+	*/
														
 
															+	for x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x += 1 {
														
 
															+		internal_mul(&M[x], &M[x - 1], &M[1])                        or_return;
														
 
															+		redux(&M[x], P, rho)                                         or_return;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+		Set initial mode and bit cnt.
														
 
															+	*/
														
 
															+	mode   := 0;
														
 
															+	bitcnt := 1;
														
 
															+	buf    := DIGIT(0);
														
 
															+	digidx := X.used - 1;
														
 
															+	bitcpy := 0;
														
 
															+	bitbuf := DIGIT(0);
														
 
															+
														
 
															+	for {
														
 
															+		/*
														
 
															+			Grab next digit as required.
														
 
															+		*/
														
 
															+		bitcnt -= 1;
														
 
															+		if bitcnt == 0 {
														
 
															+			/*
														
 
															+				If digidx == -1 we are out of digits so break.
														
 
															+			*/
														
 
															+			if digidx == -1 { break; }
														
 
															+
														
 
															+			/*
														
 
															+				Read next digit and reset the bitcnt.
														
 
															+			*/
														
 
															+			buf    = X.digit[digidx];
														
 
															+			digidx -= 1;
														
 
															+			bitcnt = _DIGIT_BITS;
														
 
															+		}
														
 
															+
														
 
															+		/*
														
 
															+			Grab the next msb from the exponent.
														
 
															+		*/
														
 
															+		y := (buf >> (_DIGIT_BITS - 1)) & 1;
														
 
															+		buf <<= 1;
														
 
															+
														
 
															+		/*
														
 
															+			If the bit is zero and mode == 0 then we ignore it.
														
 
															+			These represent the leading zero bits before the first 1 bit in the exponent.
														
 
															+			Technically this opt is not required but it does lower the # of trivial squaring/reductions used.
														
 
															+		*/
														
 
															+		if mode == 0 && y == 0 { continue; }
														
 
															+
														
 
															+		/*
														
 
															+			If the bit is zero and mode == 1 then we square.
														
 
															+		*/
														
 
															+		if mode == 1 && y == 0 {
														
 
															+			internal_sqr(res, res)                                   or_return;
														
 
															+			redux(res, P, rho)                                       or_return;
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		/*
														
 
															+			Else we add it to the window.
														
 
															+		*/
														
 
															+		bitcpy += 1;
														
 
															+		bitbuf |= (y << (winsize - uint(bitcpy)));
														
 
															+		mode    = 2;
														
 
															+
														
 
															+		if bitcpy == int(winsize) {
														
 
															+			/*
														
 
															+				Window is filled so square as required and multiply
														
 
															+				Square first.
														
 
															+			*/
														
 
															+			for x = 0; x < int(winsize); x += 1 {
														
 
															+				internal_sqr(res, res)                               or_return;
														
 
															+				redux(res, P, rho)                                   or_return;
														
 
															+			}
														
 
															+
														
 
															+			/*
														
 
															+				Then multiply.
														
 
															+			*/
														
 
															+			internal_mul(res, res, &M[bitbuf])                       or_return;
														
 
															+			redux(res, P, rho)                                       or_return;
														
 
															+
														
 
															+			/*
														
 
															+				Empty window and reset.
														
 
															+			*/
														
 
															+			bitcpy = 0;
														
 
															+			bitbuf = 0;
														
 
															+			mode   = 1;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+		If bits remain then square/multiply.
														
 
															+	*/
														
 
															+	if mode == 2 && bitcpy > 0 {
														
 
															+		/*
														
 
															+			Square then multiply if the bit is set.
														
 
															+		*/
														
 
															+		for x = 0; x < bitcpy; x += 1 {
														
 
															+			internal_sqr(res, res)                                   or_return;
														
 
															+			redux(res, P, rho)                                       or_return;
														
 
															+
														
 
															+			/*
														
 
															+				Get next bit of the window.
														
 
															+			*/
														
 
															+			bitbuf <<= 1;
														
 
															+			if bitbuf & (1 << winsize) != 0 {
														
 
															+				/*
														
 
															+					Then multiply.
														
 
															+				*/
														
 
															+				internal_mul(res, res, &M[1])                        or_return;
														
 
															+				redux(res, P, rho)                                   or_return;
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if redmode == 0 {
														
 
															+		/*
														
 
															+			Fixup result if Montgomery reduction is used.
														
 
															+			Recall that any value in a Montgomery system is actually multiplied by R mod n.
														
 
															+			So we have to reduce one more time to cancel out the factor of R.
														
 
															+		*/
														
 
															+		redux(res, P, rho)                                           or_return;
														
 
															+	}
														
 
															+
														
 
															+	return nil;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															 	Returns the number of Rabin-Miller trials needed for a given bit size.
														
 
															 */
														
--- a/core/math/big/private.odin
+++ b/core/math/big/private.odin
@@ -1730,9 +1730,6 @@ _private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -
 
															 	return;

														
 
															 }

														
 
															-

														
 
															-

														
 
															-

														
 
															 /*

														
 
															 	Computes xR**-1 == x (mod N) via Montgomery Reduction.

														
 
															 	This is an optimized implementation of `internal_montgomery_reduce`

														
@@ -1753,7 +1750,7 @@ _private_montgomery_reduce_comba :: proc(x, n: ^Int, rho: DIGIT, allocator := co
 
															 	/*

														
 
															 		Grow `x` as required.

														
 
															 	*/

														
 
															-	internal_grow(x, n.used + 1) or_return;

														
 
															+	internal_grow(x, n.used + 1)                                     or_return;

														
 
															 	/*

														
 
															 		First we have to get the digits of the input into an array of double precision words W[...]