4 years ago · 2cfd6b7024
--- a/core/math/big/build.bat
+++ b/core/math/big/build.bat
@@ -1,9 +1,9 @@
 
				 @echo off

			
 
				-:odin run . -vet

			
 
				+odin run . -vet

			
 
				 

			
 
				 set TEST_ARGS=-fast-tests

			
 
				 :odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS%

			
 
				-odin build . -build-mode:shared -show-timings -o:size -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS%

			
 
				+:odin build . -build-mode:shared -show-timings -o:size -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS%

			
 
				 :odin build . -build-mode:shared -show-timings -o:size -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS%

			
 
				 :odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS%

			
 
				 :odin build . -build-mode:shared -show-timings -o:speed -define:MATH_BIG_EXE=false && python test.py -fast-tests %TEST_ARGS%
			
--- a/core/math/big/private.odin
+++ b/core/math/big/private.odin
@@ -426,6 +426,129 @@ _private_int_mul_comba :: proc(dest, a, b: ^Int, digits: int, allocator := conte
 
				 	return internal_clamp(dest);

			
 
				 }

			
 
				 

			
 
				+/*

			
 
				+	Multiplies |a| * |b| and does not compute the lower digs digits

			
 
				+	[meant to get the higher part of the product]

			
 
				+*/

			
 
				+_private_int_mul_high :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) {

			
 
				+	context.allocator = allocator;

			
 
				+

			
 
				+	/*

			
 
				+		Can we use the fast multiplier?

			
 
				+	*/

			
 
				+	if a.used + b.used + 1 < _WARRAY && min(a.used, b.used) < _MAX_COMBA {

			
 
				+		return _private_int_mul_high_comba(dest, a, b, digits);

			
 
				+	}

			
 
				+

			
 
				+	internal_grow(dest, a.used + b.used + 1) or_return;

			
 
				+	dest.used = a.used + b.used + 1;

			
 
				+

			
 
				+	pa := a.used;

			
 
				+	pb := b.used;

			
 
				+	for ix := 0; ix < pa; ix += 1 {

			
 
				+		carry := DIGIT(0);

			
 
				+

			
 
				+		for iy := digits - ix; iy < pb; iy += 1 {

			
 
				+			/*

			
 
				+				Calculate the double precision result.

			
 
				+			*/

			
 
				+			r := _WORD(dest.digit[ix + iy]) + _WORD(a.digit[ix]) * _WORD(b.digit[iy]) + _WORD(carry);

			
 
				+

			
 
				+			/*

			
 
				+				Get the lower part.

			
 
				+			*/

			
 
				+			dest.digit[ix + iy] = DIGIT(r & _WORD(_MASK));

			
 
				+

			
 
				+			/*

			
 
				+				Carry the carry.

			
 
				+			*/

			
 
				+			carry = DIGIT(r >> _WORD(_DIGIT_BITS));

			
 
				+		}

			
 
				+		dest.digit[ix + pb] = carry;

			
 
				+	}

			
 
				+	return internal_clamp(dest);

			
 
				+}

			
 
				+

			
 
				+/*

			
 
				+	This is a modified version of `_private_int_mul_comba` that only produces output digits *above* `digits`.

			
 
				+	See the comments for `_private_int_mul_comba` to see how it works.

			
 
				+

			
 
				+	This is used in the Barrett reduction since for one of the multiplications

			
 
				+	only the higher digits were needed.  This essentially halves the work.

			
 
				+

			
 
				+	Based on Algorithm 14.12 on pp.595 of HAC.

			
 
				+*/

			
 
				+_private_int_mul_high_comba :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) {

			
 
				+	context.allocator = allocator;

			
 
				+

			
 
				+	W: [_WARRAY]DIGIT = ---;

			
 
				+	_W: _WORD = 0;

			
 
				+

			
 
				+	/*

			
 
				+		Number of output digits to produce. Grow the destination as required.

			
 
				+	*/

			
 
				+	pa := a.used + b.used;

			
 
				+	internal_grow(dest, pa) or_return;

			
 
				+

			
 
				+	ix: int;

			
 
				+	for ix = digits; ix < pa; ix += 1 {

			
 
				+		/*

			
 
				+			Get offsets into the two bignums.

			
 
				+		*/

			
 
				+		ty := min(b.used - 1, ix);

			
 
				+		tx := ix - ty;

			
 
				+

			
 
				+		/*

			
 
				+			This is the number of times the loop will iterrate, essentially it's

			
 
				+			while (tx++ < a->used && ty-- >= 0) { ... }

			
 
				+		*/

			
 
				+		iy := min(a.used - tx, ty + 1);

			
 
				+

			
 
				+		/*

			
 
				+			Execute loop.

			
 
				+		*/

			
 
				+		for iz := 0; iz < iy; iz += 1 {

			
 
				+			_W += _WORD(a.digit[tx + iz]) * _WORD(b.digit[ty - iz]);

			
 
				+		}

			
 
				+

			
 
				+		/*

			
 
				+			Store term.

			
 
				+		*/

			
 
				+		W[ix] = DIGIT(_W) & DIGIT(_MASK);

			
 
				+

			
 
				+		/*

			
 
				+			Make next carry.

			
 
				+		*/

			
 
				+		_W = _W >> _WORD(_DIGIT_BITS);

			
 
				+	}

			
 
				+

			
 
				+	/*

			
 
				+		Setup dest

			
 
				+	*/

			
 
				+	old_used := dest.used;

			
 
				+	dest.used = pa;

			
 
				+

			
 
				+	for ix = digits; ix < pa; ix += 1 {

			
 
				+		/*

			
 
				+			Now extract the previous digit [below the carry].

			
 
				+		*/

			
 
				+		dest.digit[ix] = W[ix];

			
 
				+	}

			
 
				+

			
 
				+	/*

			
 
				+		Zero remainder.

			
 
				+	*/

			
 
				+	internal_zero_unused(dest, old_used);

			
 
				+

			
 
				+	/*

			
 
				+		Adjust dest.used based on leading zeroes.

			
 
				+	*/

			
 
				+	return internal_clamp(dest);

			
 
				+}

			
 
				+

			
 
				+

			
 
				+

			
 
				+

			
 
				 /*

			
 
				 	Low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16

			
 
				 	Assumes `dest` and `src` to not be `nil`, and `src` to have been initialized.