Parcourir la source

added libtomcrypt-0.81

Tom St Denis il y a 22 ans
Parent
commit
0f999a4e9e
27 fichiers modifiés avec 7611 ajouts et 9478 suppressions
  1. 9 2
      authors
  2. 13 0
      changes
  3. 162 162
      config.pl
  4. BIN
      crypt.pdf
  5. 664 461
      demos/test.c
  6. 1427 0
      demos/test.c~
  7. 87 30
      ecc.c
  8. 15 7
      ecc_sys.c
  9. 35 0
      examples/ch2-01.c
  10. 80 0
      legal.txt
  11. 19 193
      makefile
  12. 24 0
      makefile.out
  13. 0 311
      makefile.ps2
  14. 0 274
      makefile.vc
  15. 0 87
      mpi-config.h
  16. 0 16
      mpi-types.h
  17. 4621 3464
      mpi.c
  18. 0 227
      mpi.h
  19. 0 4216
      mpi.old
  20. 3 5
      mycrypt.h
  21. 0 17
      mycrypt_cfg.h
  22. 76 0
      mycrypt_custom.h
  23. 17 1
      mycrypt_pk.h
  24. 5 1
      mycrypt_prng.h
  25. 2 3
      prime.c
  26. 0 1
      rsa.c
  27. 352 0
      tommath.h

+ 9 - 2
authors

@@ -26,6 +26,9 @@ [email protected].
 6) Clay Culver
 
    Submitted a fix for "rsa.c" which cleaned up some code.  Submited some other fixes too.  :-)
+   Clay has helped find bugs in various pieces of code including the registry functions, base64 routines 
+   and the make process.  He is also now the primary author of the libtomcrypt reference manual and has plan
+   at making a HTML version.
 
 7) Jason Klapste
 
@@ -39,10 +42,14 @@ yarrow code can now default to any cipher/hash that is left after you remove the
 
 9) Wayne Scott ([email protected])
   
-   Submitted base64 that complies with the RFC standards.
+   Submitted base64 that complies with the RFC standards.  Submitted some ideas to improve the RSA key generation
+   as well.
    
 10) Sky Schulz ([email protected])
 
    Has submitted a set of ideas to improve the library and make it more attractive for professional users.
    
-   
+11) Mike Frysinger 
+
+   Together with Clay came up with a more "unix friendly" makefile.  Mike Frysinger has been keeping copies of 
+   the library for the Gentoo linux distribution.

+ 13 - 0
changes

@@ -1,3 +1,16 @@
+Jan 16th, 2003
+v0.81  -- Merged in new makefile from Clay Culver and Mike Frysinger
+       -- Sped up the ECC mulmod() routine by making the word size adapt to the input.  Saves a whopping 9 point
+          operations on 521-bit keys now (translates to about 8ms on my Athlon XP).  I also now use barrett reduction
+          as much as possible.  This sped the routine up quite a bit.
+       -- Fixed a huge flaw in ecc_verify_hash() where it would return CRYPT_OK on error... Now fixed.
+       -- Fixed up config.pl by fixing an invalid query and the file is saved in non-windows [e.g. not CR/LF] format
+          (fix due to Mika Boström)
+       -- Merged in LibTomMath for kicks
+       -- Changed the build process so that by default "mycrypt_custom.h" is included and provided
+          The makefile doesn't include any build options anymore
+       -- Removed the PS2 and VC makefiles.
+       
 Dec 16th, 2002
 v0.80  -- Found a change I made to the MPI that is questionable.  Not quite a bug but definately not desired.  Had todo
           with the digit shifting.  In v0.79 I simply truncated without zeroing.  It didn't cause problems during my

+ 162 - 162
config.pl

@@ -1,162 +1,162 @@
-#!/usr/bin/perl
-#
-# Generates a makefile based on user input
-#
-# Tom St Denis, [email protected], http://tom.iahu.ca
-
-@settings = (
-   "CC,Compiler,gcc",
-   "AR,Archiver,ar",
-   "LD,Linker,ld",
-   "CFLAGS,Optimizations,-Os",
-   "CFLAGS,Warnings,-Wall -Wsign-compare -W -Wno-unused -Werror",
-   "CFLAGS,Include Paths,-I./",
-   "CFLAGS,Other compiler options,",
-   "CFLAGS,XMALLOC,-DXMALLOC=malloc",
-   "CFLAGS,XREALLOC,-DXREALLOC=realloc",
-   "CFLAGS,XCALLOC,-DXCALLOC=calloc",
-   "CFLAGS,XFREE,-DXFREE=free",
-   "CFLAGS,XCLOCK,-DXCLOCK=clock",
-   "CFLAGS,XCLOCKS_PER_SEC,-DXCLOCKS_PER_SEC=CLOCKS_PER_SEC",
-);
-
-@opts = (
-   "SMALL_CODE,Use small code where possible (slower code),y",
-   "NO_FILE,Avoid file I/O calls,n",
-   "CLEAN_STACK,Clean the stack within functions,n",
-
-   "BLOWFISH,Include Blowfish block cipher,y",
-   "RC2,Include RC2 block cipher,y",
-   "RC5,Include RC5 block cipher,y",
-   "RC6,Include RC6 block cipher,y",
-   "SERPENT,Include Serpent block cipher,y",
-   "SAFERP,Include Safer+ block cipher,y",
-   "SAFER,Include Safer-64 block ciphers,y",
-   "RIJNDAEL,Include Rijndael (AES) block cipher,y",
-   "XTEA,Include XTEA block cipher,y",
-   "TWOFISH,Include Twofish block cipher,y",
-   "TWOFISH_SMALL,Include Use a low ram variant of Twofish,n",
-   "TWOFISH_TABLES,Include Use precomputed tables to speed up the low-ram variant,n",
-   "DES,Include DES and 3DES block ciphers,y",
-   "CAST5,Include CAST5 (aka CAST-128) block cipher,y",
-   "NOEKEON,Include Noekeon block cipher,y",
-
-   "CFB,Include CFB block mode of operation,y",
-   "OFB,Include OFB block mode of operation,y",
-   "ECB,Include ECB block mode of operation,y",
-   "CBC,Include CBC block mode of operation,y",
-   "CTR,Include CTR block mode of operation,y",
-
-   "SHA512,Include SHA512 one-way hash,y",
-   "SHA384,Include SHA384 one-way hash (requires SHA512),y",
-   "SHA256,Include SHA256 one-way hash,y",
-   "TIGER,Include TIGER one-way hash,y",
-   "SHA1,Include SHA1 one-way hash,y",
-   "MD5,Include MD5 one-way hash,y",
-   "MD4,Include MD4 one-way hash,y",
-   "MD2,Include MD2 one-way hash,y",
-   "HMAC,Include Hash based Message Authentication Support,y",
-
-   "BASE64,Include Base64 encoding support,y",
-
-   "YARROW,Include Yarrow PRNG,y",
-   "SPRNG,Include Secure PRNG base on RNG code,y",
-   "RC4,Include RC4 PRNG,y",
-   "DEVRANDOM,Use /dev/random or /dev/urandom if available?,y",
-   "TRY_URANDOM_FIRST,Try /dev/urandom before /dev/random?,n",
-
-   "MRSA,Include RSA public key support,y",
-   "MDH,Include Diffie-Hellman (over Z/pZ) public key support,y",
-   "MECC,Include Eliptic Curve public key crypto support,y",
-   "KR,Include Keyring support (groups all three PK systems),y",
-   
-   "DH768,768-bit DH key support,y",
-   "DH1024,1024-bit DH key support,y",
-   "DH1280,1280-bit DH key support,y",
-   "DH1536,1280-bit DH key support,y",
-   "DH1792,1792-bit DH key support,y",
-   "DH2048,2048-bit DH key support,y",
-   "DH2560,2560-bit DH key support,y",
-   "DH3072,3072-bit DH key support,y",
-   "DH4096,4096-bit DH key support,y",
-   
-   "ECC160,160-bit ECC key support,y",
-   "ECC192,192-bit ECC key support,y",
-   "ECC224,224-bit ECC key support,y",
-   "ECC256,256-bit ECC key support,y",
-   "ECC384,384-bit ECC key support,y",
-   "ECC521,521-bit ECC key support,y",
-   
-   "GF,Include GF(2^w) math support (not used internally),n",
-   
-   "MPI,Include MPI big integer math support (required by the public key code),y",
-   "MPI_FASTEXPT,Use the faster exponentiation code (uses some heap but is faster),y",
-   "MPI_FASTEXPT_LOWMEM,Use the low ram variant of the fast code\nRequires the fast code to enabled,n", 
-);
-
-# scan for switches and make variables
-for (@settings) {
-   @m = split(",", $_);
-   print "@m[1]: [@m[2]] ";
-   $r = <>; $r = @m[2] if ($r eq "\n");
-   chomp($r);
-   @vars{@m[0]} = @vars{@m[0]} . $r . " ";
-}
-
-# scan for build flags
-for (@opts) {
-   @m = split(",", $_);
-   print "@m[1]: [@m[2]]";
-   $r = <>;  @vars{'CFLAGS'} = @vars{'CFLAGS'} . "-D" . $m[0] . " " if (($r eq "y\n") || ($r eq "\n" && @m[2] eq "y"));
-}   
-
-# write header 
-
-open(OUT,">mycrypt_custom.h");
-print OUT "/* This header is meant to be included before mycrypt.h in projects where\n";
-print OUT " * you don't want to throw all the defines in a makefile. \n";
-print OUT " */\n\n#ifndef MYCRYPT_CUSTOM_H_\n#define MYCRYPT_CUSTOM_H_\n\n#ifdef CRYPT\n\t#error mycrypt_custom.h should be included before mycrypt.h\n#endif\n\n";
-
-@m = split(" ", @vars{'CFLAGS'});
-for (@m) {
-    if ($_ =~ /^-D/) {
-       $_ =~ s/-D//;
-       $_ =~ s/=/" "/ge;
-       print OUT "#define $_\n";
-    }
-}
-
-print OUT "\n\n#include <mycrypt.h>\n\n#endif\n\n";
-close OUT;
-       
-print "\n\nmycrypt_custom.h generated.\n";
-
-open(OUT,">makefile.out");
-print OUT "#makefile generated with config.pl\n#\n#Tom St Denis (tomstdenis\@yahoo.com, http://tom.iahu.ca) \n\n";
-
-# output unique vars first
-for (@settings) {
-   @m = split(",", $_);
-   print OUT "@m[0] = @vars{@m[0]}\n"   if (@vars{@m[0]} ne "" && @m[0] ne "CFLAGS");
-   print OUT "CFLAGS += @vars{@m[0]}\n" if (@vars{@m[0]} ne "" && @m[0] eq "CFLAGS");
-   @vars{@m[0]} = "";
-}
-
-# output objects
-print OUT "\ndefault: library\n\n";
-print OUT "OBJECTS = keyring.o gf.o mem.o sprng.o ecc.o base64.o dh.o rsa.o bits.o yarrow.o cfb.o ofb.o ecb.o ctr.o cbc.o hash.o tiger.o sha1.o md5.o md4.o md2.o sha256.o sha512.o xtea.o aes.o serpent.o des.o safer_tab.o safer.o safer+.o rc4.o rc2.o rc6.o rc5.o cast5.o noekeon.o blowfish.o crypt.o ampi.o mpi.o prime.o twofish.o packet.o hmac.o strings.o\n\n";
-
-# some depends
-print OUT "rsa.o: rsa_sys.c\ndh.o: dh_sys.c\necc.o: ecc_sys.c\n\n";
-
-# targets 
-print OUT "library: \$(OBJECTS)\n\t \$(AR) r libtomcrypt.a \$(OBJECTS)\n\t ranlib libtomcrypt.a\n\n";
-print OUT "clean:\n\trm -f \$(OBJECTS) libtomcrypt.a \n\n";
-
-close OUT;
-
-print "makefile.out generated.\n";
-
-print "\nNow use makefile.out to build the library, e.g. `make -f makefile.out'\n";
-print "In your project just include mycrypt_custom.h (you don't have to include mycrypt.h \n";
-print "but if you do make sure mycrypt_custom.h appears first) your settings should be intact.\n";
+#!/usr/bin/perl
+#
+# Generates a makefile based on user input
+#
+# Tom St Denis, [email protected], http://tom.iahu.ca
+
+@settings = (
+   "CC,Compiler,gcc",
+   "AR,Archiver,ar",
+   "LD,Linker,ld",
+   "CFLAGS,Optimizations,-Os",
+   "CFLAGS,Warnings,-Wall -Wsign-compare -W -Wno-unused -Werror",
+   "CFLAGS,Include Paths,-I./",
+   "CFLAGS,Other compiler options,",
+   "CFLAGS,XMALLOC,-DXMALLOC=malloc",
+   "CFLAGS,XREALLOC,-DXREALLOC=realloc",
+   "CFLAGS,XCALLOC,-DXCALLOC=calloc",
+   "CFLAGS,XFREE,-DXFREE=free",
+   "CFLAGS,XCLOCK,-DXCLOCK=clock",
+   "CFLAGS,XCLOCKS_PER_SEC,-DXCLOCKS_PER_SEC=CLOCKS_PER_SEC",
+);
+
+@opts = (
+   "SMALL_CODE,Use small code where possible (slower code),y",
+   "NO_FILE,Avoid file I/O calls,n",
+   "CLEAN_STACK,Clean the stack within functions,n",
+
+   "BLOWFISH,Include Blowfish block cipher,y",
+   "RC2,Include RC2 block cipher,y",
+   "RC5,Include RC5 block cipher,y",
+   "RC6,Include RC6 block cipher,y",
+   "SERPENT,Include Serpent block cipher,y",
+   "SAFERP,Include Safer+ block cipher,y",
+   "SAFER,Include Safer-64 block ciphers,y",
+   "RIJNDAEL,Include Rijndael (AES) block cipher,y",
+   "XTEA,Include XTEA block cipher,y",
+   "TWOFISH,Include Twofish block cipher,y",
+   "TWOFISH_SMALL,Include Use a low ram variant of Twofish,n",
+   "TWOFISH_TABLES,Include Use precomputed tables to speed up the low-ram variant,n",
+   "DES,Include DES and 3DES block ciphers,y",
+   "CAST5,Include CAST5 (aka CAST-128) block cipher,y",
+   "NOEKEON,Include Noekeon block cipher,y",
+
+   "CFB,Include CFB block mode of operation,y",
+   "OFB,Include OFB block mode of operation,y",
+   "ECB,Include ECB block mode of operation,y",
+   "CBC,Include CBC block mode of operation,y",
+   "CTR,Include CTR block mode of operation,y",
+
+   "SHA512,Include SHA512 one-way hash,y",
+   "SHA384,Include SHA384 one-way hash (requires SHA512),y",
+   "SHA256,Include SHA256 one-way hash,y",
+   "TIGER,Include TIGER one-way hash,y",
+   "SHA1,Include SHA1 one-way hash,y",
+   "MD5,Include MD5 one-way hash,y",
+   "MD4,Include MD4 one-way hash,y",
+   "MD2,Include MD2 one-way hash,y",
+   "HMAC,Include Hash based Message Authentication Support,y",
+
+   "BASE64,Include Base64 encoding support,y",
+
+   "YARROW,Include Yarrow PRNG,y",
+   "SPRNG,Include Secure PRNG base on RNG code,y",
+   "RC4,Include RC4 PRNG,y",
+   "DEVRANDOM,Use /dev/random or /dev/urandom if available?,y",
+   "TRY_URANDOM_FIRST,Try /dev/urandom before /dev/random?,n",
+
+   "MRSA,Include RSA public key support,y",
+   "MDH,Include Diffie-Hellman (over Z/pZ) public key support,y",
+   "MECC,Include Eliptic Curve public key crypto support,y",
+   "KR,Include Keyring support (groups all three PK systems),y",
+   
+   "DH768,768-bit DH key support,y",
+   "DH1024,1024-bit DH key support,y",
+   "DH1280,1280-bit DH key support,y",
+   "DH1536,1536-bit DH key support,y",
+   "DH1792,1792-bit DH key support,y",
+   "DH2048,2048-bit DH key support,y",
+   "DH2560,2560-bit DH key support,y",
+   "DH3072,3072-bit DH key support,y",
+   "DH4096,4096-bit DH key support,y",
+   
+   "ECC160,160-bit ECC key support,y",
+   "ECC192,192-bit ECC key support,y",
+   "ECC224,224-bit ECC key support,y",
+   "ECC256,256-bit ECC key support,y",
+   "ECC384,384-bit ECC key support,y",
+   "ECC521,521-bit ECC key support,y",
+   
+   "GF,Include GF(2^w) math support (not used internally),n",
+   
+   "MPI,Include MPI big integer math support (required by the public key code),y",
+);
+
+# scan for switches and make variables
+for (@settings) {
+   @m = split(",", $_);
+   print "@m[1]: [@m[2]] ";
+   $r = <>; $r = @m[2] if ($r eq "\n");
+   chomp($r);
+   @vars{@m[0]} = @vars{@m[0]} . $r . " ";
+}
+
+# scan for build flags
+for (@opts) {
+   @m = split(",", $_);
+   print "@m[1]: [@m[2]]";
+   $r = <>;  @vars{'CFLAGS'} = @vars{'CFLAGS'} . "-D" . $m[0] . " " if (($r eq "y\n") || ($r eq "\n" && @m[2] eq "y"));
+}   
+
+# write header
+
+open(OUT,">mycrypt_custom.h");
+print OUT "/* This header is meant to be included before mycrypt.h in projects where\n";
+print OUT " * you don't want to throw all the defines in a makefile. \n";
+print OUT " */\n\n#ifndef MYCRYPT_CUSTOM_H_\n#define MYCRYPT_CUSTOM_H_\n\n#ifdef CRYPT\n\t#error mycrypt_custom.h should be included before mycrypt.h\n#endif\n\n";
+
+@m = split(" ", @vars{'CFLAGS'});
+for (@m) {
+    if ($_ =~ /^-D/) {
+       $_ =~ s/-D//;
+       $_ =~ s/=/" "/ge;
+       print OUT "#define $_\n";
+    }
+}
+
+print OUT "\n\n#include <mycrypt.h>\n\n#endif\n\n";
+close OUT;
+       
+print "\n\nmycrypt_custom.h generated.\n";
+
+open(OUT,">makefile.out");
+print OUT "#makefile generated with config.pl\n#\n#Tom St Denis (tomstdenis\@yahoo.com, http://tom.iahu.ca) \n\n";
+
+# output unique vars first
+@vars{'CFLAGS'} =~ s/-D.+ /""/ge;
+
+for (@settings) {
+   @m = split(",", $_);
+   print OUT "@m[0] = @vars{@m[0]}\n"   if (@vars{@m[0]} ne "" && @m[0] ne "CFLAGS");
+   print OUT "CFLAGS += @vars{@m[0]}\n" if (@vars{@m[0]} ne "" && @m[0] eq "CFLAGS");
+   @vars{@m[0]} = "";
+}
+
+# output objects
+print OUT "\ndefault: library\n\n";
+print OUT "OBJECTS = keyring.o gf.o mem.o sprng.o ecc.o base64.o dh.o rsa.o bits.o yarrow.o cfb.o ofb.o ecb.o ctr.o cbc.o hash.o tiger.o sha1.o md5.o md4.o md2.o sha256.o sha512.o xtea.o aes.o serpent.o des.o safer_tab.o safer.o safer+.o rc4.o rc2.o rc6.o rc5.o cast5.o noekeon.o blowfish.o crypt.o ampi.o mpi.o prime.o twofish.o packet.o hmac.o strings.o\n\n";
+
+# some depends
+print OUT "rsa.o: rsa_sys.c\ndh.o: dh_sys.c\necc.o: ecc_sys.c\n\n";
+
+# targets
+print OUT "library: \$(OBJECTS)\n\t \$(AR) r libtomcrypt.a \$(OBJECTS)\n\t ranlib libtomcrypt.a\n\n";
+print OUT "clean:\n\trm -f \$(OBJECTS) libtomcrypt.a \n\n";
+
+close OUT;
+
+print "makefile.out generated.\n";
+
+print "\nNow use makefile.out to build the library, e.g. `make -f makefile.out'\n";
+print "In your project just include mycrypt_custom.h (you don't have to include mycrypt.h \n";
+print "but if you do make sure mycrypt_custom.h appears first) your settings should be intact.\n";

BIN
crypt.pdf


Fichier diff supprimé car celui-ci est trop grand
+ 664 - 461
demos/test.c


+ 1427 - 0
demos/test.c~

@@ -0,0 +1,1427 @@
+/* This is the worst code you have ever seen written on purpose.... this code is just a big hack to test
+out the functionality of the library */
+
+#ifdef SONY_PS2
+#include <eetypes.h>
+#include <eeregs.h>
+#include "timer.h"
+#endif
+
+#include "../mycrypt.h"
+
+int errno;
+
+
+int null_setup(const unsigned char *key, int keylen, int num_rounds, symmetric_key *skey)
+{
+   return CRYPT_OK;
+}
+
+void null_ecb_encrypt(const unsigned char *pt, unsigned char *ct, symmetric_key *key)
+{
+   memcpy(ct, pt, 8);
+}
+
+void null_ecb_decrypt(const unsigned char *ct, unsigned char *pt, symmetric_key *key)
+{
+   memcpy(pt, ct, 8);
+}
+
+int null_test(void)
+{
+   return CRYPT_OK;
+}
+
+int null_keysize(int *desired_keysize)
+{
+   return CRYPT_OK;
+} 
+
+const struct _cipher_descriptor null_desc =
+{
+    "memcpy()",
+    255,
+    8, 8, 8, 1,
+    &null_setup,
+    &null_ecb_encrypt,
+    &null_ecb_decrypt,
+    &null_test,
+    &null_keysize
+};
+
+
+prng_state prng;
+
+void store_tests(void) 
+{
+ unsigned char buf[8];
+ unsigned long L;
+ ulong64 LL;
+
+ printf("LOAD32/STORE32 tests\n");
+ L = 0x12345678UL;
+ STORE32L(L, &buf[0]);
+ L = 0;
+ LOAD32L(L, &buf[0]);
+ if (L != 0x12345678UL)  { printf("LOAD/STORE32 Little don't work\n"); exit(-1); }
+ LL = CONST64(0x01020304050607);
+ STORE64L(LL, &buf[0]);
+ LL = 0;
+ LOAD64L(LL, &buf[0])
+ if (LL != CONST64(0x01020304050607)) { printf("LOAD/STORE64 Little don't work\n"); exit(-1); }
+
+ L = 0x12345678UL;
+ STORE32H(L, &buf[0]);
+ L = 0;
+ LOAD32H(L, &buf[0]);
+ if (L != 0x12345678UL) { printf("LOAD/STORE32 High don't work\n"); exit(-1); }
+ LL = CONST64(0x01020304050607);
+ STORE64H(LL, &buf[0]);
+ LL = 0;
+ LOAD64H(LL, &buf[0])
+ if (LL != CONST64(0x01020304050607)) { printf("LOAD/STORE64 High don't work\n"); exit(-1); }
+}
+
+void cipher_tests(void) {
+   int x;
+
+   printf("Ciphers compiled in\n");
+ for (x = 0; cipher_descriptor[x].name != NULL; x++) {
+     printf(" %12s (%2d) Key Size: %4d to %4d, Block Size: %3d, Default # of rounds: %2d\n", cipher_descriptor[x].name,
+            cipher_descriptor[x].ID,
+            cipher_descriptor[x].min_key_length*8,cipher_descriptor[x].max_key_length*8,
+            cipher_descriptor[x].block_length*8, cipher_descriptor[x].default_rounds);
+ }
+
+}
+
+void ecb_tests(void)
+{
+ int x;
+
+ printf("ECB tests\n");
+ for (x = 0; cipher_descriptor[x].name != NULL; x++) {
+     printf(" %12s: ",
+           cipher_descriptor[x].name);
+     if ((errno = cipher_descriptor[x].test()) != CRYPT_OK) {
+        printf(" **failed** Reason: %s\n", error_to_string(errno));
+        exit(-1);
+     } else {
+        printf("passed\n");
+     }
+ }
+}
+
+#ifdef CBC
+void cbc_tests(void)
+{
+ symmetric_CBC cbc;
+ int x, y;
+ unsigned char blk[32], ct[32], key[32], IV[32];
+ const unsigned char test[] = { 0XFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ printf("CBC tests\n");
+ /* ---- CBC ENCODING ---- */
+ /* make up a block and IV */
+ for (x = 0; x < 32; x++) blk[x] = IV[x] = x;
+
+ /* now lets start a cbc session */
+ if ((errno = cbc_start(find_cipher("blowfish"), IV, key, 16, 0, &cbc)) != CRYPT_OK) { 
+    printf("CBC Setup: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* now lets encode 32 bytes */
+ for (x = 0; x < 4; x++) {
+    if ((errno = cbc_encrypt(blk+8*x, ct+8*x, &cbc)) != CRYPT_OK) {
+       printf("CBC encrypt: %s\n", error_to_string(errno)); exit(-1);
+    }
+ }
+
+ zeromem(blk, sizeof(blk));
+
+ /* ---- CBC DECODING ---- */
+ /* make up a IV */
+ for (x = 0; x < 32; x++) IV[x] = x;
+
+ /* now lets start a cbc session */
+ if ((errno = cbc_start(find_cipher("blowfish"), IV, key, 16, 0, &cbc)) != CRYPT_OK) { 
+     printf("CBC Setup: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* now lets decode 32 bytes */
+ for (x = 0; x < 4; x++) {
+    if ((errno = cbc_decrypt(ct+8*x, blk+8*x, &cbc)) != CRYPT_OK) {
+       printf("CBC decrypt: %s\n", error_to_string(errno)); exit(-1);
+    }
+ }
+    
+
+ /* print output */
+ for (x = y = 0; x < 32; x++) if (blk[x] != x) y = 1;
+ printf("  %s\n", y?"failed":"passed");
+
+ /* lets actually check the bytes */
+ memset(IV, 0, 8); IV[0] = 0xFF;              /* IV  = FF 00 00 00 00 00 00 00 */
+ memset(blk, 0, 32); blk[8] = 0xFF;           /* BLK = 00 00 00 00 00 00 00 00 FF 00 00 00 00 00 00 00 */
+ cbc_start(find_cipher("memcpy()"), IV, key, 8, 0, &cbc);
+ cbc_encrypt(blk, ct, &cbc);                  /* expect: FF 00 00 00 00 00 00 00 */
+ cbc_encrypt(blk+8, ct+8, &cbc);              /* expect: 00 00 00 00 00 00 00 00 */
+ if (memcmp(ct, test, 16)) {
+    printf("CBC failed logical testing.\n");
+    for (x = 0; x < 16; x++) printf("%02x ", ct[x]);
+    printf("\n");
+    exit(-1);
+ } else {
+    printf("CBC passed logical testing.\n");
+ }
+}
+#else
+void cbc_tests(void) { printf("CBC not compiled in\n"); }
+#endif
+
+#ifdef OFB
+void ofb_tests(void)
+{
+ symmetric_OFB ofb;
+ int x, y;
+ unsigned char blk[32], ct[32], key[32], IV[32];
+
+ printf("OFB tests\n");
+ /* ---- ofb ENCODING ---- */
+ /* make up a block and IV */
+ for (x = 0; x < 32; x++) blk[x] = IV[x] = x;
+
+ /* now lets start a ofb session */
+ if ((errno = ofb_start(find_cipher("cast5"), IV, key, 16, 0, &ofb)) != CRYPT_OK) { 
+ 	printf("OFB Setup: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* now lets encode 32 bytes */
+ for (x = 0; x < 4; x++) {
+    if ((errno = ofb_encrypt(blk+8*x, ct+8*x, 8, &ofb)) != CRYPT_OK) {
+       printf("OFB encrypt: %s\n", error_to_string(errno)); exit(-1);
+    }
+ }
+
+ zeromem(blk, sizeof(blk));
+
+ /* ---- ofb DECODING ---- */
+ /* make up a IV */
+ for (x = 0; x < 32; x++) IV[x] = x;
+
+ /* now lets start a ofb session */
+ if ((errno = ofb_start(find_cipher("cast5"), IV, key, 16, 0, &ofb)) != CRYPT_OK) { 
+ 	printf("OFB setup: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* now lets decode 32 bytes */
+ for (x = 0; x < 4; x++) {
+    if ((errno = ofb_decrypt(ct+8*x, blk+8*x, 8, &ofb)) != CRYPT_OK) {
+       printf("OFB decrypt: %s\n", error_to_string(errno)); exit(-1);
+    }
+ }
+
+ /* print output */
+ for (x = y = 0; x < 32; x++) if (blk[x] != x) y = 1;
+ printf("  %s\n", y?"failed":"passed");
+ if (y) exit(-1);
+}
+#else
+void ofb_tests(void) { printf("OFB not compiled in\n"); }
+#endif
+
+#ifdef CFB
+void cfb_tests(void)
+{
+ symmetric_CFB cfb;
+ int x, y;
+ unsigned char blk[32], ct[32], key[32], IV[32];
+
+ printf("CFB tests\n");
+ /* ---- cfb ENCODING ---- */
+ /* make up a block and IV */
+ for (x = 0; x < 32; x++) blk[x] = IV[x] = x;
+
+ /* now lets start a cfb session */
+ if ((errno = cfb_start(find_cipher("blowfish"), IV, key, 16, 0, &cfb)) != CRYPT_OK) { 
+ 	printf("CFB setup: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* now lets encode 32 bytes */
+ for (x = 0; x < 4; x++) {
+    if ((errno = cfb_encrypt(blk+8*x, ct+8*x, 8, &cfb)) != CRYPT_OK) {
+       printf("CFB encrypt: %s\n", error_to_string(errno)); exit(-1);
+    }
+ }
+
+ zeromem(blk, sizeof(blk));
+
+ /* ---- cfb DECODING ---- */
+ /* make up ahash_descriptor[prng->yarrow.hash].hashsize IV */
+ for (x = 0; x < 32; x++) IV[x] = x;
+
+ /* now lets start a cfb session */
+ if ((errno = cfb_start(find_cipher("blowfish"), IV, key, 16, 0, &cfb)) != CRYPT_OK) { 
+ 	printf("CFB Setup: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* now lets decode 32 bytes */
+ for (x = 0; x < 4; x++) {
+    if ((errno = cfb_decrypt(ct+8*x, blk+8*x, 8, &cfb)) != CRYPT_OK) {
+       printf("CFB decrypt: %s\n", error_to_string(errno)); exit(-1);
+    }
+ }
+
+ /* print output */
+ for (x = y = 0; x < 32; x++) if (blk[x] != x) y = 1;
+ printf("  %s\n", y?"failed":"passed");
+ if (y) exit(-1);
+}
+#else
+void cfb_tests(void) { printf("CFB not compiled in\n"); }
+#endif
+
+#ifdef CTR
+void ctr_tests(void)
+{
+ symmetric_CTR ctr;
+ int x, y;
+ unsigned char blk[32], ct[32], key[32], count[32];
+ const unsigned char test[] = { 0xFF, 0, 0, 0, 0, 0, 0, 0,  0, 3, 0, 0, 0, 0, 0, 0 };
+
+ printf("CTR tests\n");
+ /* ---- CTR ENCODING ---- */
+ /* make up a block and IV */
+ for (x = 0; x < 32; x++) blk[x] = count[x] = x;
+
+ /* now lets start a ctr session */
+ if ((errno = ctr_start(find_cipher("xtea"), count, key, 16, 0, &ctr)) != CRYPT_OK) { 
+ 	printf("CTR Setup: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* now lets encode 32 bytes */
+ for (x = 0; x < 4; x++) {
+    if ((errno = ctr_encrypt(blk+8*x, ct+8*x, 8, &ctr)) != CRYPT_OK) {
+       printf("CTR encrypt: %s\n", error_to_string(errno)); exit(-1);
+    }
+ }
+
+ zeromem(blk, sizeof(blk));
+
+ /* ---- CTR DECODING ---- */
+ /* make up a IV */
+ for (x = 0; x < 32; x++) count[x] = x;
+
+ /* now lets start a cbc session */
+ if ((errno = ctr_start(find_cipher("xtea"), count, key, 16, 0, &ctr)) != CRYPT_OK) { 
+ 	printf("CTR Setup: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* now lets decode 32 bytes */
+ for (x = 0; x < 4; x++) {
+    if ((errno = ctr_decrypt(ct+8*x, blk+8*x, 8, &ctr)) != CRYPT_OK) {
+       printf("CTR decrypt: %s\n", error_to_string(errno)); exit(-1);
+    }
+ }
+
+ /* print output */
+ for (x = y = 0; x < 32; x++) if (blk[x] != x) y = 1;
+ printf("  %s\n", y?"failed":"passed");
+ if (y) exit(-1);
+
+ /* lets actually check the bytes */
+ memset(count, 0, 8); count[0] = 0xFF;        /* IV  = FF 00 00 00 00 00 00 00 */
+ memset(blk, 0, 32); blk[9] = 2;              /* BLK = 00 00 00 00 00 00 00 00 00 02 00 00 00 00 00 00 */
+ ctr_start(find_cipher("memcpy()"), count, key, 8, 0, &ctr);
+ ctr_encrypt(blk, ct, 8, &ctr);               /* expect: FF 00 00 00 00 00 00 00 */
+ ctr_encrypt(blk+8, ct+8, 8, &ctr);           /* expect: 00 03 00 00 00 00 00 00 */
+ if (memcmp(ct, test, 16)) {
+    printf("CTR failed logical testing.\n");
+    for (x = 0; x < 16; x++) printf("%02x ", ct[x]);
+    printf("\n");
+ } else {
+    printf("CTR passed logical testing.\n");
+ }
+
+}
+#else
+void ctr_tests(void) { printf("CTR not compiled in\n"); }
+#endif
+
+void hash_tests(void)
+{
+ int x;
+ printf("Hash tests\n");
+ for (x = 0; hash_descriptor[x].name != NULL; x++) {
+     printf(" %10s (%2d) ", hash_descriptor[x].name, hash_descriptor[x].ID);
+     if ((errno = hash_descriptor[x].test()) != CRYPT_OK)
+        printf("**failed** Reason: %s\n", error_to_string(errno));
+     else 
+        printf("passed\n");
+ }
+}
+
+#ifdef MRSA
+void pad_test(void)
+{
+ unsigned char in[100], out[100];
+ unsigned long x, y;
+  
+ /* make a dummy message */
+ for (x = 0; x < 16; x++) in[x] = (unsigned char)x;
+
+ /* pad the message so that random filler is placed before and after it */
+ y = 100;
+ if ((errno = rsa_pad(in, 16, out, &y, find_prng("yarrow"), &prng)) != CRYPT_OK) { 
+     printf("Error: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* depad the message to get the original content */
+ memset(in, 0, sizeof(in));
+ x = 100;
+ if ((errno = rsa_depad(out, y, in, &x)) != CRYPT_OK) { printf("Error: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* check outcome */
+ printf("rsa_pad: ");
+ if (x != 16) { printf("Failed.  Wrong size.\n"); exit(-1); }
+ for (x = 0; x < 16; x++) if (in[x] != x) { printf("Failed.  Expected %02lx and got %02x.\n", x, in[x]); exit(-1); }
+ printf("passed.\n");
+}
+
+void rsa_test(void)
+{
+ unsigned char in[4096], out[4096];
+ unsigned long x, y, z, limit;
+ int stat;
+ rsa_key key;
+ clock_t t;
+
+ /* ---- SINGLE ENCRYPT ---- */
+ /* encrypt a short 8 byte string */
+ if ((errno = rsa_make_key(&prng, find_prng("yarrow"), 1024/8, 65537, &key)) != CRYPT_OK) { 
+ 	printf("Error: %s\n", error_to_string(errno)); exit(-1); }
+ for (x = 0; x < 8; x++) in[x] = (unsigned char)(x+1);
+ y = sizeof(in);
+ if ((errno = rsa_exptmod(in, 8, out, &y, PK_PUBLIC, &key)) != CRYPT_OK) { 
+ 	printf("Error: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* decrypt it */
+ zeromem(in, sizeof(in));
+ x = sizeof(out);
+ if ((errno = rsa_exptmod(out, y, in, &x, PK_PRIVATE, &key)) != CRYPT_OK) { 
+ 	printf("Error: %s\n", error_to_string(errno)); exit(-1); }
+
+ /* compare */
+ printf("RSA    : ");
+ for (x = 0; x < 8; x++) if (in[x] != (x+1)) { printf("Failed.  x==%02lx, in[%ld]==%02x\n", x, x, in[x]); exit(-1); }
+ printf("passed.\n");
+
+ /* test the rsa_encrypt_key functions */
+ for (x = 0; x < 16; x++) in[x] = x;
+ y = sizeof(out);
+ if ((errno = rsa_encrypt_key(in, 16, out, &y, &prng, find_prng("yarrow"), &key)) != CRYPT_OK) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+ }
+ zeromem(in, sizeof(in));
+ x = sizeof(in);
+ if ((errno = rsa_decrypt_key(out, y, in, &x, &key)) != CRYPT_OK) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+ }
+ printf("RSA en/de crypt key routines: ");
+ if (x != 16) { printf("Failed (length)\n"); exit(-1); }
+ for (x = 0; x < 16; x++) if (in[x] != x) { printf("Failed (contents)\n"); exit(-1); }
+ printf("Passed\n");
+
+ /* test sign_hash functions */
+ for (x = 0; x < 16; x++) in[x] = x;
+ x = sizeof(in);
+ if ((errno = rsa_sign_hash(in, 16, out, &x, &key)) != CRYPT_OK) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+ }
+ printf("RSA signed hash: %lu bytes\n", x);
+ if ((errno = rsa_verify_hash(out, x, in, &stat, &key)) != CRYPT_OK) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+ }
+ printf("Verify hash: %s, ", stat?"passed":"failed");
+ in[0] ^= 1;
+ if ((errno = rsa_verify_hash(out, x, in, &stat, &key)) != CRYPT_OK) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+ }
+ printf("%s\n", (!stat)?"passed":"failed");
+ if (stat) exit(-1);
+ rsa_free(&key);
+
+ /* make a RSA key */
+#ifdef SONY_PS2_NOPE
+   limit = 1024;
+#else
+   limit = 2048;
+#endif
+
+ { int tt;
+ 
+ for (z = 1024; z <= limit; z += 512) {
+    t = XCLOCK();
+    for (tt = 0; tt < 3; tt++) {
+    if ((errno = rsa_make_key(&prng, find_prng("yarrow"), z/8, 65537, &key)) != CRYPT_OK) { printf("Error: %s\n", error_to_string(errno)); exit(-1); }
+       if (tt < 2) rsa_free(&key);
+    }       
+    t = XCLOCK() - t;
+    printf("Took %.0f ms to make a %ld-bit RSA key.\n", 1000.0 * (((double)t / 3.0) / (double)XCLOCKS_PER_SEC), z);
+
+    /* time encryption */
+    t = XCLOCK();
+    
+    for (tt = 0; tt < 100; tt++) {
+       y = sizeof(in);
+       if ((errno = rsa_exptmod(in, 8, out, &y, PK_PUBLIC, &key)) != CRYPT_OK) { printf("Error: %s\n", error_to_string(errno)); exit(-1); }
+    }       
+    t = XCLOCK() - t;
+    printf("Took %.0f ms to encrypt with a %ld-bit RSA key.\n", 1000.0 * (((double)t / 100.0)/ (double)XCLOCKS_PER_SEC), z);
+
+    /* time decryption */
+    t = XCLOCK();
+    for (tt = 0; tt < 100; tt++) {
+       x = sizeof(out);
+       if ((errno = rsa_exptmod(out, y, in, &x, PK_PRIVATE, &key)) != CRYPT_OK) { printf("Error: %s\n", error_to_string(errno)); exit(-1); }
+    }       
+    t = XCLOCK() - t;
+    printf("Took %.0f ms to decrypt with a %ld-bit RSA key.\n", 1000.0 * (((double)t / 100.0) / (double)XCLOCKS_PER_SEC), z);
+    rsa_free(&key);
+ }
+ }
+ 
+
+
+}
+#else
+void pad_test(void) { printf("MRSA not compiled in\n"); }
+void rsa_test(void) { printf("MRSA not compiled in\n"); }
+#endif
+
+#ifdef BASE64
+void base64_test(void)
+{
+   unsigned char buf[2][100];
+   unsigned long x, y;
+
+   printf("Base64 tests\n");
+   zeromem(buf, sizeof(buf));
+   for (x = 0; x < 16; x++) buf[0][x] = (unsigned char)x;
+
+   x = 100;
+   if (base64_encode(buf[0], 16, buf[1], &x) != CRYPT_OK) {
+      printf("  error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   printf("  encoded 16 bytes to %ld bytes...[%s]\n", x, buf[1]);
+   memset(buf[0], 0, 100);
+   y = 100;
+   if (base64_decode(buf[1], x, buf[0], &y) != CRYPT_OK) {
+      printf("  error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   printf("  decoded %ld bytes to %ld bytes\n", x, y);
+   for (x = 0; x < 16; x++) if (buf[0][x] != x) { 
+      printf(" **failed**\n"); 
+      exit(-1); 
+   }
+   printf("  passed\n");
+}
+#else
+void base64_test(void) { printf("Base64 not compiled in\n"); }
+#endif
+
+void time_hash(void)
+{
+    clock_t t1;
+    int x, y;
+    unsigned long z;
+    unsigned char input[4096], out[MAXBLOCKSIZE];
+    printf("Hash Time Trials (4KB blocks):\n");
+    for (x = 0; hash_descriptor[x].name != NULL; x++) {
+        t1 = XCLOCK();
+        z = sizeof(out);
+        y = 0;
+        while (XCLOCK() - t1 < (3 * XCLOCKS_PER_SEC)) {
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z);
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z);
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z);
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z); 
+            hash_memory(x, input, 4096, out, &z); y += 16;
+        }
+        t1 = XCLOCK() - t1;
+        printf("%-20s: Hash at %5.2f Mbit/sec\n", hash_descriptor[x].name,
+               ((8.0 * 4096.0) * ((double)y / ((double)t1 / (double)XCLOCKS_PER_SEC))) / 1000000.0);
+    }
+}
+
+void time_ecb(void)
+{
+    clock_t t1, t2;
+    long x, y1, y2;
+    unsigned char pt[32], key[32];
+    symmetric_key skey;
+    void (*func)(const unsigned char *, unsigned char *, symmetric_key *);
+
+    printf("ECB Time Trials for the Symmetric Ciphers:\n");
+    for (x = 0; cipher_descriptor[x].name != NULL; x++) {
+        cipher_descriptor[x].setup(key, cipher_descriptor[x].min_key_length, 0, &skey);
+
+#define DO1   func(pt,pt,&skey);
+#define DO2   DO1 DO1
+#define DO4   DO2 DO2
+#define DO8   DO4 DO4
+#define DO16  DO8 DO8
+#define DO32  DO16 DO16
+#define DO64  DO32 DO32
+#define DO128 DO64 DO64
+#define DO256 DO128 DO128 
+
+        func = cipher_descriptor[x].ecb_encrypt;
+        y1 = 0;
+        t1 = XCLOCK();
+        while (XCLOCK() - t1 < 3*XCLOCKS_PER_SEC) {
+            DO256; y1 += 256;
+        }
+        t1 = XCLOCK() - t1;
+
+        func = cipher_descriptor[x].ecb_decrypt;
+        y2 = 0;
+        t2 = XCLOCK();
+        while (XCLOCK() - t2 < 3*XCLOCKS_PER_SEC) {
+            DO256; y2 += 256;
+        }
+        t2 = XCLOCK() - t2;
+        printf("%-20s: Encrypt at %5.2f Mbit/sec and Decrypt at %5.2f Mbit/sec\n",
+               cipher_descriptor[x].name,
+               ((8.0 * (double)cipher_descriptor[x].block_length) * ((double)y1 / ((double)t1 / (double)XCLOCKS_PER_SEC))) / 1000000.0,
+               ((8.0 * (double)cipher_descriptor[x].block_length) * ((double)y2 / ((double)t2 / (double)XCLOCKS_PER_SEC))) / 1000000.0);
+
+#undef DO256
+#undef DO128
+#undef DO64
+#undef DO32
+#undef DO16
+#undef DO8
+#undef DO4
+#undef DO2
+#undef DO1
+    }
+}
+
+#ifdef MDH
+void dh_tests(void)
+{
+   unsigned char buf[3][4096];
+   unsigned long x, y, z;
+   int low, high, stat, stat2;
+   dh_key usera, userb;
+   clock_t t1;
+
+/*   if ((errno = dh_test()) != CRYPT_OK) printf("DH Error: %s\n", error_to_string(errno)); */
+
+   dh_sizes(&low, &high);
+   printf("DH Keys from %d to %d supported.\n", low*8, high*8);
+
+   /* make up two keys */
+   if ((errno = dh_make_key(&prng, find_prng("yarrow"), 96, &usera)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   if ((errno = dh_make_key(&prng, find_prng("yarrow"), 96, &userb)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   /* make the shared secret */
+   x = 4096;
+   if ((errno = dh_shared_secret(&usera, &userb, buf[0], &x)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   y = 4096;
+   if ((errno = dh_shared_secret(&userb, &usera, buf[1], &y)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   if (y != x) { printf("DH Shared keys are not same size.\n"); exit(-1); }
+   if (memcmp(buf[0], buf[1], x)) { printf("DH Shared keys not same contents.\n"); exit(-1); }
+
+   /* now export userb */
+   y = 4096;
+   if ((errno = dh_export(buf[1], &y, PK_PUBLIC, &userb)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   dh_free(&userb);
+
+   /* import and make the shared secret again */
+   if ((errno = dh_import(buf[1], y, &userb)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   z = 4096;
+   if ((errno = dh_shared_secret(&usera, &userb, buf[2], &z)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   printf("DH routines: ");
+   if (z != x) { printf("failed.  Size don't match?\n"); exit(-1); }
+   if (memcmp(buf[0], buf[2], x)) { printf("Failed.  Content didn't match.\n"); exit(-1); }
+   printf("Passed\n");
+   dh_free(&usera);
+   dh_free(&userb);
+
+/* time stuff */
+   {
+      static int sizes[] = { 96, 128, 160, 192, 224, 256, 320, 384, 512 };
+      int ii, tt;
+      
+      for (ii = 0; ii < (int)(sizeof(sizes)/sizeof(sizes[0])); ii++) {
+         t1 = XCLOCK();
+         for (tt = 0; tt < 50; tt++) {
+             dh_make_key(&prng, find_prng("yarrow"), sizes[ii], &usera);
+             dh_free(&usera);
+         }
+         t1 = XCLOCK() - t1;
+         printf("Make dh-%d key took %f msec\n", sizes[ii] * 8, 1000.0 * (((double)t1 / 50.0) / (double)XCLOCKS_PER_SEC));
+      }         
+   }      
+   
+/* test encrypt_key */
+ dh_make_key(&prng, find_prng("yarrow"), 96, &usera);
+ for (x = 0; x < 16; x++) buf[0][x] = x;
+ y = sizeof(buf[1]);
+ if ((errno = dh_encrypt_key(buf[0], 16, buf[1], &y, &prng, find_prng("yarrow"), find_hash("md5"), &usera)) != CRYPT_OK) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+ }
+ zeromem(buf[0], sizeof(buf[0]));
+ x = sizeof(buf[0]);
+ if ((errno = dh_decrypt_key(buf[1], y, buf[0], &x, &usera)) != CRYPT_OK) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+ }
+ printf("DH en/de crypt key routines: ");
+ if (x != 16) { printf("Failed (length)\n"); exit(-1); }
+ for (x = 0; x < 16; x++) if (buf[0][x] != x) { printf("Failed (contents)\n"); exit(-1); }
+ printf("Passed (size %lu)\n", y);
+
+/* test sign_hash */
+   for (x = 0; x < 16; x++) buf[0][x] = x;
+   x = sizeof(buf[1]);
+   if ((errno = dh_sign_hash(buf[0], 16, buf[1], &x, &prng, find_prng("yarrow"), &usera)) != CRYPT_OK) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+   }
+   if (dh_verify_hash(buf[1], x, buf[0], 16, &stat, &usera)) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+   }
+   buf[0][0] ^= 1;
+   if (dh_verify_hash(buf[1], x, buf[0], 16, &stat2, &usera)) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+   }
+   printf("dh_sign/verify_hash: %s (%d,%d), %lu\n", ((stat==1)&&(stat2==0))?"passed":"failed", stat,stat2, x);
+ dh_free(&usera);
+}
+#else
+void dh_tests(void) { printf("MDH not compiled in\n"); }
+#endif
+
+int callback_x = 0;
+void callback(void)
+{ 
+   printf("%c\x08", "-\\|/"[++callback_x & 3]); 
+#ifndef SONY_PS2
+   fflush(stdout);
+#endif
+}
+
+void rng_tests(void)
+{
+   unsigned char buf[16];
+   clock_t t1;
+   int x, y;
+
+   printf("RNG tests\n");
+   t1 = XCLOCK();
+   x = rng_get_bytes(buf, sizeof(buf), &callback);
+   t1 = XCLOCK() - t1;
+   printf("  %f bytes per second...",
+         (double)x / ((double)t1 / (double)XCLOCKS_PER_SEC));
+   printf("read %d bytes.\n  ", x);
+   for (y = 0; y < x; y++)
+       printf("%02x ", buf[y]);
+   printf("\n");
+
+#ifdef YARROW
+   if ((errno = rng_make_prng(128, find_prng("yarrow"), &prng, &callback)) != CRYPT_OK) {
+       printf(" starting yarrow error: %s\n", error_to_string(errno));
+       exit(-1);
+   }
+#endif
+}
+
+#ifdef MECC
+void ecc_tests(void)
+{
+   unsigned char buf[4][4096];
+   unsigned long x, y, z;
+   int stat, stat2, low, high;
+   ecc_key usera, userb;
+   clock_t t1;
+
+   if ((errno = ecc_test()) != CRYPT_OK) { printf("ecc Error: %s\n", error_to_string(errno)); exit(-1); }
+
+   ecc_sizes(&low, &high);
+   printf("ecc Keys from %d to %d supported.\n", low*8, high*8);
+
+   /* make up two keys */
+   if ((errno = ecc_make_key(&prng, find_prng("yarrow"), 24, &usera)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   if ((errno = ecc_make_key(&prng, find_prng("yarrow"), 24, &userb)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   /* make the shared secret */
+   x = 4096;
+   if ((errno = ecc_shared_secret(&usera, &userb, buf[0], &x)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   y = 4096;
+   if ((errno = ecc_shared_secret(&userb, &usera, buf[1], &y)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   if (y != x) { printf("ecc Shared keys are not same size.\n"); exit(-1); }
+
+   if (memcmp(buf[0], buf[1], x)) { printf("ecc Shared keys not same contents.\n"); exit(-1); }
+
+   /* now export userb */
+   y = 4096;
+   if ((errno = ecc_export(buf[1], &y, PK_PUBLIC, &userb)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   ecc_free(&userb);
+   printf("ECC-192 export took %ld bytes\n", y);
+
+   /* import and make the shared secret again */
+   if ((errno = ecc_import(buf[1], y, &userb)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   z = 4096;
+   if ((errno = ecc_shared_secret(&usera, &userb, buf[2], &z)) != CRYPT_OK) {
+      printf("Error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   printf("ecc routines: ");
+   if (z != x) { printf("failed.  Size don't match?\n"); exit(-1); }
+   if (memcmp(buf[0], buf[2], x)) { printf("Failed.  Content didn't match.\n"); exit(-1); }
+   printf("Passed\n");
+   ecc_free(&usera);
+   ecc_free(&userb);
+
+/* time stuff */
+   {
+      static int sizes[] = { 20, 24, 28, 32, 48, 65 };
+      int ii, tt;
+      
+      for (ii = 0; ii < (int)(sizeof(sizes)/sizeof(sizes[0])); ii++) {
+         t1 = XCLOCK();
+         for (tt = 0; tt < 25; tt++) {
+             if ((errno = ecc_make_key(&prng, find_prng("yarrow"), sizes[ii], &usera)) != CRYPT_OK) { printf("Error: %s\n", error_to_string(errno)); exit(-1); }
+             ecc_free(&usera);
+         }             
+         t1 = XCLOCK() - t1;
+         printf("Make ECC-%d key took %f msec\n", sizes[ii]*8, 1000.0 * (((double)t1/25.0) / (double)XCLOCKS_PER_SEC));
+      }
+   }      
+
+/* test encrypt_key */
+ ecc_make_key(&prng, find_prng("yarrow"), 65, &usera);
+ for (x = 0; x < 16; x++) buf[0][x] = x;
+ y = sizeof(buf[1]);
+ if ((errno = ecc_encrypt_key(buf[0], 16, buf[1], &y, &prng, find_prng("yarrow"), find_hash("md5"), &usera)) != CRYPT_OK) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+ }
+ zeromem(buf[0], sizeof(buf[0]));
+ x = sizeof(buf[0]);
+ if ((errno = ecc_decrypt_key(buf[1], y, buf[0], &x, &usera)) != CRYPT_OK) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+ }
+ printf("ECC en/de crypt key routines: ");
+ if (x != 16) { printf("Failed (length)\n"); exit(-1); }
+ for (x = 0; x < 16; x++) if (buf[0][x] != x) { printf("Failed (contents)\n"); exit(-1); }
+ printf("Passed (size: %lu)\n", y);
+/* test sign_hash */
+   for (x = 0; x < 16; x++) buf[0][x] = x;
+   x = sizeof(buf[1]);
+   if ((errno = ecc_sign_hash(buf[0], 16, buf[1], &x, &prng, find_prng("yarrow"), &usera)) != CRYPT_OK) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+   }
+   if (ecc_verify_hash(buf[1], x, buf[0], 16, &stat, &usera)) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+   }
+   buf[0][0] ^= 1;
+   if (ecc_verify_hash(buf[1], x, buf[0], 16, &stat2, &usera)) {
+    printf("Error: %s\n", error_to_string(errno));
+    exit(-1);
+   }
+   printf("ecc_sign/verify_hash: %s (%d,%d)\n", ((stat==1)&&(stat2==0))?"passed":"failed", stat,stat2);
+ ecc_free(&usera);
+}
+#else
+void ecc_tests(void) { printf("MECC not compiled in\n"); }
+#endif
+
+#ifdef GF
+void gf_tests(void)
+{
+   gf_int a, b, c, d;
+   int n;
+   unsigned char buf[1024];
+
+   printf("GF tests\n");
+   gf_zero(a);gf_zero(b);gf_zero(c);gf_zero(d);
+
+   /* a == 0x18000000b */
+   a[1] = 1;
+   a[0] = 0x8000000bUL;
+
+   /* b == 0x012345678 */
+   b[0] = 0x012345678UL;
+
+   /* find 1/b mod a */
+   gf_invmod(b,a,c);
+
+   /* find 1/1/b mod a */
+   gf_invmod(c,a,d);
+
+   /* display them */
+   printf("  %08lx %08lx\n", c[0], d[0]);
+
+   /* store as binary string */
+   n = gf_size(a);
+   printf("  a takes %d bytes\n", n);
+   gf_toraw(a, buf);
+   gf_readraw(a, buf, n);
+   printf("  a == %08lx%08lx\n", a[1], a[0]);
+
+   /* primality testing */
+   gf_zero(a);
+   a[0] = 0x169;
+   printf("  GF prime: %s, ", gf_is_prime(a)?"passed":"failed");
+   a[0] = 0x168;
+   printf("  %s\n", gf_is_prime(a)?"failed":"passed");
+
+   /* test sqrt code */
+   gf_zero(a);
+   a[1] = 0x00000001;
+   a[0] = 0x8000000bUL;
+   gf_zero(b);
+   b[0] = 0x12345678UL;
+
+   gf_sqrt(b, a, c);
+   gf_mulmod(c, c, a, b);
+   printf("  (%08lx)^2 = %08lx (mod %08lx%08lx) \n", c[0], b[0], a[1], a[0]);
+}
+#else
+void gf_tests(void) { printf("GF not compiled in\n"); }
+#endif
+
+#ifdef MPI
+void test_prime(void)
+{
+   unsigned char buf[1024];
+   mp_int a;
+   int x;
+  
+   /* make a 1024 bit prime */
+   mp_init(&a);
+   rand_prime(&a, 128, &prng, find_prng("yarrow"));
+
+   /* dump it */
+   mp_todecimal(&a, buf);
+   printf("1024-bit prime:\n");
+   for (x = 0; x < (int)strlen(buf); ) {
+       printf("%c", buf[x]);
+       if (!(++x % 60)) printf("\\ \n");
+   }
+   printf("\n\n");
+
+   mp_clear(&a);
+}
+#else
+void test_prime(void) { printf("MPI not compiled in\n"); }
+#endif
+
+void register_all_algs(void)
+{
+#ifdef BLOWFISH
+   register_cipher(&blowfish_desc);
+#endif
+#ifdef XTEA
+   register_cipher(&xtea_desc);
+#endif
+#ifdef RC5
+   register_cipher(&rc5_desc);
+#endif
+#ifdef RC6
+   register_cipher(&rc6_desc);
+#endif
+#ifdef SAFERP
+   register_cipher(&saferp_desc);
+#endif
+#ifdef SERPENT
+   register_cipher(&serpent_desc);
+#endif
+#ifdef RIJNDAEL
+   register_cipher(&aes_desc);
+#endif
+#ifdef TWOFISH
+   register_cipher(&twofish_desc);
+#endif
+#ifdef SAFER
+   register_cipher(&safer_k64_desc);
+   register_cipher(&safer_sk64_desc);
+   register_cipher(&safer_k128_desc);
+   register_cipher(&safer_sk128_desc);
+#endif
+#ifdef RC2
+   register_cipher(&rc2_desc);
+#endif
+#ifdef DES
+   register_cipher(&des_desc);
+   register_cipher(&des3_desc);
+#endif
+#ifdef CAST5
+   register_cipher(&cast5_desc);
+#endif
+#ifdef NOEKEON
+   register_cipher(&noekeon_desc);
+#endif
+
+   register_cipher(&null_desc);
+
+#ifdef SHA256
+   register_hash(&sha256_desc);
+#endif
+#ifdef TIGER
+   register_hash(&tiger_desc);
+#endif
+#ifdef SHA1
+   register_hash(&sha1_desc);
+#endif
+#ifdef MD5
+   register_hash(&md5_desc);
+#endif
+#ifdef SHA384
+   register_hash(&sha384_desc);
+#endif
+#ifdef SHA512
+   register_hash(&sha512_desc);
+#endif
+#ifdef MD4
+   register_hash(&md4_desc);
+#endif
+#ifdef MD2
+   register_hash(&md2_desc);
+#endif
+
+#ifdef YARROW
+   register_prng(&yarrow_desc);
+#endif
+#ifdef SPRNG
+   register_prng(&sprng_desc);
+#endif
+}
+
+void kr_display(pk_key *kr)
+{
+   static const char *system[] = { "NON-KEY", "RSA", "DH", "ECC" };
+   static const char *type[]   = { "PRIVATE", "PUBLIC", "PRIVATE_OPTIMIZED" };
+
+   while (kr->system != NON_KEY) {
+       printf("CRC [%08lx], System [%10s], Type [%20s], %s, %s, %s\n", kr->ID, system[kr->system], type[kr->key_type], kr->name, kr->email, kr->description);
+       kr = kr->next;
+   }
+   printf("\n");
+}
+
+void kr_test_makekeys(pk_key **kr)
+{
+   if ((errno = kr_init(kr)) != CRYPT_OK) {
+      printf("KR init error %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   /* make a DH key */
+   printf("KR: Making DH key...\n");
+   if ((errno = kr_make_key(*kr, &prng, find_prng("yarrow"), DH_KEY, 128, "dhkey", "[email protected]", "dhkey one")) != CRYPT_OK) {
+      printf("Make key error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   /* make a ECC key */
+   printf("KR: Making ECC key...\n");
+   if ((errno = kr_make_key(*kr, &prng, find_prng("yarrow"), ECC_KEY, 20, "ecckey", "[email protected]", "ecckey one")) != CRYPT_OK) {
+      printf("Make key error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   /* make a RSA key */
+   printf("KR: Making RSA key...\n");
+   if ((errno = kr_make_key(*kr, &prng, find_prng("yarrow"), RSA_KEY, 128, "rsakey", "[email protected]", "rsakey one")) != CRYPT_OK) {
+      printf("Make key error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+}
+
+void kr_test(void)
+{
+   pk_key *kr, *_kr;
+   unsigned char buf[8192], buf2[8192], buf3[8192];
+   unsigned long len;
+   int i, j, stat;
+#ifndef NO_FILE   
+   FILE *f;
+#endif
+
+   kr_test_makekeys(&kr);
+
+   printf("The original list:\n");
+   kr_display(kr);
+
+   for (i = 0; i < 3; i++) {
+       len = sizeof(buf);
+       if ((errno = kr_export(kr, kr->ID, kr->key_type, buf, &len)) != CRYPT_OK) {
+          printf("Error exporting key %d, %s\n", i, error_to_string(errno));
+          exit(-1);
+       }
+       printf("Exported key was: %lu bytes\n", len);
+       if ((errno = kr_del(&kr, kr->ID)) != CRYPT_OK) {
+          printf("Error deleting key %d, %s\n", i, error_to_string(errno));
+          exit(-1);
+       }
+       kr_display(kr);
+       if ((errno = kr_import(kr, buf, len)) != CRYPT_OK) {
+          printf("Error importing key %d, %s\n", i, error_to_string(errno));
+          exit(-1);
+       }
+       kr_display(kr);
+   }         
+
+   for (i = 0; i < 3; i++) {
+       len = sizeof(buf);
+       if ((errno = kr_export(kr, kr->ID, PK_PUBLIC, buf, &len)) != CRYPT_OK) {
+          printf("Error exporting key %d, %s\n", i, error_to_string(errno));
+          exit(-1);
+       }
+       printf("Exported key was: %lu bytes\n", len);
+       if ((errno = kr_del(&kr, kr->ID)) != CRYPT_OK) {
+          printf("Error deleting key %d, %s\n", i, error_to_string(errno));
+          exit(-1);
+       }
+       kr_display(kr);
+       if ((errno = kr_import(kr, buf, len)) != CRYPT_OK) {
+          printf("Error importing key %d, %s\n", i, error_to_string(errno));
+          exit(-1);
+       }
+       kr_display(kr);
+   }
+
+   if ((errno = kr_clear(&kr)) != CRYPT_OK) {
+      printf("Error clearing ring: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+
+/* TEST output to file */
+#ifndef NO_FILE
+
+   if ((errno = kr_init(&kr)) != CRYPT_OK) {
+      printf("KR init error %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   kr_test_makekeys(&kr);
+
+   /* save to file */
+   f = fopen("ring.dat", "wb");
+   if ((errno = kr_save(kr, f, NULL)) != CRYPT_OK) {
+      printf("kr_save error %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   fclose(f);
+
+   /* delete and load */
+   if ((errno = kr_clear(&kr)) != CRYPT_OK) {
+      printf("clear error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   f = fopen("ring.dat", "rb");
+   if ((errno = kr_load(&kr, f, NULL)) != CRYPT_OK) {
+      printf("kr_load error %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   fclose(f);
+   remove("ring.dat");
+   printf("After load and save...\n");
+   kr_display(kr);
+  
+   if ((errno = kr_clear(&kr)) != CRYPT_OK) {
+      printf("clear error: %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+#endif
+
+/* test the packet encryption/sign stuff */
+   for (i = 0; i < 32; i++) buf[i] = i;
+   kr_test_makekeys(&kr);
+   _kr = kr;
+   for (i = 0; i < 3; i++) {
+       printf("Testing a key with system %d, type %d:\t", _kr->system, _kr->key_type);
+       len = sizeof(buf2);
+       if ((errno = kr_encrypt_key(kr, _kr->ID, buf, 16, buf2, &len, &prng, find_prng("yarrow"), find_hash("md5"))) != CRYPT_OK) {
+          printf("Encrypt error, %d, %s\n", i, error_to_string(errno));
+          exit(-1);
+       }
+       len = sizeof(buf3);
+       if ((errno = kr_decrypt_key(kr, buf2, buf3, &len)) != CRYPT_OK) {
+          printf("decrypt error, %d, %s\n", i, error_to_string(errno));
+          exit(-1);
+       }
+       if (len != 16 || memcmp(buf3, buf, 16)) {
+          printf("kr_decrypt_key failed, %i, %lu\n", i, len);
+          exit(-1);
+       }
+       printf("kr_encrypt_key passed, ");
+
+       len = sizeof(buf2);
+       if ((errno = kr_sign_hash(kr, _kr->ID, buf, 32, buf2, &len, &prng, find_prng("yarrow"))) != CRYPT_OK) {
+          printf("kr_sign_hash failed, %i, %s\n", i, error_to_string(errno));
+          exit(-1);
+       }
+       printf("kr_sign_hash: ");
+       if ((errno = kr_verify_hash(kr, buf2, buf, 32, &stat)) != CRYPT_OK) {
+          printf("kr_sign_hash failed, %i, %s\n", i, error_to_string(errno));
+          exit(-1);
+       }
+       printf("%s, ", stat?"passed":"failed");
+       buf[15] ^= 1;
+       if ((errno = kr_verify_hash(kr, buf2, buf, 32, &stat)) != CRYPT_OK) {
+          printf("kr_sign_hash failed, %i, %s\n", i, error_to_string(errno));
+          exit(-1);
+       }
+       printf("%s\n", (!stat)?"passed":"failed");
+       buf[15] ^= 1;
+
+       len = sizeof(buf);
+       if ((errno = kr_fingerprint(kr, _kr->ID, find_hash("sha1"), buf, &len)) != CRYPT_OK) {
+          printf("kr_fingerprint failed, %i, %lu\n", i, len);
+          exit(-1);
+       }
+       printf("Fingerprint:  ");
+       for (j = 0; j < 20; j++) {
+           printf("%02x", buf[j]);
+           if (j < 19) printf(":");
+       }
+       printf("\n\n");
+
+       _kr = _kr->next;
+    }
+
+/* Test encrypting/decrypting to a public key */
+/* first dump the other two keys */
+   kr_del(&kr, kr->ID);
+   kr_del(&kr, kr->ID);
+   kr_display(kr);
+
+   /* now export it as public and private */
+   len = sizeof(buf);
+   if ((errno = kr_export(kr, kr->ID, PK_PUBLIC, buf, &len)) != CRYPT_OK) {
+       printf("Error exporting key %d, %s\n", i, error_to_string(errno));
+       exit(-1);
+   }
+
+   /* check boundaries */
+   memset(buf+len, 0, sizeof(buf)-len);
+
+   len = sizeof(buf2);
+   if ((errno = kr_export(kr, kr->ID, PK_PRIVATE, buf2, &len)) != CRYPT_OK) {
+       printf("Error exporting key  %s\n", error_to_string(errno));
+       exit(-1);
+   }
+
+   /* check boundaries */
+   memset(buf2+len, 0, sizeof(buf2)-len);
+
+   /* delete the key and import the public */
+   kr_clear(&kr);
+   kr_init(&kr);
+   kr_display(kr);
+   if ((errno = kr_import(kr, buf, len)) != CRYPT_OK) {
+      printf("Error importing key %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   kr_display(kr);
+   
+   /* now encrypt a buffer */
+   for (i = 0; i < 16; i++) buf[i] = i;
+   len = sizeof(buf3);
+   if ((errno = kr_encrypt_key(kr, kr->ID, buf, 16, buf3, &len, &prng, find_prng("yarrow"), find_hash("md5"))) != CRYPT_OK) {
+      printf("Encrypt error, %d, %s\n", i, error_to_string(errno));
+      exit(-1);
+   }
+
+   /* now delete the key and import the private one */
+   kr_clear(&kr);
+   kr_init(&kr);
+   kr_display(kr);
+   if ((errno = kr_import(kr, buf2, len)) != CRYPT_OK) {
+      printf("Error importing key %s\n", error_to_string(errno));
+      exit(-1);
+   }
+   kr_display(kr);
+
+   /* now decrypt */
+   len = sizeof(buf2);
+   if ((errno = kr_decrypt_key(kr, buf3, buf2, &len)) != CRYPT_OK) {
+      printf("decrypt error, %s\n", error_to_string(errno));
+      exit(-1);
+   }
+
+   printf("KR encrypt to public, decrypt with private: ");
+   if (len == 16 && !memcmp(buf2, buf, 16)) {
+      printf("passed\n"); 
+   } else {
+      printf("failed\n");
+   }
+
+   kr_clear(&kr);
+
+}   
+
+void test_errs(void)
+{
+   #define ERR(x)  printf("%25s => %s\n", #x, error_to_string(x));
+
+   ERR(CRYPT_OK);
+   ERR(CRYPT_ERROR);
+
+   ERR(CRYPT_INVALID_KEYSIZE);
+   ERR(CRYPT_INVALID_ROUNDS);
+   ERR(CRYPT_FAIL_TESTVECTOR);
+
+   ERR(CRYPT_BUFFER_OVERFLOW);
+   ERR(CRYPT_INVALID_PACKET);
+
+   ERR(CRYPT_INVALID_PRNGSIZE);
+   ERR(CRYPT_ERROR_READPRNG);
+
+   ERR(CRYPT_INVALID_CIPHER);
+   ERR(CRYPT_INVALID_HASH);
+   ERR(CRYPT_INVALID_PRNG);
+
+   ERR(CRYPT_MEM);
+
+   ERR(CRYPT_PK_TYPE_MISMATCH);
+   ERR(CRYPT_PK_NOT_PRIVATE);
+
+   ERR(CRYPT_INVALID_ARG);
+
+   ERR(CRYPT_PK_INVALID_TYPE);
+   ERR(CRYPT_PK_INVALID_SYSTEM);
+   ERR(CRYPT_PK_DUP);
+   ERR(CRYPT_PK_NOT_FOUND);
+   ERR(CRYPT_PK_INVALID_SIZE);
+
+   ERR(CRYPT_INVALID_PRIME_SIZE);
+}   
+
+
+
+int main(void)
+{
+#ifdef SONY_PS2
+  TIMER_Init();
+#endif
+
+ register_all_algs();
+
+ if ((errno = yarrow_start(&prng)) != CRYPT_OK) {
+    printf("yarrow_start: %s\n", error_to_string(errno));
+ }
+ if ((errno = yarrow_add_entropy("hello", 5, &prng)) != CRYPT_OK) {
+    printf("yarrow_add_entropy: %s\n", error_to_string(errno));
+ }
+ if ((errno = yarrow_ready(&prng)) != CRYPT_OK) {
+    printf("yarrow_ready: %s\n", error_to_string(errno));
+ }
+
+ printf(crypt_build_settings);
+ test_errs();
+
+#ifdef HMAC
+  printf("HMAC: %s\n", hmac_test() == CRYPT_OK ? "passed" : "failed");
+#endif
+
+ store_tests();
+ cipher_tests();
+ hash_tests();
+
+ ecb_tests();
+ cbc_tests();
+ ctr_tests();
+ ofb_tests();
+ cfb_tests();
+
+ rng_tests();
+ //test_prime();
+
+ kr_test();
+ rsa_test();
+ pad_test();
+ ecc_tests();
+ dh_tests();
+
+ gf_tests();
+ base64_test();
+
+ time_ecb();
+ time_hash();
+
+#ifdef SONY_PS2
+  TIMER_Shutdown();
+#endif
+
+ return 0;
+}

+ 87 - 30
ecc.c

@@ -239,13 +239,12 @@ static void del_point(ecc_point *p)
    XFREE(p);
 }
 
-
 /* double a point R = 2P, R can be P*/
-static int dbl_point(ecc_point *P, ecc_point *R, mp_int *modulus)
+static int dbl_point(ecc_point *P, ecc_point *R, mp_int *modulus, mp_int *mu)
 {
    mp_int s, tmp, tmpx;
    int res;
-
+   
    if (mp_init_multi(&s, &tmp, &tmpx, NULL) != MP_OKAY) { 
       return CRYPT_MEM;
    }
@@ -254,12 +253,18 @@ static int dbl_point(ecc_point *P, ecc_point *R, mp_int *modulus)
    if (mp_mul_2(&P->y, &tmp) != MP_OKAY)                   { goto error; } /* tmp = 2*y */
    if (mp_invmod(&tmp, modulus, &tmp) != MP_OKAY)          { goto error; } /* tmp = 1/tmp mod modulus */
    if (mp_sqr(&P->x, &s) != MP_OKAY)                       { goto error; } /* s = x^2  */
+   if (mp_reduce(&s, modulus, mu) != MP_OKAY)            { goto error; }
    if (mp_mul_d(&s,(mp_digit)3, &s) != MP_OKAY)            { goto error; } /* s = 3*(x^2) */
    if (mp_sub_d(&s,(mp_digit)3, &s) != MP_OKAY)            { goto error; } /* s = 3*(x^2) - 3 */
-   if (mp_mulmod(&s, &tmp, modulus, &s) != MP_OKAY)        { goto error; } /* s = tmp * s mod modulus */
+   if (mp_cmp_d(&s, 0) == MP_LT) {                         /* if s < 0 add modulus */
+      if (mp_add(&s, modulus, &s) != MP_OKAY)              { goto error; }
+   }
+   if (mp_mul(&s, &tmp, &s) != MP_OKAY)                    { goto error; } /* s = tmp * s mod modulus */
+   if (mp_reduce(&s, modulus, mu) != MP_OKAY)            { goto error; }
 
    /* Xr = s^2 - 2Xp */
    if (mp_sqr(&s,  &tmpx) != MP_OKAY)                      { goto error; } /* tmpx = s^2  */
+   if (mp_reduce(&tmpx, modulus, mu) != MP_OKAY)         { goto error; } /* tmpx = tmpx mod modulus */
    if (mp_sub(&tmpx, &P->x, &tmpx) != MP_OKAY)             { goto error; } /* tmpx = tmpx - x */
    if (mp_submod(&tmpx, &P->x, modulus, &tmpx) != MP_OKAY) { goto error; } /* tmpx = tmpx - x mod modulus */
 
@@ -279,11 +284,11 @@ done:
 }
 
 /* add two different points over Z/pZ, R = P + Q, note R can equal either P or Q */
-static int add_point(ecc_point *P, ecc_point *Q, ecc_point *R, mp_int *modulus)
+static int add_point(ecc_point *P, ecc_point *Q, ecc_point *R, mp_int *modulus, mp_int *mu)
 {
    mp_int s, tmp, tmpx;
    int res;
-
+   
    if (mp_init(&tmp) != MP_OKAY) { 
       return CRYPT_MEM;
    }
@@ -297,7 +302,7 @@ static int add_point(ecc_point *P, ecc_point *Q, ecc_point *R, mp_int *modulus)
    if (mp_cmp(&P->x, &Q->x) == MP_EQ)
       if (mp_cmp(&P->y, &Q->y) == MP_EQ || mp_cmp(&P->y, &tmp) == MP_EQ) {
          mp_clear(&tmp);
-         return dbl_point(P, R, modulus);
+         return dbl_point(P, R, modulus, mu);
       }
 
    if (mp_init_multi(&tmpx, &s, NULL) != MP_OKAY) { 
@@ -306,13 +311,21 @@ static int add_point(ecc_point *P, ecc_point *Q, ecc_point *R, mp_int *modulus)
    }
 
    /* get s = (Yp - Yq)/(Xp-Xq) mod p */
-   if (mp_submod(&P->x, &Q->x, modulus, &tmp) != MP_OKAY)     { goto error; } /* tmp = Px - Qx mod modulus */
+   if (mp_sub(&P->x, &Q->x, &tmp) != MP_OKAY)                 { goto error; } /* tmp = Px - Qx mod modulus */
+   if (mp_cmp_d(&tmp, 0) == MP_LT) {                                          /* if tmp<0 add modulus */
+      if (mp_add(&tmp, modulus, &tmp) != MP_OKAY)             { goto error; }
+   }
    if (mp_invmod(&tmp, modulus, &tmp) != MP_OKAY)             { goto error; } /* tmp = 1/tmp mod modulus */
    if (mp_sub(&P->y, &Q->y, &s) != MP_OKAY)                   { goto error; } /* s = Py - Qy mod modulus */
-   if (mp_mulmod(&s, &tmp, modulus, &s) != MP_OKAY)           { goto error; } /* s = s * tmp mod modulus */
+   if (mp_cmp_d(&s, 0) == MP_LT) {                                            /* if s<0 add modulus */
+      if (mp_add(&s, modulus, &s) != MP_OKAY)                 { goto error; }
+   }
+   if (mp_mul(&s, &tmp, &s) != MP_OKAY)                       { goto error; } /* s = s * tmp mod modulus */
+   if (mp_reduce(&s, modulus, mu) != MP_OKAY)               { goto error; }
 
    /* Xr = s^2 - Xp - Xq */
-   if (mp_sqrmod(&s, modulus, &tmp) != MP_OKAY)               { goto error; } /* tmp = s^2 mod modulus */
+   if (mp_sqr(&s, &tmp) != MP_OKAY)                           { goto error; } /* tmp = s^2 mod modulus */
+   if (mp_reduce(&tmp, modulus, mu) != MP_OKAY)             { goto error; }
    if (mp_sub(&tmp, &P->x, &tmp) != MP_OKAY)                  { goto error; } /* tmp = tmp - Px */
    if (mp_sub(&tmp, &Q->x, &tmpx) != MP_OKAY)                 { goto error; } /* tmpx = tmp - Qx */
 
@@ -334,32 +347,74 @@ done:
 /* perform R = kG where k == integer and G == ecc_point */
 static int ecc_mulmod(mp_int *k, ecc_point *G, ecc_point *R, mp_int *modulus)
 {
-   ecc_point *tG, *M[14];
-   int i, j, z, res;
+   ecc_point *tG, *M[30];
+   int i, j, z, res, Q;
    mp_digit d;
    unsigned char bits[150], m, first;
+   mp_int mu;
+   
+   
+   if ((USED(k) * MP_DIGIT_BIT) > 256) {
+      Q = 5;
+   } else {
+      Q = 4;
+   }
+   
+   if (mp_init(&mu) != MP_OKAY) {
+      return CRYPT_MEM;
+   }
+   
+  /* init barrett reduction */
+  mp_set(&mu, 1); 
+  mp_lshd(&mu, 2 * USED(modulus));
+  if (mp_div(&mu, modulus, &mu, NULL) != MP_OKAY) {
+    mp_clear(&mu);
+    return CRYPT_MEM;
+  }
+   
    
-   /* init M tab */
-   for (i = 0; i < 14; i++) {
+   /* init M tab (alloc here, calculate below)
+    
+    This table holds the first 2^Q multiples of the input base point G, that is 
+    
+       M[x] = x * G
+       
+    Where G is the point and x is a scalar.  The implementation is optimized
+    since M[0] == 0 and M[1] == G so there is no need to waste space for those.  In
+    effect M'[x] == M[x+2] where M'[] is the table we make.  If M[0] or M[1] are needed
+    we handle them with if statements.   
+   
+   */
+   for (i = 0; i < ((1<<Q)-2); i++) {
        M[i] = new_point();
        if (M[i] == NULL) {
           for (j = 0; j < i; j++) {
               del_point(M[j]);
           }
+          mp_clear(&mu);
           return CRYPT_MEM;
        }
    }
    
-   /* get bits of k */
+   /* get bits of k in groupings of Q 
+   
+    The multiplicand is read in groupings of four bits.  This is because the multiplication
+    routine is a Q-ary left-to-write (see HAC chapter 14, algorithm 14.82).
+   */
    first = m = (unsigned char)0;
    for (z = i = 0; z < (int)USED(k); z++) {
+       /* grab a digit from the mp_int, these have MP_DIGIT_BIT bits in them */
        d = DIGIT(k, z);
        for (j = 0; j < (int)MP_DIGIT_BIT; j++) {
+           /* OR the bits against an accumulator */
            first |= (d&1)<<(unsigned)(m++);
-           if (m == (unsigned char)4) {
+           /* if the bit count is Q then we have a Q-bit word ready */
+           if (m == (unsigned char)Q) {
+              /* store the four bit word and reset counters */
               bits[i++] = first;
               first = m = (unsigned char)0;
            }
+           /* shift the digit down to extract the next bit */
            d >>= 1;
        }
    }
@@ -371,34 +426,35 @@ static int ecc_mulmod(mp_int *k, ecc_point *G, ecc_point *R, mp_int *modulus)
 
    /* make a copy of G incase R==G */
    tG = new_point();
-   if (tG == NULL)                                          { goto error; }
+   if (tG == NULL)                                               { goto error; }
 
    /* skip leading digits which are zero */   
    --i; while (i != 0 && bits[i] == (unsigned char)0) { --i; }
    
+   /* if the multiplicand has no non-zero 4-bit words its invalid. */
    if (i == 0) {
       res = CRYPT_INVALID_ARG;
       goto done;
    }
    
-   /* now calc the M tab, note that there are only 14 spots, the normal M[0] is a no-op, and M[1] is the input
+   /* now calc the M tab, note that there are only 2^Q - 2 spots, the normal M[0] is a no-op, and M[1] is the input
       point (saves ram)
    */
    
    /* M[0] now is 2*G */
-   if (dbl_point(G, M[0], modulus) != CRYPT_OK)             { goto error; }
-   for (j = 1; j < 14; j++) {
-       if (add_point(M[j-1], G, M[j], modulus) != CRYPT_OK) { goto error; }
+   if (dbl_point(G, M[0], modulus, &mu) != CRYPT_OK)                  { goto error; }
+   for (j = 1; j < ((1<<Q)-2); j++) {
+       if (add_point(M[j-1], G, M[j], modulus, &mu) != CRYPT_OK)      { goto error; }
    }
   
    /* tG = G */
-   if (mp_copy(&G->x, &tG->x) != MP_OKAY)                   { goto error; }
-   if (mp_copy(&G->y, &tG->y) != MP_OKAY)                   { goto error; }
+   if (mp_copy(&G->x, &tG->x) != MP_OKAY)                        { goto error; }
+   if (mp_copy(&G->y, &tG->y) != MP_OKAY)                        { goto error; }
 
    /* set result M[bits[i]] */
    if (bits[i] == (unsigned char)1) {
-     if (mp_copy(&G->x, &R->x) != MP_OKAY)                  { goto error; }
-     if (mp_copy(&G->y, &R->y) != MP_OKAY)                  { goto error; }
+     if (mp_copy(&G->x, &R->x) != MP_OKAY)                       { goto error; }
+     if (mp_copy(&G->y, &R->y) != MP_OKAY)                       { goto error; }
    } else if (bits[i] >= (unsigned char)2) {
      if (mp_copy(&M[(int)bits[i]-2]->x, &R->x) != MP_OKAY)       { goto error; }
      if (mp_copy(&M[(int)bits[i]-2]->y, &R->y) != MP_OKAY)       { goto error; }
@@ -406,8 +462,8 @@ static int ecc_mulmod(mp_int *k, ecc_point *G, ecc_point *R, mp_int *modulus)
    
    while (--i >= 0) {
        /* double */
-       for (j = 0; j < 4; j++) {
-          if (dbl_point(R, R, modulus) != CRYPT_OK)               { goto error; }
+       for (j = 0; j < Q; j++) {
+          if (dbl_point(R, R, modulus, &mu) != CRYPT_OK)              { goto error; }
        }
        
        /* now based on the value of bits[i] we do ops */
@@ -415,10 +471,10 @@ static int ecc_mulmod(mp_int *k, ecc_point *G, ecc_point *R, mp_int *modulus)
           /* nop */
        } else if (bits[i] == (unsigned char)1) {
           /* add base point */
-          if (add_point(R, tG, R, modulus) != CRYPT_OK)           { goto error; }
+          if (add_point(R, tG, R, modulus, &mu) != CRYPT_OK)          { goto error; }
        } else {
           /* other case */
-          if (add_point(R, M[(int)bits[i] - 2], R, modulus) != CRYPT_OK) { goto error; }
+          if (add_point(R, M[(int)bits[i] - 2], R, modulus, &mu) != CRYPT_OK) { goto error; }
        }
    }
    
@@ -428,9 +484,10 @@ error:
    res = CRYPT_MEM;
 done:
    del_point(tG);
-   for (i = 0; i < 14; i++) {
+   for (i = 0; i < ((1<<Q)-2); i++) {
        del_point(M[i]);
    }
+   mp_clear(&mu);
 #ifdef CLEAN_STACK
    zeromem(bits, sizeof(bits)); 
 #endif

+ 15 - 7
ecc_sys.c

@@ -306,7 +306,7 @@ int ecc_verify_hash(const unsigned char *sig, unsigned long siglen,
 {
    ecc_point *mG;
    ecc_key   pubkey;
-   mp_int b, p, m;
+   mp_int b, p, m, mu;
    unsigned long x, y;
    int res, err;
 
@@ -357,14 +357,14 @@ int ecc_verify_hash(const unsigned char *sig, unsigned long siglen,
    y += 4;
 
    /* init values */
-   if (mp_init_multi(&b, &m, &p, NULL) != MP_OKAY) { 
+   if (mp_init_multi(&b, &m, &p, &mu, NULL) != MP_OKAY) { 
       ecc_free(&pubkey);
       return CRYPT_MEM;
    }
 
    mG = new_point();
    if (mG == NULL) { 
-      mp_clear_multi(&b, &m, &p, NULL);
+      mp_clear_multi(&b, &m, &p, &mu, NULL);
       ecc_free(&pubkey);
       return CRYPT_MEM;
    } 
@@ -378,12 +378,20 @@ int ecc_verify_hash(const unsigned char *sig, unsigned long siglen,
    
    /* load prime */
    if (mp_read_radix(&p, (unsigned char *)sets[key->idx].prime, 64) != MP_OKAY)    { goto error; }
+   
+   /* calculate barrett stuff */
+   mp_set(&mu, 1); 
+   mp_lshd(&mu, 2 * USED(&p));
+   if (mp_div(&mu, &p, &mu, NULL) != MP_OKAY) {
+     res = CRYPT_MEM;
+     goto done;
+   }
 
    /* get bA */
-   if (ecc_mulmod(&b, &pubkey.pubkey, &pubkey.pubkey, &p) != CRYPT_OK)             { goto error; }
+   if (ecc_mulmod(&b, &pubkey.pubkey, &pubkey.pubkey, &p) != CRYPT_OK)                  { goto error; }
    
    /* get bA + Y */
-   if (add_point(&pubkey.pubkey, &key->pubkey, &pubkey.pubkey, &p) != CRYPT_OK)    { goto error; }
+   if (add_point(&pubkey.pubkey, &key->pubkey, &pubkey.pubkey, &p, &mu) != CRYPT_OK)    { goto error; }
 
    /* get mG */
    if (mp_read_radix(&mG->x, (unsigned char *)sets[key->idx].Gx, 64) != MP_OKAY)   { goto error; }
@@ -403,7 +411,7 @@ error:
 done:
    del_point(mG);
    ecc_free(&pubkey);
-   mp_clear_multi(&p, &m, &b, NULL);
-   return CRYPT_OK;
+   mp_clear_multi(&p, &m, &b, &mu, NULL);
+   return res;
 }
 

+ 35 - 0
examples/ch2-01.c

@@ -0,0 +1,35 @@
+/* 
+ * Name      : ch2-01.c
+ * Purpose   : Demonstration of reading the RNG
+ * Author    : Tom St Denis
+ *
+ * History   : v0.81 Initial release
+ */
+ 
+ /* ch2-02-2 */
+ #include <mycrypt.h>
+ 
+ int main(void) 
+ {
+    unsigned char buf[16];
+    unsigned long len;
+    int           ix;
+    
+    /* read the RNG */
+    len = rng_get_bytes(buf, sizeof(buf), NULL);
+    
+    /* verify return */
+    if (len != sizeof(buf)) {
+       printf("Error: Only read %lu bytes.\n", len);
+    } else {
+       printf("Read %lu bytes\n", len);
+       for (ix = 0; ix < sizeof(buf); ix++) {
+           printf("%02x ", buf[ix]);
+       }
+       printf("\n");
+    }
+    
+    return EXIT_SUCCESS;
+}
+/* ch2-02-2 */
+

+ 80 - 0
legal.txt

@@ -0,0 +1,80 @@
+Legal Issues Regarding LibTomCrypt
+Tom St Denis
+
+The bulk of the code was written or donated under the TDCAL "Tom Doesn't Care About License" license.  It entitles the developer to free-reign on
+the use and distribution of derived works, commercial or otherwise.  Certain files are taken from public domain packages.
+
+AES.C
+-----
+Author: Dr Brian Gladman
+Email : [email protected]
+Disclaimer (verbatim)
+----
+/* Copyright in this implementation is held by Dr B R Gladman but I     */
+/* hereby give permission for its free direct or derivative use subject */
+/* to acknowledgment of its origin and compliance with any conditions   */
+/* that the originators of the algorithm place on its exploitation.     */
+----
+Status:  Public Domain, modified [not original]
+
+DES.C
+-----
+Author:  Unknown,  Submitted by Dobes Vandermeer
+Email :  [email protected]
+Disclaimer:  None
+Status:  TDCAL submission by Dobes, modified [not original]
+
+MD4.C
+-----
+Author:  Dobes Vandermeer
+Email : [email protected]
+Disclaimer:  None
+Status:  TDCAL submission by Dobes, modified [not original]
+
+HMAC.C
+------
+Author:  Dobes Vandermeer
+Email: [email protected]
+Disclaimer:  None
+Status:  TDCAL submission by Dobes, modified [not original]
+
+MPI.C
+-----
+Author:  Original [v0.80 and prior] Michael Fromberger, Current [v0.81 and later] Tom St Denis
+Email:  [email protected]
+Disclaimer:  None
+Status:  TDCAL submission by Tom
+
+RC2.C
+-----
+Author: Unknown, found on public domain archive [www.wiretapped.net]
+Email: none
+Disclaimer:  Possible legal issues [should remove RC2/RC5/RC6 to simplify legal issues]
+Status:  Public Domain, questionable legal status, modified [not original]
+
+SAFER.C
+-------
+Author: [copied verbatim]
+----
+* AUTHOR:         Richard De Moliner ([email protected])
+*                 Signal and Information Processing Laboratory
+*                 Swiss Federal Institute of Technology
+*                 CH-8092 Zuerich, Switzerland
+----
+Email: [email protected]
+Disclaimer:  Appears to be Public Domain [not quite sure]
+Status: Public Domain, modified [not original]
+
+SERPENT.C
+---------
+Author:  Dr. Brian Gladman
+Email : [email protected]
+Disclaimer (verbatim)
+----
+/* Copyright in this implementation is held by Dr B R Gladman but I     */
+/* hereby give permission for its free direct or derivative use subject */
+/* to acknowledgment of its origin and compliance with any conditions   */
+/* that the originators of the algorithm place on its exploitation.     */
+----
+Status:  Public Domain, modified [not original]
+

+ 19 - 193
makefile

@@ -9,7 +9,7 @@
 # a build. This is easy to remedy though, for those that have problems.
 
 # The version
-VERSION=0.80
+VERSION=0.81
 
 #ch1-01-1
 # Compiler and Linker Names
@@ -21,200 +21,22 @@ AR=ar
 ARFLAGS=r
 #ch1-01-1
 
-#ch1-01-2
-# here you can set the malloc/calloc/free functions you want
-XMALLOC=malloc
-XCALLOC=calloc
-XREALLOC=realloc
-XFREE=free
-
-# you can redefine the clock
-XCLOCK=clock
-XCLOCKS_PER_SEC=CLOCKS_PER_SEC
-#ch1-01-2
-
 #ch1-01-3
 # Compilation flags. Note the += does not write over the user's CFLAGS!
-CFLAGS += -c -I./ -Wall -Wsign-compare -W -Wno-unused -Werror  \
-   -DXMALLOC=$(XMALLOC) -DXCALLOC=$(XCALLOC) -DXFREE=$(XFREE) \
-   -DXREALLOC=$(XREALLOC) -DXCLOCK=$(XCLOCK) \
-   -DXCLOCKS_PER_SEC=$(XCLOCKS_PER_SEC)
+CFLAGS += -c -I./ -Wall -Wsign-compare -W -Wno-unused -Werror
 
 # optimize for SPEED
 #CFLAGS += -O3 -funroll-loops
 
-# optimize for SIZE 
-CFLAGS += -Os 
+# optimize for SIZE
+CFLAGS += -Os
 
-# compile for DEBUGGING 
+# compile for DEBUGGING
 #CFLAGS += -g3
 #ch1-01-3
 
 #These flags control how the library gets built.
 
-#ch1-01-4
-# Use small code variants of functions when possible?  
-CFLAGS += -DSMALL_CODE
-
-# no file support, when defined the library will not 
-# have any functions that can read/write files 
-# (comment out to have file support)
-#CFLAGS += -DNO_FILE
-
-# Support the UNIX /dev/random or /dev/urandom
-CFLAGS += -DDEVRANDOM
-
-# Use /dev/urandom first on devices where 
-# /dev/random is too slow 
-#CFLAGS += -DTRY_URANDOM_FIRST
-
-# Clean the stack after sensitive functions.  Not 
-# always required... With this defined most of 
-# the ciphers and hashes will clean their stack area
-# after usage with a (sometimes) huge penalty in speed.
-# Normally this is not required if you simply lock your 
-# stack and wipe it when your program is done.
-#
-#CFLAGS += -DCLEAN_STACK
-#ch1-01-4
-
-#ch1-01-5
-# What algorithms to include? comment out and rebuild to remove them
-CFLAGS += -DBLOWFISH
-CFLAGS += -DRC2
-CFLAGS += -DRC5
-CFLAGS += -DRC6
-CFLAGS += -DSERPENT
-CFLAGS += -DSAFERP
-CFLAGS += -DSAFER
-CFLAGS += -DRIJNDAEL
-CFLAGS += -DXTEA
-CFLAGS += -DTWOFISH
-CFLAGS += -DDES
-CFLAGS += -DCAST5
-CFLAGS += -DNOEKEON
-#ch1-01-5
-
-#You can also customize the Twofish code.  All four combinations 
-#of the flags are possible but only three of them make sense.
-#
-#Both undefined:  Very fast, requires ~4.2KB of ram per scheduled key
-#Both defined  :  Slow, requires only ~100 bytes of ram per scheduled key
-#
-#If defined on their own
-#_SMALL defined:  Very Slow, small code only ~100 bytes of ram
-#_TABLES defined: Very fast, not faster than if both were undefined.  Code is ~1KB bigger
-#                 faster keysetup though...
-
-#ch1-01-6
-# Small Ram Variant of Twofish.  For this you must have TWOFISH 
-# defined.  This variant requires about 4kb less memory but 
-# is considerably slower.  It is ideal when high throughput is 
-# less important than conserving memory. By default it is not 
-# defined which means the larger ram (about 4.2Kb used) variant 
-# is built.
-#CFLAGS += -DTWOFISH_SMALL
-
-# Tell Twofish to use precomputed tables.  If you want to use 
-# the small table variant of Twofish you may want to turn 
-# this on.  Essentially it tells Twofish to use precomputed 
-# S-boxes (Q0 and Q1) as well as precomputed GF 
-# multiplications [in the MDS].  This speeds up the cipher 
-# somewhat.
-#CFLAGS += -DTWOFISH_TABLES 
-#ch1-01-6
-
-#Use fast PK routines.  Basically this limits the size of the private key in the
-#DH system to 256 bits.  The group order remains unchanged so the best
-#attacks are still GNFS (for DH upto 2560-bits)
-#
-#This will only speed up the key generation and encryption routines.  It lowers the
-#security so its by default not turned on.  USE AT YOUR RISK!
-#CFLAGS += -DFAST_PK
-
-#ch1-01-7
-# Chaining modes
-CFLAGS += -DCFB
-CFLAGS += -DOFB
-CFLAGS += -DECB
-CFLAGS += -DCBC
-CFLAGS += -DCTR
-#ch1-01-7
-
-#ch1-01-8
-#One-way hashes
-CFLAGS += -DSHA512
-CFLAGS += -DSHA384
-CFLAGS += -DSHA256
-CFLAGS += -DTIGER
-CFLAGS += -DSHA1
-CFLAGS += -DMD5
-CFLAGS += -DMD4
-CFLAGS += -DMD2
-#ch1-01-8
-
-#ch1-01-9
-# prngs 
-CFLAGS += -DYARROW
-CFLAGS += -DSPRNG
-CFLAGS += -DRC4
-#ch1-01-9
-
-#ch1-01-10
-# PK code 
-CFLAGS += -DMRSA
-CFLAGS += -DMDH
-CFLAGS += -DMECC
-CFLAGS += -DKR
-#ch1-01-10
-
-#ch1-01-12
-# Control which built in DH or ECC key paramaters
-# are to be allowed
-CFLAGS += -DDH768
-CFLAGS += -DDH1024
-CFLAGS += -DDH1280
-CFLAGS += -DDH1536
-CFLAGS += -DDH1792
-CFLAGS += -DDH2048
-CFLAGS += -DDH2560
-CFLAGS += -DDH3072
-CFLAGS += -DDH4096
-
-CFLAGS += -DECC160
-CFLAGS += -DECC192
-CFLAGS += -DECC224
-CFLAGS += -DECC256
-CFLAGS += -DECC384
-CFLAGS += -DECC521
-
-#ch1-01-12
-
-#ch1-01-11
-# base64 
-CFLAGS += -DBASE64
-
-# include GF math routines?
-# (not currently used by anything internally)
-#CFLAGS += -DGF
-
-# include large integer math routines? (required by the PK code)
-CFLAGS += -DMPI
-
-# use the fast exptmod operation (used in dsa/rsa/dh and is_prime)
-# This uses slightly more heap than the old code [only during the function call]
-# this is also fairly faster than the previous code
-CFLAGS += -DMPI_FASTEXPT
-
-# use a "low" mem variant of the fast exptmod.  It is still always 
-# faster then the old exptmod but its savings drops off after 
-# 1024 to 2048-bits 
-#CFLAGS += -DMPI_FASTEXPT_LOWMEM
-
-# include HMAC support
-CFLAGS += -DHMAC
-#ch1-01-11
-
 #Output filenames for various targets.
 LIBNAME=libtomcrypt.a
 TEST=test
@@ -224,8 +46,11 @@ SMALL=small
 
 #LIBPATH-The directory for libtomcrypt to be installed to.
 #INCPATH-The directory to install the header files for libtomcrypt.
+#DATAPATH-The directory to install the pdf docs.
+DESTDIR=
 LIBPATH=/usr/lib
 INCPATH=/usr/include
+DATAPATH=/usr/share/doc/libtomcrypt/pdf
 
 #List of objects to compile.
 OBJECTS=keyring.o gf.o mem.o sprng.o ecc.o base64.o dh.o rsa.o \
@@ -246,10 +71,9 @@ LEFTOVERS=*.dvi *.log *.aux *.toc *.idx *.ilg *.ind
 COMPRESSED=crypt.tar.bz2 crypt.zip crypt.tar.gz
 
 #Header files used by libtomcrypt.
-HEADERS=mpi-types.h mpi-config.h mpi.h \
-mycrypt_cfg.h mycrypt_gf.h mycrypt_kr.h \
+HEADERS=mpi.h mycrypt_cfg.h mycrypt_gf.h mycrypt_kr.h \
 mycrypt_misc.h  mycrypt_prng.h mycrypt_cipher.h  mycrypt_hash.h \
-mycrypt_macros.h  mycrypt_pk.h mycrypt.h mycrypt_argchk.h
+mycrypt_macros.h  mycrypt_pk.h mycrypt.h mycrypt_argchk.h mycrypt_custom.h
 
 #The default rule for make builds the libtomcrypt library.
 default:library mycrypt.h mycrypt_cfg.h
@@ -286,17 +110,19 @@ small: library $(SMALLOBJECTS)
 #as root in order to have a high enough permission to write to the correct
 #directories and to set the owner and group to root.
 install: library docs
-	install -g root -o root $(LIBNAME) $(LIBPATH)
-	install -g root -o root $(HEADERS) $(INCPATH)
-	mkdir -p /usr/doc/libtomcrypt/pdf
-	cp crypt.pdf /usr/doc/libtomcrypt/pdf/
+	install -d -g root -o root $(DESTDIR)$(LIBPATH)
+	install -d -g root -o root $(DESTDIR)$(INCPATH)
+	install -d -g root -o root $(DESTDIR)$(DATAPATH)
+	install -g root -o root $(LIBNAME) $(DESTDIR)$(LIBPATH)
+	install -g root -o root $(HEADERS) $(DESTDIR)$(INCPATH)
+	install -g root -o root crypt.pdf $(DESTDIR)$(DATAPATH)
 
 #This rule cleans the source tree of all compiled code, not including the pdf
 #documentation.
 clean:
 	rm -f $(OBJECTS) $(TESTOBJECTS) $(HASHOBJECTS) $(CRYPTOBJECTS) $(SMALLOBJECTS) $(LEFTOVERS) $(LIBNAME)
 	rm -f $(TEST) $(HASH) $(COMPRESSED)
-	rm -f *stackdump *.lib *.exe *.obj demos/*.obj *.bat makefile.out mycrypt_custom.h
+	rm -f *stackdump *.lib *.exe *.obj demos/*.obj *.bat
 
 #This builds the crypt.pdf file. Note that the rm -f *.pdf has been removed
 #from the clean command! This is because most people would like to keep the
@@ -312,6 +138,6 @@ docs: crypt.tex
        
 #zipup the project (take that!)
 zipup: clean docs
-	chdir .. ; rm -rf crypt* libtomcrypt-$(VERSION) ; mkdir libtomcrypt-$(VERSION) ; \
+	cd .. ; rm -rf crypt* libtomcrypt-$(VERSION) ; mkdir libtomcrypt-$(VERSION) ; \
 	cp -R ./libtomcrypt/* ./libtomcrypt-$(VERSION)/ ; tar -c libtomcrypt-$(VERSION)/* > crypt-$(VERSION).tar ; \
-	bzip2 -9vv crypt-$(VERSION).tar ; zip -9 -r crypt-$(VERSION).zip libtomcrypt-$(VERSION)/*
+	bzip2 -9vv crypt-$(VERSION).tar ; zip -9 -r crypt-$(VERSION).zip libtomcrypt-$(VERSION)/*

+ 24 - 0
makefile.out

@@ -0,0 +1,24 @@
+#makefile generated with config.pl
+#
+#Tom St Denis ([email protected], http://tom.iahu.ca) 
+
+CC = gcc 
+AR = ar 
+LD = ld 
+CFLAGS += -Os -Wall -Wsign-compare -W -Wno-unused -Werror -I./  
+
+default: library
+
+OBJECTS = keyring.o gf.o mem.o sprng.o ecc.o base64.o dh.o rsa.o bits.o yarrow.o cfb.o ofb.o ecb.o ctr.o cbc.o hash.o tiger.o sha1.o md5.o md4.o md2.o sha256.o sha512.o xtea.o aes.o serpent.o des.o safer_tab.o safer.o safer+.o rc4.o rc2.o rc6.o rc5.o cast5.o noekeon.o blowfish.o crypt.o ampi.o mpi.o prime.o twofish.o packet.o hmac.o strings.o
+
+rsa.o: rsa_sys.c
+dh.o: dh_sys.c
+ecc.o: ecc_sys.c
+
+library: $(OBJECTS)
+	 $(AR) r libtomcrypt.a $(OBJECTS)
+	 ranlib libtomcrypt.a
+
+clean:
+	rm -f $(OBJECTS) libtomcrypt.a 
+

+ 0 - 311
makefile.ps2

@@ -1,311 +0,0 @@
-# MAKEFILE for linux GCC
-#
-# Tom St Denis
-# Modified by Clay Culver
-#
-# NOTE: This should later be replaced by autoconf/automake scripts, but for
-# the time being this is actually pretty clean. The only ugly part is
-# handling CFLAGS so that the x86 specific optimizations don't break
-# a build. This is easy to remedy though, for those that have problems.
-
-#Compiler and Linker Names
-CC=ee-gcc
-LD=ee-ld
-
-# PlayStation(tm) 2 specifics
-TOP       = /usr/local/sce/ee
-LIBDIR    = $(TOP)/lib
-INCDIR    = $(TOP)/include/
-COMMONDIR = $(TOP)/../common/include/
-LCFILE    = $(LIBDIR)/app.cmd
-LDFLAGS   = -DSONY_PS2 -DSONY_PS2_EE -Wl,-Map,$(@).map -mno-crt0 -L$(LIBDIR) -lm
-AS        = ee-gcc
-ASFLAGS   = -DSONY_PS2 -DSONY_PS2_EE -c -xassembler-with-cpp -Wa,-al
-EXT       = .elf
-CFLAGS   += -DSONY_PS2 -DSONY_PS2_EE -Wa,-al -Wno-unused -Werror \
-		-fno-common -fno-strict-aliasing -I$(INCDIR) -I$(COMMONDIR)
-
-#Archiver [makes .a files]
-AR=ee-ar
-ARFLAGS=rs
-
-#here you can set the malloc/calloc/free functions you want
-XMALLOC=malloc
-XCALLOC=calloc
-XREALLOC=realloc
-XFREE=free
-
-#you can redefine the clock
-XCLOCK=TIMER_clock
-XCLOCKS_PER_SEC=576000
-
-#Compilation flags. Note the += does not write over the user's CFLAGS!
-CFLAGS += -c -I./ -Wall -Wsign-compare -W -Wno-unused -Werror  \
-   -DXMALLOC=$(XMALLOC) -DXCALLOC=$(XCALLOC) -DXFREE=$(XFREE) \
-   -DXREALLOC=$(XREALLOC) -DXCLOCK=$(XCLOCK) \
-   -DXCLOCKS_PER_SEC=$(XCLOCKS_PER_SEC)
-
-#optimize for SPEED (comment out SIZE line as well)
-#CFLAGS += -O3 -fomit-frame-pointer -funroll-loops
-
-#optimize for SIZE (comment out SPEED line as well)
-CFLAGS += -Os 
-
-#Use small code variants of functions when possible?  (Slows it down!)
-CFLAGS += -DSMALL_CODE
-
-#no file support, when defined the library will not have any functions that can read/write files 
-#(comment out to have file support)
-CFLAGS += -DNO_FILE
-
-#These flags control how the library gets built.
-
-# Clean the stack after sensitive functions.  Not always required... 
-# With this defined most of the ciphers and hashes will clean their stack area
-# after usage with a (sometimes) huge penalty in speed.  Normally this is not
-# required if you simply lock your stack and wipe it when your program is done.
-#
-#CFLAGS += -DCLEAN_STACK
-
-# What algorithms to include? comment out and rebuild to remove em
-CFLAGS += -DBLOWFISH
-CFLAGS += -DRC2
-#CFLAGS += -DRC5
-#CFLAGS += -DRC6
-CFLAGS += -DSERPENT
-CFLAGS += -DSAFERP
-CFLAGS += -DSAFER
-CFLAGS += -DRIJNDAEL
-CFLAGS += -DXTEA
-CFLAGS += -DTWOFISH
-CFLAGS += -DDES
-CFLAGS += -DCAST5
-CFLAGS += -DNOEKEON
-
-#You can also customize the Twofish code.  All four combinations 
-#of the flags are possible but only three of them make sense.
-#
-#Both undefined:  Very fast, requires ~4.2KB of ram per scheduled key
-#Both defined  :  Slow, requires only ~100 bytes of ram per scheduled key
-#
-#If defined on their own
-#_SMALL defined:  Very Slow, small code only ~100 bytes of ram
-#_TABLES defined: Very fast, not faster than if both were undefined.  Code is ~1KB bigger
-#                 faster keysetup though...
-
-# Small Ram Variant of Twofish.  For this you must have TWOFISH defined.  This
-# variant requires about 4kb less memory but is considerably slower.  It is ideal
-# when high throughput is less important than conserving memory. By default it is
-# not defined which means the larger ram (about 4.2Kb used) variant is built.
-# CFLAGS += -DTWOFISH_SMALL
-
-# Tell Twofish to use precomputed tables.  If you want to use the small table
-# variant of Twofish you may want to turn this on.  Essentially it tells Twofish to use
-# precomputed S-boxes (Q0 and Q1) as well as precomputed GF multiplications [in the MDS].
-# This speeds up the cipher somewhat.
-# CFLAGS += -DTWOFISH_TABLES 
-
-#Use fast PK routines.  Basically this limits the size of the private key in the
-#DH system to 256 bits.  The group order remains unchanged so the best
-#attacks are still GNFS (for DH upto 2560-bits)
-#
-#This will only speed up the key generation and encryption routines.  It lowers the
-#security so its by default not turned on.  USE AT YOUR RISK!
-#CFLAGS += -DFAST_PK
-
-# Chaining modes
-CFLAGS += -DCFB
-CFLAGS += -DOFB
-CFLAGS += -DECB
-CFLAGS += -DCBC
-CFLAGS += -DCTR
-
-#One-way hashes
-CFLAGS += -DSHA512
-CFLAGS += -DSHA384
-CFLAGS += -DSHA256
-CFLAGS += -DTIGER
-CFLAGS += -DSHA1
-CFLAGS += -DMD5
-CFLAGS += -DMD4
-CFLAGS += -DMD2
-
-# base64 
-CFLAGS += -DBASE64
-
-# prngs 
-CFLAGS += -DYARROW
-CFLAGS += -DSPRNG
-CFLAGS += -DRC4
-
-# PK code 
-CFLAGS += -DMRSA
-CFLAGS += -DMDH
-CFLAGS += -DMECC
-CFLAGS += -DKR
-
-# Control which built in DH or ECC key paramaters
-# are to be allowed
-CFLAGS += -DDH768
-CFLAGS += -DDH1024
-CFLAGS += -DDH1280
-CFLAGS += -DDH1536
-CFLAGS += -DDH1792
-CFLAGS += -DDH2048
-CFLAGS += -DDH2560
-CFLAGS += -DDH3072
-CFLAGS += -DDH4096
-
-CFLAGS += -DECC160
-CFLAGS += -DECC192
-CFLAGS += -DECC224
-CFLAGS += -DECC256
-CFLAGS += -DECC384
-CFLAGS += -DECC521
-
-# include GF math routines?  (not currently used by anything internally)
-#CFLAGS += -DGF
-
-# include large integer math routines? (required by the PK code)
-CFLAGS += -DMPI
-
-# use the fast exptmod operation (used in dsa/rsa/dh and is_prime)
-# This uses slightly more heap than the old code [only during the function call]
-# this is also fairly faster than the previous code
-CFLAGS += -DMPI_FASTEXPT
-
-# use a "low" mem variant of the fast exptmod.  It is still always 
-# faster then the old exptmod but its savings drops off after 
-# 1024-bits 
-CFLAGS += -DMPI_FASTEXPT_LOWMEM
-
-# include HMAC support
-CFLAGS += -DHMAC
-
-# Have /dev/random or /dev/urandom?
-#CFLAGS += -DDEVRANDOM
-
-#Output filenames for various targets.
-LIBNAME=libtomcrypt.a
-TEST=test$(EXT)
-HASH=hashsum$(EXT)
-CRYPT=encrypt$(EXT)
-SMALL=small$(EXT)
-
-#LIBPATH-The directory for libtomcrypt to be installed to.
-#INCPATH-The directory to install the header files for libtomcrypt.
-LIBPATH=/usr/lib
-INCPATH=/usr/include
-
-#List of objects to compile.
-OBJECTS=keyring.o gf.o mem.o sprng.o ecc.o base64.o dh.o rsa.o \
-bits.o yarrow.o cfb.o ofb.o ecb.o ctr.o cbc.o hash.o tiger.o sha1.o \
-md5.o md4.o md2.o sha256.o sha512.o xtea.o aes.o serpent.o des.o \
-safer_tab.o safer.o safer+.o rc4.o rc2.o rc6.o rc5.o cast5.o noekeon.o blowfish.o crypt.o \
-ampi.o mpi.o prime.o twofish.o packet.o hmac.o strings.o
-
-# PlayStation(tm) 2 C run-time startup module
-PS2CRT0=crt0.o
-
-TESTOBJECTS=$(PS2CRT0) demos/test.o demos/timer.o
-HASHOBJECTS=$(PS2CRT0) demos/hashsum.o
-CRYPTOBJECTS=$(PS2CRT0) demos/encrypt.o
-SMALLOBJECTS=$(PS2CRT0) demos/small.o
-
-#Files left over from making the crypt.pdf.
-LEFTOVERS=*.dvi *.log *.aux *.toc *.idx *.ilg *.ind
-
-#Compressed filenames
-COMPRESSED=crypt.tar.bz2 crypt.zip crypt.tar.gz
-
-#Header files used by libtomcrypt.
-HEADERS=mpi-types.h mpi-config.h mpi.h \
-mycrypt_cfg.h mycrypt_gf.h mycrypt_kr.h \
-mycrypt_misc.h  mycrypt_prng.h mycrypt_cipher.h  mycrypt_hash.h \
-mycrypt_macros.h  mycrypt_pk.h mycrypt.h mycrypt_argchk.h
-
-#The default rule for make builds the libtomcrypt library.
-default:library mycrypt.h mycrypt_cfg.h
-
-#These are the rules to make certain object files.
-rsa.o: rsa.c rsa_sys.c
-ecc.o: ecc.c ecc_sys.c
-dh.o: dh.c dh_sys.c
-aes.o: aes.c aes_tab.c
-sha512.o: sha512.c sha384.c
-
-#This rule makes the libtomcrypt library.
-library: $(OBJECTS) 
-	$(AR) $(ARFLAGS) $(LIBNAME) $(OBJECTS)
-
-#This rule makes the test program included with libtomcrypt
-test: library $(TESTOBJECTS)
-	$(CC) -o $(TEST) -T $(LCFILE) $(LDFLAGS) $(TESTOBJECTS) $(LIBNAME) 
-
-#This rule makes the hash program included with libtomcrypt
-hashsum: library $(HASHOBJECTS)
-	$(CC) -o $(HASH) -T $(LCFILE) $(LDFLAGS) $(HASHOBJECTS) $(LIBNAME)
-
-#makes the crypt program
-crypt: library $(CRYPTOBJECTS)
-	$(CC) -o $(CRYPT) -T $(LCFILE) $(LDFLAGS) $(CRYPTOBJECTS) $(LIBNAME)
-
-#makes the small program
-small: library $(SMALLOBJECTS)
-	$(CC) -o $(SMALL) -T $(LCFILE) $(LDFLAGS) $(SMALLOBJECTS) $(LIBNAME)
-
-# makes the PlayStation(tm) 2 CRT 0 module
-$(PS2CRT0): $(LIBDIR)/crt0.s
-	$(AS) $(ASFLAGS) $(TMPFLAGS) -o $@ $< > $*.lst
-
-#This rule installs the library and the header files. This must be run
-#as root in order to have a high enough permission to write to the correct
-#directories and to set the owner and group to root.
-install: library
-	install -g root -o root $(LIBNAME) $(LIBPATH)
-	install -g root -o root $(HEADERS) $(INCPATH)
-
-#This rule cleans the source tree of all compiled code, not including the pdf
-#documentation.
-clean:
-	rm -f $(OBJECTS) $(TESTOBJECTS) $(HASHOBJECTS) $(CRYPTOBJECTS) $(SMALLOBJECTS) $(LEFTOVERS) $(LIBNAME)
-	rm -f $(TEST) $(HASH) $(COMPRESSED)
-	rm -f *stackdump *.lib *.exe *.obj demos/*.obj zlib/*.obj
-	rm -f *.o *.lst demos/*.o demos/*.lst
-
-#This builds the crypt.pdf file. Note that the rm -f *.pdf has been removed
-#from the clean command! This is because most people would like to keep the
-#nice pre-compiled crypt.pdf that comes with libtomcrypt! We only need to
-#delete it if we are rebuilding it.
-docs: crypt.tex
-	rm -f crypt.pdf
-	rm -f $(LEFTOVERS)
-	latex crypt > /dev/null
-	makeindex crypt > /dev/null
-	pdflatex crypt > /dev/null
-	rm -f $(LEFTOVERS)
-
-#This used to be the zipup target. I have split it into two seperate targets:
-#bz and zip. bz builds a crypt.tar.bz2 package, while zip builds a crypt.zip
-#package. I have removed the dos2unix commands, as this is a Linux makefile,
-#and these should not be needed. I also made it output the target to the
-#current directory instead of the root (/) directory. (Bad Tom!) We are
-#almost assured write permission in the current directory, but not in the root
-#directory. This means any user can now build a BZ image or a zip.
-#NOTE: This removes all pre-built compressed archives during clean.
-bz: clean docs
-	chdir .. ; rm -f crypt.tar.bz2 ; tar -c libtomcrypt/* > crypt.tar ; bzip2 -9v crypt.tar
-
-zip: clean docs
-	chdir .. ; rm -f crypt.zip ; zip -9 -r crypt.zip libtomcrypt/*
-
-#Makes a tar/gz archive of the library.
-gz: clean docs
-	chdir .. ; rm -f crypt.tar.gz ; tar -c libtomcrypt/* > crypt.tar ; gzip -9v crypt.tar
-
-#makes a tar/SZIP archive [slightly better than bzip2]
-szip: clean docs
-	chdir .. ; rm -f crypt.tar.szp ; tar -c libtomcrypt/* > crypt.tar ; szip -b41o64v255 crypt.tar crypt.tar.szp
-
-.c.o:
-	$(CC) $(CFLAGS) $(TMPFLAGS) -c $< -o $*.o > $*.lst

+ 0 - 274
makefile.vc

@@ -1,274 +0,0 @@
-# MAKEFILE for MSVC 6.0 SP5
-#
-# Tom St Denis, [email protected]
-#
-CC=cl
-AR=lib
-
-#here you can set the malloc/calloc/free functions you want
-XMALLOC=malloc
-XCALLOC=calloc
-XREALLOC=realloc
-XFREE=free
-
-#you can redefine the clock
-XCLOCK=clock
-XCLOCKS_PER_SEC=CLOCKS_PER_SEC
-
-CFLAGS = /c /Ogisy1 /Gs /I. /W3 /DWIN32 /DXREALLOC=$(XREALLOC) /DXMALLOC=$(XMALLOC) /DXCALLOC=$(XCALLOC) /DXFREE=$(XFREE) /DXCLOCK=$(XCLOCK) /DXCLOCKS_PER_SEC=$(XCLOCKS_PER_SEC)
-
-#Small code (smaller variants of some block ciphers)
-CFLAGS += /DSMALL_CODE
-
-#These flags control how the library gets built.
-
-#no file support, when defined the library will not have any functions that can read/write files 
-#(comment out to have file support)
-#CFLAGS += /DNO_FILE
-
-#Support the UNIX /dev/random or /dev/urandom
-#CFLAGS += /DDEVRANDOM
-
-# Use /dev/urandom first on devices where /dev/random is too slow */
-#CFLAGS += /DTRY_URANDOM_FIRST
-
-# Clean the stack after sensitive functions.  Not always required... 
-# With this defined most of the ciphers and hashes will clean their stack area
-# after usage with a (sometimes) huge penalty in speed.  Normally this is not
-# required if you simply lock your stack and wipe it when your program is done.
-#
-#CFLAGS += /DCLEAN_STACK
-
-# What algorithms to include? comment out and rebuild to remove em
-CFLAGS += /DBLOWFISH
-CFLAGS += /DRC2
-CFLAGS += /DRC5
-CFLAGS += /DRC6
-CFLAGS += /DSERPENT
-CFLAGS += /DSAFERP
-CFLAGS += /DSAFER
-CFLAGS += /DRIJNDAEL
-CFLAGS += /DXTEA
-CFLAGS += /DTWOFISH
-CFLAGS += /DDES
-CFLAGS += /DCAST5
-CFLAGS += /DNOEKEON
-
-#You can also customize the Twofish code.  All four combinations 
-#of the flags are possible but only three of them make sense.
-#
-#Both undefined:  Very fast, requires ~4.2KB of ram per scheduled key
-#Both defined  :  Slow, requires only ~100 bytes of ram per scheduled key
-#
-#If defined on their own
-#_SMALL defined:  Very Slow, small code only ~100 bytes of ram
-#_TABLES defined: Very fast, not faster than if both were undefined.  Code is ~1KB bigger
-#                 faster keysetup though...
-
-# Small Ram Variant of Twofish.  For this you must have TWOFISH defined.  This
-# variant requires about 4kb less memory but is considerably slower.  It is ideal
-# when high throughput is less important than conserving memory. By default it is
-# not defined which means the larger ram (about 4.2Kb used) variant is built.
-# CFLAGS += /DTWOFISH_SMALL
-
-# Tell Twofish to use precomputed tables.  If you want to use the small table
-# variant of Twofish you may want to turn this on.  Essentially it tells Twofish to use
-# precomputed S-boxes (Q0 and Q1) as well as precomputed GF multiplications [in the MDS].
-# This speeds up the cipher somewhat.
-# CFLAGS += /DTWOFISH_TABLES 
-
-#Use fast PK routines.  Basically this limits the size of the private key in the
-#DH system to 256 bits.  The group order remains unchanged so the best
-#attacks are still GNFS (for DH upto 2560-bits)
-#
-#This will only speed up the key generation and encryption routines.  It lowers the
-#security so its by default not turned on.  USE AT YOUR RISK!
-#CFLAGS += /DFAST_PK
-
-# Chaining modes
-CFLAGS += /DCFB
-CFLAGS += /DOFB
-CFLAGS += /DECB
-CFLAGS += /DCBC
-CFLAGS += /DCTR
-
-#One-way hashes
-CFLAGS += /DSHA512
-CFLAGS += /DSHA384
-CFLAGS += /DSHA256
-CFLAGS += /DTIGER
-CFLAGS += /DSHA1
-CFLAGS += /DMD5
-CFLAGS += /DMD4
-CFLAGS += /DMD2
-
-# base64 
-CFLAGS += /DBASE64
-
-# prngs 
-CFLAGS += /DYARROW
-CFLAGS += /DSPRNG
-CFLAGS += /DRC4
-
-# PK code 
-CFLAGS += /DMRSA
-CFLAGS += /DMDH
-CFLAGS += /DMECC
-CFLAGS += /DKR
-
-# Control which built in DH or ECC key paramaters
-# are to be allowed
-CFLAGS += /DDH768
-CFLAGS += /DDH1024
-CFLAGS += /DDH1280
-CFLAGS += /DDH1536
-CFLAGS += /DDH1792
-CFLAGS += /DDH2048
-CFLAGS += /DDH2560
-CFLAGS += /DDH3072
-CFLAGS += /DDH4096
-
-CFLAGS += /DECC160
-CFLAGS += /DECC192
-CFLAGS += /DECC224
-CFLAGS += /DECC256
-CFLAGS += /DECC384
-CFLAGS += /DECC521
-
-# include GF math routines?  (not currently used by anything internally)
-#CFLAGS += /DGF
-
-# include large integer math routines? (required by the PK code)
-CFLAGS += /DMPI
-
-# use the fast exptmod operation (used in dsa/rsa/dh and is_prime)
-# This uses slightly more heap than the old code [only during the function call]
-# this is also fairly faster than the previous code
-CFLAGS += /DMPI_FASTEXPT
-
-# use a "low" mem variant of the fast exptmod.  It is still always 
-# faster then the old exptmod but its savings drops off after 
-# 1024-bits 
-#CFLAGS += /DMPI_FASTEXPT_LOWMEM
-
-
-# include HMAC support
-CFLAGS += /DHMAC
-
-default: tomcrypt.lib
-
-keyring.obj: keyring.c
-	$(CC) $(CFLAGS) keyring.c
-ampi.obj: ampi.c
-	$(CC) $(CFLAGS) ampi.c
-mpi.obj: mpi.c
-	$(CC) $(CFLAGS) mpi.c
-blowfish.obj: blowfish.c
-	$(CC) $(CFLAGS) blowfish.c
-crypt.obj: crypt.c
-	$(CC) $(CFLAGS) crypt.c
-sha512.obj: sha512.c sha384.c
-	$(CC) $(CFLAGS) sha512.c
-sha256.obj: sha256.c
-	$(CC) $(CFLAGS) sha256.c
-hash.obj: hash.c
-	$(CC) $(CFLAGS) hash.c
-md5.obj: md5.c
-	$(CC) $(CFLAGS) md5.c
-md4.obj: md4.c
-	$(CC) $(CFLAGS) md4.c
-sha1.obj: sha1.c
-	$(CC) $(CFLAGS) sha1.c
-cfb.obj: cfb.c
-	$(CC) $(CFLAGS) cfb.c
-ofb.obj: ofb.c
-	$(CC) $(CFLAGS) ofb.c
-ecb.obj: ecb.c
-	$(CC) $(CFLAGS) ecb.c
-ctr.obj: ctr.c
-	$(CC) $(CFLAGS) ctr.c
-prime.obj: prime.c
-	$(CC) $(CFLAGS) prime.c
-base64.obj: base64.c
-	$(CC) $(CFLAGS) base64.c
-sprng.obj: sprng.c
-	$(CC) $(CFLAGS) sprng.c
-mem.obj: mem.c
-	$(CC) $(CFLAGS) mem.c
-gf.obj: gf.c
-	$(CC) $(CFLAGS) gf.c
-ecc.obj: ecc.c ecc_sys.c
-	$(CC) $(CFLAGS) ecc.c
-yarrow.obj: yarrow.c
-	$(CC) $(CFLAGS) yarrow.c
-bits.obj: bits.c
-	$(CC) $(CFLAGS) bits.c
-rsa.obj: rsa.c
-	$(CC) $(CFLAGS) rsa.c
-rc6.obj: rc6.c
-	$(CC) $(CFLAGS) rc6.c
-des.obj: des.c
-	$(CC) $(CFLAGS) des.c
-tiger.obj: tiger.c
-	$(CC) $(CFLAGS) tiger.c
-dh.obj: dh.c dh_sys.c
-	$(CC) $(CFLAGS) dh.c
-serpent.obj: serpent.c
-	$(CC) $(CFLAGS) serpent.c
-aes.obj: aes.c aes_tab.c
-	$(CC) $(CFLAGS) aes.c
-rc5.obj: rc5.c
-	$(CC) $(CFLAGS) rc5.c
-rc2.obj: rc2.c
-	$(CC) $(CFLAGS) rc2.c
-cbc.obj: cbc.c
-	$(CC) $(CFLAGS) cbc.c
-safer+.obj: safer+.c
-	$(CC) $(CFLAGS) safer+.c
-safer.obj: safer.c
-	$(CC) $(CFLAGS) safer.c
-safer_tab.obj: safer_tab.c
-	$(CC) $(CFLAGS) safer_tab.c
-xtea.obj: xtea.c
-	$(CC) $(CFLAGS) xtea.c
-twofish.obj: twofish.c
-	$(CC) $(CFLAGS) twofish.c
-packet.obj: packet.c
-	$(CC) $(CFLAGS) packet.c
-pack.obj: pack.c
-	$(CC) $(CFLAGS) pack.c
-hmac.obj: hmac.c
-	$(CC) $(CFLAGS) hmac.c
-strings.obj: strings.c
-	$(CC) $(CFLAGS) strings.c
-md2.obj: md2.c
-	$(CC) $(CFLAGS) md2.c
-cast5.obj: cast5.c
-	$(CC) $(CFLAGS) cast5.c
-noekeon.obj: noekeon.c
-	$(CC) $(CFLAGS) noekeon.c
-
-demos/test.obj: demos/test.c
-	$(CC) $(CFLAGS) demos/test.c
-
-demos/hashsum.obj: demos/hashsum.c
-	$(CC) $(CFLAGS) demos/hashsum.c
-
-tomcrypt.lib: keyring.obj gf.obj mem.obj sprng.obj  ecc.obj  base64.obj dh.obj rsa.obj bits.obj hmac.obj  \
-yarrow.obj cfb.obj ofb.obj ecb.obj ctr.obj cbc.obj hash.obj tiger.obj sha1.obj md2.obj md5.obj md4.obj sha256.obj sha512.obj xtea.obj \
-aes.obj serpent.obj safer_tab.obj safer.obj safer+.obj cast5.obj noekeon.obj rc2.obj rc6.obj rc5.obj des.obj blowfish.obj crypt.obj ampi.obj \
-strings.obj mpi.obj prime.obj twofish.obj packet.obj
-	$(AR) /out:tomcrypt.lib keyring.obj gf.obj mem.obj sprng.obj ecc.obj  base64.obj dh.obj rsa.obj hmac.obj \
-bits.obj yarrow.obj cfb.obj ofb.obj ecb.obj ctr.obj cbc.obj hash.obj tiger.obj sha1.obj md2.obj md5.obj md4.obj sha256.obj \
-strings.obj sha512.obj xtea.obj aes.obj serpent.obj safer_tab.obj safer.obj safer+.obj cast5.obj noekeon.obj rc2.obj rc6.obj rc5.obj des.obj \
-blowfish.obj crypt.obj ampi.obj mpi.obj prime.obj twofish.obj packet.obj
-
-
-test.exe: tomcrypt.lib demos/test.obj
-	link /OUT:test.exe test.obj tomcrypt.lib advapi32.lib
-
-hashsum.exe: tomcrypt.lib demos/hashsum.obj
-	link /OUT:hashsum.exe hashsum.obj tomcrypt.lib advapi32.lib
-
-clean:
-	rm -f demos/*.obj *.obj *.exe *.lib

+ 0 - 87
mpi-config.h

@@ -1,87 +0,0 @@
-/* Default configuration for MPI library */
-/* $ID$ */
-
-#ifndef MPI_CONFIG_H_
-#define MPI_CONFIG_H_
-
-/*
-  For boolean options, 
-  0 = no
-  1 = yes
-
-  Other options are documented individually.
-
- */
-
-#ifndef MP_IOFUNC
-#define MP_IOFUNC     0  /* include mp_print() ?                */
-#endif
-
-#ifndef MP_MODARITH
-#define MP_MODARITH   1  /* include modular arithmetic ?        */
-#endif
-
-#ifndef MP_NUMTH
-#define MP_NUMTH      1  /* include number theoretic functions? */
-#endif
-
-#ifndef MP_LOGTAB
-#define MP_LOGTAB     1  /* use table of logs instead of log()? */
-#endif
-
-#ifndef MP_MEMSET
-#define MP_MEMSET     1  /* use memset() to zero buffers?       */
-#endif
-
-#ifndef MP_MEMCPY
-#define MP_MEMCPY     1  /* use memcpy() to copy buffers?       */
-#endif
-
-#ifndef MP_CRYPTO
-#define MP_CRYPTO     1  /* erase memory on free?               */
-#endif
-
-#ifndef MP_ARGCHK
-/*
-  0 = no parameter checks
-  1 = runtime checks, continue execution and return an error to caller
-  2 = assertions; dump core on parameter errors
- */
-#define MP_ARGCHK     1  /* how to check input arguments        */
-#endif
-
-#ifndef MP_DEBUG
-#define MP_DEBUG      0  /* print diagnostic output?            */
-#endif
-
-#ifndef MP_DEFPREC
-#define MP_DEFPREC    64  /* default precision, in digits        */
-#endif
-
-#ifndef MP_MACRO
-#define MP_MACRO      0  /* use macros for frequent calls?      */
-#endif
-
-#ifndef MP_SQUARE
-#define MP_SQUARE     1  /* use separate squaring code?         */
-#endif
-
-#ifndef MP_PTAB_SIZE
-/*
-  When building mpprime.c, we build in a table of small prime
-  values to use for primality testing.  The more you include,
-  the more space they take up.  See primes.c for the possible
-  values (currently 16, 32, 64, 128, 256, and 6542)
- */
-#define MP_PTAB_SIZE  128  /* how many built-in primes?         */
-#endif
-
-#ifndef MP_COMPAT_MACROS
-#define MP_COMPAT_MACROS 1   /* define compatibility macros?    */
-#endif
-
-#endif /* ifndef MPI_CONFIG_H_ */
-
-
-
-

+ 0 - 16
mpi-types.h

@@ -1,16 +0,0 @@
-/* Type definitions generated by 'types.pl' */
-typedef char               mp_sign;
-typedef unsigned short     mp_digit;  /* 2 byte type */
-typedef unsigned int       mp_word;   /* 4 byte type */
-typedef unsigned int       mp_size;
-typedef int                mp_err;
-
-#define MP_DIGIT_BIT       (CHAR_BIT*sizeof(mp_digit))
-#define MP_DIGIT_MAX       USHRT_MAX
-#define MP_WORD_BIT        (CHAR_BIT*sizeof(mp_word))
-#define MP_WORD_MAX        UINT_MAX
-
-#define MP_DIGIT_SIZE      2
-#define DIGIT_FMT          "%04X"
-#define RADIX              (MP_DIGIT_MAX+1)
-

+ 4621 - 3464
mpi.c

@@ -1,4216 +1,5373 @@
- /*
-    mpi.c
-
-    by Michael J. Fromberger <[email protected]>
-    Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved
-
-    Arbitrary precision integer arithmetic library
-
-    $ID$
+/* File Generated Automatically by gen.pl */
+
+/* Start: bncore.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
+#include <tommath.h>
+
+int     KARATSUBA_MUL_CUTOFF = 80,	/* Min. number of digits before Karatsuba multiplication is used. */
+        KARATSUBA_SQR_CUTOFF = 80,	/* Min. number of digits before Karatsuba squaring is used. */
+        MONTGOMERY_EXPT_CUTOFF = 74;	/* max. number of digits that montgomery reductions will help for */
+
+/* End: bncore.c */
+
+/* Start: bn_fast_mp_invmod.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <ctype.h>
-
-#include "mycrypt.h"
-
-#ifdef MPI
+/* computes the modular inverse via binary extended euclidean algorithm, 
+ * that is c = 1/a mod b 
+ *
+ * Based on mp_invmod except this is optimized for the case where b is 
+ * odd as per HAC Note 14.64 on pp. 610
+ */
+int
+fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
+{
+  mp_int  x, y, u, v, B, D;
+  int     res, neg;
 
-#if MP_DEBUG
-#include <stdio.h>
+  if ((res = mp_init (&x)) != MP_OKAY) {
+    goto __ERR;
+  }
 
-#define DIAG(T,V) {fprintf(stderr,T);mp_print(V,stderr);fputc('\n',stderr);}
-#else
-#define DIAG(T,V)
-#endif
+  if ((res = mp_init (&y)) != MP_OKAY) {
+    goto __X;
+  }
 
-/* 
-   If MP_LOGTAB is not defined, use the math library to compute the
-   logarithms on the fly.  Otherwise, use the static table below.
-   Pick which works best for your system.
- */
-#if MP_LOGTAB
+  if ((res = mp_init (&u)) != MP_OKAY) {
+    goto __Y;
+  }
 
-/* {{{ s_logv_2[] - log table for 2 in various bases */
+  if ((res = mp_init (&v)) != MP_OKAY) {
+    goto __U;
+  }
 
-/*
-  A table of the logs of 2 for various bases (the 0 and 1 entries of
-  this table are meaningless and should not be referenced).  
+  if ((res = mp_init (&B)) != MP_OKAY) {
+    goto __V;
+  }
 
-  This table is used to compute output lengths for the mp_toradix()
-  function.  Since a number n in radix r takes up about log_r(n)
-  digits, we estimate the output size by taking the least integer
-  greater than log_r(n), where:
+  if ((res = mp_init (&D)) != MP_OKAY) {
+    goto __B;
+  }
 
-  log_r(n) = log_2(n) * log_r(2)
+  /* x == modulus, y == value to invert */
+  if ((res = mp_copy (b, &x)) != MP_OKAY) {
+    goto __D;
+  }
+  if ((res = mp_copy (a, &y)) != MP_OKAY) {
+    goto __D;
+  }
 
-  This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
-  which are the output bases supported.  
- */
-const float s_logv_2[] = {
-   0.000000000, 0.000000000, 1.000000000, 0.630929754, 	/*  0  1  2  3 */
-   0.500000000, 0.430676558, 0.386852807, 0.356207187, 	/*  4  5  6  7 */
-   0.333333333, 0.315464877, 0.301029996, 0.289064826, 	/*  8  9 10 11 */
-   0.278942946, 0.270238154, 0.262649535, 0.255958025, 	/* 12 13 14 15 */
-   0.250000000, 0.244650542, 0.239812467, 0.235408913, 	/* 16 17 18 19 */
-   0.231378213, 0.227670249, 0.224243824, 0.221064729, 	/* 20 21 22 23 */
-   0.218104292, 0.215338279, 0.212746054, 0.210309918, 	/* 24 25 26 27 */
-   0.208014598, 0.205846832, 0.203795047, 0.201849087, 	/* 28 29 30 31 */
-   0.200000000, 0.198239863, 0.196561632, 0.194959022, 	/* 32 33 34 35 */
-   0.193426404, 0.191958720, 0.190551412, 0.189200360, 	/* 36 37 38 39 */
-   0.187901825, 0.186652411, 0.185449023, 0.184288833, 	/* 40 41 42 43 */
-   0.183169251, 0.182087900, 0.181042597, 0.180031327, 	/* 44 45 46 47 */
-   0.179052232, 0.178103594, 0.177183820, 0.176291434, 	/* 48 49 50 51 */
-   0.175425064, 0.174583430, 0.173765343, 0.172969690, 	/* 52 53 54 55 */
-   0.172195434, 0.171441601, 0.170707280, 0.169991616, 	/* 56 57 58 59 */
-   0.169293808, 0.168613099, 0.167948779, 0.167300179, 	/* 60 61 62 63 */
-   0.166666667
-};
-/* }}} */
-#define LOG_V_2(R)  s_logv_2[(R)]
-
-#else
-
-#include <math.h>
-#define LOG_V_2(R)  (log(2.0)/log(R))
-
-#endif
-
-/* Default precision for newly created mp_int's      */
-static unsigned int s_mp_defprec = MP_DEFPREC;
-
-/* {{{ Digit arithmetic macros */
-
-/*
-  When adding and multiplying digits, the results can be larger than
-  can be contained in an mp_digit.  Thus, an mp_word is used.  These
-  macros mask off the upper and lower digits of the mp_word (the
-  mp_word may be more than 2 mp_digits wide, but we only concern
-  ourselves with the low-order 2 mp_digits)
-
-  If your mp_word DOES have more than 2 mp_digits, you need to
-  uncomment the first line, and comment out the second.
- */
+  if ((res = mp_abs (&y, &y)) != MP_OKAY) {
+    goto __D;
+  }
 
-/* #define  CARRYOUT(W)  (((W)>>DIGIT_BIT)&MP_DIGIT_MAX) */
-#define  CARRYOUT(W)  ((W)>>DIGIT_BIT)
-#define  ACCUM(W)     ((W)&MP_DIGIT_MAX)
+  /* 2. [modified] if x,y are both even then return an error! 
+   * 
+   * That is if gcd(x,y) = 2 * k then obviously there is no inverse.
+   */
+  if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
+    res = MP_VAL;
+    goto __D;
+  }
 
-/* }}} */
+  /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
+  if ((res = mp_copy (&x, &u)) != MP_OKAY) {
+    goto __D;
+  }
+  if ((res = mp_copy (&y, &v)) != MP_OKAY) {
+    goto __D;
+  }
+  mp_set (&D, 1);
 
-/* {{{ Comparison constants */
 
+top:
+  /* 4.  while u is even do */
+  while (mp_iseven (&u) == 1) {
+    /* 4.1 u = u/2 */
+    if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
+      goto __D;
+    }
+    /* 4.2 if A or B is odd then */
+    if (mp_iseven (&B) == 0) {
+      if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
+	goto __D;
+      }
+    }
+    /* A = A/2, B = B/2 */
+    if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
+      goto __D;
+    }
+  }
 
-/* }}} */
 
-/* {{{ Constant strings */
+  /* 5.  while v is even do */
+  while (mp_iseven (&v) == 1) {
+    /* 5.1 v = v/2 */
+    if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
+      goto __D;
+    }
+    /* 5.2 if C,D are even then */
+    if (mp_iseven (&D) == 0) {
+      /* C = (C+y)/2, D = (D-x)/2 */
+      if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
+	goto __D;
+      }
+    }
+    /* C = C/2, D = D/2 */
+    if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
+      goto __D;
+    }
+  }
 
-/* Constant strings returned by mp_strerror() */
-static const char *mp_err_string[] = {
-  "unknown result code",     /* say what?            */
-  "boolean true",            /* MP_OKAY, MP_YES      */
-  "boolean false",           /* MP_NO                */
-  "out of memory",           /* MP_MEM               */
-  "argument out of range",   /* MP_RANGE             */
-  "invalid input parameter", /* MP_BADARG            */
-  "result is undefined"      /* MP_UNDEF             */
-};
+  /* 6.  if u >= v then */
+  if (mp_cmp (&u, &v) != MP_LT) {
+    /* u = u - v, A = A - C, B = B - D */
+    if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
+      goto __D;
+    }
 
-/* Value to digit maps for radix conversion   */
+    if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
+      goto __D;
+    }
+  } else {
+    /* v - v - u, C = C - A, D = D - B */
+    if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
+      goto __D;
+    }
 
-/* s_dmap_1 - standard digits and letters */
-static const char *s_dmap_1 = 
-  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+    if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
+      goto __D;
+    }
+  }
 
-#if 0
-/* s_dmap_2 - base64 ordering for digits  */
-static const char *s_dmap_2 =
-  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-#endif
+  /* if not zero goto step 4 */
+  if (mp_iszero (&u) == 0) {
+    goto top;
+  }
 
-/* }}} */
+  /* now a = C, b = D, gcd == g*v */
 
-/* {{{ Static function declarations */
+  /* if v != 1 then there is no inverse */
+  if (mp_cmp_d (&v, 1) != MP_EQ) {
+    res = MP_VAL;
+    goto __D;
+  }
 
-/* 
-   If MP_MACRO is false, these will be defined as actual functions;
-   otherwise, suitable macro definitions will be used.  This works
-   around the fact that ANSI C89 doesn't support an 'inline' keyword
-   (although I hear C9x will ... about bloody time).  At present, the
-   macro definitions are identical to the function bodies, but they'll
-   expand in place, instead of generating a function call.
+  /* b is now the inverse */
+  neg = a->sign;
+  while (D.sign == MP_NEG) {
+    if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
+      goto __D;
+    }
+  }
+  mp_exch (&D, c);
+  c->sign = neg;
+  res = MP_OKAY;
+
+__D:mp_clear (&D);
+__B:mp_clear (&B);
+__V:mp_clear (&v);
+__U:mp_clear (&u);
+__Y:mp_clear (&y);
+__X:mp_clear (&x);
+__ERR:
+  return res;
+}
 
-   I chose these particular functions to be made into macros because
-   some profiling showed they are called a lot on a typical workload,
-   and yet they are primarily housekeeping.
+/* End: bn_fast_mp_invmod.c */
+
+/* Start: bn_fast_mp_montgomery_reduce.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
-#if MP_MACRO == 0
- static void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits           */
- static void s_mp_copy(mp_digit *sp, mp_digit *dp, mp_size count); /* copy    */
- static void    *s_mp_alloc(size_t nb, size_t ni);       /* general allocator     */
- static void s_mp_free(void *ptr);                   /* general free function */
-#else
-
- /* Even if these are defined as macros, we need to respect the settings
-    of the MP_MEMSET and MP_MEMCPY configuration options...
-  */
- #if MP_MEMSET == 0
-  #define  s_mp_setz(dp, count) \
-       {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=0;}
- #else
-  #define  s_mp_setz(dp, count) memset(dp, 0, (count) * sizeof(mp_digit))
- #endif /* MP_MEMSET */
-
- #if MP_MEMCPY == 0
-  #define  s_mp_copy(sp, dp, count) \
-       {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=(sp)[ix];}
- #else
-  #define  s_mp_copy(sp, dp, count) memcpy(dp, sp, (count) * sizeof(mp_digit))
- #endif /* MP_MEMCPY */
-
- #define  s_mp_alloc(nb, ni)  XCALLOC(nb, ni)
- #define  s_mp_free(ptr) {if(ptr) XFREE(ptr);}
-#endif /* MP_MACRO */
-
-static mp_err s_mp_grow(mp_int *mp, mp_size min);   /* increase allocated size */
-static mp_err s_mp_pad(mp_int *mp, mp_size min);    /* left pad with zeroes    */
-
-static void s_mp_clamp(mp_int *mp);               /* clip leading zeroes     */
-
-static void s_mp_exch(mp_int *a, mp_int *b);      /* swap a and b in place   */
-
-static mp_err s_mp_lshd(mp_int *mp, mp_size p);     /* left-shift by p digits  */
-static void s_mp_rshd(mp_int *mp, mp_size p);     /* right-shift by p digits */
-static void s_mp_div_2d(mp_int *mp, mp_digit d);  /* divide by 2^d in place  */
-static void s_mp_mod_2d(mp_int *mp, mp_digit d);  /* modulo 2^d in place     */
-static mp_err s_mp_mul_2d(mp_int *mp, mp_digit d);  /* multiply by 2^d in place*/
-static void s_mp_div_2(mp_int *mp);               /* divide by 2 in place    */
-static mp_err s_mp_mul_2(mp_int *mp);               /* multiply by 2 in place  */
-mp_digit s_mp_norm(mp_int *a, mp_int *b);      /* normalize for division  */
-static mp_err s_mp_add_d(mp_int *mp, mp_digit d);   /* unsigned digit addition */
-static mp_err s_mp_sub_d(mp_int *mp, mp_digit d);   /* unsigned digit subtract */
-static mp_err s_mp_mul_d(mp_int *mp, mp_digit d);   /* unsigned digit multiply */
-static mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r);
-		                               /* unsigned digit divide   */
-static mp_err s_mp_reduce(mp_int *x, mp_int *m, mp_int *mu);
-                                               /* Barrett reduction       */
-static mp_err s_mp_add(mp_int *a, mp_int *b);       /* magnitude addition      */
-static mp_err s_mp_sub(mp_int *a, mp_int *b);       /* magnitude subtract      */
-static mp_err s_mp_mul(mp_int *a, mp_int *b);       /* magnitude multiply      */
-#if 0
-static void s_mp_kmul(mp_digit *a, mp_digit *b, mp_digit *out, mp_size len);
-                                               /* multiply buffers in place */
-#endif
-#if MP_SQUARE
-static mp_err s_mp_sqr(mp_int *a);                  /* magnitude square        */
-#else
-#define  s_mp_sqr(a) s_mp_mul(a, a)
-#endif
-static mp_err s_mp_div(mp_int *a, mp_int *b);       /* magnitude divide        */
-static mp_err s_mp_2expt(mp_int *a, mp_digit k);    /* a = 2^k                 */
-static int s_mp_cmp(mp_int *a, mp_int *b);       /* magnitude comparison    */
-static int s_mp_cmp_d(mp_int *a, mp_digit d);    /* magnitude digit compare */
-static int s_mp_ispow2(mp_int *v);               /* is v a power of 2?      */
-static int s_mp_ispow2d(mp_digit d);             /* is d a power of 2?      */
-
-static int s_mp_tovalue(char ch, int r);          /* convert ch to value    */
-char     s_mp_todigit(int val, int r, int low); /* convert val to digit   */
-static int s_mp_outlen(int bits, int r);          /* output length in bytes */
-
-/* }}} */
-
-/* {{{ Default precision manipulation */
-
-unsigned int mp_get_prec(void)
-{
-  return s_mp_defprec;
-
-} /* end mp_get_prec() */
-
-void         mp_set_prec(unsigned int prec)
+#include <tommath.h>
+
+/* computes xR^-1 == x (mod N) via Montgomery Reduction 
+ * 
+ * This is an optimized implementation of mp_montgomery_reduce 
+ * which uses the comba method to quickly calculate the columns of the
+ * reduction.  
+ *
+ * Based on Algorithm 14.32 on pp.601 of HAC.
+*/
+int
+fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
 {
-  if(prec == 0)
-    s_mp_defprec = MP_DEFPREC;
-  else
-    s_mp_defprec = prec;
+  int     ix, res, olduse;
+  mp_word W[512];
 
-} /* end mp_set_prec() */
+  /* get old used count */
+  olduse = a->used;
 
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ mp_init(mp) */
+  /* grow a as required */
+  if (a->alloc < m->used + 1) {
+    if ((res = mp_grow (a, m->used + 1)) != MP_OKAY) {
+      return res;
+    }
+  }
 
-/*
-  mp_init(mp)
+  {
+    register mp_word *_W;
+    register mp_digit *tmpa;
 
-  Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
-  MP_MEM if memory could not be allocated for the structure.
- */
+    _W = W;
+    tmpa = a->dp;
 
-mp_err mp_init(mp_int *mp)
-{
-  return mp_init_size(mp, s_mp_defprec);
+    /* copy the digits of a */
+    for (ix = 0; ix < a->used; ix++) {
+      *_W++ = *tmpa++;
+    }
 
-} /* end mp_init() */
+    /* zero the high words */
+    for (; ix < m->used * 2 + 1; ix++) {
+      *_W++ = 0;
+    }
+  }
 
-/* }}} */
+  for (ix = 0; ix < m->used; ix++) {
+    /* ui = ai * m' mod b
+     *
+     * We avoid a double precision multiplication (which isn't required)
+     * by casting the value down to a mp_digit.  Note this requires that W[ix-1] have
+     * the carry cleared (see after the inner loop)
+     */
+    register mp_digit ui;
+    ui = (((mp_digit) (W[ix] & MP_MASK)) * mp) & MP_MASK;
+
+    /* a = a + ui * m * b^i
+     *
+     * This is computed in place and on the fly.  The multiplication
+     * by b^i is handled by offseting which columns the results
+     * are added to.
+     *
+     * Note the comba method normally doesn't handle carries in the inner loop
+     * In this case we fix the carry from the previous column since the Montgomery
+     * reduction requires digits of the result (so far) [see above] to work.  This is
+     * handled by fixing up one carry after the inner loop.  The carry fixups are done
+     * in order so after these loops the first m->used words of W[] have the carries
+     * fixed
+     */
+    {
+      register int iy;
+      register mp_digit *tmpx;
+      register mp_word *_W;
 
-/* {{{ mp_init_array(mp[], count) */
+      /* alias for the digits of the modulus */
+      tmpx = m->dp;
 
-mp_err mp_init_array(mp_int mp[], int count)
-{
-  mp_err  res;
-  int     pos;
+      /* Alias for the columns set by an offset of ix */
+      _W = W + ix;
 
-  ARGCHK(mp !=NULL && count > 0, MP_BADARG);
+      /* inner loop */
+      for (iy = 0; iy < m->used; iy++) {
+	*_W++ += ((mp_word) ui) * ((mp_word) * tmpx++);
+      }
+    }
 
-  for(pos = 0; pos < count; ++pos) {
-    if((res = mp_init(&mp[pos])) != MP_OKAY)
-      goto CLEANUP;
+    /* now fix carry for next digit, W[ix+1] */
+    W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
   }
 
-  return MP_OKAY;
+  /* nox fix rest of carries */
+  for (++ix; ix <= m->used * 2 + 1; ix++) {
+    W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
+  }
 
- CLEANUP:
-  while(--pos >= 0) 
-    mp_clear(&mp[pos]);
+  {
+    register mp_digit *tmpa;
+    register mp_word *_W;
 
-  return res;
+    /* copy out, A = A/b^n
+     *
+     * The result is A/b^n but instead of converting from an array of mp_word
+     * to mp_digit than calling mp_rshd we just copy them in the right
+     * order
+     */
+    tmpa = a->dp;
+    _W = W + m->used;
 
-} /* end mp_init_array() */
+    for (ix = 0; ix < m->used + 1; ix++) {
+      *tmpa++ = *_W++ & ((mp_word) MP_MASK);
+    }
 
-/* }}} */
+    /* zero oldused digits, if the input a was larger than
+     * m->used+1 we'll have to clear the digits */
+    for (; ix < olduse; ix++) {
+      *tmpa++ = 0;
+    }
+  }
 
-/* {{{ mp_init_size(mp, prec) */
+  /* set the max used and clamp */
+  a->used = m->used + 1;
+  mp_clamp (a);
 
-/*
-  mp_init_size(mp, prec)
+  /* if A >= m then A = A - m */
+  if (mp_cmp_mag (a, m) != MP_LT) {
+    return s_mp_sub (a, m, a);
+  }
+  return MP_OKAY;
+}
 
-  Initialize a new zero-valued mp_int with at least the given
-  precision; returns MP_OKAY if successful, or MP_MEM if memory could
-  not be allocated for the structure.
+/* End: bn_fast_mp_montgomery_reduce.c */
+
+/* Start: bn_fast_s_mp_mul_digs.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
-
-mp_err mp_init_size(mp_int *mp, mp_size prec)
+#include <tommath.h>
+
+/* Fast (comba) multiplier
+ *
+ * This is the fast column-array [comba] multiplier.  It is designed to compute
+ * the columns of the product first then handle the carries afterwards.  This
+ * has the effect of making the nested loops that compute the columns very
+ * simple and schedulable on super-scalar processors.
+ *
+ * This has been modified to produce a variable number of digits of output so
+ * if say only a half-product is required you don't have to compute the upper half
+ * (a feature required for fast Barrett reduction).
+ *
+ * Based on Algorithm 14.12 on pp.595 of HAC.
+ *
+ */
+int
+fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 {
-  ARGCHK(mp != NULL && prec > 0, MP_BADARG);
-
-  if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
-    return MP_MEM;
-
-  SIGN(mp) = MP_ZPOS;
-  USED(mp) = 1;
-  ALLOC(mp) = prec;
+  int     olduse, res, pa, ix;
+  mp_word W[512];
 
-  return MP_OKAY;
+  /* grow the destination as required */
+  if (c->alloc < digs) {
+    if ((res = mp_grow (c, digs)) != MP_OKAY) {
+      return res;
+    }
+  }
 
-} /* end mp_init_size() */
+  /* clear temp buf (the columns) */
+  memset (W, 0, sizeof (mp_word) * digs);
+
+  /* calculate the columns */
+  pa = a->used;
+  for (ix = 0; ix < pa; ix++) {
+
+    /* this multiplier has been modified to allow you to control how many digits 
+     * of output are produced.  So at most we want to make upto "digs" digits
+     * of output.
+     *
+     * this adds products to distinct columns (at ix+iy) of W
+     * note that each step through the loop is not dependent on
+     * the previous which means the compiler can easily unroll
+     * the loop without scheduling problems
+     */
+    {
+      register mp_digit tmpx, *tmpy;
+      register mp_word *_W;
+      register int iy, pb;
 
-/* }}} */
+      /* alias for the the word on the left e.g. A[ix] * A[iy] */
+      tmpx = a->dp[ix];
 
-/* {{{ mp_init_copy(mp, from) */
+      /* alias for the right side */
+      tmpy = b->dp;
 
-/*
-  mp_init_copy(mp, from)
+      /* alias for the columns, each step through the loop adds a new
+         term to each column
+       */
+      _W = W + ix;
 
-  Initialize mp as an exact copy of from.  Returns MP_OKAY if
-  successful, MP_MEM if memory could not be allocated for the new
-  structure.
- */
+      /* the number of digits is limited by their placement.  E.g. 
+         we avoid multiplying digits that will end up above the # of
+         digits of precision requested
+       */
+      pb = MIN (b->used, digs - ix);
 
-mp_err mp_init_copy(mp_int *mp, mp_int *from)
-{
-  ARGCHK(mp != NULL && from != NULL, MP_BADARG);
+      for (iy = 0; iy < pb; iy++) {
+	*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
+      }
+    }
 
-  if(mp == from)
-    return MP_OKAY;
+  }
 
-  if((DIGITS(mp) = s_mp_alloc(USED(from), sizeof(mp_digit))) == NULL)
-    return MP_MEM;
+  /* setup dest */
+  olduse = c->used;
+  c->used = digs;
+
+  {
+    register mp_digit *tmpc;
+
+    /* At this point W[] contains the sums of each column.  To get the
+     * correct result we must take the extra bits from each column and
+     * carry them down
+     *
+     * Note that while this adds extra code to the multiplier it saves time
+     * since the carry propagation is removed from the above nested loop.
+     * This has the effect of reducing the work from N*(N+N*c)==N^2 + c*N^2 to
+     * N^2 + N*c where c is the cost of the shifting.  On very small numbers
+     * this is slower but on most cryptographic size numbers it is faster.
+     */
+    tmpc = c->dp;
+    for (ix = 1; ix < digs; ix++) {
+      W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
+      *tmpc++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
+    }
+    *tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK));
 
-  s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
-  USED(mp) = USED(from);
-  ALLOC(mp) = USED(from);
-  SIGN(mp) = SIGN(from);
+    /* clear unused */
+    for (; ix < olduse; ix++) {
+      *tmpc++ = 0;
+    }
+  }
 
+  mp_clamp (c);
   return MP_OKAY;
+}
 
-} /* end mp_init_copy() */
+/* End: bn_fast_s_mp_mul_digs.c */
+
+/* Start: bn_fast_s_mp_mul_high_digs.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
+
+/* this is a modified version of fast_s_mp_mul_digs that only produces
+ * output digits *above* digs.  See the comments for fast_s_mp_mul_digs
+ * to see how it works.
+ *
+ * This is used in the Barrett reduction since for one of the multiplications
+ * only the higher digits were needed.  This essentially halves the work.
+ *
+ * Based on Algorithm 14.12 on pp.595 of HAC.
+ */
+int
+fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
+{
+  int     oldused, newused, res, pa, pb, ix;
+  mp_word W[512];
 
-/* }}} */
+  /* calculate size of product and allocate more space if required */
+  newused = a->used + b->used + 1;
+  if (c->alloc < newused) {
+    if ((res = mp_grow (c, newused)) != MP_OKAY) {
+      return res;
+    }
+  }
 
-/* {{{ mp_copy(from, to) */
+  /* like the other comba method we compute the columns first */
+  pa = a->used;
+  pb = b->used;
+  memset (W + digs, 0, (pa + pb + 1 - digs) * sizeof (mp_word));
+  for (ix = 0; ix < pa; ix++) {
+    {
+      register mp_digit tmpx, *tmpy;
+      register int iy;
+      register mp_word *_W;
 
-/*
-  mp_copy(from, to)
+      /* work todo, that is we only calculate digits that are at "digs" or above  */
+      iy = digs - ix;
 
-  Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
-  'to' has already been initialized (if not, use mp_init_copy()
-  instead). If 'from' and 'to' are identical, nothing happens.
- */
+      /* copy of word on the left of A[ix] * B[iy] */
+      tmpx = a->dp[ix];
 
-mp_err mp_copy(mp_int *from, mp_int *to)
-{
-  ARGCHK(from != NULL && to != NULL, MP_BADARG);
+      /* alias for right side */
+      tmpy = b->dp + iy;
 
-  if(from == to)
-    return MP_OKAY;
+      /* alias for the columns of output.  Offset to be equal to or above the 
+       * smallest digit place requested 
+       */
+      _W = &(W[digs]);
 
-  { /* copy */
-    mp_digit   *tmp;
+      /* compute column products for digits above the minimum */
+      for (; iy < pb; iy++) {
+	*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
+      }
+    }
+  }
 
-    /*
-      If the allocated buffer in 'to' already has enough space to hold
-      all the used digits of 'from', we'll re-use it to avoid hitting
-      the memory allocater more than necessary; otherwise, we'd have
-      to grow anyway, so we just allocate a hunk and make the copy as
-      usual
-     */
-    if(ALLOC(to) >= USED(from)) {
-      s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
-      s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
-      
-    } else {
-      if((tmp = s_mp_alloc(USED(from), sizeof(mp_digit))) == NULL)
-	return MP_MEM;
+  /* setup dest */
+  oldused = c->used;
+  c->used = newused;
 
-      s_mp_copy(DIGITS(from), tmp, USED(from));
+  /* now convert the array W downto what we need */
+  for (ix = digs + 1; ix < newused; ix++) {
+    W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
+    c->dp[ix - 1] = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
+  }
+  c->dp[(pa + pb + 1) - 1] =
+    (mp_digit) (W[(pa + pb + 1) - 1] & ((mp_word) MP_MASK));
 
-      if(DIGITS(to) != NULL) {
-#if MP_CRYPTO
-	s_mp_setz(DIGITS(to), ALLOC(to));
-#endif
-	s_mp_free(DIGITS(to));
-      }
+  for (; ix < oldused; ix++) {
+    c->dp[ix] = 0;
+  }
+  mp_clamp (c);
+  return MP_OKAY;
+}
 
-      DIGITS(to) = tmp;
-      ALLOC(to) = USED(from);
+/* End: bn_fast_s_mp_mul_high_digs.c */
+
+/* Start: bn_fast_s_mp_sqr.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
+
+/* fast squaring
+ *
+ * This is the comba method where the columns of the product are computed first
+ * then the carries are computed.  This has the effect of making a very simple
+ * inner loop that is executed the most
+ *
+ * W2 represents the outer products and W the inner.  
+ *
+ * A further optimizations is made because the inner products are of the form
+ * "A * B * 2".  The *2 part does not need to be computed until the end which is
+ * good because 64-bit shifts are slow!
+ *
+ * Based on Algorithm 14.16 on pp.597 of HAC.
+ *
+ */
+int
+fast_s_mp_sqr (mp_int * a, mp_int * b)
+{
+  int     olduse, newused, res, ix, pa;
+  mp_word W2[512], W[512];
+
+  /* calculate size of product and allocate as required */
+  pa = a->used;
+  newused = pa + pa + 1;
+  if (b->alloc < newused) {
+    if ((res = mp_grow (b, newused)) != MP_OKAY) {
+      return res;
     }
+  }
 
-    /* Copy the precision and sign from the original */
-    USED(to) = USED(from);
-    SIGN(to) = SIGN(from);
-  } /* end copy */
-
-  return MP_OKAY;
+  /* zero temp buffer (columns) 
+   * Note that there are two buffers.  Since squaring requires
+   * a outter and inner product and the inner product requires 
+   * computing a product and doubling it (a relatively expensive
+   * op to perform n^2 times if you don't have to) the inner and
+   * outer products are computed in different buffers.  This way
+   * the inner product can be doubled using n doublings instead of
+   * n^2
+   */
+  memset (W, 0, newused * sizeof (mp_word));
+  memset (W2, 0, newused * sizeof (mp_word));
+
+/* note optimization
+ * values in W2 are only written in even locations which means
+ * we can collapse the array to 256 words [and fixup the memset above]
+ * provided we also fix up the summations below.  Ideally
+ * the fixup loop should be unrolled twice to handle the even/odd 
+ * cases, and then a final step to handle odd cases [e.g. newused == odd]
+ *
+ * This will not only save ~8*256 = 2KB of stack but lower the number of
+ * operations required to finally fix up the columns
+ */
 
-} /* end mp_copy() */
+  /* This computes the inner product.  To simplify the inner N^2 loop
+   * the multiplication by two is done afterwards in the N loop.
+   */
+  for (ix = 0; ix < pa; ix++) {
+    /* compute the outer product 
+     *
+     * Note that every outer product is computed 
+     * for a particular column only once which means that 
+     * there is no need todo a double precision addition
+     */
+    W2[ix + ix] = ((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]);
 
-/* }}} */
+    {
+      register mp_digit tmpx, *tmpy;
+      register mp_word *_W;
+      register int iy;
 
-/* {{{ mp_exch(mp1, mp2) */
+      /* copy of left side */
+      tmpx = a->dp[ix];
 
-/*
-  mp_exch(mp1, mp2)
+      /* alias for right side */
+      tmpy = a->dp + (ix + 1);
 
-  Exchange mp1 and mp2 without allocating any intermediate memory
-  (well, unless you count the stack space needed for this call and the
-  locals it creates...).  This cannot fail.
- */
+      /* the column to store the result in */
+      _W = W + (ix + ix + 1);
 
-void mp_exch(mp_int *mp1, mp_int *mp2)
-{
-#if MP_ARGCHK == 2
-  assert(mp1 != NULL && mp2 != NULL);
-#else
-  if(mp1 == NULL || mp2 == NULL)
-    return;
-#endif
+      /* inner products */
+      for (iy = ix + 1; iy < pa; iy++) {
+	*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
+      }
+    }
+  }
 
-  s_mp_exch(mp1, mp2);
+  /* setup dest */
+  olduse = b->used;
+  b->used = newused;
 
-} /* end mp_exch() */
+  /* double first value, since the inner products are half of what they should be */
+  W[0] += W[0] + W2[0];
 
-/* }}} */
+  /* now compute digits */
+  {
+    register mp_digit *tmpb;
 
-/* {{{ mp_clear(mp) */
+    tmpb = b->dp;
 
-/*
-  mp_clear(mp)
+    for (ix = 1; ix < newused; ix++) {
+      /* double/add next digit */
+      W[ix] += W[ix] + W2[ix];
 
-  Release the storage used by an mp_int, and void its fields so that
-  if someone calls mp_clear() again for the same int later, we won't
-  get tollchocked.
- */
+      W[ix] = W[ix] + (W[ix - 1] >> ((mp_word) DIGIT_BIT));
+      *tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
+    }
+    *tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK));
 
-void   mp_clear(mp_int *mp)
-{
-  if(mp == NULL)
-    return;
+    /* clear high */
+    for (; ix < olduse; ix++) {
+      *tmpb++ = 0;
+    }
 
-  if(DIGITS(mp) != NULL) {
-#if MP_CRYPTO
-    s_mp_setz(DIGITS(mp), ALLOC(mp));
-#endif
-    s_mp_free(DIGITS(mp));
-    DIGITS(mp) = NULL;
   }
 
-  USED(mp) = 0;
-  ALLOC(mp) = 0;
-
-} /* end mp_clear() */
+  /* fix the sign (since we no longer make a fresh temp) */
+  b->sign = MP_ZPOS;
 
-/* }}} */
+  mp_clamp (b);
+  return MP_OKAY;
+}
 
-/* {{{ mp_clear_array(mp[], count) */
+/* End: bn_fast_s_mp_sqr.c */
+
+/* Start: bn_mp_2expt.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-void   mp_clear_array(mp_int mp[], int count)
+/* computes a = 2^b 
+ *
+ * Simple algorithm which zeroes the int, grows it then just sets one bit
+ * as required.
+ */
+int
+mp_2expt (mp_int * a, int b)
 {
-//  ARGCHK(mp != NULL && count > 0, MP_BADARG);
-
-  while(--count >= 0) 
-    mp_clear(&mp[count]);
-
-} /* end mp_clear_array() */
+  int     res;
 
-/* }}} */
+  mp_zero (a);
+  if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
+    return res;
+  }
+  a->used = b / DIGIT_BIT + 1;
+  a->dp[b / DIGIT_BIT] = 1 << (b % DIGIT_BIT);
 
-/* {{{ mp_zero(mp) */
+  return MP_OKAY;
+}
 
-/*
-  mp_zero(mp) 
+/* End: bn_mp_2expt.c */
+
+/* Start: bn_mp_abs.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  Set mp to zero.  Does not change the allocated size of the structure,
-  and therefore cannot fail (except on a bad argument, which we ignore)
+/* b = |a| 
+ *
+ * Simple function copies the input and fixes the sign to positive
  */
-void   mp_zero(mp_int *mp)
+int
+mp_abs (mp_int * a, mp_int * b)
 {
-  if(mp == NULL)
-    return;
-
-  s_mp_setz(DIGITS(mp), ALLOC(mp));
-  USED(mp) = 1;
-  SIGN(mp) = MP_ZPOS;
+  int     res;
+  if ((res = mp_copy (a, b)) != MP_OKAY) {
+    return res;
+  }
+  b->sign = MP_ZPOS;
+  return MP_OKAY;
+}
 
-} /* end mp_zero() */
+/* End: bn_mp_abs.c */
+
+/* Start: bn_mp_add.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* }}} */
+/* high level addition (handles signs) */
+int
+mp_add (mp_int * a, mp_int * b, mp_int * c)
+{
+  int     sa, sb, res;
+
+  /* get sign of both inputs */
+  sa = a->sign;
+  sb = b->sign;
+
+  /* handle four cases */
+  if (sa == MP_ZPOS && sb == MP_ZPOS) {
+    /* both positive */
+    res = s_mp_add (a, b, c);
+    c->sign = MP_ZPOS;
+  } else if (sa == MP_ZPOS && sb == MP_NEG) {
+    /* a + -b == a - b, but if b>a then we do it as -(b-a) */
+    if (mp_cmp_mag (a, b) == MP_LT) {
+      res = s_mp_sub (b, a, c);
+      c->sign = MP_NEG;
+    } else {
+      res = s_mp_sub (a, b, c);
+      c->sign = MP_ZPOS;
+    }
+  } else if (sa == MP_NEG && sb == MP_ZPOS) {
+    /* -a + b == b - a, but if a>b then we do it as -(a-b) */
+    if (mp_cmp_mag (a, b) == MP_GT) {
+      res = s_mp_sub (a, b, c);
+      c->sign = MP_NEG;
+    } else {
+      res = s_mp_sub (b, a, c);
+      c->sign = MP_ZPOS;
+    }
+  } else {
+    /* -a + -b == -(a + b) */
+    res = s_mp_add (a, b, c);
+    c->sign = MP_NEG;
+  }
+  return res;
+}
 
-/* {{{ mp_set(mp, d) */
+/* End: bn_mp_add.c */
+
+/* Start: bn_mp_addmod.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-void   mp_set(mp_int *mp, mp_digit d)
+/* d = a + b (mod c) */
+int
+mp_addmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
 {
-  if(mp == NULL)
-    return;
-
-  mp_zero(mp);
-  DIGIT(mp, 0) = d;
+  int     res;
+  mp_int  t;
 
-} /* end mp_set() */
+  if ((res = mp_init (&t)) != MP_OKAY) {
+    return res;
+  }
 
-/* }}} */
+  if ((res = mp_add (a, b, &t)) != MP_OKAY) {
+    mp_clear (&t);
+    return res;
+  }
+  res = mp_mod (&t, c, d);
+  mp_clear (&t);
+  return res;
+}
 
-/* {{{ mp_set_int(mp, z) */
+/* End: bn_mp_addmod.c */
+
+/* Start: bn_mp_add_d.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-mp_err mp_set_int(mp_int *mp, long z)
+/* single digit addition */
+int
+mp_add_d (mp_int * a, mp_digit b, mp_int * c)
 {
-  int            ix;
-  unsigned long  v = abs(z);
-  mp_err         res;
+  mp_int  t;
+  int     res;
 
-  ARGCHK(mp != NULL, MP_BADARG);
+  if ((res = mp_init (&t)) != MP_OKAY) {
+    return res;
+  }
+  mp_set (&t, b);
+  res = mp_add (a, &t, c);
 
-  mp_zero(mp);
-  if(z == 0)
-    return MP_OKAY;  /* shortcut for zero */
+  mp_clear (&t);
+  return res;
+}
 
-  for(ix = sizeof(long) - 1; ix >= 0; ix--) {
+/* End: bn_mp_add_d.c */
+
+/* Start: bn_mp_and.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* --- bug in MSVC [first release] */
-  if (ix == -1) break;
-/* --- end of fix */
+/* AND two ints together */
+int
+mp_and (mp_int * a, mp_int * b, mp_int * c)
+{
+  int     res, ix, px;
+  mp_int  t, *x;
 
-    if((res = s_mp_mul_2d(mp, CHAR_BIT)) != MP_OKAY)
+  if (a->used > b->used) {
+    if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
       return res;
-
-    res = s_mp_add_d(mp, 
-		     (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
-    if(res != MP_OKAY)
+    }
+    px = b->used;
+    x = b;
+  } else {
+    if ((res = mp_init_copy (&t, b)) != MP_OKAY) {
       return res;
+    }
+    px = a->used;
+    x = a;
+  }
+
+  for (ix = 0; ix < px; ix++) {
+    t.dp[ix] &= x->dp[ix];
   }
 
-  if(z < 0)
-    SIGN(mp) = MP_NEG;
+  /* zero digits above the last from the smallest mp_int */
+  for (; ix < t.used; ix++) {
+    t.dp[ix] = 0;
+  }
 
+  mp_clamp (&t);
+  mp_exch (c, &t);
+  mp_clear (&t);
   return MP_OKAY;
+}
+
+/* End: bn_mp_and.c */
+
+/* Start: bn_mp_clamp.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
+
+/* trim unused digits 
+ *
+ * This is used to ensure that leading zero digits are
+ * trimed and the leading "used" digit will be non-zero
+ * Typically very fast.  Also fixes the sign if there
+ * are no more leading digits
+ */
+void
+mp_clamp (mp_int * a)
+{
+  while (a->used > 0 && a->dp[a->used - 1] == 0)
+    --(a->used);
+  if (a->used == 0) {
+    a->sign = MP_ZPOS;
+  }
+}
 
-} /* end mp_set_int() */
+/* End: bn_mp_clamp.c */
+
+/* Start: bn_mp_clear.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with 
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* }}} */
+/* clear one (frees)  */
+void
+mp_clear (mp_int * a)
+{
+  if (a->dp != NULL) {
 
-/*------------------------------------------------------------------------*/
-/* {{{ Digit arithmetic */
+    /* first zero the digits */
+    memset (a->dp, 0, sizeof (mp_digit) * a->used);
 
-/* {{{ mp_add_d(a, d, b) */
+    /* free ram */
+    free (a->dp);
 
-/*
-  mp_add_d(a, d, b)
+    /* reset members to make debugging easier */
+    a->dp = NULL;
+    a->alloc = a->used = 0;
+  }
+}
 
-  Compute the sum b = a + d, for a single digit d.  Respects the sign of
-  its primary addend (single digits are unsigned anyway).
+/* End: bn_mp_clear.c */
+
+/* Start: bn_mp_cmp.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
+#include <tommath.h>
 
-mp_err mp_add_d(mp_int *a, mp_digit d, mp_int *b)
+/* compare two ints (signed)*/
+int
+mp_cmp (mp_int * a, mp_int * b)
 {
-  mp_err   res = MP_OKAY;
+  /* compare based on sign */
+  if (a->sign == MP_NEG && b->sign == MP_ZPOS) {
+    return MP_LT;
+  } else if (a->sign == MP_ZPOS && b->sign == MP_NEG) {
+    return MP_GT;
+  }
+  return mp_cmp_mag (a, b);
+}
 
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
+/* End: bn_mp_cmp.c */
+
+/* Start: bn_mp_cmp_d.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  if((res = mp_copy(a, b)) != MP_OKAY)
-    return res;
+/* compare a digit */
+int
+mp_cmp_d (mp_int * a, mp_digit b)
+{
 
-  if(SIGN(b) == MP_ZPOS) {
-    res = s_mp_add_d(b, d);
-  } else if(s_mp_cmp_d(b, d) >= 0) {
-    res = s_mp_sub_d(b, d);
-  } else {
-    SIGN(b) = MP_ZPOS;
+  if (a->sign == MP_NEG) {
+    return MP_LT;
+  }
 
-    DIGIT(b, 0) = d - DIGIT(b, 0);
+  if (a->used > 1) {
+    return MP_GT;
   }
 
-  return res;
+  if (a->dp[0] > b) {
+    return MP_GT;
+  } else if (a->dp[0] < b) {
+    return MP_LT;
+  } else {
+    return MP_EQ;
+  }
+}
 
-} /* end mp_add_d() */
+/* End: bn_mp_cmp_d.c */
+
+/* Start: bn_mp_cmp_mag.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* }}} */
+/* compare maginitude of two ints (unsigned) */
+int
+mp_cmp_mag (mp_int * a, mp_int * b)
+{
+  int     n;
 
-/* {{{ mp_sub_d(a, d, b) */
+  /* compare based on # of non-zero digits */
+  if (a->used > b->used) {
+    return MP_GT;
+  } else if (a->used < b->used) {
+    return MP_LT;
+  }
 
-/*
-  mp_sub_d(a, d, b)
+  /* compare based on digits  */
+  for (n = a->used - 1; n >= 0; n--) {
+    if (a->dp[n] > b->dp[n]) {
+      return MP_GT;
+    } else if (a->dp[n] < b->dp[n]) {
+      return MP_LT;
+    }
+  }
+  return MP_EQ;
+}
 
-  Compute the difference b = a - d, for a single digit d.  Respects the
-  sign of its subtrahend (single digits are unsigned anyway).
+/* End: bn_mp_cmp_mag.c */
+
+/* Start: bn_mp_copy.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
+#include <tommath.h>
 
-mp_err mp_sub_d(mp_int *a, mp_digit d, mp_int *b)
+/* copy, b = a */
+int
+mp_copy (mp_int * a, mp_int * b)
 {
-  mp_err   res;
+  int     res, n;
 
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
+  /* if dst == src do nothing */
+  if (a == b || a->dp == b->dp) {
+    return MP_OKAY;
+  }
 
-  if((res = mp_copy(a, b)) != MP_OKAY)
+  /* grow dest */
+  if ((res = mp_grow (b, a->used)) != MP_OKAY) {
     return res;
+  }
 
-  if(SIGN(b) == MP_NEG) {
-    if((res = s_mp_add_d(b, d)) != MP_OKAY)
-      return res;
-
-  } else if(s_mp_cmp_d(b, d) >= 0) {
-    if((res = s_mp_sub_d(b, d)) != MP_OKAY)
-      return res;
+  /* zero b and copy the parameters over */
+  b->used = a->used;
+  b->sign = a->sign;
 
-  } else {
-    mp_neg(b, b);
+  {
+    register mp_digit *tmpa, *tmpb;
 
-    DIGIT(b, 0) = d - DIGIT(b, 0);
-    SIGN(b) = MP_NEG;
-  }
+    tmpa = a->dp;
+    tmpb = b->dp;
 
-  if(s_mp_cmp_d(b, 0) == 0)
-    SIGN(b) = MP_ZPOS;
+    /* copy all the digits */
+    for (n = 0; n < a->used; n++) {
+      *tmpb++ = *tmpa++;
+    }
 
+    /* clear high digits */
+    for (; n < b->alloc; n++) {
+      *tmpb++ = 0;
+    }
+  }
   return MP_OKAY;
+}
 
-} /* end mp_sub_d() */
+/* End: bn_mp_copy.c */
+
+/* Start: bn_mp_count_bits.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* }}} */
+/* returns the number of bits in an int */
+int
+mp_count_bits (mp_int * a)
+{
+  int     r;
+  mp_digit q;
 
-/* {{{ mp_mul_d(a, d, b) */
+  if (a->used == 0) {
+    return 0;
+  }
 
-/*
-  mp_mul_d(a, d, b)
+  r = (a->used - 1) * DIGIT_BIT;
+  q = a->dp[a->used - 1];
+  while (q > ((mp_digit) 0)) {
+    ++r;
+    q >>= ((mp_digit) 1);
+  }
+  return r;
+}
 
-  Compute the product b = a * d, for a single digit d.  Respects the sign
-  of its multiplicand (single digits are unsigned anyway)
+/* End: bn_mp_count_bits.c */
+
+/* Start: bn_mp_div.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
-
-mp_err mp_mul_d(mp_int *a, mp_digit d, mp_int *b)
+#include <tommath.h>
+
+/* integer signed division. c*b + d == a [e.g. a/b, c=quotient, d=remainder]
+ * HAC pp.598 Algorithm 14.20
+ *
+ * Note that the description in HAC is horribly incomplete.  For example,
+ * it doesn't consider the case where digits are removed from 'x' in the inner
+ * loop.  It also doesn't consider the case that y has fewer than three digits, etc..
+ *
+ * The overall algorithm is as described as 14.20 from HAC but fixed to treat these cases.
+*/
+int
+mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
 {
-  mp_err  res;
+  mp_int  q, x, y, t1, t2;
+  int     res, n, t, i, norm, neg;
 
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
 
-  if(d == 0) {
-    mp_zero(b);
-    return MP_OKAY;
+  /* is divisor zero ? */
+  if (mp_iszero (b) == 1) {
+    return MP_VAL;
   }
 
-  if((res = mp_copy(a, b)) != MP_OKAY)
+  /* if a < b then q=0, r = a */
+  if (mp_cmp_mag (a, b) == MP_LT) {
+    if (d != NULL) {
+      res = mp_copy (a, d);
+    } else {
+      res = MP_OKAY;
+    }
+    if (c != NULL) {
+      mp_zero (c);
+    }
     return res;
+  }
 
-  res = s_mp_mul_d(b, d);
+  if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
+    return res;
+  }
+  q.used = a->used + 2;
 
-  return res;
-
-} /* end mp_mul_d() */
-
-/* }}} */
-
-/* {{{ mp_mul_2(a, c) */
-
-mp_err mp_mul_2(mp_int *a, mp_int *c)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, c)) != MP_OKAY)
-    return res;
-
-  return s_mp_mul_2(c);
-
-} /* end mp_mul_2() */
-
-/* }}} */
-
-/* {{{ mp_div_d(a, d, q, r) */
-
-/*
-  mp_div_d(a, d, q, r)
-
-  Compute the quotient q = a / d and remainder r = a mod d, for a
-  single digit d.  Respects the sign of its divisor (single digits are
-  unsigned anyway).
- */
-
-mp_err mp_div_d(mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
-{
-  mp_err   res;
-  mp_digit rem;
-  int      pow;
-
-  ARGCHK(a != NULL, MP_BADARG);
-
-  if(d == 0)
-    return MP_RANGE;
-
-  /* Shortcut for powers of two ... */
-  if((pow = s_mp_ispow2d(d)) >= 0) {
-    mp_digit  mask;
-
-    mask = (1 << pow) - 1;
-    rem = DIGIT(a, 0) & mask;
-
-    if(q) {
-      mp_copy(a, q);
-      s_mp_div_2d(q, (mp_digit)pow);
-    }
-
-    if(r)
-      *r = rem;
-
-    return MP_OKAY;
-  }
-
-  /*
-    If the quotient is actually going to be returned, we'll try to
-    avoid hitting the memory allocator by copying the dividend into it
-    and doing the division there.  This can't be any _worse_ than
-    always copying, and will sometimes be better (since it won't make
-    another copy)
-
-    If it's not going to be returned, we need to allocate a temporary
-    to hold the quotient, which will just be discarded.
-   */
-  if(q) {
-    if((res = mp_copy(a, q)) != MP_OKAY)
-      return res;
-
-    res = s_mp_div_d(q, d, &rem);
-    if(s_mp_cmp_d(q, 0) == MP_EQ)
-      SIGN(q) = MP_ZPOS;
-
-  } else {
-    mp_int  qp;
-
-    if((res = mp_init_copy(&qp, a)) != MP_OKAY)
-      return res;
-
-    res = s_mp_div_d(&qp, d, &rem);
-    if(s_mp_cmp_d(&qp, 0) == 0)
-      SIGN(&qp) = MP_ZPOS;
-
-    mp_clear(&qp);
-  }
-
-  if(r)
-    *r = rem;
-
-  return res;
-
-} /* end mp_div_d() */
-
-/* }}} */
-
-/* {{{ mp_div_2(a, c) */
-
-/*
-  mp_div_2(a, c)
-
-  Compute c = a / 2, disregarding the remainder.
- */
-
-mp_err mp_div_2(mp_int *a, mp_int *c)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, c)) != MP_OKAY)
-    return res;
-
-  s_mp_div_2(c);
-
-  return MP_OKAY;
-
-} /* end mp_div_2() */
-
-/* }}} */
-
-/* {{{ mp_expt_d(a, d, b) */
-
-mp_err mp_expt_d(mp_int *a, mp_digit d, mp_int *c)
-{
-  mp_int   s, x;
-  mp_err   res;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_init(&s)) != MP_OKAY)
-    return res;
-  if((res = mp_init_copy(&x, a)) != MP_OKAY)
-    goto X;
-
-  DIGIT(&s, 0) = 1;
-
-  while(d != 0) {
-    if(d & 1) {
-      if((res = s_mp_mul(&s, &x)) != MP_OKAY)
-	goto CLEANUP;
-    }
-
-    d >>= 1;
-
-    if((res = s_mp_sqr(&x)) != MP_OKAY)
-      goto CLEANUP;
-  }
-
-  s_mp_exch(&s, c);
-
-CLEANUP:
-  mp_clear(&x);
-X:
-  mp_clear(&s);
-
-  return res;
-
-} /* end mp_expt_d() */
-
-/* }}} */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ Full arithmetic */
-
-/* {{{ mp_abs(a, b) */
-
-/*
-  mp_abs(a, b)
-
-  Compute b = |a|.  'a' and 'b' may be identical.
- */
-
-mp_err mp_abs(mp_int *a, mp_int *b)
-{
-  mp_err   res;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, b)) != MP_OKAY)
-    return res;
-
-  SIGN(b) = MP_ZPOS;
-
-  return MP_OKAY;
-
-} /* end mp_abs() */
-
-/* }}} */
-
-/* {{{ mp_neg(a, b) */
-
-/*
-  mp_neg(a, b)
-
-  Compute b = -a.  'a' and 'b' may be identical.
- */
-
-mp_err mp_neg(mp_int *a, mp_int *b)
-{
-  mp_err   res;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, b)) != MP_OKAY)
-    return res;
-
-  if(s_mp_cmp_d(b, 0) == MP_EQ) 
-    SIGN(b) = MP_ZPOS;
-  else 
-    SIGN(b) = (SIGN(b) == MP_NEG) ? MP_ZPOS : MP_NEG;
-
-  return MP_OKAY;
-
-} /* end mp_neg() */
-
-/* }}} */
-
-/* {{{ mp_add(a, b, c) */
-
-/*
-  mp_add(a, b, c)
-
-  Compute c = a + b.  All parameters may be identical.
- */
-
-mp_err mp_add(mp_int *a, mp_int *b, mp_int *c)
-{
-  mp_err  res;
-  int     cmp;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
-
-    /* Commutativity of addition lets us do this in either order,
-       so we avoid having to use a temporary even if the result 
-       is supposed to replace the output
-     */
-    if(c == b) {
-      if((res = s_mp_add(c, a)) != MP_OKAY)
-	return res;
-    } else {
-      if(c != a && (res = mp_copy(a, c)) != MP_OKAY)
-	return res;
-
-      if((res = s_mp_add(c, b)) != MP_OKAY) 
-	return res;
-    }
-
-  } else if((cmp = s_mp_cmp(a, b)) > 0) {  /* different sign: a > b   */
-
-    /* If the output is going to be clobbered, we will use a temporary
-       variable; otherwise, we'll do it without touching the memory 
-       allocator at all, if possible
-     */
-    if(c == b) {
-      mp_int  tmp;
-
-      if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
-	return res;
-      if((res = s_mp_sub(&tmp, b)) != MP_OKAY) {
-	mp_clear(&tmp);
-	return res;
-      }
-
-      s_mp_exch(&tmp, c);
-      mp_clear(&tmp);
-
-    } else {
-
-      if(c != a && (res = mp_copy(a, c)) != MP_OKAY)
-	return res;
-      if((res = s_mp_sub(c, b)) != MP_OKAY)
-	return res;
-
-    }
-
-  } else if(cmp == 0) {             /* different sign, a == b   */
-
-    mp_zero(c);
-    return MP_OKAY;
-
-  } else {                          /* different sign: a < b    */
-
-    /* See above... */
-    if(c == a) {
-      mp_int  tmp;
-
-      if((res = mp_init_copy(&tmp, b)) != MP_OKAY)
-	return res;
-      if((res = s_mp_sub(&tmp, a)) != MP_OKAY) {
-	mp_clear(&tmp);
-	return res;
-      }
-
-      s_mp_exch(&tmp, c);
-      mp_clear(&tmp);
-
-    } else {
-
-      if(c != b && (res = mp_copy(b, c)) != MP_OKAY)
-	return res;
-      if((res = s_mp_sub(c, a)) != MP_OKAY)
-	return res;
-
-    }
-  }
-
-  if(USED(c) == 1 && DIGIT(c, 0) == 0)
-    SIGN(c) = MP_ZPOS;
-
-  return MP_OKAY;
-
-} /* end mp_add() */
-
-/* }}} */
-
-/* {{{ mp_sub(a, b, c) */
-
-/*
-  mp_sub(a, b, c)
-
-  Compute c = a - b.  All parameters may be identical.
- */
-
-mp_err mp_sub(mp_int *a, mp_int *b, mp_int *c)
-{
-  mp_err  res;
-  int     cmp;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  if(SIGN(a) != SIGN(b)) {
-    if(c == a) {
-      if((res = s_mp_add(c, b)) != MP_OKAY)
-	return res;
-    } else {
-      if(c != b && ((res = mp_copy(b, c)) != MP_OKAY))
-	return res;
-      if((res = s_mp_add(c, a)) != MP_OKAY)
-	return res;
-      SIGN(c) = SIGN(a);
-    }
-
-  } else if((cmp = s_mp_cmp(a, b)) > 0) { /* Same sign, a > b */
-    if(c == b) {
-      mp_int  tmp;
-
-      if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
-	return res;
-      if((res = s_mp_sub(&tmp, b)) != MP_OKAY) {
-	mp_clear(&tmp);
-	return res;
-      }
-      s_mp_exch(&tmp, c);
-      mp_clear(&tmp);
-
-    } else {
-      if(c != a && ((res = mp_copy(a, c)) != MP_OKAY))
-	return res;
-
-      if((res = s_mp_sub(c, b)) != MP_OKAY)
-	return res;
-    }
-
-  } else if(cmp == 0) {  /* Same sign, equal magnitude */
-    mp_zero(c);
-    return MP_OKAY;
-
-  } else {               /* Same sign, b > a */
-    if(c == a) {
-      mp_int  tmp;
-
-      if((res = mp_init_copy(&tmp, b)) != MP_OKAY)
-	return res;
-
-      if((res = s_mp_sub(&tmp, a)) != MP_OKAY) {
-	mp_clear(&tmp);
-	return res;
-      }
-      s_mp_exch(&tmp, c);
-      mp_clear(&tmp);
-
-    } else {
-      if(c != b && ((res = mp_copy(b, c)) != MP_OKAY)) 
-	return res;
-
-      if((res = s_mp_sub(c, a)) != MP_OKAY)
-	return res;
-    }
-
-    SIGN(c) = !SIGN(b);
-  }
-
-  if(USED(c) == 1 && DIGIT(c, 0) == 0)
-    SIGN(c) = MP_ZPOS;
-
-  return MP_OKAY;
-
-} /* end mp_sub() */
-
-/* }}} */
-
-/* {{{ mp_mul(a, b, c) */
-
-/*
-  mp_mul(a, b, c)
-
-  Compute c = a * b.  All parameters may be identical.
- */
-
-mp_err mp_mul(mp_int *a, mp_int *b, mp_int *c)
-{
-  mp_err   res;
-  mp_sign  sgn;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  sgn = (SIGN(a) == SIGN(b)) ? MP_ZPOS : MP_NEG;
-
-  if(c == b) {
-    if((res = s_mp_mul(c, a)) != MP_OKAY)
-      return res;
-
-  } else {
-    if((res = mp_copy(a, c)) != MP_OKAY)
-      return res;
-
-    if((res = s_mp_mul(c, b)) != MP_OKAY)
-      return res;
-  }
-  
-  if(sgn == MP_ZPOS || s_mp_cmp_d(c, 0) == MP_EQ)
-    SIGN(c) = MP_ZPOS;
-  else
-    SIGN(c) = sgn;
-  
-  return MP_OKAY;
-
-} /* end mp_mul() */
-
-/* }}} */
-
-/* {{{ mp_mul_2d(a, d, c) */
-
-/*
-  mp_mul_2d(a, d, c)
-
-  Compute c = a * 2^d.  a may be the same as c.
- */
-
-mp_err mp_mul_2d(mp_int *a, mp_digit d, mp_int *c)
-{
-  mp_err   res;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, c)) != MP_OKAY)
-    return res;
-
-  if(d == 0)
-    return MP_OKAY;
-
-  return s_mp_mul_2d(c, d);
-
-} /* end mp_mul() */
-
-/* }}} */
-
-/* {{{ mp_sqr(a, b) */
-
-#if MP_SQUARE
-mp_err mp_sqr(mp_int *a, mp_int *b)
-{
-  mp_err   res;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, b)) != MP_OKAY)
-    return res;
-
-  if((res = s_mp_sqr(b)) != MP_OKAY)
-    return res;
-
-  SIGN(b) = MP_ZPOS;
-
-  return MP_OKAY;
-
-} /* end mp_sqr() */
-#endif
-
-/* }}} */
-
-/* {{{ mp_div(a, b, q, r) */
-
-/*
-  mp_div(a, b, q, r)
-
-  Compute q = a / b and r = a mod b.  Input parameters may be re-used
-  as output parameters.  If q or r is NULL, that portion of the
-  computation will be discarded (although it will still be computed)
-
-  Pay no attention to the hacker behind the curtain.
- */
-
-mp_err mp_div(mp_int *a, mp_int *b, mp_int *q, mp_int *r)
-{
-  mp_err   res;
-  mp_int   qtmp, rtmp;
-  int      cmp;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  if(mp_cmp_z(b) == MP_EQ)
-    return MP_RANGE;
-
-  /* If a <= b, we can compute the solution without division, and
-     avoid any memory allocation
-   */
-  if((cmp = s_mp_cmp(a, b)) < 0) {
-    if(r) {
-      if((res = mp_copy(a, r)) != MP_OKAY)
-	return res;
-    }
-
-    if(q) 
-      mp_zero(q);
-
-    return MP_OKAY;
-
-  } else if(cmp == 0) {
-
-    /* Set quotient to 1, with appropriate sign */
-    if(q) {
-      int qneg = (SIGN(a) != SIGN(b));
-
-      mp_set(q, 1);
-      if(qneg)
-	SIGN(q) = MP_NEG;
-    }
-
-    if(r)
-      mp_zero(r);
-
-    return MP_OKAY;
-  }
-
-  /* If we get here, it means we actually have to do some division */
-
-  /* Set up some temporaries... */
-  if((res = mp_init_copy(&qtmp, a)) != MP_OKAY)
-    return res;
-  if((res = mp_init_copy(&rtmp, b)) != MP_OKAY)
-    goto CLEANUP;
-
-  if((res = s_mp_div(&qtmp, &rtmp)) != MP_OKAY)
-    goto CLEANUP;
-
-  /* Compute the signs for the output  */
-  SIGN(&rtmp) = SIGN(a); /* Sr = Sa              */
-  if(SIGN(a) == SIGN(b))
-    SIGN(&qtmp) = MP_ZPOS;  /* Sq = MP_ZPOS if Sa = Sb */
-  else
-    SIGN(&qtmp) = MP_NEG;   /* Sq = MP_NEG if Sa != Sb */
-
-  if(s_mp_cmp_d(&qtmp, 0) == MP_EQ)
-    SIGN(&qtmp) = MP_ZPOS;
-  if(s_mp_cmp_d(&rtmp, 0) == MP_EQ)
-    SIGN(&rtmp) = MP_ZPOS;
-
-  /* Copy output, if it is needed      */
-  if(q) 
-    s_mp_exch(&qtmp, q);
-
-  if(r) 
-    s_mp_exch(&rtmp, r);
-
-CLEANUP:
-  mp_clear(&rtmp);
-  mp_clear(&qtmp);
-
-  return res;
-
-} /* end mp_div() */
-
-/* }}} */
-
-/* {{{ mp_div_2d(a, d, q, r) */
-
-mp_err mp_div_2d(mp_int *a, mp_digit d, mp_int *q, mp_int *r)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL, MP_BADARG);
-
-  if(q) {
-    if((res = mp_copy(a, q)) != MP_OKAY)
-      return res;
-
-    s_mp_div_2d(q, d);
-  }
-
-  if(r) {
-    if((res = mp_copy(a, r)) != MP_OKAY)
-      return res;
-
-    s_mp_mod_2d(r, d);
-  }
-
-  return MP_OKAY;
-
-} /* end mp_div_2d() */
-
-/* }}} */
-
-/* {{{ mp_expt(a, b, c) */
-
-/*
-  mp_expt(a, b, c)
-
-  Compute c = a ** b, that is, raise a to the b power.  Uses a
-  standard iterative square-and-multiply technique.
- */
-
-mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
-{
-  mp_int   s, x;
-  mp_err   res;
-  mp_digit d;
-  int      dig, bit;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  if(mp_cmp_z(b) < 0)
-    return MP_RANGE;
-
-  if((res = mp_init(&s)) != MP_OKAY)
-    return res;
-
-  mp_set(&s, 1);
-
-  if((res = mp_init_copy(&x, a)) != MP_OKAY)
-    goto X;
-
-  /* Loop over low-order digits in ascending order */
-  for(dig = 0; dig < (int)(USED(b) - 1); dig++) {
-    d = DIGIT(b, dig);
-
-    /* Loop over bits of each non-maximal digit */
-    for(bit = 0; bit < (int)DIGIT_BIT; bit++) {
-      if(d & 1) {
-	if((res = s_mp_mul(&s, &x)) != MP_OKAY) 
-	  goto CLEANUP;
-      }
-
-      d >>= 1;
-      
-      if((res = s_mp_sqr(&x)) != MP_OKAY)
-	goto CLEANUP;
-    }
-  }
-
-  /* Consider now the last digit... */
-  d = DIGIT(b, dig);
-
-  while(d) {
-    if(d & 1) {
-      if((res = s_mp_mul(&s, &x)) != MP_OKAY)
-	goto CLEANUP;
-    }
-
-    d >>= 1;
-
-    if((res = s_mp_sqr(&x)) != MP_OKAY)
-      goto CLEANUP;
-  }
-  
-  if(mp_iseven(b))
-    SIGN(&s) = SIGN(a);
-
-  res = mp_copy(&s, c);
-
-CLEANUP:
-  mp_clear(&x);
-X:
-  mp_clear(&s);
-
-  return res;
-
-} /* end mp_expt() */
-
-/* }}} */
-
-/* {{{ mp_2expt(a, k) */
-
-/* Compute a = 2^k */
-
-mp_err mp_2expt(mp_int *a, mp_digit k)
-{
-  ARGCHK(a != NULL, MP_BADARG);
-
-  return s_mp_2expt(a, k);
-
-} /* end mp_2expt() */
-
-/* }}} */
-
-/* {{{ mp_mod(a, m, c) */
-
-/*
-  mp_mod(a, m, c)
-
-  Compute c = a (mod m).  Result will always be 0 <= c < m.
- */
-
-mp_err mp_mod(mp_int *a, mp_int *m, mp_int *c)
-{
-  mp_err  res;
-  int     mag;
-
-  ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
-
-  if(SIGN(m) == MP_NEG)
-    return MP_RANGE;
-
-  /*
-     If |a| > m, we need to divide to get the remainder and take the
-     absolute value.  
-
-     If |a| < m, we don't need to do any division, just copy and adjust
-     the sign (if a is negative).
-
-     If |a| == m, we can simply set the result to zero.
-
-     This order is intended to minimize the average path length of the
-     comparison chain on common workloads -- the most frequent cases are
-     that |a| != m, so we do those first.
-   */
-  if((mag = s_mp_cmp(a, m)) > 0) {
-    if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
-      return res;
-    
-    if(SIGN(c) == MP_NEG) {
-      if((res = mp_add(c, m, c)) != MP_OKAY)
-	return res;
-    }
-
-  } else if(mag < 0) {
-    if((res = mp_copy(a, c)) != MP_OKAY)
-      return res;
-
-    if(mp_cmp_z(a) < 0) {
-      if((res = mp_add(c, m, c)) != MP_OKAY)
-	return res;
-
-    }
-    
-  } else {
-    mp_zero(c);
-
-  }
-
-  return MP_OKAY;
-
-} /* end mp_mod() */
-
-/* }}} */
-
-/* {{{ mp_mod_d(a, d, c) */
-
-/*
-  mp_mod_d(a, d, c)
-
-  Compute c = a (mod d).  Result will always be 0 <= c < d
- */
-mp_err mp_mod_d(mp_int *a, mp_digit d, mp_digit *c)
-{
-  mp_err   res;
-  mp_digit rem;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if(s_mp_cmp_d(a, d) > 0) {
-    if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
-      return res;
-
-  } else {
-    if(SIGN(a) == MP_NEG)
-      rem = d - DIGIT(a, 0);
-    else
-      rem = DIGIT(a, 0);
-  }
-
-  if(c)
-    *c = rem;
-
-  return MP_OKAY;
-
-} /* end mp_mod_d() */
-
-/* }}} */
-
-/* {{{ mp_sqrt(a, b) */
-
-/*
-  mp_sqrt(a, b)
-
-  Compute the integer square root of a, and store the result in b.
-  Uses an integer-arithmetic version of Newton's iterative linear
-  approximation technique to determine this value; the result has the
-  following two properties:
-
-     b^2 <= a
-     (b+1)^2 >= a
-
-  It is a range error to pass a negative value.
- */
-mp_err mp_sqrt(mp_int *a, mp_int *b)
-{
-  mp_int   x, t;
-  mp_err   res;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  /* Cannot take square root of a negative value */
-  if(SIGN(a) == MP_NEG)
-    return MP_RANGE;
-
-  /* Special cases for zero and one, trivial     */
-  if(mp_cmp_d(a, 0) == MP_EQ || mp_cmp_d(a, 1) == MP_EQ) 
-    return mp_copy(a, b);
-    
-  /* Initialize the temporaries we'll use below  */
-  if((res = mp_init_size(&t, USED(a))) != MP_OKAY)
-    return res;
-
-  /* Compute an initial guess for the iteration as a itself */
-  if((res = mp_init_copy(&x, a)) != MP_OKAY)
-    goto X;
-
-  for(;;) {
-    /* t = (x * x) - a */
-    mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
-    if((res = mp_sqr(&t, &t)) != MP_OKAY ||
-       (res = mp_sub(&t, a, &t)) != MP_OKAY)
-      goto CLEANUP;
-
-    /* t = t / 2x       */
-    s_mp_mul_2(&x);
-    if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
-      goto CLEANUP;
-    s_mp_div_2(&x);
-
-    /* Terminate the loop, if the quotient is zero */
-    if(mp_cmp_z(&t) == MP_EQ)
-      break;
-
-    /* x = x - t       */
-    if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
-      goto CLEANUP;
-
-  }
-
-  /* Copy result to output parameter */
-  mp_sub_d(&x, 1, &x);
-  s_mp_exch(&x, b);
-
- CLEANUP:
-  mp_clear(&x);
- X:
-  mp_clear(&t); 
-
-  return res;
-
-} /* end mp_sqrt() */
-
-/* }}} */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ Modular arithmetic */
-
-#if MP_MODARITH
-/* {{{ mp_addmod(a, b, m, c) */
-
-/*
-  mp_addmod(a, b, m, c)
-
-  Compute c = (a + b) mod m
- */
-
-mp_err mp_addmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_add(a, b, c)) != MP_OKAY)
-    return res;
-  if((res = mp_mod(c, m, c)) != MP_OKAY)
-    return res;
-
-  return MP_OKAY;
-
-}
-
-/* }}} */
-
-/* {{{ mp_submod(a, b, m, c) */
-
-/*
-  mp_submod(a, b, m, c)
-
-  Compute c = (a - b) mod m
- */
-
-mp_err mp_submod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_sub(a, b, c)) != MP_OKAY)
-    return res;
-  if((res = mp_mod(c, m, c)) != MP_OKAY)
-    return res;
-
-  return MP_OKAY;
-
-}
-
-/* }}} */
-
-/* {{{ mp_mulmod(a, b, m, c) */
-
-/*
-  mp_mulmod(a, b, m, c)
-
-  Compute c = (a * b) mod m
- */
-
-mp_err mp_mulmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
-{
-  mp_err  res;
+  if ((res = mp_init (&t1)) != MP_OKAY) {
+    goto __Q;
+  }
 
-  ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+  if ((res = mp_init (&t2)) != MP_OKAY) {
+    goto __T1;
+  }
 
-  if((res = mp_mul(a, b, c)) != MP_OKAY)
-    return res;
-  if((res = mp_mod(c, m, c)) != MP_OKAY)
-    return res;
+  if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
+    goto __T2;
+  }
 
-  return MP_OKAY;
+  if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
+    goto __X;
+  }
 
-}
+  /* fix the sign */
+  neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
+  x.sign = y.sign = MP_ZPOS;
+
+  /* normalize both x and y, ensure that y >= b/2, [b == 2^DIGIT_BIT] */
+  norm = 0;
+  while ((y.dp[y.used - 1] & (((mp_digit) 1) << (DIGIT_BIT - 1))) ==
+	 ((mp_digit) 0)) {
+    ++norm;
+    if ((res = mp_mul_2 (&x, &x)) != MP_OKAY) {
+      goto __Y;
+    }
+    if ((res = mp_mul_2 (&y, &y)) != MP_OKAY) {
+      goto __Y;
+    }
+  }
 
-/* }}} */
+  /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
+  n = x.used - 1;
+  t = y.used - 1;
 
-/* {{{ mp_sqrmod(a, m, c) */
+  /* step 2. while (x >= y*b^n-t) do { q[n-t] += 1; x -= y*b^{n-t} } */
+  if ((res = mp_lshd (&y, n - t)) != MP_OKAY) {	/* y = y*b^{n-t} */
+    goto __Y;
+  }
 
-#if MP_SQUARE
-mp_err mp_sqrmod(mp_int *a, mp_int *m, mp_int *c)
-{
-  mp_err  res;
+  while (mp_cmp (&x, &y) != MP_LT) {
+    ++(q.dp[n - t]);
+    if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
+      goto __Y;
+    }
+  }
 
-  ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+  /* reset y by shifting it back down */
+  mp_rshd (&y, n - t);
 
-  if((res = mp_sqr(a, c)) != MP_OKAY)
-    return res;
-  if((res = mp_mod(c, m, c)) != MP_OKAY)
-    return res;
+  /* step 3. for i from n down to (t + 1) */
+  for (i = n; i >= (t + 1); i--) {
+    if (i > x.alloc)
+      continue;
 
-  return MP_OKAY;
+    /* step 3.1 if xi == yt then set q{i-t-1} to b-1, otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
+    if (x.dp[i] == y.dp[t]) {
+      q.dp[i - t - 1] = ((1UL << DIGIT_BIT) - 1UL);
+    } else {
+      mp_word tmp;
+      tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
+      tmp |= ((mp_word) x.dp[i - 1]);
+      tmp /= ((mp_word) y.dp[t]);
+      if (tmp > (mp_word) MP_MASK)
+	tmp = MP_MASK;
+      q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
+    }
 
-} /* end mp_sqrmod() */
-#endif
+    /* step 3.2 while (q{i-t-1} * (yt * b + y{t-1})) > xi * b^2 + xi-1 * b + xi-2 do q{i-t-1} -= 1; */
+    q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
+    do {
+      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
+
+      /* find left hand */
+      mp_zero (&t1);
+      t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
+      t1.dp[1] = y.dp[t];
+      t1.used = 2;
+      if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
+	goto __Y;
+      }
 
-/* }}} */
+      /* find right hand */
+      t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
+      t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
+      t2.dp[2] = x.dp[i];
+      t2.used = 3;
+    } while (mp_cmp (&t1, &t2) == MP_GT);
 
-/* shrinks the memory required to store a mp_int if possible */
-mp_err mp_shrink(mp_int *a)
-{
-   if (a->used != a->alloc) {
-      if ((a->dp = XREALLOC(a->dp, a->used * sizeof(mp_digit))) == NULL) {
-         return MP_MEM;
-      } else {
-         a->alloc = a->used;
-         return MP_OKAY;
-      }
-   } else {
-      return MP_OKAY;
-   }
-}      
+    /* step 3.3 x = x - q{i-t-1} * y * b^{i-t-1} */
+    if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
+      goto __Y;
+    }
 
-/* {{{ mp_exptmod(a, b, m, c) */
+    if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
+      goto __Y;
+    }
 
-#ifdef MPI_FASTEXPT
+    if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
+      goto __Y;
+    }
 
-/* computes y == g^x mod p */
-mp_err mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
-{
-   mp_int *M, tx, mu, res;
-   int QQQ, QQ, Q, x, *vals, err;
-
-   /* determine the value of Q */
-   x = (USED(X) - 1) * DIGIT_BIT;
-   Q = DIGIT(X, USED(X)-1);
-   while (Q) { 
-      ++x;
-      Q >>= 1;
-   }
-        if (x <= 8)    { Q = 2; }
-   else if (x <= 64)   { Q = 3; }
-   else if (x <= 256)  { Q = 4; }
-   else if (x <= 950)  { Q = 5; }
-   else if (x <= 2755) { Q = 6; }
-   else                { Q = 7; }
-   
-#ifdef MPI_FASTEXPT_LOWMEM
-   if (Q > 5) {
-      Q = 5;
-   }
-#endif
-
-   /* alloc room for table */
-   vals = XCALLOC(sizeof(int), USED(X)*((DIGIT_BIT/Q)+((DIGIT_BIT%Q)?1:0)));
-   if (vals == NULL) { err = MP_MEM; goto _ERR; }
-
-   M    = XCALLOC(sizeof(mp_int), 1<<Q);
-   if (M == NULL)    { err = MP_MEM; goto _VALS; }
-
-   /* init M table */
-   for (x = 0; x < (1<<Q); x++) {
-       if (mp_init(&M[x]) != MP_OKAY) {
-          for (Q = 0; Q < x; Q++) {
-              mp_clear(&M[x]);
-          }
-          err = MP_MEM;
-          goto __M;
-       }
-   }
-
-  /* init the barett reduction */
-  /* mu = b^2k / m */
-  if ((err = mp_init(&mu)) != MP_OKAY) {
-     goto _M;
-  }
-
-  if ((err = mp_init(&res)) != MP_OKAY) {
-     goto _MU;
-  }
-
-  mp_set(&mu, 1); 
-  s_mp_lshd(&mu, 2 * USED(P));
-  if((err = mp_div(&mu, P, &mu, NULL)) != MP_OKAY){
-    goto _RES;
-  }
-
-   /* now init the M array with powers of the base */
-   mp_set(&M[0], 1);
-   if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { goto _RES; }
-   
-   /* shrink first two */
-   for (x = 0; x < 2; x++) {
-      if ((err = mp_shrink(&M[x])) != MP_OKAY) { goto _RES; }
-   }
-   
-   for (x = 2; x < (1<<Q); x++) {
-       if (USED(&M[x]) == 1 && DIGIT(&M[x], 0) == 0) {
-          if ((err = mp_mul(&M[x-1], &M[1], &M[x])) != MP_OKAY)      { goto _RES; }
-          if ((err = s_mp_reduce(&M[x], P, &mu)) != MP_OKAY)         { goto _RES; }
-          if ((err = mp_shrink(&M[x])) != MP_OKAY)                   { goto _RES; }
-          
-          QQQ = x;
-          QQ  = x * 2;
-          while (QQ < (1<<Q)) {
-              if ((err = mp_sqr(&M[QQQ], &M[QQ])) != MP_OKAY)        { goto _RES; }
-              if ((err = s_mp_reduce(&M[QQ], P, &mu)) != MP_OKAY)    { goto _RES; }
-              if ((err = mp_shrink(&M[QQ])) != MP_OKAY)              { goto _RES; }
-              QQQ = QQ;
-              QQ  *= 2;
-          }
-       }
-   }
-   
-   /* now grab the bits */
-   if ((err = mp_init_copy(&tx, X)) != MP_OKAY) {
-      goto _RES;
-   }
-
-   x = 0;
-   while (mp_cmp_d(&tx, 0)) {
-       vals[x++] = DIGIT(&tx, 0) & ((1<<Q)-1);
-       s_mp_div_2d(&tx, Q);
-   }
-
-   /* now set output equal to the first digit exponent */
-   if ((err = mp_copy(&M[vals[--x]], &res)) != MP_OKAY)        { goto _TX; }
-
-   while (--x >= 0) {
-      for (QQ = 0; QQ < Q; QQ++) {
-          if ((err = s_mp_sqr(&res)) != MP_OKAY)               { goto _TX; }
-          if ((err = s_mp_reduce(&res, P, &mu)) != MP_OKAY)    { goto _TX; }
+    /* step 3.4 if x < 0 then { x = x + y*b^{i-t-1}; q{i-t-1} -= 1; } */
+    if (x.sign == MP_NEG) {
+      if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
+	goto __Y;
       }
-      if (vals[x] != 0) {
-         if ((err = s_mp_mul(&res, &M[vals[x]])) != MP_OKAY)   { goto _TX; }
-         if ((err = s_mp_reduce(&res, P, &mu)) != MP_OKAY)     { goto _TX; }
+      if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
+	goto __Y;
       }
-   }
-   s_mp_exch(&res, Y);
-
-   /* free ram */
-_TX:
-   mp_clear(&tx);
-_RES:
-   mp_clear(&res);
-_MU:
-   mp_clear(&mu);
-_M:
-   for (x = 0; x < (1<<Q); x++) {
-       mp_clear(&M[x]);
-   }
-__M:
-   XFREE(M);
-_VALS:
-   XFREE(vals);
-_ERR:
-   return err;
-}
-
-#else 
-
-/*
-  mp_exptmod(a, b, m, c)
-
-  Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
-  method with modular reductions at each step. (This is basically the
-  same code as mp_expt(), except for the addition of the reductions)
-  
-  The modular reductions are done using Barrett's algorithm (see
-  s_mp_reduce() below for details)
- */
-
-mp_err mp_exptmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
-{
-  mp_int   s, x, mu;
-  mp_err   res;
-  mp_digit d, *db = DIGITS(b);
-  mp_size  ub = USED(b);
-  int      dig, bit;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
-    return MP_RANGE;
-
-  if((res = mp_init(&s)) != MP_OKAY)
-    return res;
-  if((res = mp_init_copy(&x, a)) != MP_OKAY)
-    goto X;
-  if((res = mp_mod(&x, m, &x)) != MP_OKAY ||
-     (res = mp_init(&mu)) != MP_OKAY)
-    goto MU;
-
-  mp_set(&s, 1);
-
-  /* mu = b^2k / m */
-  s_mp_add_d(&mu, 1); 
-  s_mp_lshd(&mu, 2 * USED(m));
-  if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
-    goto CLEANUP;
-
-  /* Loop over digits of b in ascending order, except highest order */
-  for(dig = 0; dig < (int)(ub - 1); dig++) {
-    d = *db++;
-
-    /* Loop over the bits of the lower-order digits */
-    for(bit = 0; bit < (int)DIGIT_BIT; bit++) {
-      if(d & 1) {
-	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
-	  goto CLEANUP;
-	if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
-	  goto CLEANUP;
+      if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
+	goto __Y;
       }
 
-      d >>= 1;
-
-      if((res = s_mp_sqr(&x)) != MP_OKAY)
-	goto CLEANUP;
-      if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
-	goto CLEANUP;
+      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
     }
   }
 
-  /* Now do the last digit... */
-  d = *db;
-
-  while(d) {
-    if(d & 1) {
-      if((res = s_mp_mul(&s, &x)) != MP_OKAY)
-	goto CLEANUP;
-      if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
-	goto CLEANUP;
-    }
-
-    d >>= 1;
-
-    if((res = s_mp_sqr(&x)) != MP_OKAY)
-      goto CLEANUP;
-    if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
-      goto CLEANUP;
+  /* now q is the quotient and x is the remainder [which we have to normalize] */
+  /* get sign before writing to c */
+  x.sign = a->sign;
+  if (c != NULL) {
+    mp_clamp (&q);
+    mp_exch (&q, c);
+    c->sign = neg;
   }
 
-  s_mp_exch(&s, c);
-
- CLEANUP:
-  mp_clear(&mu);
- MU:
-  mp_clear(&x);
- X:
-  mp_clear(&s);
-
-  return res;
-
-} /* end mp_exptmod() */
-
-#endif
-
-/* }}} */
-
-/* {{{ mp_exptmod_d(a, d, m, c) */
-
-mp_err mp_exptmod_d(mp_int *a, mp_digit d, mp_int *m, mp_int *c)
-{
-  mp_int   s, x;
-  mp_err   res;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_init(&s)) != MP_OKAY)
-    return res;
-  if((res = mp_init_copy(&x, a)) != MP_OKAY)
-    goto X;
-
-  mp_set(&s, 1);
-
-  while(d != 0) {
-    if(d & 1) {
-      if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
-	 (res = mp_mod(&s, m, &s)) != MP_OKAY)
-	goto CLEANUP;
-    }
-
-    d /= 2;
-
-    if((res = s_mp_sqr(&x)) != MP_OKAY ||
-       (res = mp_mod(&x, m, &x)) != MP_OKAY)
-      goto CLEANUP;
+  if (d != NULL) {
+    mp_div_2d (&x, norm, &x, NULL);
+    mp_clamp (&x);
+    mp_exch (&x, d);
   }
 
-  s_mp_exch(&s, c);
-
-CLEANUP:
-  mp_clear(&x);
-X:
-  mp_clear(&s);
+  res = MP_OKAY;
 
+__Y:mp_clear (&y);
+__X:mp_clear (&x);
+__T2:mp_clear (&t2);
+__T1:mp_clear (&t1);
+__Q:mp_clear (&q);
   return res;
+}
 
-} /* end mp_exptmod_d() */
-
-/* }}} */
-#endif /* if MP_MODARITH */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ Comparison functions */
-
-/* {{{ mp_cmp_z(a) */
-
-/*
-  mp_cmp_z(a)
-
-  Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
- */
-
-int    mp_cmp_z(mp_int *a)
-{
-  if(SIGN(a) == MP_NEG)
-    return MP_LT;
-  else if(USED(a) == 1 && DIGIT(a, 0) == 0)
-    return MP_EQ;
-  else
-    return MP_GT;
-
-} /* end mp_cmp_z() */
-
-/* }}} */
-
-/* {{{ mp_cmp_d(a, d) */
-
-/*
-  mp_cmp_d(a, d)
-
-  Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
+/* End: bn_mp_div.c */
+
+/* Start: bn_mp_div_2.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
+#include <tommath.h>
 
-int    mp_cmp_d(mp_int *a, mp_digit d)
-{
-  ARGCHK(a != NULL, MP_EQ);
-
-  if(SIGN(a) == MP_NEG)
-    return MP_LT;
-
-  return s_mp_cmp_d(a, d);
-
-} /* end mp_cmp_d() */
-
-/* }}} */
-
-/* {{{ mp_cmp(a, b) */
-
-int    mp_cmp(mp_int *a, mp_int *b)
+/* b = a/2 */
+int
+mp_div_2 (mp_int * a, mp_int * b)
 {
-  ARGCHK(a != NULL && b != NULL, MP_EQ);
-
-  if(SIGN(a) == SIGN(b)) {
-    int  mag;
+  int     x, res, oldused;
 
-    if((mag = s_mp_cmp(a, b)) == MP_EQ)
-      return MP_EQ;
-
-    if(SIGN(a) == MP_ZPOS)
-      return mag;
-    else
-      return -mag;
-
-  } else if(SIGN(a) == MP_ZPOS) {
-    return MP_GT;
-  } else {
-    return MP_LT;
+  /* copy */
+  if (b->alloc < a->used) {
+    if ((res = mp_grow (b, a->used)) != MP_OKAY) {
+      return res;
+    }
   }
 
-} /* end mp_cmp() */
-
-/* }}} */
-
-/* {{{ mp_cmp_mag(a, b) */
-
-/*
-  mp_cmp_mag(a, b)
-
-  Compares |a| <=> |b|, and returns an appropriate comparison result
- */
-
-int    mp_cmp_mag(mp_int *a, mp_int *b)
-{
-  ARGCHK(a != NULL && b != NULL, MP_EQ);
-
-  return s_mp_cmp(a, b);
-
-} /* end mp_cmp_mag() */
-
-/* }}} */
-
-/* {{{ mp_cmp_int(a, z) */
-
-/*
-  This just converts z to an mp_int, and uses the existing comparison
-  routines.  This is sort of inefficient, but it's not clear to me how
-  frequently this wil get used anyway.  For small positive constants,
-  you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
- */
-int    mp_cmp_int(mp_int *a, long z)
-{
-  mp_int  tmp;
-  int     out;
-
-  ARGCHK(a != NULL, MP_EQ);
-  
-  mp_init(&tmp); mp_set_int(&tmp, z);
-  out = mp_cmp(a, &tmp);
-  mp_clear(&tmp);
-
-  return out;
-
-} /* end mp_cmp_int() */
-
-/* }}} */
-
-/* {{{ mp_isodd(a) */
+  oldused = b->used;
+  b->used = a->used;
+  {
+    register mp_digit r, rr, *tmpa, *tmpb;
+
+    tmpa = a->dp + b->used - 1;
+    tmpb = b->dp + b->used - 1;
+    r = 0;
+    for (x = b->used - 1; x >= 0; x--) {
+      rr = *tmpa & 1;
+      *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
+      r = rr;
+    }
 
-/*
-  mp_isodd(a)
+    tmpb = b->dp + b->used;
+    for (x = b->used; x < oldused; x++) {
+      *tmpb++ = 0;
+    }
+  }
+  mp_clamp (b);
+  return MP_OKAY;
+}
 
-  Returns a true (non-zero) value if a is odd, false (zero) otherwise.
+/* End: bn_mp_div_2.c */
+
+/* Start: bn_mp_div_2d.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
-int    mp_isodd(mp_int *a)
-{
-  ARGCHK(a != NULL, 0);
-
-  return (DIGIT(a, 0) & 1);
-
-} /* end mp_isodd() */
-
-/* }}} */
-
-/* {{{ mp_iseven(a) */
-
-int    mp_iseven(mp_int *a)
-{
-  return !mp_isodd(a);
-
-} /* end mp_iseven() */
-
-/* }}} */
+#include <tommath.h>
 
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ Number theoretic functions */
-
-#if MP_NUMTH
-/* {{{ mp_gcd(a, b, c) */
-
-/*
-  Like the old mp_gcd() function, except computes the GCD using the
-  binary algorithm due to Josef Stein in 1961 (via Knuth).
- */
-mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
+/* shift right by a certain bit count (store quotient in c, remainder in d) */
+int
+mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
 {
-  mp_err   res;
-  mp_int   u, v, t;
-  mp_size  k = 0;
+  mp_digit D, r, rr;
+  int     x, res;
+  mp_int  t;
 
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 
-  if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
-      return MP_RANGE;
-  if(mp_cmp_z(a) == MP_EQ) {
-    return mp_copy(b, c);
-  } else if(mp_cmp_z(b) == MP_EQ) {
-    return mp_copy(a, c);
+  /* if the shift count is <= 0 then we do no work */
+  if (b <= 0) {
+    res = mp_copy (a, c);
+    if (d != NULL) {
+      mp_zero (d);
+    }
+    return res;
   }
 
-  if((res = mp_init(&t)) != MP_OKAY)
+  if ((res = mp_init (&t)) != MP_OKAY) {
     return res;
-  if((res = mp_init_copy(&u, a)) != MP_OKAY)
-    goto U;
-  if((res = mp_init_copy(&v, b)) != MP_OKAY)
-    goto V;
-
-  SIGN(&u) = MP_ZPOS;
-  SIGN(&v) = MP_ZPOS;
-
-  /* Divide out common factors of 2 until at least 1 of a, b is even */
-  while(mp_iseven(&u) && mp_iseven(&v)) {
-    s_mp_div_2(&u);
-    s_mp_div_2(&v);
-    ++k;
   }
 
-  /* Initialize t */
-  if(mp_isodd(&u)) {
-    if((res = mp_copy(&v, &t)) != MP_OKAY)
-      goto CLEANUP;
-    
-    /* t = -v */
-    if(SIGN(&v) == MP_ZPOS)
-      SIGN(&t) = MP_NEG;
-    else
-      SIGN(&t) = MP_ZPOS;
-    
-  } else {
-    if((res = mp_copy(&u, &t)) != MP_OKAY)
-      goto CLEANUP;
+  /* get the remainder */
+  if (d != NULL) {
+    if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
+      mp_clear (&t);
+      return res;
+    }
+  }
 
+  /* copy */
+  if ((res = mp_copy (a, c)) != MP_OKAY) {
+    mp_clear (&t);
+    return res;
   }
 
-  for(;;) {
-    while(mp_iseven(&t)) {
-      s_mp_div_2(&t);
-    }
+  /* shift by as many digits in the bit count */
+  mp_rshd (c, b / DIGIT_BIT);
 
-    if(mp_cmp_z(&t) == MP_GT) {
-      if((res = mp_copy(&t, &u)) != MP_OKAY)
-	goto CLEANUP;
+  /* shift any bit count < DIGIT_BIT */
+  D = (mp_digit) (b % DIGIT_BIT);
+  if (D != 0) {
+    r = 0;
+    for (x = c->used - 1; x >= 0; x--) {
+      /* get the lower  bits of this word in a temp */
+      rr = c->dp[x] & ((mp_digit) ((1U << D) - 1U));
 
-    } else {
-      if((res = mp_copy(&t, &v)) != MP_OKAY)
-	goto CLEANUP;
+      /* shift the current word and mix in the carry bits from the previous word */
+      c->dp[x] = (c->dp[x] >> D) | (r << (DIGIT_BIT - D));
 
-      /* v = -t */
-      if(SIGN(&t) == MP_ZPOS)
-	SIGN(&v) = MP_NEG;
-      else
-	SIGN(&v) = MP_ZPOS;
+      /* set the carry to the carry bits of the current word found above */
+      r = rr;
     }
-
-    if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
-      goto CLEANUP;
-
-    if(s_mp_cmp_d(&t, 0) == MP_EQ)
-      break;
   }
+  mp_clamp (c);
+  res = MP_OKAY;
+  if (d != NULL) {
+    mp_exch (&t, d);
+  }
+  mp_clear (&t);
+  return MP_OKAY;
+}
 
-  s_mp_2expt(&v, (mp_digit)k);       /* v = 2^k   */
-  res = mp_mul(&u, &v, c); /* c = u * v */
-
- CLEANUP:
-  mp_clear(&v);
- V:
-  mp_clear(&u);
- U:
-  mp_clear(&t);
-
-  return res;
-
-} /* end mp_bgcd() */
-
-/* }}} */
-
-/* {{{ mp_lcm(a, b, c) */
-
-/* We compute the least common multiple using the rule:
-
-   ab = [a, b](a, b)
-
-   ... by computing the product, and dividing out the gcd.
+/* End: bn_mp_div_2d.c */
+
+/* Start: bn_mp_div_d.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
+#include <tommath.h>
 
-mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
+/* single digit division */
+int
+mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
 {
-  mp_int  gcd, prod;
-  mp_err  res;
+  mp_int  t, t2;
+  int     res;
 
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 
-  /* Set up temporaries */
-  if((res = mp_init(&gcd)) != MP_OKAY)
+  if ((res = mp_init (&t)) != MP_OKAY) {
     return res;
-  if((res = mp_init(&prod)) != MP_OKAY)
-    goto GCD;
+  }
 
-  if((res = mp_mul(a, b, &prod)) != MP_OKAY)
-    goto CLEANUP;
-  if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
-    goto CLEANUP;
+  if ((res = mp_init (&t2)) != MP_OKAY) {
+    mp_clear (&t);
+    return res;
+  }
 
-  res = mp_div(&prod, &gcd, c, NULL);
+  mp_set (&t, b);
+  res = mp_div (a, &t, c, &t2);
 
- CLEANUP:
-  mp_clear(&prod);
- GCD:
-  mp_clear(&gcd);
+  if (d != NULL) {
+    *d = t2.dp[0];
+  }
 
+  mp_clear (&t);
+  mp_clear (&t2);
   return res;
+}
 
-} /* end mp_lcm() */
-
-/* }}} */
+/* End: bn_mp_div_d.c */
+
+/* Start: bn_mp_exch.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* {{{ mp_xgcd(a, b, g, x, y) */
+void
+mp_exch (mp_int * a, mp_int * b)
+{
+  mp_int  t;
 
-/*
-  mp_xgcd(a, b, g, x, y)
+  t = *a;
+  *a = *b;
+  *b = t;
+}
 
-  Compute g = (a, b) and values x and y satisfying Bezout's identity
-  (that is, ax + by = g).  This uses the extended binary GCD algorithm
-  based on the Stein algorithm used for mp_gcd()
+/* End: bn_mp_exch.c */
+
+/* Start: bn_mp_exptmod.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
+#include <tommath.h>
 
-mp_err mp_xgcd(mp_int *a, mp_int *b, mp_int *g, mp_int *x, mp_int *y)
+int
+mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
 {
-  mp_int   gx, xc, yc, u, v, A, B, C, D;
-  mp_int  *clean[9];
-  mp_err   res;
-  int      last = -1;
-
-  if(mp_cmp_z(b) == 0)
-    return MP_RANGE;
-
-  /* Initialize all these variables we need */
-  if((res = mp_init(&u)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &u;
-  if((res = mp_init(&v)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &v;
-  if((res = mp_init(&gx)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &gx;
-  if((res = mp_init(&A)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &A;
-  if((res = mp_init(&B)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &B;
-  if((res = mp_init(&C)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &C;
-  if((res = mp_init(&D)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &D;
-  if((res = mp_init_copy(&xc, a)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &xc;
-  mp_abs(&xc, &xc);
-  if((res = mp_init_copy(&yc, b)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &yc;
-  mp_abs(&yc, &yc);
-
-  mp_set(&gx, 1);
-
-  /* Divide by two until at least one of them is even */
-  while(mp_iseven(&xc) && mp_iseven(&yc)) {
-    s_mp_div_2(&xc);
-    s_mp_div_2(&yc);
-    if((res = s_mp_mul_2(&gx)) != MP_OKAY)
-      goto CLEANUP;
-  }
+  mp_int  M[256], res, mu;
+  mp_digit buf;
+  int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
 
-  mp_copy(&xc, &u);
-  mp_copy(&yc, &v);
-  mp_set(&A, 1); mp_set(&D, 1);
 
-  /* Loop through binary GCD algorithm */
-  for(;;) {
-    while(mp_iseven(&u)) {
-      s_mp_div_2(&u);
-
-      if(mp_iseven(&A) && mp_iseven(&B)) {
-	s_mp_div_2(&A); s_mp_div_2(&B);
-      } else {
-	if((res = mp_add(&A, &yc, &A)) != MP_OKAY) goto CLEANUP;
-	s_mp_div_2(&A);
-	if((res = mp_sub(&B, &xc, &B)) != MP_OKAY) goto CLEANUP;
-	s_mp_div_2(&B);
-      }
-    }
+  /* if the modulus is odd use the fast method */
+  if (mp_isodd (P) == 1 && P->used > 4 && P->used < MONTGOMERY_EXPT_CUTOFF) {
+    err = mp_exptmod_fast (G, X, P, Y);
+    return err;
+  }
 
-    while(mp_iseven(&v)) {
-      s_mp_div_2(&v);
+  /* find window size */
+  x = mp_count_bits (X);
+  if (x <= 7) {
+    winsize = 2;
+  } else if (x <= 36) {
+    winsize = 3;
+  } else if (x <= 140) {
+    winsize = 4;
+  } else if (x <= 450) {
+    winsize = 5;
+  } else if (x <= 1303) {
+    winsize = 6;
+  } else if (x <= 3529) {
+    winsize = 7;
+  } else {
+    winsize = 8;
+  }
 
-      if(mp_iseven(&C) && mp_iseven(&D)) {
-	s_mp_div_2(&C); s_mp_div_2(&D);
-      } else {
-	if((res = mp_add(&C, &yc, &C)) != MP_OKAY) goto CLEANUP;
-	s_mp_div_2(&C);
-	if((res = mp_sub(&D, &xc, &D)) != MP_OKAY) goto CLEANUP;
-	s_mp_div_2(&D);
+  /* init G array */
+  for (x = 0; x < (1 << winsize); x++) {
+    if ((err = mp_init_size (&M[x], 1)) != MP_OKAY) {
+      for (y = 0; y < x; y++) {
+	mp_clear (&M[y]);
       }
+      return err;
     }
+  }
 
-    if(mp_cmp(&u, &v) >= 0) {
-      if((res = mp_sub(&u, &v, &u)) != MP_OKAY) goto CLEANUP;
-      if((res = mp_sub(&A, &C, &A)) != MP_OKAY) goto CLEANUP;
-      if((res = mp_sub(&B, &D, &B)) != MP_OKAY) goto CLEANUP;
-
-    } else {
-      if((res = mp_sub(&v, &u, &v)) != MP_OKAY) goto CLEANUP;
-      if((res = mp_sub(&C, &A, &C)) != MP_OKAY) goto CLEANUP;
-      if((res = mp_sub(&D, &B, &D)) != MP_OKAY) goto CLEANUP;
-
-    }
+  /* create mu, used for Barrett reduction */
+  if ((err = mp_init (&mu)) != MP_OKAY) {
+    goto __M;
+  }
+  if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
+    goto __MU;
+  }
 
-    /* If we're done, copy results to output */
-    if(mp_cmp_z(&u) == 0) {
-      if(x)
-	if((res = mp_copy(&C, x)) != MP_OKAY) goto CLEANUP;
+  /* create M table 
+   *
+   * The M table contains powers of the input base, e.g. M[x] = G^x mod P
+   *
+   * The first half of the table is not computed though accept for M[0] and M[1]
+   */
+  if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
+    goto __MU;
+  }
 
-      if(y)
-	if((res = mp_copy(&D, y)) != MP_OKAY) goto CLEANUP;
-      
-      if(g)
-	if((res = mp_mul(&gx, &v, g)) != MP_OKAY) goto CLEANUP;
+  /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
+  if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
+    goto __MU;
+  }
 
-      break;
+  for (x = 0; x < (winsize - 1); x++) {
+    if ((err =
+	 mp_sqr (&M[1 << (winsize - 1)],
+		 &M[1 << (winsize - 1)])) != MP_OKAY) {
+      goto __MU;
+    }
+    if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
+      goto __MU;
     }
   }
 
- CLEANUP:
-  while(last >= 0)
-    mp_clear(clean[last--]);
-
-  return res;
-
-} /* end mp_xgcd() */
-
-/* }}} */
-
-/* {{{ mp_invmod(a, m, c) */
-
-/*
-  mp_invmod(a, m, c)
-
-  Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
-  This is equivalent to the question of whether (a, m) = 1.  If not,
-  MP_UNDEF is returned, and there is no inverse.
- */
-
-mp_err mp_invmod(mp_int *a, mp_int *m, mp_int *c)
-{
-  mp_int  g, x;
-  mp_err  res;
-
-  ARGCHK(a && m && c, MP_BADARG);
-
-  if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
-    return MP_RANGE;
-
-  if((res = mp_init(&g)) != MP_OKAY)
-    return res;
-  if((res = mp_init(&x)) != MP_OKAY)
-    goto X;
-
-  if((res = mp_xgcd(a, m, &g, &x, NULL)) != MP_OKAY)
-    goto CLEANUP;
-
-  if(mp_cmp_d(&g, 1) != MP_EQ) {
-    res = MP_UNDEF;
-    goto CLEANUP;
+  /* create upper table */
+  for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
+    if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
+      goto __MU;
+    }
+    if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
+      goto __MU;
+    }
   }
 
-  res = mp_mod(&x, m, c);
-  SIGN(c) = SIGN(a);
-
-CLEANUP:
-  mp_clear(&x);
-X:
-  mp_clear(&g);
-
-  return res;
-
-} /* end mp_invmod() */
-
-/* }}} */
-#endif /* if MP_NUMTH */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ mp_print(mp, ofp) */
+  /* setup result */
+  if ((err = mp_init (&res)) != MP_OKAY) {
+    goto __MU;
+  }
+  mp_set (&res, 1);
+
+  /* set initial mode and bit cnt */
+  mode = 0;
+  bitcnt = 0;
+  buf = 0;
+  digidx = X->used - 1;
+  bitcpy = bitbuf = 0;
+
+  bitcnt = 1;
+  for (;;) {
+    /* grab next digit as required */
+    if (--bitcnt == 0) {
+      if (digidx == -1) {
+	break;
+      }
+      buf = X->dp[digidx--];
+      bitcnt = (int) DIGIT_BIT;
+    }
 
-#if MP_IOFUNC
-/*
-  mp_print(mp, ofp)
+    /* grab the next msb from the exponent */
+    y = (buf >> (DIGIT_BIT - 1)) & 1;
+    buf <<= 1;
 
-  Print a textual representation of the given mp_int on the output
-  stream 'ofp'.  Output is generated using the internal radix.
- */
+    /* if the bit is zero and mode == 0 then we ignore it 
+     * These represent the leading zero bits before the first 1 bit
+     * in the exponent.  Technically this opt is not required but it 
+     * does lower the # of trivial squaring/reductions used
+     */
+    if (mode == 0 && y == 0)
+      continue;
 
-void   mp_print(mp_int *mp, FILE *ofp)
-{
-  int   ix;
+    /* if the bit is zero and mode == 1 then we square */
+    if (mode == 1 && y == 0) {
+      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+	goto __RES;
+      }
+      if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
+	goto __RES;
+      }
+      continue;
+    }
 
-  if(mp == NULL || ofp == NULL)
-    return;
+    /* else we add it to the window */
+    bitbuf |= (y << (winsize - ++bitcpy));
+    mode = 2;
+
+    if (bitcpy == winsize) {
+      /* ok window is filled so square as required and multiply multiply */
+      /* square first */
+      for (x = 0; x < winsize; x++) {
+	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+	  goto __RES;
+	}
+	if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
+	  goto __RES;
+	}
+      }
 
-  fputc((SIGN(mp) == MP_NEG) ? '-' : '+', ofp);
+      /* then multiply */
+      if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
+	goto __MU;
+      }
+      if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
+	goto __MU;
+      }
 
-  for(ix = USED(mp) - 1; ix >= 0; ix--) {
-    fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
+      /* empty window and reset */
+      bitcpy = bitbuf = 0;
+      mode = 1;
+    }
   }
 
-} /* end mp_print() */
-
-#endif /* if MP_IOFUNC */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ More I/O Functions */
+  /* if bits remain then square/multiply */
+  if (mode == 2 && bitcpy > 0) {
+    /* square then multiply if the bit is set */
+    for (x = 0; x < bitcpy; x++) {
+      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+	goto __RES;
+      }
+      if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
+	goto __RES;
+      }
 
-/* {{{ mp_read_signed_bin(mp, str, len) */
+      bitbuf <<= 1;
+      if ((bitbuf & (1 << winsize)) != 0) {
+	/* then multiply */
+	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
+	  goto __RES;
+	}
+	if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
+	  goto __RES;
+	}
+      }
+    }
+  }
 
-/* 
-   mp_read_signed_bin(mp, str, len)
+  mp_exch (&res, Y);
+  err = MP_OKAY;
+__RES:mp_clear (&res);
+__MU:mp_clear (&mu);
+__M:
+  for (x = 0; x < (1 << winsize); x++) {
+    mp_clear (&M[x]);
+  }
+  return err;
+}
 
-   Read in a raw value (base 256) into the given mp_int
+/* End: bn_mp_exptmod.c */
+
+/* Start: bn_mp_exptmod_fast.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
-
-mp_err  mp_read_signed_bin(mp_int *mp, unsigned char *str, int len)
+#include <tommath.h>
+
+/* computes Y == G^X mod P, HAC pp.616, Algorithm 14.85
+ *
+ * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
+ * The value of k changes based on the size of the exponent.
+ *
+ * Uses Montgomery reduction 
+ */
+int
+mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
 {
-  mp_err         res;
+  mp_int  M[256], res;
+  mp_digit buf, mp;
+  int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
+
+  /* find window size */
+  x = mp_count_bits (X);
+  if (x <= 7) {
+    winsize = 2;
+  } else if (x <= 36) {
+    winsize = 3;
+  } else if (x <= 140) {
+    winsize = 4;
+  } else if (x <= 450) {
+    winsize = 5;
+  } else if (x <= 1303) {
+    winsize = 6;
+  } else if (x <= 3529) {
+    winsize = 7;
+  } else {
+    winsize = 8;
+  }
 
-  ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
+  /* init G array */
+  for (x = 0; x < (1 << winsize); x++) {
+    if ((err = mp_init (&M[x])) != MP_OKAY) {
+      for (y = 0; y < x; y++) {
+	mp_clear (&M[y]);
+      }
+      return err;
+    }
+  }
 
-  if((res = mp_read_unsigned_bin(mp, str + 1, len - 1)) == MP_OKAY) {
-    /* Get sign from first byte */
-    if(str[0])
-      SIGN(mp) = MP_NEG;
-    else
-      SIGN(mp) = MP_ZPOS;
+  /* now setup montgomery  */
+  if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
+    goto __M;
   }
 
-  return res;
+  /* setup result */
+  if ((err = mp_init (&res)) != MP_OKAY) {
+    goto __RES;
+  }
 
-} /* end mp_read_signed_bin() */
+  /* now we need R mod m */
+  if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
+    goto __RES;
+  }
 
-/* }}} */
+  /* create M table
+   *
+   * The M table contains powers of the input base, e.g. M[x] = G^x mod P
+   *
+   * The first half of the table is not computed though accept for M[0] and M[1]
+   */
+  if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
+    goto __RES;
+  }
 
-/* {{{ mp_signed_bin_size(mp) */
+  /* now set M[1] to G * R mod m */
+  if ((err = mp_mulmod (&M[1], &res, P, &M[1])) != MP_OKAY) {
+    goto __RES;
+  }
 
-int    mp_signed_bin_size(mp_int *mp)
-{
-  ARGCHK(mp != NULL, 0);
+  /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
+  if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
+    goto __RES;
+  }
 
-  return mp_unsigned_bin_size(mp) + 1;
+  for (x = 0; x < (winsize - 1); x++) {
+    if ((err =
+	 mp_sqr (&M[1 << (winsize - 1)],
+		 &M[1 << (winsize - 1)])) != MP_OKAY) {
+      goto __RES;
+    }
+    if ((err =
+	 mp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
+      goto __RES;
+    }
+  }
 
-} /* end mp_signed_bin_size() */
+  /* create upper table */
+  for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
+    if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
+      goto __RES;
+    }
+    if ((err = mp_montgomery_reduce (&M[x], P, mp)) != MP_OKAY) {
+      goto __RES;
+    }
+  }
 
-/* }}} */
+  /* set initial mode and bit cnt */
+  mode = 0;
+  bitcnt = 0;
+  buf = 0;
+  digidx = X->used - 1;
+  bitcpy = bitbuf = 0;
+
+  bitcnt = 1;
+  for (;;) {
+    /* grab next digit as required */
+    if (--bitcnt == 0) {
+      if (digidx == -1) {
+	break;
+      }
+      buf = X->dp[digidx--];
+      bitcnt = (int) DIGIT_BIT;
+    }
 
-/* {{{ mp_to_signed_bin(mp, str) */
+    /* grab the next msb from the exponent */
+    y = (buf >> (DIGIT_BIT - 1)) & 1;
+    buf <<= 1;
 
-mp_err mp_to_signed_bin(mp_int *mp, unsigned char *str)
-{
-  ARGCHK(mp != NULL && str != NULL, MP_BADARG);
+    /* if the bit is zero and mode == 0 then we ignore it
+     * These represent the leading zero bits before the first 1 bit
+     * in the exponent.  Technically this opt is not required but it
+     * does lower the # of trivial squaring/reductions used
+     */
+    if (mode == 0 && y == 0)
+      continue;
+
+    /* if the bit is zero and mode == 1 then we square */
+    if (mode == 1 && y == 0) {
+      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+	goto __RES;
+      }
+      if ((err = mp_montgomery_reduce (&res, P, mp)) != MP_OKAY) {
+	goto __RES;
+      }
+      continue;
+    }
+
+    /* else we add it to the window */
+    bitbuf |= (y << (winsize - ++bitcpy));
+    mode = 2;
+
+    if (bitcpy == winsize) {
+      /* ok window is filled so square as required and multiply multiply */
+      /* square first */
+      for (x = 0; x < winsize; x++) {
+	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+	  goto __RES;
+	}
+	if ((err = mp_montgomery_reduce (&res, P, mp)) != MP_OKAY) {
+	  goto __RES;
+	}
+      }
 
-  /* Caller responsible for allocating enough memory (use mp_raw_size(mp)) */
-  str[0] = (char)SIGN(mp);
+      /* then multiply */
+      if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
+	goto __RES;
+      }
+      if ((err = mp_montgomery_reduce (&res, P, mp)) != MP_OKAY) {
+	goto __RES;
+      }
 
-  return mp_to_unsigned_bin(mp, str + 1);
+      /* empty window and reset */
+      bitcpy = bitbuf = 0;
+      mode = 1;
+    }
+  }
 
-} /* end mp_to_signed_bin() */
+  /* if bits remain then square/multiply */
+  if (mode == 2 && bitcpy > 0) {
+    /* square then multiply if the bit is set */
+    for (x = 0; x < bitcpy; x++) {
+      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
+	goto __RES;
+      }
+      if ((err = mp_montgomery_reduce (&res, P, mp)) != MP_OKAY) {
+	goto __RES;
+      }
 
-/* }}} */
+      bitbuf <<= 1;
+      if ((bitbuf & (1 << winsize)) != 0) {
+	/* then multiply */
+	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
+	  goto __RES;
+	}
+	if ((err = mp_montgomery_reduce (&res, P, mp)) != MP_OKAY) {
+	  goto __RES;
+	}
+      }
+    }
+  }
 
-/* {{{ mp_read_unsigned_bin(mp, str, len) */
+  /* fixup result */
+  if ((err = mp_montgomery_reduce (&res, P, mp)) != MP_OKAY) {
+    goto __RES;
+  }
 
-/*
-  mp_read_unsigned_bin(mp, str, len)
+  mp_exch (&res, Y);
+  err = MP_OKAY;
+__RES:mp_clear (&res);
+__M:
+  for (x = 0; x < (1 << winsize); x++) {
+    mp_clear (&M[x]);
+  }
+  return err;
+}
 
-  Read in an unsigned value (base 256) into the given mp_int
+/* End: bn_mp_exptmod_fast.c */
+
+/* Start: bn_mp_expt_d.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
+#include <tommath.h>
 
-mp_err  mp_read_unsigned_bin(mp_int *mp, unsigned char *str, int len)
+int
+mp_expt_d (mp_int * a, mp_digit b, mp_int * c)
 {
-  int     ix;
-  mp_err  res;
+  int     res, x;
+  mp_int  g;
 
-  ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
 
-  mp_zero(mp);
+  if ((res = mp_init_copy (&g, a)) != MP_OKAY) {
+    return res;
+  }
 
-  for(ix = 0; ix < len; ix++) {
-    if((res = s_mp_mul_2d(mp, CHAR_BIT)) != MP_OKAY)
-      return res;
+  /* set initial result */
+  mp_set (c, 1);
 
-    if((res = mp_add_d(mp, str[ix], mp)) != MP_OKAY)
+  for (x = 0; x < (int) DIGIT_BIT; x++) {
+    if ((res = mp_sqr (c, c)) != MP_OKAY) {
+      mp_clear (&g);
       return res;
-  }
-  
-  return MP_OKAY;
-  
-} /* end mp_read_unsigned_bin() */
+    }
 
-/* }}} */
+    if ((b & (mp_digit) (1 << (DIGIT_BIT - 1))) != 0) {
+      if ((res = mp_mul (c, &g, c)) != MP_OKAY) {
+	mp_clear (&g);
+	return res;
+      }
+    }
 
-/* {{{ mp_unsigned_bin_size(mp) */
+    b <<= 1;
+  }
 
-int     mp_unsigned_bin_size(mp_int *mp) 
-{
-  mp_digit   topdig;
-  int        count;
+  mp_clear (&g);
+  return MP_OKAY;
+}
 
-  ARGCHK(mp != NULL, 0);
+/* End: bn_mp_expt_d.c */
+
+/* Start: bn_mp_gcd.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  /* Special case for the value zero */
-  if(USED(mp) == 1 && DIGIT(mp, 0) == 0)
-    return 1;
+/* Greatest Common Divisor using the binary method [Algorithm B, page 338, vol2 of TAOCP]
+ */
+int
+mp_gcd (mp_int * a, mp_int * b, mp_int * c)
+{
+  mp_int  u, v, t;
+  int     k, res, neg;
 
-  count = (USED(mp) - 1) * sizeof(mp_digit);
-  topdig = DIGIT(mp, USED(mp) - 1);
 
-  while(topdig != 0) {
-    ++count;
-    topdig >>= CHAR_BIT;
+  /* either zero than gcd is the largest */
+  if (mp_iszero (a) == 1 && mp_iszero (b) == 0) {
+    return mp_copy (b, c);
+  }
+  if (mp_iszero (a) == 0 && mp_iszero (b) == 1) {
+    return mp_copy (a, c);
+  }
+  if (mp_iszero (a) == 1 && mp_iszero (b) == 1) {
+    mp_set (c, 1);
+    return MP_OKAY;
   }
 
-  return count;
-
-} /* end mp_unsigned_bin_size() */
-
-/* }}} */
-
-/* {{{ mp_to_unsigned_bin(mp, str) */
+  /* if both are negative they share (-1) as a common divisor */
+  neg = (a->sign == b->sign) ? a->sign : MP_ZPOS;
 
-mp_err mp_to_unsigned_bin(mp_int *mp, unsigned char *str)
-{
-  mp_digit      *dp, *end, d;
-  unsigned char *spos;
+  if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
+    return res;
+  }
 
-  ARGCHK(mp != NULL && str != NULL, MP_BADARG);
+  if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
+    goto __U;
+  }
 
-  dp = DIGITS(mp);
-  end = dp + USED(mp) - 1;
-  spos = str;
+  /* must be positive for the remainder of the algorithm */
+  u.sign = v.sign = MP_ZPOS;
 
-  /* Special case for zero, quick test */
-  if(dp == end && *dp == 0) {
-    *str = '\0';
-    return MP_OKAY;
+  if ((res = mp_init (&t)) != MP_OKAY) {
+    goto __V;
   }
 
-  /* Generate digits in reverse order */
-  while(dp < end) {
-    int      ix;
-
-    d = *dp;
-    for(ix = 0; ix < (int)sizeof(mp_digit); ++ix) {
-      *spos = d & UCHAR_MAX;
-      d >>= CHAR_BIT;
-      ++spos;
+  /* B1.  Find power of two */
+  k = 0;
+  while ((u.dp[0] & 1) == 0 && (v.dp[0] & 1) == 0) {
+    ++k;
+    if ((res = mp_div_2d (&u, 1, &u, NULL)) != MP_OKAY) {
+      goto __T;
+    }
+    if ((res = mp_div_2d (&v, 1, &v, NULL)) != MP_OKAY) {
+      goto __T;
     }
-
-    ++dp;
   }
 
-  /* Now handle last digit specially, high order zeroes are not written */
-  d = *end;
-  while(d != 0) {
-    *spos = d & UCHAR_MAX;
-    d >>= CHAR_BIT;
-    ++spos;
+  /* B2.  Initialize */
+  if ((u.dp[0] & 1) == 1) {
+    if ((res = mp_copy (&v, &t)) != MP_OKAY) {
+      goto __T;
+    }
+    t.sign = MP_NEG;
+  } else {
+    if ((res = mp_copy (&u, &t)) != MP_OKAY) {
+      goto __T;
+    }
   }
 
-  /* Reverse everything to get digits in the correct order */
-  while(--spos > str) {
-    unsigned char t = *str;
-    *str = *spos;
-    *spos = t;
+  do {
+    /* B3 (and B4).  Halve t, if even */
+    while (t.used != 0 && (t.dp[0] & 1) == 0) {
+      if ((res = mp_div_2d (&t, 1, &t, NULL)) != MP_OKAY) {
+	goto __T;
+      }
+    }
 
-    ++str;
-  }
+    /* B5.  if t>0 then u=t otherwise v=-t */
+    if (t.used != 0 && t.sign != MP_NEG) {
+      if ((res = mp_copy (&t, &u)) != MP_OKAY) {
+	goto __T;
+      }
+    } else {
+      if ((res = mp_copy (&t, &v)) != MP_OKAY) {
+	goto __T;
+      }
+      v.sign = (v.sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
+    }
 
-  return MP_OKAY;
+    /* B6.  t = u - v, if t != 0 loop otherwise terminate */
+    if ((res = mp_sub (&u, &v, &t)) != MP_OKAY) {
+      goto __T;
+    }
+  }
+  while (t.used != 0);
 
-} /* end mp_to_unsigned_bin() */
+  if ((res = mp_mul_2d (&u, k, &u)) != MP_OKAY) {
+    goto __T;
+  }
 
-/* }}} */
+  mp_exch (&u, c);
+  c->sign = neg;
+  res = MP_OKAY;
+__T:mp_clear (&t);
+__V:mp_clear (&u);
+__U:mp_clear (&v);
+  return res;
+}
 
-/* {{{ mp_count_bits(mp) */
+/* End: bn_mp_gcd.c */
+
+/* Start: bn_mp_grow.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-int    mp_count_bits(mp_int *mp)
+/* grow as required */
+int
+mp_grow (mp_int * a, int size)
 {
-  int      len;
-  mp_digit d;
+  int     i, n;
 
-  ARGCHK(mp != NULL, MP_BADARG);
+  /* if the alloc size is smaller alloc more ram */
+  if (a->alloc < size) {
+    size += (MP_PREC * 2) - (size & (MP_PREC - 1));	/* ensure there are always at least MP_PREC digits extra on top */
 
-  len = DIGIT_BIT * (USED(mp) - 1);
-  d = DIGIT(mp, USED(mp) - 1);
+    a->dp = realloc (a->dp, sizeof (mp_digit) * size);
+    if (a->dp == NULL) {
+      return MP_MEM;
+    }
 
-  while(d != 0) {
-    ++len;
-    d >>= 1;
+    n = a->alloc;
+    a->alloc = size;
+    for (i = n; i < a->alloc; i++) {
+      a->dp[i] = 0;
+    }
   }
+  return MP_OKAY;
+}
 
-  return len;
-  
-} /* end mp_count_bits() */
+/* End: bn_mp_grow.c */
+
+/* Start: bn_mp_init.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with 
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
+
+/* init a new bigint */
+int
+mp_init (mp_int * a)
+{
 
-/* }}} */
+  /* allocate ram required and clear it */
+  a->dp = calloc (sizeof (mp_digit), MP_PREC);
+  if (a->dp == NULL) {
+    return MP_MEM;
+  }
 
-/* {{{ mp_read_radix(mp, str, radix) */
+  /* set the used to zero, allocated digit to the default precision
+   * and sign to positive */
+  a->used = 0;
+  a->alloc = MP_PREC;
+  a->sign = MP_ZPOS;
 
-/*
-  mp_read_radix(mp, str, radix)
+  return MP_OKAY;
+}
 
-  Read an integer from the given string, and set mp to the resulting
-  value.  The input is presumed to be in base 10.  Leading non-digit
-  characters are ignored, and the function reads until a non-digit
-  character or the end of the string.
+/* End: bn_mp_init.c */
+
+/* Start: bn_mp_init_copy.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
+#include <tommath.h>
 
-mp_err  mp_read_radix(mp_int *mp, unsigned char *str, int radix)
+/* creates "a" then copies b into it */
+int
+mp_init_copy (mp_int * a, mp_int * b)
 {
-  int     ix = 0, val = 0;
-  mp_err  res;
-  mp_sign sig = MP_ZPOS;
-
-  ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX, 
-	 MP_BADARG);
+  int     res;
 
-  mp_zero(mp);
-
-  /* Skip leading non-digit characters until a digit or '-' or '+' */
-  while(str[ix] && 
-	(s_mp_tovalue(str[ix], radix) < 0) && 
-	str[ix] != '-' &&
-	str[ix] != '+') {
-    ++ix;
+  if ((res = mp_init (a)) != MP_OKAY) {
+    return res;
   }
+  res = mp_copy (b, a);
+  return res;
+}
 
-  if(str[ix] == '-') {
-    sig = MP_NEG;
-    ++ix;
-  } else if(str[ix] == '+') {
-    sig = MP_ZPOS; /* this is the default anyway... */
-    ++ix;
-  }
+/* End: bn_mp_init_copy.c */
+
+/* Start: bn_mp_init_size.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
-    if((res = s_mp_mul_d(mp, (mp_digit)radix)) != MP_OKAY)
-      return res;
-    if((res = s_mp_add_d(mp, (mp_digit)val)) != MP_OKAY)
-      return res;
-    ++ix;
-  }
+/* init a mp_init and grow it to a given size */
+int
+mp_init_size (mp_int * a, int size)
+{
 
-  if(s_mp_cmp_d(mp, 0) == MP_EQ)
-    SIGN(mp) = MP_ZPOS;
-  else
-    SIGN(mp) = sig;
+  /* pad up so there are at least 16 zero digits */
+  size += (MP_PREC * 2) - (size & (MP_PREC - 1));	/* ensure there are always at least 16 digits extra on top */
+  a->dp = calloc (sizeof (mp_digit), size);
+  if (a->dp == NULL) {
+    return MP_MEM;
+  }
+  a->used = 0;
+  a->alloc = size;
+  a->sign = MP_ZPOS;
 
   return MP_OKAY;
+}
 
-} /* end mp_read_radix() */
-
-/* }}} */
-
-/* {{{ mp_radix_size(mp, radix) */
+/* End: bn_mp_init_size.c */
+
+/* Start: bn_mp_invmod.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-int    mp_radix_size(mp_int *mp, int radix)
+int
+mp_invmod (mp_int * a, mp_int * b, mp_int * c)
 {
-  int  len;
-  ARGCHK(mp != NULL, 0);
+  mp_int  x, y, u, v, A, B, C, D;
+  int     res;
 
-  len = s_mp_outlen(mp_count_bits(mp), radix) + 1; /* for NUL terminator */
-
-  if(mp_cmp_z(mp) < 0)
-    ++len; /* for sign */
+  /* b cannot be negative */
+  if (b->sign == MP_NEG) {
+    return MP_VAL;
+  }
 
-  return len;
+  /* if the modulus is odd we can use a faster routine instead */
+  if (mp_iseven (b) == 0) {
+    return fast_mp_invmod (a, b, c);
+  }
 
-} /* end mp_radix_size() */
+  if ((res = mp_init (&x)) != MP_OKAY) {
+    goto __ERR;
+  }
 
-/* }}} */
+  if ((res = mp_init (&y)) != MP_OKAY) {
+    goto __X;
+  }
 
-/* {{{ mp_value_radix_size(num, qty, radix) */
+  if ((res = mp_init (&u)) != MP_OKAY) {
+    goto __Y;
+  }
 
-/* num = number of digits
-   qty = number of bits per digit
-   radix = target base
-   
-   Return the number of digits in the specified radix that would be
-   needed to express 'num' digits of 'qty' bits each.
- */
-int    mp_value_radix_size(int num, int qty, int radix)
-{
-  ARGCHK(num >= 0 && qty > 0 && radix >= 2 && radix <= MAX_RADIX, 0);
+  if ((res = mp_init (&v)) != MP_OKAY) {
+    goto __U;
+  }
 
-  return s_mp_outlen(num * qty, radix);
+  if ((res = mp_init (&A)) != MP_OKAY) {
+    goto __V;
+  }
 
-} /* end mp_value_radix_size() */
+  if ((res = mp_init (&B)) != MP_OKAY) {
+    goto __A;
+  }
 
-/* }}} */
+  if ((res = mp_init (&C)) != MP_OKAY) {
+    goto __B;
+  }
 
-/* {{{ mp_toradix(mp, str, radix) */
+  if ((res = mp_init (&D)) != MP_OKAY) {
+    goto __C;
+  }
 
-mp_err mp_toradix(mp_int *mp, unsigned char *str, int radix)
-{
-  int  ix, pos = 0;
+  /* x = a, y = b */
+  if ((res = mp_copy (a, &x)) != MP_OKAY) {
+    goto __D;
+  }
+  if ((res = mp_copy (b, &y)) != MP_OKAY) {
+    goto __D;
+  }
 
-  ARGCHK(mp != NULL && str != NULL, MP_BADARG);
-  ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
+  if ((res = mp_abs (&x, &x)) != MP_OKAY) {
+    goto __D;
+  }
 
-  if(mp_cmp_z(mp) == MP_EQ) {
-    str[0] = '0';
-    str[1] = '\0';
-  } else {
-    mp_err   res;
-    mp_int   tmp;
-    mp_sign  sgn;
-    mp_digit rem, rdx = (mp_digit)radix;
-    char     ch;
+  /* 2. [modified] if x,y are both even then return an error! */
+  if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
+    res = MP_VAL;
+    goto __D;
+  }
 
-    if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
-      return res;
+  /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
+  if ((res = mp_copy (&x, &u)) != MP_OKAY) {
+    goto __D;
+  }
+  if ((res = mp_copy (&y, &v)) != MP_OKAY) {
+    goto __D;
+  }
+  mp_set (&A, 1);
+  mp_set (&D, 1);
 
-    /* Save sign for later, and take absolute value */
-    sgn = SIGN(&tmp); SIGN(&tmp) = MP_ZPOS;
 
-    /* Generate output digits in reverse order      */
-    while(mp_cmp_z(&tmp) != 0) {
-      if((res = s_mp_div_d(&tmp, rdx, &rem)) != MP_OKAY) {
-	mp_clear(&tmp);
-	return res;
+top:
+  /* 4.  while u is even do */
+  while (mp_iseven (&u) == 1) {
+    /* 4.1 u = u/2 */
+    if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
+      goto __D;
+    }
+    /* 4.2 if A or B is odd then */
+    if (mp_iseven (&A) == 0 || mp_iseven (&B) == 0) {
+      /* A = (A+y)/2, B = (B-x)/2 */
+      if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
+	goto __D;
+      }
+      if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
+	goto __D;
       }
-
-      /* Generate digits, use capital letters */
-      ch = s_mp_todigit(rem, radix, 0);
-
-      str[pos++] = ch;
     }
+    /* A = A/2, B = B/2 */
+    if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
+      goto __D;
+    }
+    if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
+      goto __D;
+    }
+  }
 
-    /* Add - sign if original value was negative */
-    if(sgn == MP_NEG)
-      str[pos++] = '-';
-
-    /* Add trailing NUL to end the string        */
-    str[pos--] = '\0';
-
-    /* Reverse the digits and sign indicator     */
-    ix = 0;
-    while(ix < pos) {
-      char tmp = str[ix];
 
-      str[ix] = str[pos];
-      str[pos] = tmp;
-      ++ix;
-      --pos;
+  /* 5.  while v is even do */
+  while (mp_iseven (&v) == 1) {
+    /* 5.1 v = v/2 */
+    if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
+      goto __D;
+    }
+    /* 5.2 if C,D are even then */
+    if (mp_iseven (&C) == 0 || mp_iseven (&D) == 0) {
+      /* C = (C+y)/2, D = (D-x)/2 */
+      if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
+	goto __D;
+      }
+      if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
+	goto __D;
+      }
+    }
+    /* C = C/2, D = D/2 */
+    if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
+      goto __D;
+    }
+    if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
+      goto __D;
     }
-    
-    mp_clear(&tmp);
   }
 
-  return MP_OKAY;
+  /* 6.  if u >= v then */
+  if (mp_cmp (&u, &v) != MP_LT) {
+    /* u = u - v, A = A - C, B = B - D */
+    if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
+      goto __D;
+    }
 
-} /* end mp_toradix() */
+    if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
+      goto __D;
+    }
 
-/* }}} */
+    if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
+      goto __D;
+    }
+  } else {
+    /* v - v - u, C = C - A, D = D - B */
+    if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
+      goto __D;
+    }
 
-/* {{{ mp_char2value(ch, r) */
+    if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
+      goto __D;
+    }
 
-int    mp_char2value(char ch, int r)
-{
-  return s_mp_tovalue(ch, r);
+    if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
+      goto __D;
+    }
+  }
 
-} /* end mp_tovalue() */
+  /* if not zero goto step 4 */
+  if (mp_iszero (&u) == 0)
+    goto top;
 
-/* }}} */
+  /* now a = C, b = D, gcd == g*v */
 
-/* }}} */
+  /* if v != 1 then there is no inverse */
+  if (mp_cmp_d (&v, 1) != MP_EQ) {
+    res = MP_VAL;
+    goto __D;
+  }
 
-/* {{{ mp_strerror(ec) */
+  /* a is now the inverse */
+  mp_exch (&C, c);
+  res = MP_OKAY;
+
+__D:mp_clear (&D);
+__C:mp_clear (&C);
+__B:mp_clear (&B);
+__A:mp_clear (&A);
+__V:mp_clear (&v);
+__U:mp_clear (&u);
+__Y:mp_clear (&y);
+__X:mp_clear (&x);
+__ERR:
+  return res;
+}
 
-/*
-  mp_strerror(ec)
+/* End: bn_mp_invmod.c */
+
+/* Start: bn_mp_jacobi.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  Return a string describing the meaning of error code 'ec'.  The
-  string returned is allocated in static memory, so the caller should
-  not attempt to modify or free the memory associated with this
-  string.
+/* computes the jacobi c = (a | n) (or Legendre if b is prime)
+ * HAC pp. 73 Algorithm 2.149
  */
-const char  *mp_strerror(mp_err ec)
+int
+mp_jacobi (mp_int * a, mp_int * n, int *c)
 {
-  int   aec = (ec < 0) ? -ec : ec;
+  mp_int  a1, n1, e;
+  int     s, r, res;
+  mp_digit residue;
 
-  /* Code values are negative, so the senses of these comparisons
-     are accurate */
-  if(ec < MP_LAST_CODE || ec > MP_OKAY) {
-    return mp_err_string[0];  /* unknown error code */
-  } else {
-    return mp_err_string[aec + 1];
+  /* step 1.  if a == 0, return 0 */
+  if (mp_iszero (a) == 1) {
+    *c = 0;
+    return MP_OKAY;
+  }
+
+  /* step 2.  if a == 1, return 1 */
+  if (mp_cmp_d (a, 1) == MP_EQ) {
+    *c = 1;
+    return MP_OKAY;
   }
 
-} /* end mp_strerror() */
+  /* default */
+  s = 0;
 
-/* }}} */
+  /* step 3.  write a = a1 * 2^e  */
+  if ((res = mp_init_copy (&a1, a)) != MP_OKAY) {
+    return res;
+  }
 
-/*========================================================================*/
-/*------------------------------------------------------------------------*/
-/* Static function definitions (internal use only)                        */
+  if ((res = mp_init (&n1)) != MP_OKAY) {
+    goto __A1;
+  }
 
-/* {{{ Memory management */
+  if ((res = mp_init (&e)) != MP_OKAY) {
+    goto __N1;
+  }
 
-/* {{{ s_mp_grow(mp, min) */
+  while (mp_iseven (&a1) == 1) {
+    if ((res = mp_add_d (&e, 1, &e)) != MP_OKAY) {
+      goto __E;
+    }
 
-/* Make sure there are at least 'min' digits allocated to mp              */
-static mp_err s_mp_grow(mp_int *mp, mp_size min)
-{
-  if(min > ALLOC(mp)) {
-    mp_digit   *tmp;
+    if ((res = mp_div_2 (&a1, &a1)) != MP_OKAY) {
+      goto __E;
+    }
+  }
 
-    /* Set min to next nearest default precision block size */
-    min = ((min + (s_mp_defprec - 1)) / s_mp_defprec) * s_mp_defprec;
+  /* step 4.  if e is even set s=1 */
+  if (mp_iseven (&e) == 1) {
+    s = 1;
+  } else {
+    /* else set s=1 if n = 1/7 (mod 8) or s=-1 if n = 3/5 (mod 8) */
+    if ((res = mp_mod_d (n, 8, &residue)) != MP_OKAY) {
+      goto __E;
+    }
 
-    if((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
-      return MP_MEM;
+    if (residue == 1 || residue == 7) {
+      s = 1;
+    } else if (residue == 3 || residue == 5) {
+      s = -1;
+    }
+  }
 
-    s_mp_copy(DIGITS(mp), tmp, USED(mp));
+  /* step 5.  if n == 3 (mod 4) *and* a1 == 3 (mod 4) then s = -s */
+  if ((res = mp_mod_d (n, 4, &residue)) != MP_OKAY) {
+    goto __E;
+  }
+  if (residue == 3) {
+    if ((res = mp_mod_d (&a1, 4, &residue)) != MP_OKAY) {
+      goto __E;
+    }
+    if (residue == 3) {
+      s = -s;
+    }
+  }
 
-#if MP_CRYPTO
-    s_mp_setz(DIGITS(mp), ALLOC(mp));
-#endif
-    s_mp_free(DIGITS(mp));
-    DIGITS(mp) = tmp;
-    ALLOC(mp) = min;
+  /* if a1 == 1 we're done */
+  if (mp_cmp_d (&a1, 1) == MP_EQ) {
+    *c = s;
+  } else {
+    /* n1 = n mod a1 */
+    if ((res = mp_mod (n, &a1, &n1)) != MP_OKAY) {
+      goto __E;
+    }
+    if ((res = mp_jacobi (&n1, &a1, &r)) != MP_OKAY) {
+      goto __E;
+    }
+    *c = s * r;
   }
 
-  return MP_OKAY;
+  /* done */
+  res = MP_OKAY;
+__E:mp_clear (&e);
+__N1:mp_clear (&n1);
+__A1:mp_clear (&a1);
+  return res;
+}
 
-} /* end s_mp_grow() */
+/* End: bn_mp_jacobi.c */
+
+/* Start: bn_mp_karatsuba_mul.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
+
+/* c = |a| * |b| using Karatsuba Multiplication using three half size multiplications
+ *
+ * Let B represent the radix [e.g. 2**DIGIT_BIT] and let n represent half of the number of digits in the min(a,b)
+ *
+ * a = a1 * B^n + a0
+ * b = b1 * B^n + b0
+ *
+ * Then, a * b => a1b1 * B^2n + ((a1 - b1)(a0 - b0) + a0b0 + a1b1) * B + a0b0
+ *
+ * Note that a1b1 and a0b0 are used twice and only need to be computed once.  So in total
+ * three half size (half # of digit) multiplications are performed, a0b0, a1b1 and (a1-b1)(a0-b0)
+ *
+ * Note that a multiplication of half the digits requires 1/4th the number of single precision 
+ * multiplications so in total after one call 25% of the single precision multiplications are saved.
+ * Note also that the call to mp_mul can end up back in this function if the a0, a1, b0, or b1 are above
+ * the threshold.  This is known as divide-and-conquer and leads to the famous O(N^lg(3)) or O(N^1.584) work which
+ * is asymptopically lower than the standard O(N^2) that the baseline/comba methods use.  Generally though the 
+ * overhead of this method doesn't pay off until a certain size (N ~ 80) is reached.
+ */
+int
+mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
+{
+  mp_int  x0, x1, y0, y1, t1, t2, x0y0, x1y1;
+  int     B, err, x;
+
+
+  err = MP_MEM;
+
+  /* min # of digits */
+  B = MIN (a->used, b->used);
+
+  /* now divide in two */
+  B = B / 2;
+
+  /* init copy all the temps */
+  if (mp_init_size (&x0, B) != MP_OKAY)
+    goto ERR;
+  if (mp_init_size (&x1, a->used - B) != MP_OKAY)
+    goto X0;
+  if (mp_init_size (&y0, B) != MP_OKAY)
+    goto X1;
+  if (mp_init_size (&y1, b->used - B) != MP_OKAY)
+    goto Y0;
+
+  /* init temps */
+  if (mp_init (&t1) != MP_OKAY)
+    goto Y1;
+  if (mp_init (&t2) != MP_OKAY)
+    goto T1;
+  if (mp_init (&x0y0) != MP_OKAY)
+    goto T2;
+  if (mp_init (&x1y1) != MP_OKAY)
+    goto X0Y0;
+
+  /* now shift the digits */
+  x0.sign = x1.sign = a->sign;
+  y0.sign = y1.sign = b->sign;
+
+  x0.used = y0.used = B;
+  x1.used = a->used - B;
+  y1.used = b->used - B;
+
+  /* we copy the digits directly instead of using higher level functions
+   * since we also need to shift the digits
+   */
+  for (x = 0; x < B; x++) {
+    x0.dp[x] = a->dp[x];
+    y0.dp[x] = b->dp[x];
+  }
+  for (x = B; x < a->used; x++) {
+    x1.dp[x - B] = a->dp[x];
+  }
+  for (x = B; x < b->used; x++) {
+    y1.dp[x - B] = b->dp[x];
+  }
 
-/* }}} */
+  /* only need to clamp the lower words since by definition the upper words x1/y1 must
+   * have a known number of digits
+   */
+  mp_clamp (&x0);
+  mp_clamp (&y0);
+
+  /* now calc the products x0y0 and x1y1 */
+  if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
+    goto X1Y1;			/* x0y0 = x0*y0 */
+  if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
+    goto X1Y1;			/* x1y1 = x1*y1 */
+
+  /* now calc x1-x0 and y1-y0 */
+  if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
+    goto X1Y1;			/* t1 = x1 - x0 */
+  if (mp_sub (&y1, &y0, &t2) != MP_OKAY)
+    goto X1Y1;			/* t2 = y1 - y0 */
+  if (mp_mul (&t1, &t2, &t1) != MP_OKAY)
+    goto X1Y1;			/* t1 = (x1 - x0) * (y1 - y0) */
+
+  /* add x0y0 */
+  if (mp_add (&x0y0, &x1y1, &t2) != MP_OKAY)
+    goto X1Y1;			/* t2 = x0y0 + x1y1 */
+  if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
+    goto X1Y1;			/* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
+
+  /* shift by B */
+  if (mp_lshd (&t1, B) != MP_OKAY)
+    goto X1Y1;			/* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
+  if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
+    goto X1Y1;			/* x1y1 = x1y1 << 2*B */
+
+  if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
+    goto X1Y1;			/* t1 = x0y0 + t1 */
+  if (mp_add (&t1, &x1y1, c) != MP_OKAY)
+    goto X1Y1;			/* t1 = x0y0 + t1 + x1y1 */
+
+  err = MP_OKAY;
+
+X1Y1:mp_clear (&x1y1);
+X0Y0:mp_clear (&x0y0);
+T2:mp_clear (&t2);
+T1:mp_clear (&t1);
+Y1:mp_clear (&y1);
+Y0:mp_clear (&y0);
+X1:mp_clear (&x1);
+X0:mp_clear (&x0);
+ERR:
+  return err;
+}
 
-/* {{{ s_mp_pad(mp, min) */
+/* End: bn_mp_karatsuba_mul.c */
+
+/* Start: bn_mp_karatsuba_sqr.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* Make sure the used size of mp is at least 'min', growing if needed     */
-static mp_err s_mp_pad(mp_int *mp, mp_size min)
+/* Karatsuba squaring, computes b = a*a using three half size squarings
+ *
+ * See comments of mp_karatsuba_mul for details.  It is essentially the same algorithm
+ * but merely tuned to perform recursive squarings.
+ */
+int
+mp_karatsuba_sqr (mp_int * a, mp_int * b)
 {
-  if(min > USED(mp)) {
-    mp_err  res;
-
-    /* Make sure there is room to increase precision  */
-    if(min > ALLOC(mp) && (res = s_mp_grow(mp, min)) != MP_OKAY)
-      return res;
+  mp_int  x0, x1, t1, t2, x0x0, x1x1;
+  int     B, err, x;
 
-    /* Increase precision; should already be 0-filled */
-    USED(mp) = min;
-  }
 
-  return MP_OKAY;
+  err = MP_MEM;
 
-} /* end s_mp_pad() */
+  /* min # of digits */
+  B = a->used;
 
-/* }}} */
+  /* now divide in two */
+  B = B / 2;
 
-/* {{{ s_mp_setz(dp, count) */
+  /* init copy all the temps */
+  if (mp_init_size (&x0, B) != MP_OKAY)
+    goto ERR;
+  if (mp_init_size (&x1, a->used - B) != MP_OKAY)
+    goto X0;
 
-#if MP_MACRO == 0
-/* Set 'count' digits pointed to by dp to be zeroes                       */
-static void s_mp_setz(mp_digit *dp, mp_size count)
-{
-#if MP_MEMSET == 0
-  int  ix;
+  /* init temps */
+  if (mp_init (&t1) != MP_OKAY)
+    goto X1;
+  if (mp_init (&t2) != MP_OKAY)
+    goto T1;
+  if (mp_init (&x0x0) != MP_OKAY)
+    goto T2;
+  if (mp_init (&x1x1) != MP_OKAY)
+    goto X0X0;
 
-  for(ix = 0; ix < count; ix++)
-    dp[ix] = 0;
-#else
-  memset(dp, 0, count * sizeof(mp_digit));
-#endif
+  /* now shift the digits */
+  for (x = 0; x < B; x++) {
+    x0.dp[x] = a->dp[x];
+  }
 
-} /* end s_mp_setz() */
-#endif
+  for (x = B; x < a->used; x++) {
+    x1.dp[x - B] = a->dp[x];
+  }
 
-/* }}} */
+  x0.used = B;
+  x1.used = a->used - B;
+
+  mp_clamp (&x0);
+
+  /* now calc the products x0*x0 and x1*x1 */
+  if (mp_sqr (&x0, &x0x0) != MP_OKAY)
+    goto X1X1;			/* x0x0 = x0*x0 */
+  if (mp_sqr (&x1, &x1x1) != MP_OKAY)
+    goto X1X1;			/* x1x1 = x1*x1 */
+
+  /* now calc x1-x0 and y1-y0 */
+  if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
+    goto X1X1;			/* t1 = x1 - x0 */
+  if (mp_sqr (&t1, &t1) != MP_OKAY)
+    goto X1X1;			/* t1 = (x1 - x0) * (y1 - y0) */
+
+  /* add x0y0 */
+  if (mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
+    goto X1X1;			/* t2 = x0y0 + x1y1 */
+  if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
+    goto X1X1;			/* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
+
+  /* shift by B */
+  if (mp_lshd (&t1, B) != MP_OKAY)
+    goto X1X1;			/* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
+  if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
+    goto X1X1;			/* x1y1 = x1y1 << 2*B */
+
+  if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
+    goto X1X1;			/* t1 = x0y0 + t1 */
+  if (mp_add (&t1, &x1x1, b) != MP_OKAY)
+    goto X1X1;			/* t1 = x0y0 + t1 + x1y1 */
+
+  err = MP_OKAY;
+
+X1X1:mp_clear (&x1x1);
+X0X0:mp_clear (&x0x0);
+T2:mp_clear (&t2);
+T1:mp_clear (&t1);
+X1:mp_clear (&x1);
+X0:mp_clear (&x0);
+ERR:
+  return err;
+}
 
-/* {{{ s_mp_copy(sp, dp, count) */
+/* End: bn_mp_karatsuba_sqr.c */
+
+/* Start: bn_mp_lcm.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-#if MP_MACRO == 0
-/* Copy 'count' digits from sp to dp                                      */
-static void s_mp_copy(mp_digit *sp, mp_digit *dp, mp_size count)
+/* computes least common multiple as a*b/(a, b) */
+int
+mp_lcm (mp_int * a, mp_int * b, mp_int * c)
 {
-#if MP_MEMCPY == 0
-  int  ix;
+  int     res;
+  mp_int  t;
+
 
-  for(ix = 0; ix < count; ix++)
-    dp[ix] = sp[ix];
-#else
-  memcpy(dp, sp, count * sizeof(mp_digit));
-#endif
+  if ((res = mp_init (&t)) != MP_OKAY) {
+    return res;
+  }
+
+  if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
+    mp_clear (&t);
+    return res;
+  }
 
-} /* end s_mp_copy() */
-#endif
+  if ((res = mp_gcd (a, b, c)) != MP_OKAY) {
+    mp_clear (&t);
+    return res;
+  }
 
-/* }}} */
+  res = mp_div (&t, c, c, NULL);
+  mp_clear (&t);
+  return res;
+}
 
-/* {{{ s_mp_alloc(nb, ni) */
+/* End: bn_mp_lcm.c */
+
+/* Start: bn_mp_lshd.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-#if MP_MACRO == 0
-/* Allocate ni records of nb bytes each, and return a pointer to that     */
-static void    *s_mp_alloc(size_t nb, size_t ni)
+/* shift left a certain amount of digits */
+int
+mp_lshd (mp_int * a, int b)
 {
-  return XCALLOC(nb, ni);
+  int     x, res;
 
-} /* end s_mp_alloc() */
-#endif
 
-/* }}} */
-
-/* {{{ s_mp_free(ptr) */
+  /* if its less than zero return */
+  if (b <= 0) {
+    return MP_OKAY;
+  }
 
-#if MP_MACRO == 0
-/* Free the memory pointed to by ptr                                      */
-static void s_mp_free(void *ptr)
-{
-  if(ptr)
-    XFREE(ptr);
+  /* grow to fit the new digits */
+  if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
+    return res;
+  }
 
-} /* end s_mp_free() */
-#endif
+  /* increment the used by the shift amount than copy upwards */
+  a->used += b;
+  for (x = a->used - 1; x >= b; x--) {
+    a->dp[x] = a->dp[x - b];
+  }
 
-/* }}} */
+  /* zero the lower digits */
+  for (x = 0; x < b; x++) {
+    a->dp[x] = 0;
+  }
+  mp_clamp (a);
+  return MP_OKAY;
+}
 
-/* {{{ s_mp_clamp(mp) */
+/* End: bn_mp_lshd.c */
+
+/* Start: bn_mp_mod.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* Remove leading zeroes from the given value                             */
-static void s_mp_clamp(mp_int *mp)
+/* c = a mod b, 0 <= c < b */
+int
+mp_mod (mp_int * a, mp_int * b, mp_int * c)
 {
-  mp_size   du = USED(mp);
-  mp_digit *zp = DIGITS(mp) + du - 1;
+  mp_int  t;
+  int     res;
 
-  while(du > 1 && !*zp--)
-    --du;
 
-  USED(mp) = du;
+  if ((res = mp_init (&t)) != MP_OKAY) {
+    return res;
+  }
 
-} /* end s_mp_clamp() */
+  if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
+    mp_clear (&t);
+    return res;
+  }
 
+  if (t.sign == MP_NEG) {
+    res = mp_add (b, &t, c);
+  } else {
+    res = MP_OKAY;
+    mp_exch (&t, c);
+  }
 
-/* }}} */
+  mp_clear (&t);
+  return res;
+}
 
-/* {{{ s_mp_exch(a, b) */
+/* End: bn_mp_mod.c */
+
+/* Start: bn_mp_mod_2d.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* Exchange the data for a and b; (b, a) = (a, b)                         */
-static void s_mp_exch(mp_int *a, mp_int *b)
+/* calc a value mod 2^b */
+int
+mp_mod_2d (mp_int * a, int b, mp_int * c)
 {
-  mp_int   tmp;
-
-  tmp = *a;
-  *a = *b;
-  *b = tmp;
+  int     x, res;
 
-} /* end s_mp_exch() */
 
-/* }}} */
+  /* if b is <= 0 then zero the int */
+  if (b <= 0) {
+    mp_zero (c);
+    return MP_OKAY;
+  }
 
-/* }}} */
+  /* if the modulus is larger than the value than return */
+  if (b > (int) (a->used * DIGIT_BIT)) {
+    res = mp_copy (a, c);
+    return res;
+  }
 
-/* {{{ Arithmetic helpers */
+  /* copy */
+  if ((res = mp_copy (a, c)) != MP_OKAY) {
+    return res;
+  }
 
-/* {{{ s_mp_lshd(mp, p) */
+  /* zero digits above the last digit of the modulus */
+  for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
+    c->dp[x] = 0;
+  }
+  /* clear the digit that is not completely outside/inside the modulus */
+  c->dp[b / DIGIT_BIT] &=
+    (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) -
+		((mp_digit) 1));
+  mp_clamp (c);
+  return MP_OKAY;
+}
 
-/* 
-   Shift mp leftward by p digits, growing if needed, and zero-filling
-   the in-shifted digits at the right end.  This is a convenient
-   alternative to multiplication by powers of the radix
- */   
+/* End: bn_mp_mod_2d.c */
+
+/* Start: bn_mp_mod_d.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-static mp_err s_mp_lshd(mp_int *mp, mp_size p)
+int
+mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
 {
-  mp_err   res;
-  mp_size  pos;
-  mp_digit *dp;
-  int     ix;
+  mp_int  t, t2;
+  int     res;
 
-  if(p == 0)
-    return MP_OKAY;
 
-  if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
+  if ((res = mp_init (&t)) != MP_OKAY) {
     return res;
+  }
 
-  pos = USED(mp) - 1;
-  dp = DIGITS(mp);
-
-  /* Shift all the significant figures over as needed */
-  for(ix = pos - p; ix >= 0; ix--) 
-    dp[ix + p] = dp[ix];
+  if ((res = mp_init (&t2)) != MP_OKAY) {
+    mp_clear (&t);
+    return res;
+  }
 
-  /* Fill the bottom digits with zeroes */
-  for(ix = 0; ix < (int)p; ix++)
-    dp[ix] = 0;
+  mp_set (&t, b);
+  mp_div (a, &t, NULL, &t2);
 
+  if (t2.sign == MP_NEG) {
+    if ((res = mp_add_d (&t2, b, &t2)) != MP_OKAY) {
+      mp_clear (&t);
+      mp_clear (&t2);
+      return res;
+    }
+  }
+  *c = t2.dp[0];
+  mp_clear (&t);
+  mp_clear (&t2);
   return MP_OKAY;
+}
 
-} /* end s_mp_lshd() */
-
-/* }}} */
-
-/* {{{ s_mp_rshd(mp, p) */
-
-/* 
-   Shift mp rightward by p digits.  Maintains the invariant that
-   digits above the precision are all zero.  Digits shifted off the
-   end are lost.  Cannot fail.
+/* End: bn_mp_mod_d.c */
+
+/* Start: bn_mp_montgomery_calc_normalization.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
-
-static void s_mp_rshd(mp_int *mp, mp_size p)
+#include <tommath.h>
+
+/* calculates a = B^n mod b for Montgomery reduction
+ * Where B is the base [e.g. 2^DIGIT_BIT].  
+ * B^n mod b is computed by first computing
+ * A = B^(n-1) which doesn't require a reduction but a simple OR.
+ * then C = A * B = B^n is computed by performing upto DIGIT_BIT 
+ * shifts with subtractions when the result is greater than b.
+ *
+ * The method is slightly modified to shift B unconditionally upto just under
+ * the leading bit of b.  This saves alot of multiple precision shifting.
+ */
+int
+mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
 {
-  mp_size  ix;
-  mp_digit *dp;
+  int     x, bits, res;
 
-  if(p == 0)
-    return;
+  /* how many bits of last digit does b use */
+  bits = mp_count_bits (b) % DIGIT_BIT;
 
-  /* Shortcut when all digits are to be shifted off */
-  if(p >= USED(mp)) {
-    s_mp_setz(DIGITS(mp), ALLOC(mp));
-    USED(mp) = 1;
-    SIGN(mp) = MP_ZPOS;
-    return;
+  /* compute A = B^(n-1) * 2^(bits-1) */
+  if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
+    return res;
   }
 
-  /* Shift all the significant figures over as needed */
-  dp = DIGITS(mp);
-  for(ix = p; ix < USED(mp); ix++)
-    dp[ix - p] = dp[ix];
-
+  /* now compute C = A * B mod b */
+  for (x = bits - 1; x < DIGIT_BIT; x++) {
+    if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
+      return res;
+    }
+    if (mp_cmp_mag (a, b) != MP_LT) {
+      if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
+	return res;
+      }
+    }
+  }
 
-  /* Fill the top digits with zeroes */
-  
-  ix -= p;
-  while(ix < USED(mp))
-    dp[ix++] = 0;
+  return MP_OKAY;
+}
 
-  /* Strip off any leading zeroes    */
-  s_mp_clamp(mp);
+/* End: bn_mp_montgomery_calc_normalization.c */
+
+/* Start: bn_mp_montgomery_reduce.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-} /* end s_mp_rshd() */
+/* computes xR^-1 == x (mod N) via Montgomery Reduction */
+int
+mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
+{
+  int     ix, res, digs;
+  mp_digit ui;
 
-/* }}} */
+  digs = m->used * 2 + 1;
+  if ((digs < 512)
+      && digs < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+    return fast_mp_montgomery_reduce (a, m, mp);
+  }
 
-/* {{{ s_mp_div_2(mp) */
+  if (a->alloc < m->used * 2 + 1) {
+    if ((res = mp_grow (a, m->used * 2 + 1)) != MP_OKAY) {
+      return res;
+    }
+  }
+  a->used = m->used * 2 + 1;
+
+  for (ix = 0; ix < m->used; ix++) {
+    /* ui = ai * m' mod b */
+    ui = (a->dp[ix] * mp) & MP_MASK;
+
+    /* a = a + ui * m * b^i */
+    {
+      register int iy;
+      register mp_digit *tmpx, *tmpy, mu;
+      register mp_word r;
+
+      /* aliases */
+      tmpx = m->dp;
+      tmpy = a->dp + ix;
+
+      mu = 0;
+      for (iy = 0; iy < m->used; iy++) {
+	r =
+	  ((mp_word) ui) * ((mp_word) * tmpx++) + ((mp_word) mu) +
+	  ((mp_word) * tmpy);
+	mu = (r >> ((mp_word) DIGIT_BIT));
+	*tmpy++ = (r & ((mp_word) MP_MASK));
+      }
+      /* propagate carries */
+      while (mu) {
+	*tmpy += mu;
+	mu = (*tmpy >> DIGIT_BIT) & 1;
+	*tmpy++ &= MP_MASK;
+      }
+    }
+  }
 
-/* Divide by two -- take advantage of radix properties to do it fast      */
-static void s_mp_div_2(mp_int *mp)
-{
-  s_mp_div_2d(mp, 1);
+  /* A = A/b^n */
+  mp_rshd (a, m->used);
 
-} /* end s_mp_div_2() */
+  /* if A >= m then A = A - m */
+  if (mp_cmp_mag (a, m) != MP_LT) {
+    return s_mp_sub (a, m, a);
+  }
 
-/* }}} */
+  return MP_OKAY;
+}
 
-/* {{{ s_mp_mul_2(mp) */
+/* End: bn_mp_montgomery_reduce.c */
+
+/* Start: bn_mp_montgomery_setup.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-static mp_err s_mp_mul_2(mp_int *mp)
+/* setups the montgomery reduction stuff */
+int
+mp_montgomery_setup (mp_int * a, mp_digit * mp)
 {
-  int      ix;
-  mp_digit kin = 0, kout, *dp = DIGITS(mp);
-  mp_err   res;
-
-  /* Shift digits leftward by 1 bit */
-  for(ix = 0; ix < (int)USED(mp); ix++) {
-    kout = (dp[ix] >> (DIGIT_BIT - 1)) & 1;
-    dp[ix] = (dp[ix] << 1) | kin;
+  mp_int  t, tt;
+  int     res;
 
-    kin = kout;
+  if ((res = mp_init (&t)) != MP_OKAY) {
+    return res;
   }
 
-  /* Deal with rollover from last digit */
-  if(kin) {
-    if(ix >= (int)ALLOC(mp)) {
-      if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
-	return res;
-      dp = DIGITS(mp);
-    }
-
-    dp[ix] = kin;
-    USED(mp) += 1;
+  if ((res = mp_init (&tt)) != MP_OKAY) {
+    goto __T;
   }
 
-  return MP_OKAY;
+  /* tt = b */
+  tt.dp[0] = 0;
+  tt.dp[1] = 1;
+  tt.used = 2;
+
+  /* t = m mod b */
+  t.dp[0] = a->dp[0];
+  t.used = 1;
 
-} /* end s_mp_mul_2() */
+  /* t = 1/m mod b */
+  if ((res = mp_invmod (&t, &tt, &t)) != MP_OKAY) {
+    goto __TT;
+  }
 
-/* }}} */
+  /* t = -1/m mod b */
+  *mp = ((mp_digit) 1 << ((mp_digit) DIGIT_BIT)) - t.dp[0];
 
-/* {{{ s_mp_mod_2d(mp, d) */
+  res = MP_OKAY;
+__TT:mp_clear (&tt);
+__T:mp_clear (&t);
+  return res;
+}
 
-/*
-  Remainder the integer by 2^d, where d is a number of bits.  This
-  amounts to a bitwise AND of the value, and does not require the full
-  division code
+/* End: bn_mp_montgomery_setup.c */
+
+/* Start: bn_mp_mul.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
-static void s_mp_mod_2d(mp_int *mp, mp_digit d)
-{
-  unsigned int  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
-  unsigned int  ix;
-  mp_digit      dmask, *dp = DIGITS(mp);
+#include <tommath.h>
 
-  if(ndig >= USED(mp))
-    return;
+/* high level multiplication (handles sign) */
+int
+mp_mul (mp_int * a, mp_int * b, mp_int * c)
+{
+  int     res, neg;
+  neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
+  if (MIN (a->used, b->used) > KARATSUBA_MUL_CUTOFF) {
+    res = mp_karatsuba_mul (a, b, c);
+  } else {
+    res = s_mp_mul (a, b, c);
+  }
+  c->sign = neg;
+  return res;
+}
 
-  /* Flush all the bits above 2^d in its digit */
-  dmask = (1 << nbit) - 1;
-  dp[ndig] &= dmask;
+/* End: bn_mp_mul.c */
+
+/* Start: bn_mp_mulmod.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  /* Flush all digits above the one with 2^d in it */
-  for(ix = ndig + 1; ix < USED(mp); ix++)
-    dp[ix] = 0;
+/* d = a * b (mod c) */
+int
+mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
+{
+  int     res;
+  mp_int  t;
 
-  s_mp_clamp(mp);
-} /* end s_mp_mod_2d() */
 
-/* }}} */
+  if ((res = mp_init (&t)) != MP_OKAY) {
+    return res;
+  }
 
-/* {{{ s_mp_mul_2d(mp, d) */
+  if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
+    mp_clear (&t);
+    return res;
+  }
+  res = mp_mod (&t, c, d);
+  mp_clear (&t);
+  return res;
+}
 
-/*
-  Multiply by the integer 2^d, where d is a number of bits.  This
-  amounts to a bitwise shift of the value, and does not require the
-  full multiplication code.
+/* End: bn_mp_mulmod.c */
+
+/* Start: bn_mp_mul_2.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
-static mp_err s_mp_mul_2d(mp_int *mp, mp_digit d)
-{
-  mp_err   res;
-  mp_digit save, next, mask, *dp;
-  mp_size  used;
-  int      ix;
-
-  if((res = s_mp_lshd(mp, d / DIGIT_BIT)) != MP_OKAY)
-    return res;
+#include <tommath.h>
 
-  dp = DIGITS(mp); used = USED(mp);
-  d %= DIGIT_BIT;
+/* b = a*2 */
+int
+mp_mul_2 (mp_int * a, mp_int * b)
+{
+  int     x, res, oldused;
 
-  mask = (1 << d) - 1;
+  /* Optimization: should copy and shift at the same time */
 
-  /* If the shift requires another digit, make sure we've got one to
-     work with */
-  if((dp[used - 1] >> (DIGIT_BIT - d)) & mask) {
-    if((res = s_mp_grow(mp, used + 1)) != MP_OKAY)
+  if (b->alloc < a->used) {
+    if ((res = mp_grow (b, a->used)) != MP_OKAY) {
       return res;
-    dp = DIGITS(mp);
-  }
-
-  /* Do the shifting... */
-  save = 0;
-  for(ix = 0; ix < (int)used; ix++) {
-    next = (dp[ix] >> (DIGIT_BIT - d)) & mask;
-    dp[ix] = (dp[ix] << d) | save;
-    save = next;
+    }
   }
 
-  /* If, at this point, we have a nonzero carryout into the next
-     digit, we'll increase the size by one digit, and store it...
-   */
-  if(save) {
-    dp[used] = save;
-    USED(mp) += 1;
-  }
+  oldused = b->used;
+  b->used = a->used;
 
-  s_mp_clamp(mp);
-  return MP_OKAY;
+  /* shift any bit count < DIGIT_BIT */
+  {
+    register mp_digit r, rr, *tmpa, *tmpb;
 
-} /* end s_mp_mul_2d() */
+    r = 0;
+    tmpa = a->dp;
+    tmpb = b->dp;
+    for (x = 0; x < b->used; x++) {
+      rr = *tmpa >> (DIGIT_BIT - 1);
+      *tmpb++ = ((*tmpa++ << 1) | r) & MP_MASK;
+      r = rr;
+    }
 
-/* }}} */
+    /* new leading digit? */
+    if (r != 0) {
+      if (b->alloc == b->used) {
+	if ((res = mp_grow (b, b->used + 1)) != MP_OKAY) {
+	  return res;
+	}
+      }
+      /* add a MSB of 1 */
+      *tmpb = 1;
+      ++b->used;
+    }
 
-/* {{{ s_mp_div_2d(mp, d) */
+    tmpb = b->dp + b->used;
+    for (x = b->used; x < oldused; x++) {
+      *tmpb++ = 0;
+    }
+  }
+  return MP_OKAY;
+}
 
-/*
-  Divide the integer by 2^d, where d is a number of bits.  This
-  amounts to a bitwise shift of the value, and does not require the
-  full division code (used in Barrett reduction, see below)
+/* End: bn_mp_mul_2.c */
+
+/* Start: bn_mp_mul_2d.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
-static void s_mp_div_2d(mp_int *mp, mp_digit d)
-{
-  int       ix;
-  mp_digit  save, next, mask, *dp = DIGITS(mp);
+#include <tommath.h>
 
-  s_mp_rshd(mp, d / DIGIT_BIT);
-  d %= DIGIT_BIT;
+/* shift left by a certain bit count */
+int
+mp_mul_2d (mp_int * a, int b, mp_int * c)
+{
+  mp_digit d, r, rr;
+  int     x, res;
 
-  mask = (1 << d) - 1;
 
-  save = 0;
-  for(ix = USED(mp) - 1; ix >= 0; ix--) {
-    next = dp[ix] & mask;
-    dp[ix] = (dp[ix] >> d) | (save << (DIGIT_BIT - d));
-    save = next;
+  /* copy */
+  if ((res = mp_copy (a, c)) != MP_OKAY) {
+    return res;
   }
 
-  s_mp_clamp(mp);
-
-} /* end s_mp_div_2d() */
+  if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
+    return res;
+  }
 
-/* }}} */
+  /* shift by as many digits in the bit count */
+  if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
+    return res;
+  }
+  c->used = c->alloc;
 
-/* {{{ s_mp_norm(a, b) */
+  /* shift any bit count < DIGIT_BIT */
+  d = (mp_digit) (b % DIGIT_BIT);
+  if (d != 0) {
+    r = 0;
+    for (x = 0; x < c->used; x++) {
+      /* get the higher bits of the current word */
+      rr = (c->dp[x] >> (DIGIT_BIT - d)) & ((mp_digit) ((1U << d) - 1U));
 
-/*
-  s_mp_norm(a, b)
+      /* shift the current word and OR in the carry */
+      c->dp[x] = ((c->dp[x] << d) | r) & MP_MASK;
 
-  Normalize a and b for division, where b is the divisor.  In order
-  that we might make good guesses for quotient digits, we want the
-  leading digit of b to be at least half the radix, which we
-  accomplish by multiplying a and b by a constant.  This constant is
-  returned (so that it can be divided back out of the remainder at the
-  end of the division process).
+      /* set the carry to the carry bits of the current word */
+      r = rr;
+    }
+  }
+  mp_clamp (c);
+  return MP_OKAY;
+}
 
-  We multiply by the smallest power of 2 that gives us a leading digit
-  at least half the radix.  By choosing a power of 2, we simplify the 
-  multiplication and division steps to simple shifts.
+/* End: bn_mp_mul_2d.c */
+
+/* Start: bn_mp_mul_d.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
-mp_digit s_mp_norm(mp_int *a, mp_int *b)
+#include <tommath.h>
+
+/* multiply by a digit */
+int
+mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
 {
-  mp_digit  t, d = 0;
+  int     res, pa, olduse;
 
-  t = DIGIT(b, USED(b) - 1);
-  while(t < (RADIX / 2)) {
-    t <<= 1;
-    ++d;
-  }
-    
-  if(d != 0) {
-    s_mp_mul_2d(a, d);
-    s_mp_mul_2d(b, d);
+  pa = a->used;
+  if (c->alloc < pa + 1) {
+    if ((res = mp_grow (c, pa + 1)) != MP_OKAY) {
+      return res;
+    }
   }
 
-  return d;
+  olduse = c->used;
+  c->used = pa + 1;
+
+  {
+    register mp_digit u, *tmpa, *tmpc;
+    register mp_word r;
+    register int ix;
 
-} /* end s_mp_norm() */
+    tmpc = c->dp + c->used;
+    for (ix = c->used; ix < olduse; ix++) {
+      *tmpc++ = 0;
+    }
 
-/* }}} */
+    tmpa = a->dp;
+    tmpc = c->dp;
 
-/* }}} */
+    u = 0;
+    for (ix = 0; ix < pa; ix++) {
+      r = ((mp_word) u) + ((mp_word) * tmpa++) * ((mp_word) b);
+      *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
+      u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
+    }
+    *tmpc = u;
+  }
 
-/* {{{ Primitive digit arithmetic */
+  mp_clamp (c);
+  return MP_OKAY;
+}
 
-/* {{{ s_mp_add_d(mp, d) */
+/* End: bn_mp_mul_d.c */
+
+/* Start: bn_mp_neg.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* Add d to |mp| in place                                                 */
-static mp_err s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
+/* b = -a */
+int
+mp_neg (mp_int * a, mp_int * b)
 {
-  mp_word   w, k = 0;
-  mp_size   ix = 1, used = USED(mp);
-  mp_digit *dp = DIGITS(mp);
-
-  w = dp[0] + d;
-  dp[0] = ACCUM(w);
-  k = CARRYOUT(w);
-
-  while(ix < used && k) {
-    w = dp[ix] + k;
-    dp[ix] = ACCUM(w);
-    k = CARRYOUT(w);
-    ++ix;
+  int     res;
+  if ((res = mp_copy (a, b)) != MP_OKAY) {
+    return res;
   }
+  b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
+  return MP_OKAY;
+}
 
-  if(k != 0) {
-    mp_err  res;
+/* End: bn_mp_neg.c */
+
+/* Start: bn_mp_n_root.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
+
+/* find the n'th root of an integer 
+ *
+ * Result found such that (c)^b <= a and (c+1)^b > a 
+ *
+ * This algorithm uses Newton's approximation x[i+1] = x[i] - f(x[i])/f'(x[i]) 
+ * which will find the root in log(N) time where each step involves a fair bit.  This
+ * is not meant to find huge roots [square and cube at most].
+ */
+int
+mp_n_root (mp_int * a, mp_digit b, mp_int * c)
+{
+  mp_int  t1, t2, t3;
+  int     res, neg;
 
-    if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
-      return res;
+  /* input must be positive if b is even */
+  if ((b & 1) == 0 && a->sign == MP_NEG) {
+    return MP_VAL;
+  }
 
-    DIGIT(mp, ix) = k;
+  if ((res = mp_init (&t1)) != MP_OKAY) {
+    return res;
   }
 
-  return MP_OKAY;
+  if ((res = mp_init (&t2)) != MP_OKAY) {
+    goto __T1;
+  }
 
-} /* end s_mp_add_d() */
+  if ((res = mp_init (&t3)) != MP_OKAY) {
+    goto __T2;
+  }
 
-/* }}} */
+  /* if a is negative fudge the sign but keep track */
+  neg = a->sign;
+  a->sign = MP_ZPOS;
 
-/* {{{ s_mp_sub_d(mp, d) */
+  /* t2 = 2 */
+  mp_set (&t2, 2);
 
-/* Subtract d from |mp| in place, assumes |mp| > d                        */
-static mp_err s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
-{
-  mp_word   w, b = 0;
-  mp_size   ix = 1, used = USED(mp);
-  mp_digit *dp = DIGITS(mp);
-
-  /* Compute initial subtraction    */
-  w = (RADIX + dp[0]) - d;
-  b = CARRYOUT(w) ? 0 : 1;
-  dp[0] = ACCUM(w);
-
-  /* Propagate borrows leftward     */
-  while(b && ix < used) {
-    w = (RADIX + dp[ix]) - b;
-    b = CARRYOUT(w) ? 0 : 1;
-    dp[ix] = ACCUM(w);
-    ++ix;
-  }
+  do {
+    /* t1 = t2 */
+    if ((res = mp_copy (&t2, &t1)) != MP_OKAY) {
+      goto __T3;
+    }
 
-  /* Remove leading zeroes          */
-  s_mp_clamp(mp);
+    /* t2 = t1 - ((t1^b - a) / (b * t1^(b-1))) */
+    if ((res = mp_expt_d (&t1, b - 1, &t3)) != MP_OKAY) {	/* t3 = t1^(b-1) */
+      goto __T3;
+    }
 
-  /* If we have a borrow out, it's a violation of the input invariant */
-  if(b)
-    return MP_RANGE;
-  else
-    return MP_OKAY;
+    /* numerator */
+    if ((res = mp_mul (&t3, &t1, &t2)) != MP_OKAY) {	/* t2 = t1^b */
+      goto __T3;
+    }
 
-} /* end s_mp_sub_d() */
+    if ((res = mp_sub (&t2, a, &t2)) != MP_OKAY) {	/* t2 = t1^b - a */
+      goto __T3;
+    }
 
-/* }}} */
+    if ((res = mp_mul_d (&t3, b, &t3)) != MP_OKAY) {	/* t3 = t1^(b-1) * b  */
+      goto __T3;
+    }
 
-/* {{{ s_mp_mul_d(a, d) */
+    if ((res = mp_div (&t2, &t3, &t3, NULL)) != MP_OKAY) {	/* t3 = (t1^b - a)/(b * t1^(b-1)) */
+      goto __T3;
+    }
 
-/* Compute a = a * d, single digit multiplication                         */
-static mp_err s_mp_mul_d(mp_int *a, mp_digit d)
-{
-  mp_word w, k = 0;
-  mp_size ix, max;
-  mp_err  res;
-  mp_digit *dp = DIGITS(a);
-
-  /*
-    Single-digit multiplication will increase the precision of the
-    output by at most one digit.  However, we can detect when this
-    will happen -- if the high-order digit of a, times d, gives a
-    two-digit result, then the precision of the result will increase;
-    otherwise it won't.  We use this fact to avoid calling s_mp_pad()
-    unless absolutely necessary.
-   */
-  max = USED(a);
-  w = dp[max - 1] * d;
-  if(CARRYOUT(w) != 0) {
-    if((res = s_mp_pad(a, max + 1)) != MP_OKAY)
-      return res;
-    dp = DIGITS(a);
+    if ((res = mp_sub (&t1, &t3, &t2)) != MP_OKAY) {
+      goto __T3;
+    }
   }
+  while (mp_cmp (&t1, &t2) != MP_EQ);
 
-  for(ix = 0; ix < max; ix++) {
-    w = (dp[ix] * d) + k;
-    dp[ix] = ACCUM(w);
-    k = CARRYOUT(w);
-  }
+  /* result can be off by a few so check */
+  for (;;) {
+    if ((res = mp_expt_d (&t1, b, &t2)) != MP_OKAY) {
+      goto __T3;
+    }
 
-  /* If there is a precision increase, take care of it here; the above
-     test guarantees we have enough storage to do this safely.
-   */
-  if(k) {
-    dp[max] = k; 
-    USED(a) = max + 1;
+    if (mp_cmp (&t2, a) == MP_GT) {
+      if ((res = mp_sub_d (&t1, 1, &t1)) != MP_OKAY) {
+	goto __T3;
+      }
+    } else {
+      break;
+    }
   }
 
-  s_mp_clamp(a);
+  /* reset the sign of a first */
+  a->sign = neg;
 
-  return MP_OKAY;
-  
-} /* end s_mp_mul_d() */
+  /* set the result */
+  mp_exch (&t1, c);
 
-/* }}} */
+  /* set the sign of the result */
+  c->sign = neg;
 
-/* {{{ s_mp_div_d(mp, d, r) */
+  res = MP_OKAY;
 
-/*
-  s_mp_div_d(mp, d, r)
+__T3:mp_clear (&t3);
+__T2:mp_clear (&t2);
+__T1:mp_clear (&t1);
+  return res;
+}
 
-  Compute the quotient mp = mp / d and remainder r = mp mod d, for a
-  single digit d.  If r is null, the remainder will be discarded.
+/* End: bn_mp_n_root.c */
+
+/* Start: bn_mp_or.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
+#include <tommath.h>
 
-static mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
+/* OR two ints together */
+int
+mp_or (mp_int * a, mp_int * b, mp_int * c)
 {
-  mp_word   w = 0, t;
-  mp_int    quot;
-  mp_err    res;
-  mp_digit *dp = DIGITS(mp), *qp;
-  int       ix;
-
-  if(d == 0)
-    return MP_RANGE;
-
-  /* Make room for the quotient */
-  if((res = mp_init_size(&quot, USED(mp))) != MP_OKAY)
-    return res;
+  int     res, ix, px;
+  mp_int  t, *x;
 
-  USED(&quot) = USED(mp); /* so clamping will work below */
-  qp = DIGITS(&quot);
-
-  /* Divide without subtraction */
-  for(ix = USED(mp) - 1; ix >= 0; ix--) {
-    w = (w << DIGIT_BIT) | dp[ix];
-
-    if(w >= d) {
-      t = w / d;
-      w = w % d;
-    } else {
-      t = 0;
+  if (a->used > b->used) {
+    if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+      return res;
     }
+    px = b->used;
+    x = b;
+  } else {
+    if ((res = mp_init_copy (&t, b)) != MP_OKAY) {
+      return res;
+    }
+    px = a->used;
+    x = a;
+  }
 
-    qp[ix] = t;
+  for (ix = 0; ix < px; ix++) {
+    t.dp[ix] |= x->dp[ix];
   }
+  mp_clamp (&t);
+  mp_exch (c, &t);
+  mp_clear (&t);
+  return MP_OKAY;
+}
 
-  /* Deliver the remainder, if desired */
-  if(r)
-    *r = w;
+/* End: bn_mp_or.c */
+
+/* Start: bn_mp_rand.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  s_mp_clamp(&quot);
-  mp_exch(&quot, mp);
-  mp_clear(&quot);
+/* makes a pseudo-random int of a given size */
+int
+mp_rand (mp_int * a, int digits)
+{
+  int     res;
+  mp_digit d;
 
-  return MP_OKAY;
+  mp_zero (a);
+  if (digits <= 0) {
+    return MP_OKAY;
+  }
 
-} /* end s_mp_div_d() */
+  /* first place a random non-zero digit */
+  do {
+    d = ((mp_digit) abs (rand ()));
+  } while (d == 0);
 
-/* }}} */
+  if ((res = mp_add_d (a, d, a)) != MP_OKAY) {
+    return res;
+  }
+
+  while (digits-- > 0) {
+    if ((res = mp_lshd (a, 1)) != MP_OKAY) {
+      return res;
+    }
 
-/* }}} */
+    if ((res = mp_add_d (a, ((mp_digit) abs (rand ())), a)) != MP_OKAY) {
+      return res;
+    }
+  }
 
-/* {{{ Primitive full arithmetic */
+  return MP_OKAY;
+}
 
-/* {{{ s_mp_add(a, b) */
+/* End: bn_mp_rand.c */
+
+/* Start: bn_mp_read_signed_bin.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* Compute a = |a| + |b|                                                  */
-static mp_err s_mp_add(mp_int *a, mp_int *b)        /* magnitude addition      */
+/* read signed bin, big endian, first byte is 0==positive or 1==negative */
+int
+mp_read_signed_bin (mp_int * a, unsigned char *b, int c)
 {
-  mp_word   w = 0;
-  mp_digit *pa, *pb;
-  mp_size   ix, used = USED(b);
-  mp_err    res;
+  int     res;
 
-  /* Make sure a has enough precision for the output value */
-  if((used > USED(a)) && (res = s_mp_pad(a, used)) != MP_OKAY)
+  if ((res = mp_read_unsigned_bin (a, b + 1, c - 1)) != MP_OKAY) {
     return res;
-
-  /*
-    Add up all digits up to the precision of b.  If b had initially
-    the same precision as a, or greater, we took care of it by the
-    padding step above, so there is no problem.  If b had initially
-    less precision, we'll have to make sure the carry out is duly
-    propagated upward among the higher-order digits of the sum.
-   */
-  pa = DIGITS(a);
-  pb = DIGITS(b);
-  for(ix = 0; ix < used; ++ix) {
-    w += *pa + *pb++;
-    *pa++ = ACCUM(w);
-    w = CARRYOUT(w);
   }
+  a->sign = ((b[0] == (unsigned char) 0) ? MP_ZPOS : MP_NEG);
+  return MP_OKAY;
+}
 
-  /* If we run out of 'b' digits before we're actually done, make
-     sure the carries get propagated upward...  
-   */
-  used = USED(a);
-  while(w && ix < used) {
-    w += *pa;
-    *pa++ = ACCUM(w);
-    w = CARRYOUT(w);
-    ++ix;
-  }
+/* End: bn_mp_read_signed_bin.c */
+
+/* Start: bn_mp_read_unsigned_bin.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  /* If there's an overall carry out, increase precision and include
-     it.  We could have done this initially, but why touch the memory
-     allocator unless we're sure we have to?
-   */
-  if(w) {
-    if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
+/* reads a unsigned char array, assumes the msb is stored first [big endian] */
+int
+mp_read_unsigned_bin (mp_int * a, unsigned char *b, int c)
+{
+  int     res;
+  mp_zero (a);
+  while (c-- > 0) {
+    if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
       return res;
+    }
 
-    DIGIT(a, ix) = w;  /* pa may not be valid after s_mp_pad() call */
+    if (DIGIT_BIT != 7) {
+      a->dp[0] |= *b++;
+      a->used += 1;
+    } else {
+      a->dp[0] = (*b & MP_MASK);
+      a->dp[1] |= ((*b++ >> 7U) & 1);
+      a->used += 2;
+    }
   }
-
+  mp_clamp (a);
   return MP_OKAY;
+}
+
+/* End: bn_mp_read_unsigned_bin.c */
+
+/* Start: bn_mp_reduce.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-} /* end s_mp_add() */
+/* pre-calculate the value required for Barrett reduction
+ * For a given modulus "b" it calulates the value required in "a"
+ */
+int
+mp_reduce_setup (mp_int * a, mp_int * b)
+{
+  int     res;
 
-/* }}} */
 
-/* {{{ s_mp_sub(a, b) */
+  if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
+    return res;
+  }
+  res = mp_div (a, b, a, NULL);
+  return res;
+}
 
-/* Compute a = |a| - |b|, assumes |a| >= |b|                              */
-static mp_err s_mp_sub(mp_int *a, mp_int *b)        /* magnitude subtract      */
+/* reduces x mod m, assumes 0 < x < m^2, mu is precomputed via mp_reduce_setup 
+ * From HAC pp.604 Algorithm 14.42 
+ */
+int
+mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
 {
-  mp_word   w = 0;
-  mp_digit *pa, *pb;
-  mp_size   ix, used = USED(b);
-
-  /*
-    Subtract and propagate borrow.  Up to the precision of b, this
-    accounts for the digits of b; after that, we just make sure the
-    carries get to the right place.  This saves having to pad b out to
-    the precision of a just to make the loops work right...
-   */
-  pa = DIGITS(a);
-  pb = DIGITS(b);
+  mp_int  q;
+  int     res, um = m->used;
 
-  for(ix = 0; ix < used; ++ix) {
-    w = (RADIX + *pa) - w - *pb++;
-    *pa++ = ACCUM(w);
-    w = CARRYOUT(w) ? 0 : 1;
-  }
 
-  used = USED(a);
-  while(ix < used) {
-    w = RADIX + *pa - w;
-    *pa++ = ACCUM(w);
-    w = CARRYOUT(w) ? 0 : 1;
-    ++ix;
+  if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
+    return res;
   }
 
-  /* Clobber any leading zeroes we created    */
-  s_mp_clamp(a);
+  mp_rshd (&q, um - 1);		/* q1 = x / b^(k-1)  */
 
-  /* 
-     If there was a borrow out, then |b| > |a| in violation
-     of our input invariant.  We've already done the work,
-     but we'll at least complain about it...
-   */
-  if(w)
-    return MP_RANGE;
-  else
-    return MP_OKAY;
+  /* according to HAC this is optimization is ok */
+  if (((unsigned long) m->used) > (1UL << (unsigned long) (DIGIT_BIT - 1UL))) {
+    if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
+      goto CLEANUP;
+    }
+  } else {
+    if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
+      goto CLEANUP;
+    }
+  }
 
-} /* end s_mp_sub() */
+  mp_rshd (&q, um + 1);		/* q3 = q2 / b^(k+1) */
 
-/* }}} */
+  /* x = x mod b^(k+1), quick (no division) */
+  if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
+    goto CLEANUP;
+  }
 
-/* {{{ s_mp_mul(a, b) */
+  /* q = q * m mod b^(k+1), quick (no division) */
+  if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
+    goto CLEANUP;
+  }
 
-/* Compute a = |a| * |b|                                                  */
-static mp_err s_mp_mul(mp_int *a, mp_int *b)
-{
-  mp_word   w, k = 0;
-  mp_int    tmp;
-  mp_err    res;
-  mp_size   ix, jx, ua = USED(a), ub = USED(b);
-  mp_digit *pa, *pb, *pt, *pbt;
+  /* x = x - q */
+  if ((res = mp_sub (x, &q, x)) != MP_OKAY)
+    goto CLEANUP;
 
-  if((res = mp_init_size(&tmp, ua + ub)) != MP_OKAY)
-    return res;
+  /* If x < 0, add b^(k+1) to it */
+  if (mp_cmp_d (x, 0) == MP_LT) {
+    mp_set (&q, 1);
+    if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
+      goto CLEANUP;
+    if ((res = mp_add (x, &q, x)) != MP_OKAY)
+      goto CLEANUP;
+  }
+
+  /* Back off if it's too big */
+  while (mp_cmp (x, m) != MP_LT) {
+    if ((res = s_mp_sub (x, m, x)) != MP_OKAY)
+      break;
+  }
 
-  /* This has the effect of left-padding with zeroes... */
-  USED(&tmp) = ua + ub;
+CLEANUP:
+  mp_clear (&q);
 
-  /* We're going to need the base value each iteration */
-  pbt = DIGITS(&tmp);
+  return res;
+}
 
-  /* Outer loop:  Digits of b */
+/* End: bn_mp_reduce.c */
+
+/* Start: bn_mp_rshd.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  pb = DIGITS(b);
-  for(ix = 0; ix < ub; ++ix, ++pb) {
-    if(*pb == 0) 
-      continue;
+/* shift right a certain amount of digits */
+void
+mp_rshd (mp_int * a, int b)
+{
+  int     x;
 
-    /* Inner product:  Digits of a */
-    pa = DIGITS(a);
-    for(jx = 0; jx < ua; ++jx, ++pa) {
-      pt = pbt + ix + jx;
-      w = *pb * *pa + k + *pt;
-      *pt = ACCUM(w);
-      k = CARRYOUT(w);
-    }
 
-    pbt[ix + jx] = k;
-    k = 0;
+  /* if b <= 0 then ignore it */
+  if (b <= 0) {
+    return;
   }
 
-  s_mp_clamp(&tmp);
-  s_mp_exch(&tmp, a);
+  /* if b > used then simply zero it and return */
+  if (a->used < b) {
+    mp_zero (a);
+    return;
+  }
 
-  mp_clear(&tmp);
+  /* shift the digits down */
+  for (x = 0; x < (a->used - b); x++) {
+    a->dp[x] = a->dp[x + b];
+  }
 
-  return MP_OKAY;
+  /* zero the top digits */
+  for (; x < a->used; x++) {
+    a->dp[x] = 0;
+  }
+  mp_clamp (a);
+}
 
-} /* end s_mp_mul() */
+/* End: bn_mp_rshd.c */
+
+/* Start: bn_mp_set.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* Compute a = |a| * |b| max of digs digits */
-static mp_err s_mp_mul_dig(mp_int *a, mp_int *b, int digs)
+/* set to a digit */
+void
+mp_set (mp_int * a, mp_digit b)
 {
-  mp_word   w, k = 0;
-  mp_int    tmp;
-  mp_err    res;
-  mp_size   ix, jx, ua = USED(a), ub = USED(b);
-  mp_digit *pa, *pb, *pt, *pbt;
-
-  if((res = mp_init_size(&tmp, digs+1)) != MP_OKAY)
-    return res;
-
-  /* This has the effect of left-padding with zeroes... */
-  USED(&tmp) = digs+1;
+  mp_zero (a);
+  a->dp[0] = b & MP_MASK;
+  a->used = (a->dp[0] != 0) ? 1 : 0;
+}
 
-  /* We're going to need the base value each iteration */
-  pbt = DIGITS(&tmp);
+/* End: bn_mp_set.c */
+
+/* Start: bn_mp_set_int.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  /* Outer loop:  Digits of b */
+/* set a 32-bit const */
+int
+mp_set_int (mp_int * a, unsigned long b)
+{
+  int     x, res;
 
-  ub = MIN(digs, (int)ub);
-  ua = MIN(digs, (int)ua);
+  mp_zero (a);
 
-  pb = DIGITS(b);
-  for(ix = 0; ix < ub; ++ix, ++pb) {
-    if(*pb == 0) 
-      continue;
+  /* set four bits at a time, simplest solution to the what if DIGIT_BIT==7 case */
+  for (x = 0; x < 8; x++) {
 
-    /* Inner product:  Digits of a */
-    pa = DIGITS(a);
-    for(jx = 0; jx < ua; ++jx, ++pa) {
-      if ((int)(ix+jx) > digs) { break; }
-      pt = pbt + ix + jx;
-      w = *pb * *pa + k + *pt;
-      *pt = ACCUM(w);
-      k = CARRYOUT(w);
-    }
-    if ((int)(ix + jx) < digs) {
-       pbt[ix + jx] = k;
+    /* shift the number up four bits */
+    if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
+      return res;
     }
-    k = 0;
-  }
 
-  USED(&tmp) = digs;
-  s_mp_clamp(&tmp);
-  s_mp_exch(&tmp, a);
+    /* OR in the top four bits of the source */
+    a->dp[0] |= (b >> 28) & 15;
 
-  mp_clear(&tmp);
-
-  return MP_OKAY;
+    /* shift the source up to the next four bits */
+    b <<= 4;
 
-} /* end s_mp_mul() */
+    /* ensure that digits are not clamped off */
+    a->used += 32 / DIGIT_BIT + 1;
+  }
 
-/* }}} */
+  mp_clamp (a);
+  return MP_OKAY;
+}
 
-/* {{{ s_mp_kmul(a, b, out, len) */
+/* End: bn_mp_set_int.c */
+
+/* Start: bn_mp_shrink.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-#if 0
-static void s_mp_kmul(mp_digit *a, mp_digit *b, mp_digit *out, mp_size len)
+/* shrink a bignum */
+int
+mp_shrink (mp_int * a)
 {
-  mp_word   w, k = 0;
-  mp_size   ix, jx;
-  mp_digit *pa, *pt;
-
-  for(ix = 0; ix < len; ++ix, ++b) {
-    if(*b == 0)
-      continue;
-    
-    pa = a;
-    for(jx = 0; jx < len; ++jx, ++pa) {
-      pt = out + ix + jx;
-      w = *b * *pa + k + *pt;
-      *pt = ACCUM(w);
-      k = CARRYOUT(w);
+  if (a->alloc != a->used) {
+    if ((a->dp = realloc (a->dp, sizeof (mp_digit) * a->used)) == NULL) {
+      return MP_MEM;
     }
-
-    out[ix + jx] = k;
-    k = 0;
+    a->alloc = a->used;
   }
+  return MP_OKAY;
+}
 
-} /* end s_mp_kmul() */
-#endif
-
-/* }}} */
-
-/* {{{ s_mp_sqr(a) */
-
-/*
-  Computes the square of a, in place.  This can be done more
-  efficiently than a general multiplication, because many of the
-  computation steps are redundant when squaring.  The inner product
-  step is a bit more complicated, but we save a fair number of
-  iterations of the multiplication loop.
+/* End: bn_mp_shrink.c */
+
+/* Start: bn_mp_signed_bin_size.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
-#if MP_SQUARE
-static mp_err s_mp_sqr(mp_int *a)
-{
-  mp_word  w, k = 0;
-  mp_int   tmp;
-  mp_err   res;
-  mp_size  ix, jx, kx, used = USED(a);
-  mp_digit *pa1, *pa2, *pt, *pbt;
+#include <tommath.h>
 
-  if((res = mp_init_size(&tmp, 2 * used)) != MP_OKAY)
-    return res;
+/* get the size for an signed equivalent */
+int
+mp_signed_bin_size (mp_int * a)
+{
+  return 1 + mp_unsigned_bin_size (a);
+}
 
-  /* Left-pad with zeroes */
-  USED(&tmp) = 2 * used;
+/* End: bn_mp_signed_bin_size.c */
+
+/* Start: bn_mp_sqr.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  /* We need the base value each time through the loop */
-  pbt = DIGITS(&tmp);
+/* computes b = a*a */
+int
+mp_sqr (mp_int * a, mp_int * b)
+{
+  int     res;
+  if (a->used > KARATSUBA_SQR_CUTOFF) {
+    res = mp_karatsuba_sqr (a, b);
+  } else {
+    res = s_mp_sqr (a, b);
+  }
+  b->sign = MP_ZPOS;
+  return res;
+}
 
-  pa1 = DIGITS(a);
-  for(ix = 0; ix < used; ++ix, ++pa1) {
-    if(*pa1 == 0)
-      continue;
+/* End: bn_mp_sqr.c */
+
+/* Start: bn_mp_sqrmod.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-    w = DIGIT(&tmp, ix + ix) + (*pa1 * *pa1);
+/* c = a * a (mod b) */
+int
+mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
+{
+  int     res;
+  mp_int  t;
 
-    pbt[ix + ix] = ACCUM(w);
-    k = CARRYOUT(w);
 
-    /*
-      The inner product is computed as:
+  if ((res = mp_init (&t)) != MP_OKAY) {
+    return res;
+  }
 
-         (C, S) = t[i,j] + 2 a[i] a[j] + C
+  if ((res = mp_sqr (a, &t)) != MP_OKAY) {
+    mp_clear (&t);
+    return res;
+  }
+  res = mp_mod (&t, b, c);
+  mp_clear (&t);
+  return res;
+}
 
-      This can overflow what can be represented in an mp_word, and
-      since C arithmetic does not provide any way to check for
-      overflow, we have to check explicitly for overflow conditions
-      before they happen.
-     */
-    for(jx = ix + 1, pa2 = DIGITS(a) + jx; jx < used; ++jx, ++pa2) {
-      mp_word  u = 0, v;
-      
-      /* Store this in a temporary to avoid indirections later */
-      pt = pbt + ix + jx;
-
-      /* Compute the multiplicative step */
-      w = *pa1 * *pa2;
-
-      /* If w is more than half MP_WORD_MAX, the doubling will
-	 overflow, and we need to record a carry out into the next
-	 word */
-      u = (w >> (MP_WORD_BIT - 1)) & 1;
-
-      /* Double what we've got, overflow will be ignored as defined
-	 for C arithmetic (we've already noted if it is to occur)
-       */
-      w *= 2;
+/* End: bn_mp_sqrmod.c */
+
+/* Start: bn_mp_sub.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-      /* Compute the additive step */
-      v = *pt + k;
+/* high level subtraction (handles signs) */
+int
+mp_sub (mp_int * a, mp_int * b, mp_int * c)
+{
+  int     sa, sb, res;
 
-      /* If we do not already have an overflow carry, check to see
-	 if the addition will cause one, and set the carry out if so 
-       */
-      u |= ((MP_WORD_MAX - v) < w);
 
-      /* Add in the rest, again ignoring overflow */
-      w += v;
+  sa = a->sign;
+  sb = b->sign;
 
-      /* Set the i,j digit of the output */
-      *pt = ACCUM(w);
+  /* handle four cases */
+  if (sa == MP_ZPOS && sb == MP_ZPOS) {
+    /* both positive, a - b, but if b>a then we do -(b - a) */
+    if (mp_cmp_mag (a, b) == MP_LT) {
+      /* b>a */
+      res = s_mp_sub (b, a, c);
+      c->sign = MP_NEG;
+    } else {
+      res = s_mp_sub (a, b, c);
+      c->sign = MP_ZPOS;
+    }
+  } else if (sa == MP_ZPOS && sb == MP_NEG) {
+    /* a - -b == a + b  */
+    res = s_mp_add (a, b, c);
+    c->sign = MP_ZPOS;
+  } else if (sa == MP_NEG && sb == MP_ZPOS) {
+    /* -a - b == -(a + b) */
+    res = s_mp_add (a, b, c);
+    c->sign = MP_NEG;
+  } else {
+    /* -a - -b == b - a, but if a>b == -(a - b) */
+    if (mp_cmp_mag (a, b) == MP_GT) {
+      res = s_mp_sub (a, b, c);
+      c->sign = MP_NEG;
+    } else {
+      res = s_mp_sub (b, a, c);
+      c->sign = MP_ZPOS;
+    }
+  }
 
-      /* Save carry information for the next iteration of the loop.
-	 This is why k must be an mp_word, instead of an mp_digit */
-      k = CARRYOUT(w) | (u << DIGIT_BIT);
+  return res;
+}
 
-    } /* for(jx ...) */
+/* End: bn_mp_sub.c */
+
+/* Start: bn_mp_submod.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-    /* Set the last digit in the cycle and reset the carry */
-    k = DIGIT(&tmp, ix + jx) + k;
-    pbt[ix + jx] = ACCUM(k);
-    k = CARRYOUT(k);
+/* d = a - b (mod c) */
+int
+mp_submod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
+{
+  int     res;
+  mp_int  t;
 
-    /* If we are carrying out, propagate the carry to the next digit
-       in the output.  This may cascade, so we have to be somewhat
-       circumspect -- but we will have enough precision in the output
-       that we won't overflow 
-     */
-    kx = 1;
-    while(k) {
-      k = pbt[ix + jx + kx] + 1;
-      pbt[ix + jx + kx] = ACCUM(k);
-      k = CARRYOUT(k);
-      ++kx;
-    }
-  } /* for(ix ...) */
 
-  s_mp_clamp(&tmp);
-  s_mp_exch(&tmp, a);
+  if ((res = mp_init (&t)) != MP_OKAY) {
+    return res;
+  }
 
-  mp_clear(&tmp);
+  if ((res = mp_sub (a, b, &t)) != MP_OKAY) {
+    mp_clear (&t);
+    return res;
+  }
+  res = mp_mod (&t, c, d);
+  mp_clear (&t);
+  return res;
+}
 
-  return MP_OKAY;
+/* End: bn_mp_submod.c */
+
+/* Start: bn_mp_sub_d.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-} /* end s_mp_sqr() */
-#endif
+/* single digit subtraction */
+int
+mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
+{
+  mp_int  t;
+  int     res;
 
-/* }}} */
 
-/* {{{ s_mp_div(a, b) */
+  if ((res = mp_init (&t)) != MP_OKAY) {
+    return res;
+  }
+  mp_set (&t, b);
+  res = mp_sub (a, &t, c);
 
-/*
-  s_mp_div(a, b)
+  mp_clear (&t);
+  return res;
+}
 
-  Compute a = a / b and b = a mod b.  Assumes b > a.
+/* End: bn_mp_sub_d.c */
+
+/* Start: bn_mp_to_signed_bin.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
+#include <tommath.h>
 
-static mp_err s_mp_div(mp_int *a, mp_int *b)
+/* store in signed [big endian] format */
+int
+mp_to_signed_bin (mp_int * a, unsigned char *b)
 {
-  mp_int   quot, rem, t;
-  mp_word  q;
-  mp_err   res;
-  mp_digit d;
-  int      ix;
+  int     res;
 
-  if(mp_cmp_z(b) == 0)
-    return MP_RANGE;
+  if ((res = mp_to_unsigned_bin (a, b + 1)) != MP_OKAY) {
+    return res;
+  }
+  b[0] = (unsigned char) ((a->sign == MP_ZPOS) ? 0 : 1);
+  return MP_OKAY;
+}
 
-  /* Shortcut if b is power of two */
-  if((ix = s_mp_ispow2(b)) >= 0) {
-    mp_copy(a, b);  /* need this for remainder */
-    s_mp_div_2d(a, (mp_digit)ix);
-    s_mp_mod_2d(b, (mp_digit)ix);
+/* End: bn_mp_to_signed_bin.c */
+
+/* Start: bn_mp_to_unsigned_bin.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-    return MP_OKAY;
-  }
+/* store in unsigned [big endian] format */
+int
+mp_to_unsigned_bin (mp_int * a, unsigned char *b)
+{
+  int     x, res;
+  mp_int  t;
 
-  /* Allocate space to store the quotient */
-  if((res = mp_init_size(&quot, USED(a))) != MP_OKAY)
+  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
     return res;
+  }
 
-  /* A working temporary for division     */
-  if((res = mp_init_size(&t, USED(a))) != MP_OKAY)
-    goto T;
-
-  /* Allocate space for the remainder     */
-  if((res = mp_init_size(&rem, USED(a))) != MP_OKAY)
-    goto REM;
+  x = 0;
+  while (mp_iszero (&t) == 0) {
+    if (DIGIT_BIT != 7) {
+      b[x++] = (unsigned char) (t.dp[0] & 255);
+    } else {
+      b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
+    }
+    if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
+      mp_clear (&t);
+      return res;
+    }
+  }
+  bn_reverse (b, x);
+  mp_clear (&t);
+  return MP_OKAY;
+}
 
-  /* Normalize to optimize guessing       */
-  d = s_mp_norm(a, b);
+/* End: bn_mp_to_unsigned_bin.c */
+
+/* Start: bn_mp_unsigned_bin_size.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  /* Perform the division itself...woo!   */
-  ix = USED(a) - 1;
+/* get the size for an unsigned equivalent */
+int
+mp_unsigned_bin_size (mp_int * a)
+{
+  int     size = mp_count_bits (a);
+  return (size / 8 + ((size & 7) != 0 ? 1 : 0));
+}
 
-  while(ix >= 0) {
-    /* Find a partial substring of a which is at least b */
-    while(s_mp_cmp(&rem, b) < 0 && ix >= 0) {
-      if((res = s_mp_lshd(&rem, 1)) != MP_OKAY) 
-	goto CLEANUP;
+/* End: bn_mp_unsigned_bin_size.c */
+
+/* Start: bn_mp_xor.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-      if((res = s_mp_lshd(&quot, 1)) != MP_OKAY)
-	goto CLEANUP;
+/* XOR two ints together */
+int
+mp_xor (mp_int * a, mp_int * b, mp_int * c)
+{
+  int     res, ix, px;
+  mp_int  t, *x;
 
-      DIGIT(&rem, 0) = DIGIT(a, ix);
-      s_mp_clamp(&rem);
-      --ix;
+  if (a->used > b->used) {
+    if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+      return res;
+    }
+    px = b->used;
+    x = b;
+  } else {
+    if ((res = mp_init_copy (&t, b)) != MP_OKAY) {
+      return res;
     }
+    px = a->used;
+    x = a;
+  }
 
-    /* If we didn't find one, we're finished dividing    */
-    if(s_mp_cmp(&rem, b) < 0) 
-      break;    
+  for (ix = 0; ix < px; ix++) {
+    t.dp[ix] ^= x->dp[ix];
+  }
+  mp_clamp (&t);
+  mp_exch (c, &t);
+  mp_clear (&t);
+  return MP_OKAY;
+}
 
-    /* Compute a guess for the next quotient digit       */
-    q = DIGIT(&rem, USED(&rem) - 1);
-    if(q <= DIGIT(b, USED(b) - 1) && USED(&rem) > 1)
-      q = (q << DIGIT_BIT) | DIGIT(&rem, USED(&rem) - 2);
+/* End: bn_mp_xor.c */
+
+/* Start: bn_mp_zero.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-    q /= DIGIT(b, USED(b) - 1);
+/* set to zero */
+void
+mp_zero (mp_int * a)
+{
+  a->sign = MP_ZPOS;
+  a->used = 0;
+  memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
+}
 
-    /* The guess can be as much as RADIX + 1 */
-    if(q >= RADIX)
-      q = RADIX - 1;
+/* End: bn_mp_zero.c */
+
+/* Start: bn_radix.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-    /* See what that multiplies out to                   */
-    mp_copy(b, &t);
-    if((res = s_mp_mul_d(&t, (mp_digit)q)) != MP_OKAY)
-      goto CLEANUP;
+/* chars used in radix conversions */
+static const char *s_rmap =
+  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
 
-    /* 
-       If it's too big, back it off.  We should not have to do this
-       more than once, or, in rare cases, twice.  Knuth describes a
-       method by which this could be reduced to a maximum of once, but
-       I didn't implement that here.
-     */
-    while(s_mp_cmp(&t, &rem) > 0) {
-      --q;
-      s_mp_sub(&t, b);
-    }
 
-    /* At this point, q should be the right next digit   */
-    if((res = s_mp_sub(&rem, &t)) != MP_OKAY)
-      goto CLEANUP;
+/* read a string [ASCII] in a given radix */
+int
+mp_read_radix (mp_int * a, char *str, int radix)
+{
+  int     y, res, neg;
+  char    ch;
 
-    /*
-      Include the digit in the quotient.  We allocated enough memory
-      for any quotient we could ever possibly get, so we should not
-      have to check for failures here
-     */
-    DIGIT(&quot, 0) = q;
+  if (radix < 2 || radix > 64) {
+    return MP_VAL;
   }
 
-  /* Denormalize remainder                */
-  if(d != 0) 
-    s_mp_div_2d(&rem, d);
+  if (*str == '-') {
+    ++str;
+    neg = MP_NEG;
+  } else {
+    neg = MP_ZPOS;
+  }
 
-  s_mp_clamp(&quot);
-  s_mp_clamp(&rem);
+  mp_zero (a);
+  while (*str) {
+    ch = (char) ((radix < 36) ? toupper (*str) : *str);
+    for (y = 0; y < 64; y++) {
+      if (ch == s_rmap[y]) {
+	break;
+      }
+    }
 
-  /* Copy quotient back to output         */
-  s_mp_exch(&quot, a);
-  
-  /* Copy remainder back to output        */
-  s_mp_exch(&rem, b);
+    if (y < radix) {
+      if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
+	return res;
+      }
+      if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
+	return res;
+      }
+    } else {
+      break;
+    }
+    ++str;
+  }
+  a->sign = neg;
+  return MP_OKAY;
+}
 
-CLEANUP:
-  mp_clear(&rem);
-REM:
-  mp_clear(&t);
-T:
-  mp_clear(&quot);
+/* stores a bignum as a ASCII string in a given radix (2..64) */
+int
+mp_toradix (mp_int * a, char *str, int radix)
+{
+  int     res, digs;
+  mp_int  t;
+  mp_digit d;
+  char   *_s = str;
 
-  return res;
+  if (radix < 2 || radix > 64) {
+    return MP_VAL;
+  }
 
-} /* end s_mp_div() */
+  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+    return res;
+  }
 
-/* }}} */
+  if (t.sign == MP_NEG) {
+    ++_s;
+    *str++ = '-';
+    t.sign = MP_ZPOS;
+  }
 
-/* {{{ s_mp_2expt(a, k) */
+  digs = 0;
+  while (mp_iszero (&t) == 0) {
+    if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
+      mp_clear (&t);
+      return res;
+    }
+    *str++ = s_rmap[d];
+    ++digs;
+  }
+  bn_reverse ((unsigned char *) _s, digs);
+  *str++ = '\0';
+  mp_clear (&t);
+  return MP_OKAY;
+}
 
-static mp_err s_mp_2expt(mp_int *a, mp_digit k)
+/* returns size of ASCII reprensentation */
+int
+mp_radix_size (mp_int * a, int radix)
 {
-  mp_err    res;
-  mp_size   dig, bit;
-
-  dig = k / DIGIT_BIT;
-  bit = k % DIGIT_BIT;
-
-  mp_zero(a);
-  if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
-    return res;
-  
-  DIGIT(a, dig) |= (1 << bit);
+  int     res, digs;
+  mp_int  t;
+  mp_digit d;
 
-  return MP_OKAY;
+  /* special case for binary */
+  if (radix == 2) {
+    return mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
+  }
 
-} /* end s_mp_2expt() */
+  if (radix < 2 || radix > 64) {
+    return 0;
+  }
 
-/* }}} */
+  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+    return 0;
+  }
 
-/* {{{ s_mp_reduce(x, m, mu) */
+  digs = 0;
+  if (t.sign == MP_NEG) {
+    ++digs;
+    t.sign = MP_ZPOS;
+  }
 
-/*
-  Compute Barrett reduction, x (mod m), given a precomputed value for
-  mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
-  faster than straight division, when many reductions by the same
-  value of m are required (such as in modular exponentiation).  This
-  can nearly halve the time required to do modular exponentiation,
-  as compared to using the full integer divide to reduce.
+  while (mp_iszero (&t) == 0) {
+    if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
+      mp_clear (&t);
+      return 0;
+    }
+    ++digs;
+  }
+  mp_clear (&t);
+  return digs + 1;
+}
 
-  This algorithm was derived from the _Handbook of Applied
-  Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
-  pp. 603-604.  
+/* End: bn_radix.c */
+
+/* Start: bn_reverse.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
  */
+#include <tommath.h>
 
-static mp_err s_mp_reduce(mp_int *x, mp_int *m, mp_int *mu)
+/* reverse an array, used for radix code */
+void
+bn_reverse (unsigned char *s, int len)
 {
-  mp_int   q;
-  mp_err   res;
-  mp_size  um = USED(m);
-
-  if((res = mp_init_copy(&q, x)) != MP_OKAY)
-    return res;
-
-  s_mp_rshd(&q, um - 1);       /* q1 = x / b^(k-1)  */
-  s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
-  s_mp_rshd(&q, um + 1);       /* q3 = q2 / b^(k+1) */
-
-  /* x = x mod b^(k+1), quick (no division) */
-  s_mp_mod_2d(x, (mp_digit)(DIGIT_BIT * (um + 1)));
+  int     ix, iy;
+  unsigned char t;
+
+  ix = 0;
+  iy = len - 1;
+  while (ix < iy) {
+    t = s[ix];
+    s[ix] = s[iy];
+    s[iy] = t;
+    ++ix;
+    --iy;
+  }
+}
 
-  /* q = q * m mod b^(k+1), quick (no division) */
-  s_mp_mul_dig(&q, m, um + 1);
-//  s_mp_mod_2d(&q, (mp_digit)(DIGIT_BIT * (um + 1)));
+/* End: bn_reverse.c */
+
+/* Start: bn_s_mp_add.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  /* x = x - q */
-  if((res = mp_sub(x, &q, x)) != MP_OKAY)
-    goto CLEANUP;
+/* low level addition, based on HAC pp.594, Algorithm 14.7 */
+int
+s_mp_add (mp_int * a, mp_int * b, mp_int * c)
+{
+  mp_int *x;
+  int     olduse, res, min, max;
 
-  /* If x < 0, add b^(k+1) to it */
-  if(mp_cmp_z(x) < 0) {
-    mp_set(&q, 1);
-    if((res = s_mp_lshd(&q, um + 1)) != MP_OKAY)
-      goto CLEANUP;
-    if((res = mp_add(x, &q, x)) != MP_OKAY)
-      goto CLEANUP;
+  /* find sizes, we let |a| <= |b| which means we have to sort
+   * them.  "x" will point to the input with the most digits
+   */
+  if (a->used > b->used) {
+    min = b->used;
+    max = a->used;
+    x = a;
+  } else if (a->used < b->used) {
+    min = a->used;
+    max = b->used;
+    x = b;
+  } else {
+    min = max = a->used;
+    x = NULL;
   }
 
-  /* Back off if it's too big */
-  while(mp_cmp(x, m) >= 0) {
-    if((res = s_mp_sub(x, m)) != MP_OKAY)
-      break;
+  /* init result */
+  if (c->alloc < max + 1) {
+    if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
+      return res;
+    }
   }
 
- CLEANUP:
-  mp_clear(&q);
-
-  return res;
+  olduse = c->used;
+  c->used = max + 1;
 
-} /* end s_mp_reduce() */
+  /* add digits from lower part */
 
-/* }}} */
+  /* set the carry to zero */
+  {
+    register mp_digit u, *tmpa, *tmpb, *tmpc;
+    register int i;
 
-/* }}} */
+    /* alias for digit pointers */
+    tmpa = a->dp;
+    tmpb = b->dp;
+    tmpc = c->dp;
 
-/* {{{ Primitive comparisons */
+    u = 0;
+    for (i = 0; i < min; i++) {
+      /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
+      *tmpc = *tmpa++ + *tmpb++ + u;
 
-/* {{{ s_mp_cmp(a, b) */
+      /* U = carry bit of T[i] */
+      u = *tmpc >> DIGIT_BIT;
 
-/* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
-static int s_mp_cmp(mp_int *a, mp_int *b)
-{
-  mp_size   ua = USED(a), ub = USED(b);
+      /* take away carry bit from T[i] */
+      *tmpc++ &= MP_MASK;
+    }
 
-  if(ua > ub)
-    return MP_GT;
-  else if(ua < ub)
-    return MP_LT;
-  else {
-    int      ix = ua - 1;
-    mp_digit *ap = DIGITS(a) + ix, *bp = DIGITS(b) + ix;
+    /* now copy higher words if any, that is in A+B if A or B has more digits add those in */
+    if (min != max) {
+      for (; i < max; i++) {
+	/* T[i] = X[i] + U */
+	*tmpc = x->dp[i] + u;
 
-    while(ix >= 0) {
-      if(*ap > *bp)
-	return MP_GT;
-      else if(*ap < *bp)
-	return MP_LT;
+	/* U = carry bit of T[i] */
+	u = *tmpc >> DIGIT_BIT;
 
-      --ap; --bp; --ix;
+	/* take away carry bit from T[i] */
+	*tmpc++ &= MP_MASK;
+      }
     }
 
-    return MP_EQ;
-  }
+    /* add carry */
+    *tmpc++ = u;
 
-} /* end s_mp_cmp() */
+    /* clear digits above used (since we may not have grown result above) */
+    for (i = c->used; i < olduse; i++) {
+      *tmpc++ = 0;
+    }
+  }
 
-/* }}} */
+  mp_clamp (c);
+  return MP_OKAY;
+}
 
-/* {{{ s_mp_cmp_d(a, d) */
+/* End: bn_s_mp_add.c */
+
+/* Start: bn_s_mp_mul_digs.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
-static int s_mp_cmp_d(mp_int *a, mp_digit d)
+/* multiplies |a| * |b| and only computes upto digs digits of result
+ * HAC pp. 595, Algorithm 14.12  Modified so you can control how many digits of 
+ * output are created.  
+ */
+int
+s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 {
-  mp_size  ua = USED(a);
-  mp_digit *ap = DIGITS(a);
+  mp_int  t;
+  int     res, pa, pb, ix, iy;
+  mp_digit u;
+  mp_word r;
+  mp_digit tmpx, *tmpt, *tmpy;
 
-  if(ua > 1)
-    return MP_GT;
 
-  if(*ap < d) 
-    return MP_LT;
-  else if(*ap > d)
-    return MP_GT;
-  else
-    return MP_EQ;
+  /* can we use the fast multiplier? 
+   *
+   * The fast multiplier can be used if the output will have less than 
+   * 512 digits and the number of digits won't affect carry propagation
+   */
+  if ((digs < 512)
+      && digs < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+    return fast_s_mp_mul_digs (a, b, c, digs);
+  }
+
+  if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
+    return res;
+  }
+  t.used = digs;
+
+  /* compute the digits of the product directly */
+  pa = a->used;
+  for (ix = 0; ix < pa; ix++) {
+    /* set the carry to zero */
+    u = 0;
+
+    /* limit ourselves to making digs digits of output */
+    pb = MIN (b->used, digs - ix);
+
+    /* setup some aliases */
+    tmpx = a->dp[ix];
+    tmpt = &(t.dp[ix]);
+    tmpy = b->dp;
+
+    /* compute the columns of the output and propagate the carry */
+    for (iy = 0; iy < pb; iy++) {
+      /* compute the column as a mp_word */
+      r =
+	((mp_word) * tmpt) + ((mp_word) tmpx) * ((mp_word) * tmpy++) +
+	((mp_word) u);
+
+      /* the new column is the lower part of the result */
+      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
+
+      /* get the carry word from the result */
+      u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
+    }
+    if (ix + iy < digs)
+      *tmpt = u;
+  }
 
-} /* end s_mp_cmp_d() */
+  mp_clamp (&t);
+  mp_exch (&t, c);
 
-/* }}} */
+  mp_clear (&t);
+  return MP_OKAY;
+}
 
-/* {{{ s_mp_ispow2(v) */
+/* End: bn_s_mp_mul_digs.c */
+
+/* Start: bn_s_mp_mul_high_digs.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-/*
-  Returns -1 if the value is not a power of two; otherwise, it returns
-  k such that v = 2^k, i.e. lg(v).
+/* multiplies |a| * |b| and does not compute the lower digs digits 
+ * [meant to get the higher part of the product]
  */
-static int s_mp_ispow2(mp_int *v)
+int
+s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 {
-  mp_digit d, *dp;
-  mp_size  uv = USED(v);
-  int      extra = 0, ix;
-
-  d = DIGIT(v, uv - 1); /* most significant digit of v */
+  mp_int  t;
+  int     res, pa, pb, ix, iy;
+  mp_digit u;
+  mp_word r;
+  mp_digit tmpx, *tmpt, *tmpy;
+
+
+  /* can we use the fast multiplier? */
+  if (((a->used + b->used + 1) < 512)
+      && MAX (a->used,
+	      b->used) <
+      (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+    return fast_s_mp_mul_high_digs (a, b, c, digs);
+  }
 
-  while(d && ((d & 1) == 0)) {
-    d >>= 1;
-    ++extra;
+  if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
+    return res;
   }
+  t.used = a->used + b->used + 1;
 
-  if(d == 1) {
-    ix = uv - 2;
-    dp = DIGITS(v) + ix;
+  pa = a->used;
+  pb = b->used;
+  for (ix = 0; ix < pa; ix++) {
+    /* clear the carry */
+    u = 0;
 
-    while(ix >= 0) {
-      if(*dp)
-	return -1; /* not a power of two */
+    /* left hand side of A[ix] * B[iy] */
+    tmpx = a->dp[ix];
 
-      --dp; --ix;
-    }
+    /* alias to the address of where the digits will be stored */
+    tmpt = &(t.dp[digs]);
 
-    return ((uv - 1) * DIGIT_BIT) + extra;
-  } 
+    /* alias for where to read the right hand side from */
+    tmpy = b->dp + (digs - ix);
 
-  return -1;
+    for (iy = digs - ix; iy < pb; iy++) {
+      /* calculate the double precision result */
+      r =
+	((mp_word) * tmpt) + ((mp_word) tmpx) * ((mp_word) * tmpy++) +
+	((mp_word) u);
 
-} /* end s_mp_ispow2() */
+      /* get the lower part */
+      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
 
-/* }}} */
+      /* carry the carry */
+      u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
+    }
+    *tmpt = u;
+  }
+  mp_clamp (&t);
+  mp_exch (&t, c);
+  mp_clear (&t);
+  return MP_OKAY;
+}
 
-/* {{{ s_mp_ispow2d(d) */
+/* End: bn_s_mp_mul_high_digs.c */
+
+/* Start: bn_s_mp_sqr.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-static int s_mp_ispow2d(mp_digit d)
+/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
+int
+s_mp_sqr (mp_int * a, mp_int * b)
 {
-  int   pow = 0;
-
-  while((d & 1) == 0) {
-    ++pow; d >>= 1;
+  mp_int  t;
+  int     res, ix, iy, pa;
+  mp_word r, u;
+  mp_digit tmpx, *tmpt;
+
+  /* can we use the fast multiplier? */
+  if (((a->used * 2 + 1) < 512)
+      && a->used <
+      (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT) - 1))) {
+    return fast_s_mp_sqr (a, b);
   }
 
-  if(d == 1)
-    return pow;
-
-  return -1;
+  pa = a->used;
+  if ((res = mp_init_size (&t, pa + pa + 1)) != MP_OKAY) {
+    return res;
+  }
+  t.used = pa + pa + 1;
 
-} /* end s_mp_ispow2d() */
+  for (ix = 0; ix < pa; ix++) {
+    /* first calculate the digit at 2*ix */
+    /* calculate double precision result */
+    r =
+      ((mp_word) t.dp[ix + ix]) +
+      ((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]);
 
-/* }}} */
+    /* store lower part in result */
+    t.dp[ix + ix] = (mp_digit) (r & ((mp_word) MP_MASK));
 
-/* }}} */
+    /* get the carry */
+    u = (r >> ((mp_word) DIGIT_BIT));
 
-/* {{{ Primitive I/O helpers */
+    /* left hand side of A[ix] * A[iy] */
+    tmpx = a->dp[ix];
 
-/* {{{ s_mp_tovalue(ch, r) */
+    /* alias for where to store the results */
+    tmpt = &(t.dp[ix + ix + 1]);
+    for (iy = ix + 1; iy < pa; iy++) {
+      /* first calculate the product */
+      r = ((mp_word) tmpx) * ((mp_word) a->dp[iy]);
 
-/*
-  Convert the given character to its digit value, in the given radix.
-  If the given character is not understood in the given radix, -1 is
-  returned.  Otherwise the digit's numeric value is returned.
+      /* now calculate the double precision result, note we use
+       * addition instead of *2 since its easier to optimize
+       */
+      r = ((mp_word) * tmpt) + r + r + ((mp_word) u);
 
-  The results will be odd if you use a radix < 2 or > 62, you are
-  expected to know what you're up to.
- */
-static int s_mp_tovalue(char ch, int r)
-{
-  int    val, xch;
-  
-  if(r > 36)
-    xch = ch;
-  else
-    xch = toupper(ch);
-
-  if(isdigit(xch))
-    val = xch - '0';
-  else if(isupper(xch))
-    val = xch - 'A' + 10;
-  else if(islower(xch))
-    val = xch - 'a' + 36;
-  else if(xch == '+')
-    val = 62;
-  else if(xch == '/')
-    val = 63;
-  else 
-    return -1;
-
-  if(val < 0 || val >= r)
-    return -1;
-
-  return val;
-
-} /* end s_mp_tovalue() */
-
-/* }}} */
-
-/* {{{ s_mp_todigit(val, r, low) */
-
-/*
-  Convert val to a radix-r digit, if possible.  If val is out of range
-  for r, returns zero.  Otherwise, returns an ASCII character denoting
-  the value in the given radix.
-
-  The results may be odd if you use a radix < 2 or > 64, you are
-  expected to know what you're doing.
- */
-  
-char     s_mp_todigit(int val, int r, int low)
-{
-  char   ch;
+      /* store lower part */
+      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
 
-  if(val < 0 || val >= r)
-    return 0;
+      /* get carry */
+      u = (r >> ((mp_word) DIGIT_BIT));
+    }
+    r = ((mp_word) * tmpt) + u;
+    *tmpt = (mp_digit) (r & ((mp_word) MP_MASK));
+    u = (r >> ((mp_word) DIGIT_BIT));
+    /* propagate upwards */
+    ++tmpt;
+    while (u != ((mp_word) 0)) {
+      r = ((mp_word) * tmpt) + ((mp_word) 1);
+      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
+      u = (r >> ((mp_word) DIGIT_BIT));
+    }
+  }
 
-  ch = s_dmap_1[val];
+  mp_clamp (&t);
+  mp_exch (&t, b);
+  mp_clear (&t);
+  return MP_OKAY;
+}
 
-  if(r <= 36 && low)
-    ch = tolower(ch);
+/* End: bn_s_mp_sqr.c */
+
+/* Start: bn_s_mp_sub.c */
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#include <tommath.h>
 
-  return ch;
+/* low level subtraction (assumes a > b), HAC pp.595 Algorithm 14.9 */
+int
+s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
+{
+  int     olduse, res, min, max;
 
-} /* end s_mp_todigit() */
+  /* find sizes */
+  min = b->used;
+  max = a->used;
 
-/* }}} */
+  /* init result */
+  if (c->alloc < max) {
+    if ((res = mp_grow (c, max)) != MP_OKAY) {
+      return res;
+    }
+  }
+  olduse = c->used;
+  c->used = max;
+
+  /* sub digits from lower part */
+
+  {
+    register mp_digit u, *tmpa, *tmpb, *tmpc;
+    register int i;
+
+    /* alias for digit pointers */
+    tmpa = a->dp;
+    tmpb = b->dp;
+    tmpc = c->dp;
+
+    /* set carry to zero */
+    u = 0;
+    for (i = 0; i < min; i++) {
+      /* T[i] = A[i] - B[i] - U */
+      *tmpc = *tmpa++ - *tmpb++ - u;
+
+      /* U = carry bit of T[i] 
+       * Note this saves performing an AND operation since 
+       * if a carry does occur it will propagate all the way to the
+       * MSB.  As a result a single shift is required to get the carry
+       */
+      u = *tmpc >> (CHAR_BIT * sizeof (mp_digit) - 1);
 
-/* {{{ s_mp_outlen(bits, radix) */
+      /* Clear carry from T[i] */
+      *tmpc++ &= MP_MASK;
+    }
 
-/* 
-   Return an estimate for how long a string is needed to hold a radix
-   r representation of a number with 'bits' significant bits.
+    /* now copy higher words if any, e.g. if A has more digits than B  */
+    for (; i < max; i++) {
+      /* T[i] = A[i] - U */
+      *tmpc = *tmpa++ - u;
 
-   Does not include space for a sign or a NUL terminator.
- */
-static int s_mp_outlen(int bits, int r)
-{
-  return (int)((double)bits * LOG_V_2(r));
+      /* U = carry bit of T[i] */
+      u = *tmpc >> (CHAR_BIT * sizeof (mp_digit) - 1);
 
-} /* end s_mp_outlen() */
+      /* Clear carry from T[i] */
+      *tmpc++ &= MP_MASK;
+    }
 
-/* }}} */
+    /* clear digits above used (since we may not have grown result above) */
+    for (i = c->used; i < olduse; i++) {
+      *tmpc++ = 0;
+    }
+  }
 
-/* }}} */
+  mp_clamp (c);
+  return MP_OKAY;
+}
 
-#endif /* MPI */
+/* End: bn_s_mp_sub.c */
 
-/*------------------------------------------------------------------------*/
-/* HERE THERE BE DRAGONS                                                  */
 
- 
+/* EOF */

+ 0 - 227
mpi.h

@@ -1,227 +0,0 @@
-/*
-    mpi.h
-
-    by Michael J. Fromberger <[email protected]>
-    Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved
-
-    Arbitrary precision integer arithmetic library
-
-    $ID$
- */
-
-#ifndef _H_MPI_
-#define _H_MPI_
-
-#include "mpi-config.h"
-
-#define  MP_LT       -1
-#define  MP_EQ        0
-#define  MP_GT        1
-
-#if MP_DEBUG
-#undef MP_IOFUNC
-#define MP_IOFUNC 1
-#endif
-
-#if MP_IOFUNC
-#include <stdio.h>
-#include <ctype.h>
-#endif
-
-#include <limits.h>
-
-#define  MP_NEG  1
-#define  MP_ZPOS 0
-
-/* Included for compatibility... */
-#define  NEG     MP_NEG
-#define  ZPOS    MP_ZPOS
-
-#define  MP_OKAY          0 /* no error, all is well */
-#define  MP_YES           0 /* yes (boolean result)  */
-#define  MP_NO           -1 /* no (boolean result)   */
-#define  MP_MEM          -2 /* out of memory         */
-#define  MP_RANGE        -3 /* argument out of range */
-#define  MP_BADARG       -4 /* invalid parameter     */
-#define  MP_UNDEF        -5 /* answer is undefined   */
-#define  MP_LAST_CODE    MP_UNDEF
-
-#include "mpi-types.h"
-
-/* Included for compatibility... */
-#define DIGIT_BIT         MP_DIGIT_BIT
-#define DIGIT_MAX         MP_DIGIT_MAX
-
-/* Macros for accessing the mp_int internals           */
-#define  SIGN(MP)     ((MP)->sign)
-#define  USED(MP)     ((MP)->used)
-#define  ALLOC(MP)    ((MP)->alloc)
-#define  DIGITS(MP)   ((MP)->dp)
-#define  DIGIT(MP,N)  (MP)->dp[(N)]
-
-#if MP_ARGCHK == 1
-#define  ARGCHK(X,Y)  {if(!(X)){return (Y);}}
-#elif MP_ARGCHK == 2
-#include <assert.h>
-#define  ARGCHK(X,Y)  assert(X)
-#else
-#define  ARGCHK(X,Y)  /*  */
-#endif
-
-/* This defines the maximum I/O base (minimum is 2)   */
-#define MAX_RADIX         64
-
-typedef struct {
-  mp_sign       sign;    /* sign of this quantity      */
-  mp_size       alloc;   /* how many digits allocated  */
-  mp_size       used;    /* how many digits used       */
-  mp_digit     *dp;      /* the digits themselves      */
-} mp_int;
-
-/*------------------------------------------------------------------------*/
-/* Default precision                                                      */
-
-unsigned int mp_get_prec(void);
-void         mp_set_prec(unsigned int prec);
-
-/*------------------------------------------------------------------------*/
-/* Memory management                                                      */
-
-mp_err mp_init(mp_int *mp);
-mp_err mp_init_array(mp_int mp[], int count);
-mp_err mp_init_size(mp_int *mp, mp_size prec);
-mp_err mp_init_copy(mp_int *mp, mp_int *from);
-mp_err mp_copy(mp_int *from, mp_int *to);
-void   mp_exch(mp_int *mp1, mp_int *mp2);
-void   mp_clear(mp_int *mp);
-void   mp_clear_array(mp_int mp[], int count);
-void   mp_zero(mp_int *mp);
-void   mp_set(mp_int *mp, mp_digit d);
-mp_err mp_set_int(mp_int *mp, long z);
-mp_err mp_shrink(mp_int *a);
-
-
-/*------------------------------------------------------------------------*/
-/* Single digit arithmetic                                                */
-
-mp_err mp_add_d(mp_int *a, mp_digit d, mp_int *b);
-mp_err mp_sub_d(mp_int *a, mp_digit d, mp_int *b);
-mp_err mp_mul_d(mp_int *a, mp_digit d, mp_int *b);
-mp_err mp_mul_2(mp_int *a, mp_int *c);
-mp_err mp_div_d(mp_int *a, mp_digit d, mp_int *q, mp_digit *r);
-mp_err mp_div_2(mp_int *a, mp_int *c);
-mp_err mp_expt_d(mp_int *a, mp_digit d, mp_int *c);
-
-/*------------------------------------------------------------------------*/
-/* Sign manipulations                                                     */
-
-mp_err mp_abs(mp_int *a, mp_int *b);
-mp_err mp_neg(mp_int *a, mp_int *b);
-
-/*------------------------------------------------------------------------*/
-/* Full arithmetic                                                        */
-
-mp_err mp_add(mp_int *a, mp_int *b, mp_int *c);
-mp_err mp_sub(mp_int *a, mp_int *b, mp_int *c);
-mp_err mp_mul(mp_int *a, mp_int *b, mp_int *c);
-mp_err mp_mul_2d(mp_int *a, mp_digit d, mp_int *c);
-#if MP_SQUARE
-mp_err mp_sqr(mp_int *a, mp_int *b);
-#else
-#define mp_sqr(a, b) mp_mul(a, a, b)
-#endif
-mp_err mp_div(mp_int *a, mp_int *b, mp_int *q, mp_int *r);
-mp_err mp_div_2d(mp_int *a, mp_digit d, mp_int *q, mp_int *r);
-mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c);
-mp_err mp_2expt(mp_int *a, mp_digit k);
-mp_err mp_sqrt(mp_int *a, mp_int *b);
-
-/*------------------------------------------------------------------------*/
-/* Modular arithmetic                                                     */
-
-#if MP_MODARITH
-mp_err mp_mod(mp_int *a, mp_int *m, mp_int *c);
-mp_err mp_mod_d(mp_int *a, mp_digit d, mp_digit *c);
-mp_err mp_addmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c);
-mp_err mp_submod(mp_int *a, mp_int *b, mp_int *m, mp_int *c);
-mp_err mp_mulmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c);
-#if MP_SQUARE
-mp_err mp_sqrmod(mp_int *a, mp_int *m, mp_int *c);
-#else
-#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c)
-#endif
-mp_err mp_exptmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c);
-mp_err mp_exptmod_d(mp_int *a, mp_digit d, mp_int *m, mp_int *c);
-#endif /* MP_MODARITH */
-
-/*------------------------------------------------------------------------*/
-/* Comparisons                                                            */
-
-int    mp_cmp_z(mp_int *a);
-int    mp_cmp_d(mp_int *a, mp_digit d);
-int    mp_cmp(mp_int *a, mp_int *b);
-int    mp_cmp_mag(mp_int *a, mp_int *b);
-int    mp_cmp_int(mp_int *a, long z);
-int    mp_isodd(mp_int *a);
-int    mp_iseven(mp_int *a);
-
-/*------------------------------------------------------------------------*/
-/* Number theoretic                                                       */
-
-#if MP_NUMTH
-mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
-mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c);
-mp_err mp_xgcd(mp_int *a, mp_int *b, mp_int *g, mp_int *x, mp_int *y);
-mp_err mp_invmod(mp_int *a, mp_int *m, mp_int *c);
-#endif /* end MP_NUMTH */
-
-/*------------------------------------------------------------------------*/
-/* Input and output                                                       */
-
-#if MP_IOFUNC
-void   mp_print(mp_int *mp, FILE *ofp);
-#endif /* end MP_IOFUNC */
-
-/*------------------------------------------------------------------------*/
-/* Base conversion                                                        */
-
-#define BITS     1
-#define BYTES    CHAR_BIT
-
-mp_err mp_read_signed_bin(mp_int *mp, unsigned char *str, int len);
-int    mp_signed_bin_size(mp_int *mp);
-mp_err mp_to_signed_bin(mp_int *mp, unsigned char *str);
-
-mp_err mp_read_unsigned_bin(mp_int *mp, unsigned char *str, int len);
-int    mp_unsigned_bin_size(mp_int *mp);
-mp_err mp_to_unsigned_bin(mp_int *mp, unsigned char *str);
-
-int    mp_count_bits(mp_int *mp);
-
-#if MP_COMPAT_MACROS
-#define mp_read_raw(mp, str, len) mp_read_signed_bin((mp), (str), (len))
-#define mp_raw_size(mp)           mp_signed_bin_size(mp)
-#define mp_toraw(mp, str)         mp_to_signed_bin((mp), (str))
-#define mp_read_mag(mp, str, len) mp_read_unsigned_bin((mp), (str), (len))
-#define mp_mag_size(mp)           mp_unsigned_bin_size(mp)
-#define mp_tomag(mp, str)         mp_to_unsigned_bin((mp), (str))
-#endif
-
-mp_err mp_read_radix(mp_int *mp, unsigned char *str, int radix);
-int    mp_radix_size(mp_int *mp, int radix);
-int    mp_value_radix_size(int num, int qty, int radix);
-mp_err mp_toradix(mp_int *mp, unsigned char *str, int radix);
-
-int    mp_char2value(char ch, int r);
-
-#define mp_tobinary(M, S)  mp_toradix((M), (S), 2)
-#define mp_tooctal(M, S)   mp_toradix((M), (S), 8)
-#define mp_todecimal(M, S) mp_toradix((M), (S), 10)
-#define mp_tohex(M, S)     mp_toradix((M), (S), 16)
-
-/*------------------------------------------------------------------------*/
-/* Error strings                                                          */
-
-const  char  *mp_strerror(mp_err ec);
-
-#endif /* end _H_MPI_ */

+ 0 - 4216
mpi.old

@@ -1,4216 +0,0 @@
- /*
-    mpi.c
-
-    by Michael J. Fromberger <[email protected]>
-    Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved
-
-    Arbitrary precision integer arithmetic library
-
-    $Id: mpi.c,v 1.22 2001/09/14 15:11:20 sting Exp sting $
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <ctype.h>
-
-#include "mycrypt.h"
-
-#ifdef MPI
-
-#if MP_DEBUG
-#include <stdio.h>
-
-#define DIAG(T,V) {fprintf(stderr,T);mp_print(V,stderr);fputc('\n',stderr);}
-#else
-#define DIAG(T,V)
-#endif
-
-/* 
-   If MP_LOGTAB is not defined, use the math library to compute the
-   logarithms on the fly.  Otherwise, use the static table below.
-   Pick which works best for your system.
- */
-#if MP_LOGTAB
-
-/* {{{ s_logv_2[] - log table for 2 in various bases */
-
-/*
-  A table of the logs of 2 for various bases (the 0 and 1 entries of
-  this table are meaningless and should not be referenced).  
-
-  This table is used to compute output lengths for the mp_toradix()
-  function.  Since a number n in radix r takes up about log_r(n)
-  digits, we estimate the output size by taking the least integer
-  greater than log_r(n), where:
-
-  log_r(n) = log_2(n) * log_r(2)
-
-  This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
-  which are the output bases supported.  
- */
-const float s_logv_2[] = {
-   0.000000000, 0.000000000, 1.000000000, 0.630929754, 	/*  0  1  2  3 */
-   0.500000000, 0.430676558, 0.386852807, 0.356207187, 	/*  4  5  6  7 */
-   0.333333333, 0.315464877, 0.301029996, 0.289064826, 	/*  8  9 10 11 */
-   0.278942946, 0.270238154, 0.262649535, 0.255958025, 	/* 12 13 14 15 */
-   0.250000000, 0.244650542, 0.239812467, 0.235408913, 	/* 16 17 18 19 */
-   0.231378213, 0.227670249, 0.224243824, 0.221064729, 	/* 20 21 22 23 */
-   0.218104292, 0.215338279, 0.212746054, 0.210309918, 	/* 24 25 26 27 */
-   0.208014598, 0.205846832, 0.203795047, 0.201849087, 	/* 28 29 30 31 */
-   0.200000000, 0.198239863, 0.196561632, 0.194959022, 	/* 32 33 34 35 */
-   0.193426404, 0.191958720, 0.190551412, 0.189200360, 	/* 36 37 38 39 */
-   0.187901825, 0.186652411, 0.185449023, 0.184288833, 	/* 40 41 42 43 */
-   0.183169251, 0.182087900, 0.181042597, 0.180031327, 	/* 44 45 46 47 */
-   0.179052232, 0.178103594, 0.177183820, 0.176291434, 	/* 48 49 50 51 */
-   0.175425064, 0.174583430, 0.173765343, 0.172969690, 	/* 52 53 54 55 */
-   0.172195434, 0.171441601, 0.170707280, 0.169991616, 	/* 56 57 58 59 */
-   0.169293808, 0.168613099, 0.167948779, 0.167300179, 	/* 60 61 62 63 */
-   0.166666667
-};
-/* }}} */
-#define LOG_V_2(R)  s_logv_2[(R)]
-
-#else
-
-#include <math.h>
-#define LOG_V_2(R)  (log(2.0)/log(R))
-
-#endif
-
-/* Default precision for newly created mp_int's      */
-static unsigned static int s_mp_defprec = MP_DEFPREC;
-
-/* {{{ Digit arithmetic macros */
-
-/*
-  When adding and multiplying digits, the results can be larger than
-  can be contained in an mp_digit.  Thus, an mp_word is used.  These
-  macros mask off the upper and lower digits of the mp_word (the
-  mp_word may be more than 2 mp_digits wide, but we only concern
-  ourselves with the low-order 2 mp_digits)
-
-  If your mp_word DOES have more than 2 mp_digits, you need to
-  uncomment the first line, and comment out the second.
- */
-
-/* #define  CARRYOUT(W)  (((W)>>DIGIT_BIT)&MP_DIGIT_MAX) */
-#define  CARRYOUT(W)  ((W)>>DIGIT_BIT)
-#define  ACCUM(W)     ((W)&MP_DIGIT_MAX)
-
-/* }}} */
-
-/* {{{ Comparison constants */
-
-
-/* }}} */
-
-/* {{{ Constant strings */
-
-/* Constant strings returned by mp_strerror() */
-static const char *mp_err_string[] = {
-  "unknown result code",     /* say what?            */
-  "boolean true",            /* MP_OKAY, MP_YES      */
-  "boolean false",           /* MP_NO                */
-  "out of memory",           /* MP_MEM               */
-  "argument out of range",   /* MP_RANGE             */
-  "invalid input parameter", /* MP_BADARG            */
-  "result is undefined"      /* MP_UNDEF             */
-};
-
-/* Value to digit maps for radix conversion   */
-
-/* s_dmap_1 - standard digits and letters */
-static const char *s_dmap_1 = 
-  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
-
-#if 0
-/* s_dmap_2 - base64 ordering for digits  */
-static const char *s_dmap_2 =
-  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-#endif
-
-/* }}} */
-
-/* {{{ Static function declarations */
-
-/* 
-   If MP_MACRO is false, these will be defined as actual functions;
-   otherwise, suitable macro definitions will be used.  This works
-   around the fact that ANSI C89 doesn't support an 'inline' keyword
-   (although I hear C9x will ... about bloody time).  At present, the
-   macro definitions are identical to the function bodies, but they'll
-   expand in place, instead of generating a function call.
-
-   I chose these particular functions to be made into macros because
-   some profiling showed they are called a lot on a typical workload,
-   and yet they are primarily housekeeping.
- */
-#if MP_MACRO == 0
- void     s_mp_setz(mp_digit *dp, mp_size count); /* zero digits           */
- void     s_mp_copy(mp_digit *sp, mp_digit *dp, mp_size count); /* copy    */
- void    *s_mp_alloc(size_t nb, size_t ni);       /* general allocator     */
- void     s_mp_free(void *ptr);                   /* general free function */
-#else
-
- /* Even if these are defined as macros, we need to respect the settings
-    of the MP_MEMSET and MP_MEMCPY configuration options...
-  */
- #if MP_MEMSET == 0
-  #define  s_mp_setz(dp, count) \
-       {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=0;}
- #else
-  #define  s_mp_setz(dp, count) memset(dp, 0, (count) * sizeof(mp_digit))
- #endif /* MP_MEMSET */
-
- #if MP_MEMCPY == 0
-  #define  s_mp_copy(sp, dp, count) \
-       {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=(sp)[ix];}
- #else
-  #define  s_mp_copy(sp, dp, count) memcpy(dp, sp, (count) * sizeof(mp_digit))
- #endif /* MP_MEMCPY */
-
- #define  s_mp_alloc(nb, ni)  XCALLOC(nb, ni)
- #define  s_mp_free(ptr) {if(ptr) XFREE(ptr);}
-#endif /* MP_MACRO */
-
-static mp_err s_mp_grow(mp_int *mp, mp_size min);   /* increase allocated size */
-static mp_err s_mp_pad(mp_int *mp, mp_size min);    /* left pad with zeroes    */
-
-void     s_mp_clamp(mp_int *mp);               /* clip leading zeroes     */
-
-void     s_mp_exch(mp_int *a, mp_int *b);      /* swap a and b in place   */
-
-static mp_err s_mp_lshd(mp_int *mp, mp_size p);     /* left-shift by p digits  */
-void     s_mp_rshd(mp_int *mp, mp_size p);     /* right-shift by p digits */
-void     s_mp_div_2d(mp_int *mp, mp_digit d);  /* divide by 2^d in place  */
-void     s_mp_mod_2d(mp_int *mp, mp_digit d);  /* modulo 2^d in place     */
-static mp_err s_mp_mul_2d(mp_int *mp, mp_digit d);  /* multiply by 2^d in place*/
-void     s_mp_div_2(mp_int *mp);               /* divide by 2 in place    */
-static mp_err s_mp_mul_2(mp_int *mp);               /* multiply by 2 in place  */
-mp_digit s_mp_norm(mp_int *a, mp_int *b);      /* normalize for division  */
-static mp_err s_mp_add_d(mp_int *mp, mp_digit d);   /* unsigned digit addition */
-static mp_err s_mp_sub_d(mp_int *mp, mp_digit d);   /* unsigned digit subtract */
-static mp_err s_mp_mul_d(mp_int *mp, mp_digit d);   /* unsigned digit multiply */
-static mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r);
-		                               /* unsigned digit divide   */
-static mp_err s_mp_reduce(mp_int *x, mp_int *m, mp_int *mu);
-                                               /* Barrett reduction       */
-static mp_err s_mp_add(mp_int *a, mp_int *b);       /* magnitude addition      */
-static mp_err s_mp_sub(mp_int *a, mp_int *b);       /* magnitude subtract      */
-static mp_err s_mp_mul(mp_int *a, mp_int *b);       /* magnitude multiply      */
-#if 0
-void     s_mp_kmul(mp_digit *a, mp_digit *b, mp_digit *out, mp_size len);
-                                               /* multiply buffers in place */
-#endif
-#if MP_SQUARE
-static mp_err s_mp_sqr(mp_int *a);                  /* magnitude square        */
-#else
-#define  s_mp_sqr(a) s_mp_mul(a, a)
-#endif
-static mp_err s_mp_div(mp_int *a, mp_int *b);       /* magnitude divide        */
-static mp_err s_mp_2expt(mp_int *a, mp_digit k);    /* a = 2^k                 */
-static int s_mp_cmp(mp_int *a, mp_int *b);       /* magnitude comparison    */
-static int s_mp_cmp_d(mp_int *a, mp_digit d);    /* magnitude digit compare */
-static int s_mp_ispow2(mp_int *v);               /* is v a power of 2?      */
-static int s_mp_ispow2d(mp_digit d);             /* is d a power of 2?      */
-
-static int s_mp_tovalue(char ch, int r);          /* convert ch to value    */
-char     s_mp_todigit(int val, int r, int low); /* convert val to digit   */
-static int s_mp_outlen(int bits, int r);          /* output length in bytes */
-
-/* }}} */
-
-/* {{{ Default precision manipulation */
-
-unsigned int mp_get_prec(void)
-{
-  return s_mp_defprec;
-
-} /* end mp_get_prec() */
-
-void         mp_set_prec(unsigned int prec)
-{
-  if(prec == 0)
-    s_mp_defprec = MP_DEFPREC;
-  else
-    s_mp_defprec = prec;
-
-} /* end mp_set_prec() */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ mp_init(mp) */
-
-/*
-  mp_init(mp)
-
-  Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
-  MP_MEM if memory could not be allocated for the structure.
- */
-
-mp_err mp_init(mp_int *mp)
-{
-  return mp_init_size(mp, s_mp_defprec);
-
-} /* end mp_init() */
-
-/* }}} */
-
-/* {{{ mp_init_array(mp[], count) */
-
-mp_err mp_init_array(mp_int mp[], int count)
-{
-  mp_err  res;
-  int     pos;
-
-  ARGCHK(mp !=NULL && count > 0, MP_BADARG);
-
-  for(pos = 0; pos < count; ++pos) {
-    if((res = mp_init(&mp[pos])) != MP_OKAY)
-      goto CLEANUP;
-  }
-
-  return MP_OKAY;
-
- CLEANUP:
-  while(--pos >= 0) 
-    mp_clear(&mp[pos]);
-
-  return res;
-
-} /* end mp_init_array() */
-
-/* }}} */
-
-/* {{{ mp_init_size(mp, prec) */
-
-/*
-  mp_init_size(mp, prec)
-
-  Initialize a new zero-valued mp_int with at least the given
-  precision; returns MP_OKAY if successful, or MP_MEM if memory could
-  not be allocated for the structure.
- */
-
-mp_err mp_init_size(mp_int *mp, mp_size prec)
-{
-  ARGCHK(mp != NULL && prec > 0, MP_BADARG);
-
-  if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
-    return MP_MEM;
-
-  SIGN(mp) = MP_ZPOS;
-  USED(mp) = 1;
-  ALLOC(mp) = prec;
-
-  return MP_OKAY;
-
-} /* end mp_init_size() */
-
-/* }}} */
-
-/* {{{ mp_init_copy(mp, from) */
-
-/*
-  mp_init_copy(mp, from)
-
-  Initialize mp as an exact copy of from.  Returns MP_OKAY if
-  successful, MP_MEM if memory could not be allocated for the new
-  structure.
- */
-
-mp_err mp_init_copy(mp_int *mp, mp_int *from)
-{
-  ARGCHK(mp != NULL && from != NULL, MP_BADARG);
-
-  if(mp == from)
-    return MP_OKAY;
-
-  if((DIGITS(mp) = s_mp_alloc(USED(from), sizeof(mp_digit))) == NULL)
-    return MP_MEM;
-
-  s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
-  USED(mp) = USED(from);
-  ALLOC(mp) = USED(from);
-  SIGN(mp) = SIGN(from);
-
-  return MP_OKAY;
-
-} /* end mp_init_copy() */
-
-/* }}} */
-
-/* {{{ mp_copy(from, to) */
-
-/*
-  mp_copy(from, to)
-
-  Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
-  'to' has already been initialized (if not, use mp_init_copy()
-  instead). If 'from' and 'to' are identical, nothing happens.
- */
-
-mp_err mp_copy(mp_int *from, mp_int *to)
-{
-  ARGCHK(from != NULL && to != NULL, MP_BADARG);
-
-  if(from == to)
-    return MP_OKAY;
-
-  { /* copy */
-    mp_digit   *tmp;
-
-    /*
-      If the allocated buffer in 'to' already has enough space to hold
-      all the used digits of 'from', we'll re-use it to avoid hitting
-      the memory allocater more than necessary; otherwise, we'd have
-      to grow anyway, so we just allocate a hunk and make the copy as
-      usual
-     */
-    if(ALLOC(to) >= USED(from)) {
-      s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
-      s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
-      
-    } else {
-      if((tmp = s_mp_alloc(USED(from), sizeof(mp_digit))) == NULL)
-	return MP_MEM;
-
-      s_mp_copy(DIGITS(from), tmp, USED(from));
-
-      if(DIGITS(to) != NULL) {
-#if MP_CRYPTO
-	s_mp_setz(DIGITS(to), ALLOC(to));
-#endif
-	s_mp_free(DIGITS(to));
-      }
-
-      DIGITS(to) = tmp;
-      ALLOC(to) = USED(from);
-    }
-
-    /* Copy the precision and sign from the original */
-    USED(to) = USED(from);
-    SIGN(to) = SIGN(from);
-  } /* end copy */
-
-  return MP_OKAY;
-
-} /* end mp_copy() */
-
-/* }}} */
-
-/* {{{ mp_exch(mp1, mp2) */
-
-/*
-  mp_exch(mp1, mp2)
-
-  Exchange mp1 and mp2 without allocating any intermediate memory
-  (well, unless you count the stack space needed for this call and the
-  locals it creates...).  This cannot fail.
- */
-
-void mp_exch(mp_int *mp1, mp_int *mp2)
-{
-#if MP_ARGCHK == 2
-  assert(mp1 != NULL && mp2 != NULL);
-#else
-  if(mp1 == NULL || mp2 == NULL)
-    return;
-#endif
-
-  s_mp_exch(mp1, mp2);
-
-} /* end mp_exch() */
-
-/* }}} */
-
-/* {{{ mp_clear(mp) */
-
-/*
-  mp_clear(mp)
-
-  Release the storage used by an mp_int, and void its fields so that
-  if someone calls mp_clear() again for the same int later, we won't
-  get tollchocked.
- */
-
-void   mp_clear(mp_int *mp)
-{
-  if(mp == NULL)
-    return;
-
-  if(DIGITS(mp) != NULL) {
-#if MP_CRYPTO
-    s_mp_setz(DIGITS(mp), ALLOC(mp));
-#endif
-    s_mp_free(DIGITS(mp));
-    DIGITS(mp) = NULL;
-  }
-
-  USED(mp) = 0;
-  ALLOC(mp) = 0;
-
-} /* end mp_clear() */
-
-/* }}} */
-
-/* {{{ mp_clear_array(mp[], count) */
-
-void   mp_clear_array(mp_int mp[], int count)
-{
-//  ARGCHK(mp != NULL && count > 0, MP_BADARG);
-
-  while(--count >= 0) 
-    mp_clear(&mp[count]);
-
-} /* end mp_clear_array() */
-
-/* }}} */
-
-/* {{{ mp_zero(mp) */
-
-/*
-  mp_zero(mp) 
-
-  Set mp to zero.  Does not change the allocated size of the structure,
-  and therefore cannot fail (except on a bad argument, which we ignore)
- */
-void   mp_zero(mp_int *mp)
-{
-  if(mp == NULL)
-    return;
-
-  s_mp_setz(DIGITS(mp), ALLOC(mp));
-  USED(mp) = 1;
-  SIGN(mp) = MP_ZPOS;
-
-} /* end mp_zero() */
-
-/* }}} */
-
-/* {{{ mp_set(mp, d) */
-
-void   mp_set(mp_int *mp, mp_digit d)
-{
-  if(mp == NULL)
-    return;
-
-  mp_zero(mp);
-  DIGIT(mp, 0) = d;
-
-} /* end mp_set() */
-
-/* }}} */
-
-/* {{{ mp_set_int(mp, z) */
-
-mp_err mp_set_int(mp_int *mp, long z)
-{
-  int            ix;
-  unsigned long  v = abs(z);
-  mp_err         res;
-
-  ARGCHK(mp != NULL, MP_BADARG);
-
-  mp_zero(mp);
-  if(z == 0)
-    return MP_OKAY;  /* shortcut for zero */
-
-  for(ix = sizeof(long) - 1; ix >= 0; ix--) {
-
-/* --- bug in MSVC [first release] */
-  if (ix == -1) break;
-/* --- end of fix */
-
-    if((res = s_mp_mul_2d(mp, CHAR_BIT)) != MP_OKAY)
-      return res;
-
-    res = s_mp_add_d(mp, 
-		     (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
-    if(res != MP_OKAY)
-      return res;
-  }
-
-  if(z < 0)
-    SIGN(mp) = MP_NEG;
-
-  return MP_OKAY;
-
-} /* end mp_set_int() */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ Digit arithmetic */
-
-/* {{{ mp_add_d(a, d, b) */
-
-/*
-  mp_add_d(a, d, b)
-
-  Compute the sum b = a + d, for a single digit d.  Respects the sign of
-  its primary addend (single digits are unsigned anyway).
- */
-
-mp_err mp_add_d(mp_int *a, mp_digit d, mp_int *b)
-{
-  mp_err   res = MP_OKAY;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, b)) != MP_OKAY)
-    return res;
-
-  if(SIGN(b) == MP_ZPOS) {
-    res = s_mp_add_d(b, d);
-  } else if(s_mp_cmp_d(b, d) >= 0) {
-    res = s_mp_sub_d(b, d);
-  } else {
-    SIGN(b) = MP_ZPOS;
-
-    DIGIT(b, 0) = d - DIGIT(b, 0);
-  }
-
-  return res;
-
-} /* end mp_add_d() */
-
-/* }}} */
-
-/* {{{ mp_sub_d(a, d, b) */
-
-/*
-  mp_sub_d(a, d, b)
-
-  Compute the difference b = a - d, for a single digit d.  Respects the
-  sign of its subtrahend (single digits are unsigned anyway).
- */
-
-mp_err mp_sub_d(mp_int *a, mp_digit d, mp_int *b)
-{
-  mp_err   res;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, b)) != MP_OKAY)
-    return res;
-
-  if(SIGN(b) == MP_NEG) {
-    if((res = s_mp_add_d(b, d)) != MP_OKAY)
-      return res;
-
-  } else if(s_mp_cmp_d(b, d) >= 0) {
-    if((res = s_mp_sub_d(b, d)) != MP_OKAY)
-      return res;
-
-  } else {
-    mp_neg(b, b);
-
-    DIGIT(b, 0) = d - DIGIT(b, 0);
-    SIGN(b) = MP_NEG;
-  }
-
-  if(s_mp_cmp_d(b, 0) == 0)
-    SIGN(b) = MP_ZPOS;
-
-  return MP_OKAY;
-
-} /* end mp_sub_d() */
-
-/* }}} */
-
-/* {{{ mp_mul_d(a, d, b) */
-
-/*
-  mp_mul_d(a, d, b)
-
-  Compute the product b = a * d, for a single digit d.  Respects the sign
-  of its multiplicand (single digits are unsigned anyway)
- */
-
-mp_err mp_mul_d(mp_int *a, mp_digit d, mp_int *b)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  if(d == 0) {
-    mp_zero(b);
-    return MP_OKAY;
-  }
-
-  if((res = mp_copy(a, b)) != MP_OKAY)
-    return res;
-
-  res = s_mp_mul_d(b, d);
-
-  return res;
-
-} /* end mp_mul_d() */
-
-/* }}} */
-
-/* {{{ mp_mul_2(a, c) */
-
-mp_err mp_mul_2(mp_int *a, mp_int *c)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, c)) != MP_OKAY)
-    return res;
-
-  return s_mp_mul_2(c);
-
-} /* end mp_mul_2() */
-
-/* }}} */
-
-/* {{{ mp_div_d(a, d, q, r) */
-
-/*
-  mp_div_d(a, d, q, r)
-
-  Compute the quotient q = a / d and remainder r = a mod d, for a
-  single digit d.  Respects the sign of its divisor (single digits are
-  unsigned anyway).
- */
-
-mp_err mp_div_d(mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
-{
-  mp_err   res;
-  mp_digit rem;
-  int      pow;
-
-  ARGCHK(a != NULL, MP_BADARG);
-
-  if(d == 0)
-    return MP_RANGE;
-
-  /* Shortcut for powers of two ... */
-  if((pow = s_mp_ispow2d(d)) >= 0) {
-    mp_digit  mask;
-
-    mask = (1 << pow) - 1;
-    rem = DIGIT(a, 0) & mask;
-
-    if(q) {
-      mp_copy(a, q);
-      s_mp_div_2d(q, (mp_digit)pow);
-    }
-
-    if(r)
-      *r = rem;
-
-    return MP_OKAY;
-  }
-
-  /*
-    If the quotient is actually going to be returned, we'll try to
-    avoid hitting the memory allocator by copying the dividend into it
-    and doing the division there.  This can't be any _worse_ than
-    always copying, and will sometimes be better (since it won't make
-    another copy)
-
-    If it's not going to be returned, we need to allocate a temporary
-    to hold the quotient, which will just be discarded.
-   */
-  if(q) {
-    if((res = mp_copy(a, q)) != MP_OKAY)
-      return res;
-
-    res = s_mp_div_d(q, d, &rem);
-    if(s_mp_cmp_d(q, 0) == MP_EQ)
-      SIGN(q) = MP_ZPOS;
-
-  } else {
-    mp_int  qp;
-
-    if((res = mp_init_copy(&qp, a)) != MP_OKAY)
-      return res;
-
-    res = s_mp_div_d(&qp, d, &rem);
-    if(s_mp_cmp_d(&qp, 0) == 0)
-      SIGN(&qp) = MP_ZPOS;
-
-    mp_clear(&qp);
-  }
-
-  if(r)
-    *r = rem;
-
-  return res;
-
-} /* end mp_div_d() */
-
-/* }}} */
-
-/* {{{ mp_div_2(a, c) */
-
-/*
-  mp_div_2(a, c)
-
-  Compute c = a / 2, disregarding the remainder.
- */
-
-mp_err mp_div_2(mp_int *a, mp_int *c)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, c)) != MP_OKAY)
-    return res;
-
-  s_mp_div_2(c);
-
-  return MP_OKAY;
-
-} /* end mp_div_2() */
-
-/* }}} */
-
-/* {{{ mp_expt_d(a, d, b) */
-
-mp_err mp_expt_d(mp_int *a, mp_digit d, mp_int *c)
-{
-  mp_int   s, x;
-  mp_err   res;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_init(&s)) != MP_OKAY)
-    return res;
-  if((res = mp_init_copy(&x, a)) != MP_OKAY)
-    goto X;
-
-  DIGIT(&s, 0) = 1;
-
-  while(d != 0) {
-    if(d & 1) {
-      if((res = s_mp_mul(&s, &x)) != MP_OKAY)
-	goto CLEANUP;
-    }
-
-    d >>= 1;
-
-    if((res = s_mp_sqr(&x)) != MP_OKAY)
-      goto CLEANUP;
-  }
-
-  s_mp_exch(&s, c);
-
-CLEANUP:
-  mp_clear(&x);
-X:
-  mp_clear(&s);
-
-  return res;
-
-} /* end mp_expt_d() */
-
-/* }}} */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ Full arithmetic */
-
-/* {{{ mp_abs(a, b) */
-
-/*
-  mp_abs(a, b)
-
-  Compute b = |a|.  'a' and 'b' may be identical.
- */
-
-mp_err mp_abs(mp_int *a, mp_int *b)
-{
-  mp_err   res;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, b)) != MP_OKAY)
-    return res;
-
-  SIGN(b) = MP_ZPOS;
-
-  return MP_OKAY;
-
-} /* end mp_abs() */
-
-/* }}} */
-
-/* {{{ mp_neg(a, b) */
-
-/*
-  mp_neg(a, b)
-
-  Compute b = -a.  'a' and 'b' may be identical.
- */
-
-mp_err mp_neg(mp_int *a, mp_int *b)
-{
-  mp_err   res;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, b)) != MP_OKAY)
-    return res;
-
-  if(s_mp_cmp_d(b, 0) == MP_EQ) 
-    SIGN(b) = MP_ZPOS;
-  else 
-    SIGN(b) = (SIGN(b) == MP_NEG) ? MP_ZPOS : MP_NEG;
-
-  return MP_OKAY;
-
-} /* end mp_neg() */
-
-/* }}} */
-
-/* {{{ mp_add(a, b, c) */
-
-/*
-  mp_add(a, b, c)
-
-  Compute c = a + b.  All parameters may be identical.
- */
-
-mp_err mp_add(mp_int *a, mp_int *b, mp_int *c)
-{
-  mp_err  res;
-  int     cmp;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
-
-    /* Commutativity of addition lets us do this in either order,
-       so we avoid having to use a temporary even if the result 
-       is supposed to replace the output
-     */
-    if(c == b) {
-      if((res = s_mp_add(c, a)) != MP_OKAY)
-	return res;
-    } else {
-      if(c != a && (res = mp_copy(a, c)) != MP_OKAY)
-	return res;
-
-      if((res = s_mp_add(c, b)) != MP_OKAY) 
-	return res;
-    }
-
-  } else if((cmp = s_mp_cmp(a, b)) > 0) {  /* different sign: a > b   */
-
-    /* If the output is going to be clobbered, we will use a temporary
-       variable; otherwise, we'll do it without touching the memory 
-       allocator at all, if possible
-     */
-    if(c == b) {
-      mp_int  tmp;
-
-      if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
-	return res;
-      if((res = s_mp_sub(&tmp, b)) != MP_OKAY) {
-	mp_clear(&tmp);
-	return res;
-      }
-
-      s_mp_exch(&tmp, c);
-      mp_clear(&tmp);
-
-    } else {
-
-      if(c != a && (res = mp_copy(a, c)) != MP_OKAY)
-	return res;
-      if((res = s_mp_sub(c, b)) != MP_OKAY)
-	return res;
-
-    }
-
-  } else if(cmp == 0) {             /* different sign, a == b   */
-
-    mp_zero(c);
-    return MP_OKAY;
-
-  } else {                          /* different sign: a < b    */
-
-    /* See above... */
-    if(c == a) {
-      mp_int  tmp;
-
-      if((res = mp_init_copy(&tmp, b)) != MP_OKAY)
-	return res;
-      if((res = s_mp_sub(&tmp, a)) != MP_OKAY) {
-	mp_clear(&tmp);
-	return res;
-      }
-
-      s_mp_exch(&tmp, c);
-      mp_clear(&tmp);
-
-    } else {
-
-      if(c != b && (res = mp_copy(b, c)) != MP_OKAY)
-	return res;
-      if((res = s_mp_sub(c, a)) != MP_OKAY)
-	return res;
-
-    }
-  }
-
-  if(USED(c) == 1 && DIGIT(c, 0) == 0)
-    SIGN(c) = MP_ZPOS;
-
-  return MP_OKAY;
-
-} /* end mp_add() */
-
-/* }}} */
-
-/* {{{ mp_sub(a, b, c) */
-
-/*
-  mp_sub(a, b, c)
-
-  Compute c = a - b.  All parameters may be identical.
- */
-
-mp_err mp_sub(mp_int *a, mp_int *b, mp_int *c)
-{
-  mp_err  res;
-  int     cmp;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  if(SIGN(a) != SIGN(b)) {
-    if(c == a) {
-      if((res = s_mp_add(c, b)) != MP_OKAY)
-	return res;
-    } else {
-      if(c != b && ((res = mp_copy(b, c)) != MP_OKAY))
-	return res;
-      if((res = s_mp_add(c, a)) != MP_OKAY)
-	return res;
-      SIGN(c) = SIGN(a);
-    }
-
-  } else if((cmp = s_mp_cmp(a, b)) > 0) { /* Same sign, a > b */
-    if(c == b) {
-      mp_int  tmp;
-
-      if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
-	return res;
-      if((res = s_mp_sub(&tmp, b)) != MP_OKAY) {
-	mp_clear(&tmp);
-	return res;
-      }
-      s_mp_exch(&tmp, c);
-      mp_clear(&tmp);
-
-    } else {
-      if(c != a && ((res = mp_copy(a, c)) != MP_OKAY))
-	return res;
-
-      if((res = s_mp_sub(c, b)) != MP_OKAY)
-	return res;
-    }
-
-  } else if(cmp == 0) {  /* Same sign, equal magnitude */
-    mp_zero(c);
-    return MP_OKAY;
-
-  } else {               /* Same sign, b > a */
-    if(c == a) {
-      mp_int  tmp;
-
-      if((res = mp_init_copy(&tmp, b)) != MP_OKAY)
-	return res;
-
-      if((res = s_mp_sub(&tmp, a)) != MP_OKAY) {
-	mp_clear(&tmp);
-	return res;
-      }
-      s_mp_exch(&tmp, c);
-      mp_clear(&tmp);
-
-    } else {
-      if(c != b && ((res = mp_copy(b, c)) != MP_OKAY)) 
-	return res;
-
-      if((res = s_mp_sub(c, a)) != MP_OKAY)
-	return res;
-    }
-
-    SIGN(c) = !SIGN(b);
-  }
-
-  if(USED(c) == 1 && DIGIT(c, 0) == 0)
-    SIGN(c) = MP_ZPOS;
-
-  return MP_OKAY;
-
-} /* end mp_sub() */
-
-/* }}} */
-
-/* {{{ mp_mul(a, b, c) */
-
-/*
-  mp_mul(a, b, c)
-
-  Compute c = a * b.  All parameters may be identical.
- */
-
-mp_err mp_mul(mp_int *a, mp_int *b, mp_int *c)
-{
-  mp_err   res;
-  mp_sign  sgn;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  sgn = (SIGN(a) == SIGN(b)) ? MP_ZPOS : MP_NEG;
-
-  if(c == b) {
-    if((res = s_mp_mul(c, a)) != MP_OKAY)
-      return res;
-
-  } else {
-    if((res = mp_copy(a, c)) != MP_OKAY)
-      return res;
-
-    if((res = s_mp_mul(c, b)) != MP_OKAY)
-      return res;
-  }
-  
-  if(sgn == MP_ZPOS || s_mp_cmp_d(c, 0) == MP_EQ)
-    SIGN(c) = MP_ZPOS;
-  else
-    SIGN(c) = sgn;
-  
-  return MP_OKAY;
-
-} /* end mp_mul() */
-
-/* }}} */
-
-/* {{{ mp_mul_2d(a, d, c) */
-
-/*
-  mp_mul_2d(a, d, c)
-
-  Compute c = a * 2^d.  a may be the same as c.
- */
-
-mp_err mp_mul_2d(mp_int *a, mp_digit d, mp_int *c)
-{
-  mp_err   res;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, c)) != MP_OKAY)
-    return res;
-
-  if(d == 0)
-    return MP_OKAY;
-
-  return s_mp_mul_2d(c, d);
-
-} /* end mp_mul() */
-
-/* }}} */
-
-/* {{{ mp_sqr(a, b) */
-
-#if MP_SQUARE
-mp_err mp_sqr(mp_int *a, mp_int *b)
-{
-  mp_err   res;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  if((res = mp_copy(a, b)) != MP_OKAY)
-    return res;
-
-  if((res = s_mp_sqr(b)) != MP_OKAY)
-    return res;
-
-  SIGN(b) = MP_ZPOS;
-
-  return MP_OKAY;
-
-} /* end mp_sqr() */
-#endif
-
-/* }}} */
-
-/* {{{ mp_div(a, b, q, r) */
-
-/*
-  mp_div(a, b, q, r)
-
-  Compute q = a / b and r = a mod b.  Input parameters may be re-used
-  as output parameters.  If q or r is NULL, that portion of the
-  computation will be discarded (although it will still be computed)
-
-  Pay no attention to the hacker behind the curtain.
- */
-
-mp_err mp_div(mp_int *a, mp_int *b, mp_int *q, mp_int *r)
-{
-  mp_err   res;
-  mp_int   qtmp, rtmp;
-  int      cmp;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  if(mp_cmp_z(b) == MP_EQ)
-    return MP_RANGE;
-
-  /* If a <= b, we can compute the solution without division, and
-     avoid any memory allocation
-   */
-  if((cmp = s_mp_cmp(a, b)) < 0) {
-    if(r) {
-      if((res = mp_copy(a, r)) != MP_OKAY)
-	return res;
-    }
-
-    if(q) 
-      mp_zero(q);
-
-    return MP_OKAY;
-
-  } else if(cmp == 0) {
-
-    /* Set quotient to 1, with appropriate sign */
-    if(q) {
-      int qneg = (SIGN(a) != SIGN(b));
-
-      mp_set(q, 1);
-      if(qneg)
-	SIGN(q) = MP_NEG;
-    }
-
-    if(r)
-      mp_zero(r);
-
-    return MP_OKAY;
-  }
-
-  /* If we get here, it means we actually have to do some division */
-
-  /* Set up some temporaries... */
-  if((res = mp_init_copy(&qtmp, a)) != MP_OKAY)
-    return res;
-  if((res = mp_init_copy(&rtmp, b)) != MP_OKAY)
-    goto CLEANUP;
-
-  if((res = s_mp_div(&qtmp, &rtmp)) != MP_OKAY)
-    goto CLEANUP;
-
-  /* Compute the signs for the output  */
-  SIGN(&rtmp) = SIGN(a); /* Sr = Sa              */
-  if(SIGN(a) == SIGN(b))
-    SIGN(&qtmp) = MP_ZPOS;  /* Sq = MP_ZPOS if Sa = Sb */
-  else
-    SIGN(&qtmp) = MP_NEG;   /* Sq = MP_NEG if Sa != Sb */
-
-  if(s_mp_cmp_d(&qtmp, 0) == MP_EQ)
-    SIGN(&qtmp) = MP_ZPOS;
-  if(s_mp_cmp_d(&rtmp, 0) == MP_EQ)
-    SIGN(&rtmp) = MP_ZPOS;
-
-  /* Copy output, if it is needed      */
-  if(q) 
-    s_mp_exch(&qtmp, q);
-
-  if(r) 
-    s_mp_exch(&rtmp, r);
-
-CLEANUP:
-  mp_clear(&rtmp);
-  mp_clear(&qtmp);
-
-  return res;
-
-} /* end mp_div() */
-
-/* }}} */
-
-/* {{{ mp_div_2d(a, d, q, r) */
-
-mp_err mp_div_2d(mp_int *a, mp_digit d, mp_int *q, mp_int *r)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL, MP_BADARG);
-
-  if(q) {
-    if((res = mp_copy(a, q)) != MP_OKAY)
-      return res;
-
-    s_mp_div_2d(q, d);
-  }
-
-  if(r) {
-    if((res = mp_copy(a, r)) != MP_OKAY)
-      return res;
-
-    s_mp_mod_2d(r, d);
-  }
-
-  return MP_OKAY;
-
-} /* end mp_div_2d() */
-
-/* }}} */
-
-/* {{{ mp_expt(a, b, c) */
-
-/*
-  mp_expt(a, b, c)
-
-  Compute c = a ** b, that is, raise a to the b power.  Uses a
-  standard iterative square-and-multiply technique.
- */
-
-mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
-{
-  mp_int   s, x;
-  mp_err   res;
-  mp_digit d;
-  int      dig, bit;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  if(mp_cmp_z(b) < 0)
-    return MP_RANGE;
-
-  if((res = mp_init(&s)) != MP_OKAY)
-    return res;
-
-  mp_set(&s, 1);
-
-  if((res = mp_init_copy(&x, a)) != MP_OKAY)
-    goto X;
-
-  /* Loop over low-order digits in ascending order */
-  for(dig = 0; dig < (int)(USED(b) - 1); dig++) {
-    d = DIGIT(b, dig);
-
-    /* Loop over bits of each non-maximal digit */
-    for(bit = 0; bit < (int)DIGIT_BIT; bit++) {
-      if(d & 1) {
-	if((res = s_mp_mul(&s, &x)) != MP_OKAY) 
-	  goto CLEANUP;
-      }
-
-      d >>= 1;
-      
-      if((res = s_mp_sqr(&x)) != MP_OKAY)
-	goto CLEANUP;
-    }
-  }
-
-  /* Consider now the last digit... */
-  d = DIGIT(b, dig);
-
-  while(d) {
-    if(d & 1) {
-      if((res = s_mp_mul(&s, &x)) != MP_OKAY)
-	goto CLEANUP;
-    }
-
-    d >>= 1;
-
-    if((res = s_mp_sqr(&x)) != MP_OKAY)
-      goto CLEANUP;
-  }
-  
-  if(mp_iseven(b))
-    SIGN(&s) = SIGN(a);
-
-  res = mp_copy(&s, c);
-
-CLEANUP:
-  mp_clear(&x);
-X:
-  mp_clear(&s);
-
-  return res;
-
-} /* end mp_expt() */
-
-/* }}} */
-
-/* {{{ mp_2expt(a, k) */
-
-/* Compute a = 2^k */
-
-mp_err mp_2expt(mp_int *a, mp_digit k)
-{
-  ARGCHK(a != NULL, MP_BADARG);
-
-  return s_mp_2expt(a, k);
-
-} /* end mp_2expt() */
-
-/* }}} */
-
-/* {{{ mp_mod(a, m, c) */
-
-/*
-  mp_mod(a, m, c)
-
-  Compute c = a (mod m).  Result will always be 0 <= c < m.
- */
-
-mp_err mp_mod(mp_int *a, mp_int *m, mp_int *c)
-{
-  mp_err  res;
-  int     mag;
-
-  ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
-
-  if(SIGN(m) == MP_NEG)
-    return MP_RANGE;
-
-  /*
-     If |a| > m, we need to divide to get the remainder and take the
-     absolute value.  
-
-     If |a| < m, we don't need to do any division, just copy and adjust
-     the sign (if a is negative).
-
-     If |a| == m, we can simply set the result to zero.
-
-     This order is intended to minimize the average path length of the
-     comparison chain on common workloads -- the most frequent cases are
-     that |a| != m, so we do those first.
-   */
-  if((mag = s_mp_cmp(a, m)) > 0) {
-    if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
-      return res;
-    
-    if(SIGN(c) == MP_NEG) {
-      if((res = mp_add(c, m, c)) != MP_OKAY)
-	return res;
-    }
-
-  } else if(mag < 0) {
-    if((res = mp_copy(a, c)) != MP_OKAY)
-      return res;
-
-    if(mp_cmp_z(a) < 0) {
-      if((res = mp_add(c, m, c)) != MP_OKAY)
-	return res;
-
-    }
-    
-  } else {
-    mp_zero(c);
-
-  }
-
-  return MP_OKAY;
-
-} /* end mp_mod() */
-
-/* }}} */
-
-/* {{{ mp_mod_d(a, d, c) */
-
-/*
-  mp_mod_d(a, d, c)
-
-  Compute c = a (mod d).  Result will always be 0 <= c < d
- */
-mp_err mp_mod_d(mp_int *a, mp_digit d, mp_digit *c)
-{
-  mp_err   res;
-  mp_digit rem;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if(s_mp_cmp_d(a, d) > 0) {
-    if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
-      return res;
-
-  } else {
-    if(SIGN(a) == MP_NEG)
-      rem = d - DIGIT(a, 0);
-    else
-      rem = DIGIT(a, 0);
-  }
-
-  if(c)
-    *c = rem;
-
-  return MP_OKAY;
-
-} /* end mp_mod_d() */
-
-/* }}} */
-
-/* {{{ mp_sqrt(a, b) */
-
-/*
-  mp_sqrt(a, b)
-
-  Compute the integer square root of a, and store the result in b.
-  Uses an integer-arithmetic version of Newton's iterative linear
-  approximation technique to determine this value; the result has the
-  following two properties:
-
-     b^2 <= a
-     (b+1)^2 >= a
-
-  It is a range error to pass a negative value.
- */
-mp_err mp_sqrt(mp_int *a, mp_int *b)
-{
-  mp_int   x, t;
-  mp_err   res;
-
-  ARGCHK(a != NULL && b != NULL, MP_BADARG);
-
-  /* Cannot take square root of a negative value */
-  if(SIGN(a) == MP_NEG)
-    return MP_RANGE;
-
-  /* Special cases for zero and one, trivial     */
-  if(mp_cmp_d(a, 0) == MP_EQ || mp_cmp_d(a, 1) == MP_EQ) 
-    return mp_copy(a, b);
-    
-  /* Initialize the temporaries we'll use below  */
-  if((res = mp_init_size(&t, USED(a))) != MP_OKAY)
-    return res;
-
-  /* Compute an initial guess for the iteration as a itself */
-  if((res = mp_init_copy(&x, a)) != MP_OKAY)
-    goto X;
-
-  for(;;) {
-    /* t = (x * x) - a */
-    mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
-    if((res = mp_sqr(&t, &t)) != MP_OKAY ||
-       (res = mp_sub(&t, a, &t)) != MP_OKAY)
-      goto CLEANUP;
-
-    /* t = t / 2x       */
-    s_mp_mul_2(&x);
-    if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
-      goto CLEANUP;
-    s_mp_div_2(&x);
-
-    /* Terminate the loop, if the quotient is zero */
-    if(mp_cmp_z(&t) == MP_EQ)
-      break;
-
-    /* x = x - t       */
-    if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
-      goto CLEANUP;
-
-  }
-
-  /* Copy result to output parameter */
-  mp_sub_d(&x, 1, &x);
-  s_mp_exch(&x, b);
-
- CLEANUP:
-  mp_clear(&x);
- X:
-  mp_clear(&t); 
-
-  return res;
-
-} /* end mp_sqrt() */
-
-/* }}} */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ Modular arithmetic */
-
-#if MP_MODARITH
-/* {{{ mp_addmod(a, b, m, c) */
-
-/*
-  mp_addmod(a, b, m, c)
-
-  Compute c = (a + b) mod m
- */
-
-mp_err mp_addmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_add(a, b, c)) != MP_OKAY)
-    return res;
-  if((res = mp_mod(c, m, c)) != MP_OKAY)
-    return res;
-
-  return MP_OKAY;
-
-}
-
-/* }}} */
-
-/* {{{ mp_submod(a, b, m, c) */
-
-/*
-  mp_submod(a, b, m, c)
-
-  Compute c = (a - b) mod m
- */
-
-mp_err mp_submod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_sub(a, b, c)) != MP_OKAY)
-    return res;
-  if((res = mp_mod(c, m, c)) != MP_OKAY)
-    return res;
-
-  return MP_OKAY;
-
-}
-
-/* }}} */
-
-/* {{{ mp_mulmod(a, b, m, c) */
-
-/*
-  mp_mulmod(a, b, m, c)
-
-  Compute c = (a * b) mod m
- */
-
-mp_err mp_mulmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_mul(a, b, c)) != MP_OKAY)
-    return res;
-  if((res = mp_mod(c, m, c)) != MP_OKAY)
-    return res;
-
-  return MP_OKAY;
-
-}
-
-/* }}} */
-
-/* {{{ mp_sqrmod(a, m, c) */
-
-#if MP_SQUARE
-mp_err mp_sqrmod(mp_int *a, mp_int *m, mp_int *c)
-{
-  mp_err  res;
-
-  ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_sqr(a, c)) != MP_OKAY)
-    return res;
-  if((res = mp_mod(c, m, c)) != MP_OKAY)
-    return res;
-
-  return MP_OKAY;
-
-} /* end mp_sqrmod() */
-#endif
-
-/* }}} */
-
-/* shrinks the memory required to store a mp_int if possible */
-mp_err mp_shrink(mp_int *a)
-{
-   if (a->used != a->alloc) {
-      if ((a->dp = XREALLOC(a->dp, a->used * sizeof(mp_digit))) == NULL) {
-         return MP_MEM;
-      } else {
-         a->alloc = a->used;
-         return MP_OKAY;
-      }
-   } else {
-      return MP_OKAY;
-   }
-}      
-
-/* {{{ mp_exptmod(a, b, m, c) */
-
-#ifdef MPI_FASTEXPT
-
-/* computes y == g^x mod p */
-mp_err mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
-{
-   mp_int *M, tx, mu, res;
-   int QQQ, QQ, Q, x, *vals, err;
-
-   /* determine the value of Q */
-   x = (USED(X) - 1) * DIGIT_BIT;
-   Q = DIGIT(X, USED(X)-1);
-   while (Q) { 
-      ++x;
-      Q >>= 1;
-   }
-        if (x <= 8)    { Q = 2; }
-   else if (x <= 64)   { Q = 3; }
-   else if (x <= 256)  { Q = 4; }
-   else if (x <= 950)  { Q = 5; }
-   else if (x <= 2755) { Q = 6; }
-   else                { Q = 7; }
-   
-#ifdef MPI_FASTEXPT_LOWMEM
-   if (Q > 5) {
-      Q = 5;
-   }
-#endif
-
-   /* alloc room for table */
-   vals = XCALLOC(sizeof(int), USED(X)*((DIGIT_BIT/Q)+((DIGIT_BIT%Q)?1:0)));
-   if (vals == NULL) { err = MP_MEM; goto _ERR; }
-
-   M    = XCALLOC(sizeof(mp_int), 1<<Q);
-   if (M == NULL)    { err = MP_MEM; goto _VALS; }
-
-   /* init M table */
-   for (x = 0; x < (1<<Q); x++) {
-       if (mp_init(&M[x]) != MP_OKAY) {
-          for (Q = 0; Q < x; Q++) {
-              mp_clear(&M[x]);
-          }
-          err = MP_MEM;
-          goto __M;
-       }
-   }
-
-  /* init the barett reduction */
-  /* mu = b^2k / m */
-  if ((err = mp_init(&mu)) != MP_OKAY) {
-     goto _M;
-  }
-
-  if ((err = mp_init(&res)) != MP_OKAY) {
-     goto _MU;
-  }
-
-  mp_set(&mu, 1); 
-  s_mp_lshd(&mu, 2 * USED(P));
-  if((err = mp_div(&mu, P, &mu, NULL)) != MP_OKAY){
-    goto _RES;
-  }
-
-   /* now init the M array with powers of the base */
-   mp_set(&M[0], 1);
-   if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { goto _RES; }
-   
-   /* shrink first two */
-   for (x = 0; x < 2; x++) {
-      if ((err = mp_shrink(&M[x])) != MP_OKAY) { goto _RES; }
-   }
-   
-   for (x = 2; x < (1<<Q); x++) {
-       if (USED(&M[x]) == 1 && DIGIT(&M[x], 0) == 0) {
-          if ((err = mp_mul(&M[x-1], &M[1], &M[x])) != MP_OKAY)      { goto _RES; }
-          if ((err = s_mp_reduce(&M[x], P, &mu)) != MP_OKAY)         { goto _RES; }
-          if ((err = mp_shrink(&M[x])) != MP_OKAY)                   { goto _RES; }
-          
-          QQQ = x;
-          QQ  = x * 2;
-          while (QQ < (1<<Q)) {
-              if ((err = mp_sqr(&M[QQQ], &M[QQ])) != MP_OKAY)        { goto _RES; }
-              if ((err = s_mp_reduce(&M[QQ], P, &mu)) != MP_OKAY)    { goto _RES; }
-              if ((err = mp_shrink(&M[QQ])) != MP_OKAY)              { goto _RES; }
-              QQQ = QQ;
-              QQ  *= 2;
-          }
-       }
-   }
-   
-   /* now grab the bits */
-   if ((err = mp_init_copy(&tx, X)) != MP_OKAY) {
-      goto _RES;
-   }
-
-   x = 0;
-   while (mp_cmp_d(&tx, 0)) {
-       vals[x++] = DIGIT(&tx, 0) & ((1<<Q)-1);
-       s_mp_div_2d(&tx, Q);
-   }
-
-   /* now set output equal to the first digit exponent */
-   if ((err = mp_copy(&M[vals[--x]], &res)) != MP_OKAY)        { goto _TX; }
-
-   while (--x >= 0) {
-      for (QQ = 0; QQ < Q; QQ++) {
-          if ((err = s_mp_sqr(&res)) != MP_OKAY)               { goto _TX; }
-          if ((err = s_mp_reduce(&res, P, &mu)) != MP_OKAY)    { goto _TX; }
-      }
-      if (vals[x] != 0) {
-         if ((err = s_mp_mul(&res, &M[vals[x]])) != MP_OKAY)   { goto _TX; }
-         if ((err = s_mp_reduce(&res, P, &mu)) != MP_OKAY)     { goto _TX; }
-      }
-   }
-   s_mp_exch(&res, Y);
-
-   /* free ram */
-_TX:
-   mp_clear(&tx);
-_RES:
-   mp_clear(&res);
-_MU:
-   mp_clear(&mu);
-_M:
-   for (x = 0; x < (1<<Q); x++) {
-       mp_clear(&M[x]);
-   }
-__M:
-   XFREE(M);
-_VALS:
-   XFREE(vals);
-_ERR:
-   return err;
-}
-
-#else 
-
-/*
-  mp_exptmod(a, b, m, c)
-
-  Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
-  method with modular reductions at each step. (This is basically the
-  same code as mp_expt(), except for the addition of the reductions)
-  
-  The modular reductions are done using Barrett's algorithm (see
-  s_mp_reduce() below for details)
- */
-
-mp_err mp_exptmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
-{
-  mp_int   s, x, mu;
-  mp_err   res;
-  mp_digit d, *db = DIGITS(b);
-  mp_size  ub = USED(b);
-  int      dig, bit;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
-    return MP_RANGE;
-
-  if((res = mp_init(&s)) != MP_OKAY)
-    return res;
-  if((res = mp_init_copy(&x, a)) != MP_OKAY)
-    goto X;
-  if((res = mp_mod(&x, m, &x)) != MP_OKAY ||
-     (res = mp_init(&mu)) != MP_OKAY)
-    goto MU;
-
-  mp_set(&s, 1);
-
-  /* mu = b^2k / m */
-  s_mp_add_d(&mu, 1); 
-  s_mp_lshd(&mu, 2 * USED(m));
-  if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
-    goto CLEANUP;
-
-  /* Loop over digits of b in ascending order, except highest order */
-  for(dig = 0; dig < (int)(ub - 1); dig++) {
-    d = *db++;
-
-    /* Loop over the bits of the lower-order digits */
-    for(bit = 0; bit < (int)DIGIT_BIT; bit++) {
-      if(d & 1) {
-	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
-	  goto CLEANUP;
-	if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
-	  goto CLEANUP;
-      }
-
-      d >>= 1;
-
-      if((res = s_mp_sqr(&x)) != MP_OKAY)
-	goto CLEANUP;
-      if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
-	goto CLEANUP;
-    }
-  }
-
-  /* Now do the last digit... */
-  d = *db;
-
-  while(d) {
-    if(d & 1) {
-      if((res = s_mp_mul(&s, &x)) != MP_OKAY)
-	goto CLEANUP;
-      if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
-	goto CLEANUP;
-    }
-
-    d >>= 1;
-
-    if((res = s_mp_sqr(&x)) != MP_OKAY)
-      goto CLEANUP;
-    if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
-      goto CLEANUP;
-  }
-
-  s_mp_exch(&s, c);
-
- CLEANUP:
-  mp_clear(&mu);
- MU:
-  mp_clear(&x);
- X:
-  mp_clear(&s);
-
-  return res;
-
-} /* end mp_exptmod() */
-
-#endif
-
-/* }}} */
-
-/* {{{ mp_exptmod_d(a, d, m, c) */
-
-mp_err mp_exptmod_d(mp_int *a, mp_digit d, mp_int *m, mp_int *c)
-{
-  mp_int   s, x;
-  mp_err   res;
-
-  ARGCHK(a != NULL && c != NULL, MP_BADARG);
-
-  if((res = mp_init(&s)) != MP_OKAY)
-    return res;
-  if((res = mp_init_copy(&x, a)) != MP_OKAY)
-    goto X;
-
-  mp_set(&s, 1);
-
-  while(d != 0) {
-    if(d & 1) {
-      if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
-	 (res = mp_mod(&s, m, &s)) != MP_OKAY)
-	goto CLEANUP;
-    }
-
-    d /= 2;
-
-    if((res = s_mp_sqr(&x)) != MP_OKAY ||
-       (res = mp_mod(&x, m, &x)) != MP_OKAY)
-      goto CLEANUP;
-  }
-
-  s_mp_exch(&s, c);
-
-CLEANUP:
-  mp_clear(&x);
-X:
-  mp_clear(&s);
-
-  return res;
-
-} /* end mp_exptmod_d() */
-
-/* }}} */
-#endif /* if MP_MODARITH */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ Comparison functions */
-
-/* {{{ mp_cmp_z(a) */
-
-/*
-  mp_cmp_z(a)
-
-  Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
- */
-
-int    mp_cmp_z(mp_int *a)
-{
-  if(SIGN(a) == MP_NEG)
-    return MP_LT;
-  else if(USED(a) == 1 && DIGIT(a, 0) == 0)
-    return MP_EQ;
-  else
-    return MP_GT;
-
-} /* end mp_cmp_z() */
-
-/* }}} */
-
-/* {{{ mp_cmp_d(a, d) */
-
-/*
-  mp_cmp_d(a, d)
-
-  Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
- */
-
-int    mp_cmp_d(mp_int *a, mp_digit d)
-{
-  ARGCHK(a != NULL, MP_EQ);
-
-  if(SIGN(a) == MP_NEG)
-    return MP_LT;
-
-  return s_mp_cmp_d(a, d);
-
-} /* end mp_cmp_d() */
-
-/* }}} */
-
-/* {{{ mp_cmp(a, b) */
-
-int    mp_cmp(mp_int *a, mp_int *b)
-{
-  ARGCHK(a != NULL && b != NULL, MP_EQ);
-
-  if(SIGN(a) == SIGN(b)) {
-    int  mag;
-
-    if((mag = s_mp_cmp(a, b)) == MP_EQ)
-      return MP_EQ;
-
-    if(SIGN(a) == MP_ZPOS)
-      return mag;
-    else
-      return -mag;
-
-  } else if(SIGN(a) == MP_ZPOS) {
-    return MP_GT;
-  } else {
-    return MP_LT;
-  }
-
-} /* end mp_cmp() */
-
-/* }}} */
-
-/* {{{ mp_cmp_mag(a, b) */
-
-/*
-  mp_cmp_mag(a, b)
-
-  Compares |a| <=> |b|, and returns an appropriate comparison result
- */
-
-int    mp_cmp_mag(mp_int *a, mp_int *b)
-{
-  ARGCHK(a != NULL && b != NULL, MP_EQ);
-
-  return s_mp_cmp(a, b);
-
-} /* end mp_cmp_mag() */
-
-/* }}} */
-
-/* {{{ mp_cmp_int(a, z) */
-
-/*
-  This just converts z to an mp_int, and uses the existing comparison
-  routines.  This is sort of inefficient, but it's not clear to me how
-  frequently this wil get used anyway.  For small positive constants,
-  you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
- */
-int    mp_cmp_int(mp_int *a, long z)
-{
-  mp_int  tmp;
-  int     out;
-
-  ARGCHK(a != NULL, MP_EQ);
-  
-  mp_init(&tmp); mp_set_int(&tmp, z);
-  out = mp_cmp(a, &tmp);
-  mp_clear(&tmp);
-
-  return out;
-
-} /* end mp_cmp_int() */
-
-/* }}} */
-
-/* {{{ mp_isodd(a) */
-
-/*
-  mp_isodd(a)
-
-  Returns a true (non-zero) value if a is odd, false (zero) otherwise.
- */
-int    mp_isodd(mp_int *a)
-{
-  ARGCHK(a != NULL, 0);
-
-  return (DIGIT(a, 0) & 1);
-
-} /* end mp_isodd() */
-
-/* }}} */
-
-/* {{{ mp_iseven(a) */
-
-int    mp_iseven(mp_int *a)
-{
-  return !mp_isodd(a);
-
-} /* end mp_iseven() */
-
-/* }}} */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ Number theoretic functions */
-
-#if MP_NUMTH
-/* {{{ mp_gcd(a, b, c) */
-
-/*
-  Like the old mp_gcd() function, except computes the GCD using the
-  binary algorithm due to Josef Stein in 1961 (via Knuth).
- */
-mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
-{
-  mp_err   res;
-  mp_int   u, v, t;
-  mp_size  k = 0;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
-      return MP_RANGE;
-  if(mp_cmp_z(a) == MP_EQ) {
-    return mp_copy(b, c);
-  } else if(mp_cmp_z(b) == MP_EQ) {
-    return mp_copy(a, c);
-  }
-
-  if((res = mp_init(&t)) != MP_OKAY)
-    return res;
-  if((res = mp_init_copy(&u, a)) != MP_OKAY)
-    goto U;
-  if((res = mp_init_copy(&v, b)) != MP_OKAY)
-    goto V;
-
-  SIGN(&u) = MP_ZPOS;
-  SIGN(&v) = MP_ZPOS;
-
-  /* Divide out common factors of 2 until at least 1 of a, b is even */
-  while(mp_iseven(&u) && mp_iseven(&v)) {
-    s_mp_div_2(&u);
-    s_mp_div_2(&v);
-    ++k;
-  }
-
-  /* Initialize t */
-  if(mp_isodd(&u)) {
-    if((res = mp_copy(&v, &t)) != MP_OKAY)
-      goto CLEANUP;
-    
-    /* t = -v */
-    if(SIGN(&v) == MP_ZPOS)
-      SIGN(&t) = MP_NEG;
-    else
-      SIGN(&t) = MP_ZPOS;
-    
-  } else {
-    if((res = mp_copy(&u, &t)) != MP_OKAY)
-      goto CLEANUP;
-
-  }
-
-  for(;;) {
-    while(mp_iseven(&t)) {
-      s_mp_div_2(&t);
-    }
-
-    if(mp_cmp_z(&t) == MP_GT) {
-      if((res = mp_copy(&t, &u)) != MP_OKAY)
-	goto CLEANUP;
-
-    } else {
-      if((res = mp_copy(&t, &v)) != MP_OKAY)
-	goto CLEANUP;
-
-      /* v = -t */
-      if(SIGN(&t) == MP_ZPOS)
-	SIGN(&v) = MP_NEG;
-      else
-	SIGN(&v) = MP_ZPOS;
-    }
-
-    if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
-      goto CLEANUP;
-
-    if(s_mp_cmp_d(&t, 0) == MP_EQ)
-      break;
-  }
-
-  s_mp_2expt(&v, (mp_digit)k);       /* v = 2^k   */
-  res = mp_mul(&u, &v, c); /* c = u * v */
-
- CLEANUP:
-  mp_clear(&v);
- V:
-  mp_clear(&u);
- U:
-  mp_clear(&t);
-
-  return res;
-
-} /* end mp_bgcd() */
-
-/* }}} */
-
-/* {{{ mp_lcm(a, b, c) */
-
-/* We compute the least common multiple using the rule:
-
-   ab = [a, b](a, b)
-
-   ... by computing the product, and dividing out the gcd.
- */
-
-mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
-{
-  mp_int  gcd, prod;
-  mp_err  res;
-
-  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
-
-  /* Set up temporaries */
-  if((res = mp_init(&gcd)) != MP_OKAY)
-    return res;
-  if((res = mp_init(&prod)) != MP_OKAY)
-    goto GCD;
-
-  if((res = mp_mul(a, b, &prod)) != MP_OKAY)
-    goto CLEANUP;
-  if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
-    goto CLEANUP;
-
-  res = mp_div(&prod, &gcd, c, NULL);
-
- CLEANUP:
-  mp_clear(&prod);
- GCD:
-  mp_clear(&gcd);
-
-  return res;
-
-} /* end mp_lcm() */
-
-/* }}} */
-
-/* {{{ mp_xgcd(a, b, g, x, y) */
-
-/*
-  mp_xgcd(a, b, g, x, y)
-
-  Compute g = (a, b) and values x and y satisfying Bezout's identity
-  (that is, ax + by = g).  This uses the extended binary GCD algorithm
-  based on the Stein algorithm used for mp_gcd()
- */
-
-mp_err mp_xgcd(mp_int *a, mp_int *b, mp_int *g, mp_int *x, mp_int *y)
-{
-  mp_int   gx, xc, yc, u, v, A, B, C, D;
-  mp_int  *clean[9];
-  mp_err   res;
-  int      last = -1;
-
-  if(mp_cmp_z(b) == 0)
-    return MP_RANGE;
-
-  /* Initialize all these variables we need */
-  if((res = mp_init(&u)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &u;
-  if((res = mp_init(&v)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &v;
-  if((res = mp_init(&gx)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &gx;
-  if((res = mp_init(&A)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &A;
-  if((res = mp_init(&B)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &B;
-  if((res = mp_init(&C)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &C;
-  if((res = mp_init(&D)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &D;
-  if((res = mp_init_copy(&xc, a)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &xc;
-  mp_abs(&xc, &xc);
-  if((res = mp_init_copy(&yc, b)) != MP_OKAY) goto CLEANUP;
-  clean[++last] = &yc;
-  mp_abs(&yc, &yc);
-
-  mp_set(&gx, 1);
-
-  /* Divide by two until at least one of them is even */
-  while(mp_iseven(&xc) && mp_iseven(&yc)) {
-    s_mp_div_2(&xc);
-    s_mp_div_2(&yc);
-    if((res = s_mp_mul_2(&gx)) != MP_OKAY)
-      goto CLEANUP;
-  }
-
-  mp_copy(&xc, &u);
-  mp_copy(&yc, &v);
-  mp_set(&A, 1); mp_set(&D, 1);
-
-  /* Loop through binary GCD algorithm */
-  for(;;) {
-    while(mp_iseven(&u)) {
-      s_mp_div_2(&u);
-
-      if(mp_iseven(&A) && mp_iseven(&B)) {
-	s_mp_div_2(&A); s_mp_div_2(&B);
-      } else {
-	if((res = mp_add(&A, &yc, &A)) != MP_OKAY) goto CLEANUP;
-	s_mp_div_2(&A);
-	if((res = mp_sub(&B, &xc, &B)) != MP_OKAY) goto CLEANUP;
-	s_mp_div_2(&B);
-      }
-    }
-
-    while(mp_iseven(&v)) {
-      s_mp_div_2(&v);
-
-      if(mp_iseven(&C) && mp_iseven(&D)) {
-	s_mp_div_2(&C); s_mp_div_2(&D);
-      } else {
-	if((res = mp_add(&C, &yc, &C)) != MP_OKAY) goto CLEANUP;
-	s_mp_div_2(&C);
-	if((res = mp_sub(&D, &xc, &D)) != MP_OKAY) goto CLEANUP;
-	s_mp_div_2(&D);
-      }
-    }
-
-    if(mp_cmp(&u, &v) >= 0) {
-      if((res = mp_sub(&u, &v, &u)) != MP_OKAY) goto CLEANUP;
-      if((res = mp_sub(&A, &C, &A)) != MP_OKAY) goto CLEANUP;
-      if((res = mp_sub(&B, &D, &B)) != MP_OKAY) goto CLEANUP;
-
-    } else {
-      if((res = mp_sub(&v, &u, &v)) != MP_OKAY) goto CLEANUP;
-      if((res = mp_sub(&C, &A, &C)) != MP_OKAY) goto CLEANUP;
-      if((res = mp_sub(&D, &B, &D)) != MP_OKAY) goto CLEANUP;
-
-    }
-
-    /* If we're done, copy results to output */
-    if(mp_cmp_z(&u) == 0) {
-      if(x)
-	if((res = mp_copy(&C, x)) != MP_OKAY) goto CLEANUP;
-
-      if(y)
-	if((res = mp_copy(&D, y)) != MP_OKAY) goto CLEANUP;
-      
-      if(g)
-	if((res = mp_mul(&gx, &v, g)) != MP_OKAY) goto CLEANUP;
-
-      break;
-    }
-  }
-
- CLEANUP:
-  while(last >= 0)
-    mp_clear(clean[last--]);
-
-  return res;
-
-} /* end mp_xgcd() */
-
-/* }}} */
-
-/* {{{ mp_invmod(a, m, c) */
-
-/*
-  mp_invmod(a, m, c)
-
-  Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
-  This is equivalent to the question of whether (a, m) = 1.  If not,
-  MP_UNDEF is returned, and there is no inverse.
- */
-
-mp_err mp_invmod(mp_int *a, mp_int *m, mp_int *c)
-{
-  mp_int  g, x;
-  mp_err  res;
-
-  ARGCHK(a && m && c, MP_BADARG);
-
-  if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
-    return MP_RANGE;
-
-  if((res = mp_init(&g)) != MP_OKAY)
-    return res;
-  if((res = mp_init(&x)) != MP_OKAY)
-    goto X;
-
-  if((res = mp_xgcd(a, m, &g, &x, NULL)) != MP_OKAY)
-    goto CLEANUP;
-
-  if(mp_cmp_d(&g, 1) != MP_EQ) {
-    res = MP_UNDEF;
-    goto CLEANUP;
-  }
-
-  res = mp_mod(&x, m, c);
-  SIGN(c) = SIGN(a);
-
-CLEANUP:
-  mp_clear(&x);
-X:
-  mp_clear(&g);
-
-  return res;
-
-} /* end mp_invmod() */
-
-/* }}} */
-#endif /* if MP_NUMTH */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ mp_print(mp, ofp) */
-
-#if MP_IOFUNC
-/*
-  mp_print(mp, ofp)
-
-  Print a textual representation of the given mp_int on the output
-  stream 'ofp'.  Output is generated using the internal radix.
- */
-
-void   mp_print(mp_int *mp, FILE *ofp)
-{
-  int   ix;
-
-  if(mp == NULL || ofp == NULL)
-    return;
-
-  fputc((SIGN(mp) == MP_NEG) ? '-' : '+', ofp);
-
-  for(ix = USED(mp) - 1; ix >= 0; ix--) {
-    fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
-  }
-
-} /* end mp_print() */
-
-#endif /* if MP_IOFUNC */
-
-/* }}} */
-
-/*------------------------------------------------------------------------*/
-/* {{{ More I/O Functions */
-
-/* {{{ mp_read_signed_bin(mp, str, len) */
-
-/* 
-   mp_read_signed_bin(mp, str, len)
-
-   Read in a raw value (base 256) into the given mp_int
- */
-
-mp_err  mp_read_signed_bin(mp_int *mp, unsigned char *str, int len)
-{
-  mp_err         res;
-
-  ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
-
-  if((res = mp_read_unsigned_bin(mp, str + 1, len - 1)) == MP_OKAY) {
-    /* Get sign from first byte */
-    if(str[0])
-      SIGN(mp) = MP_NEG;
-    else
-      SIGN(mp) = MP_ZPOS;
-  }
-
-  return res;
-
-} /* end mp_read_signed_bin() */
-
-/* }}} */
-
-/* {{{ mp_signed_bin_size(mp) */
-
-int    mp_signed_bin_size(mp_int *mp)
-{
-  ARGCHK(mp != NULL, 0);
-
-  return mp_unsigned_bin_size(mp) + 1;
-
-} /* end mp_signed_bin_size() */
-
-/* }}} */
-
-/* {{{ mp_to_signed_bin(mp, str) */
-
-mp_err mp_to_signed_bin(mp_int *mp, unsigned char *str)
-{
-  ARGCHK(mp != NULL && str != NULL, MP_BADARG);
-
-  /* Caller responsible for allocating enough memory (use mp_raw_size(mp)) */
-  str[0] = (char)SIGN(mp);
-
-  return mp_to_unsigned_bin(mp, str + 1);
-
-} /* end mp_to_signed_bin() */
-
-/* }}} */
-
-/* {{{ mp_read_unsigned_bin(mp, str, len) */
-
-/*
-  mp_read_unsigned_bin(mp, str, len)
-
-  Read in an unsigned value (base 256) into the given mp_int
- */
-
-mp_err  mp_read_unsigned_bin(mp_int *mp, unsigned char *str, int len)
-{
-  int     ix;
-  mp_err  res;
-
-  ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
-
-  mp_zero(mp);
-
-  for(ix = 0; ix < len; ix++) {
-    if((res = s_mp_mul_2d(mp, CHAR_BIT)) != MP_OKAY)
-      return res;
-
-    if((res = mp_add_d(mp, str[ix], mp)) != MP_OKAY)
-      return res;
-  }
-  
-  return MP_OKAY;
-  
-} /* end mp_read_unsigned_bin() */
-
-/* }}} */
-
-/* {{{ mp_unsigned_bin_size(mp) */
-
-int     mp_unsigned_bin_size(mp_int *mp) 
-{
-  mp_digit   topdig;
-  int        count;
-
-  ARGCHK(mp != NULL, 0);
-
-  /* Special case for the value zero */
-  if(USED(mp) == 1 && DIGIT(mp, 0) == 0)
-    return 1;
-
-  count = (USED(mp) - 1) * sizeof(mp_digit);
-  topdig = DIGIT(mp, USED(mp) - 1);
-
-  while(topdig != 0) {
-    ++count;
-    topdig >>= CHAR_BIT;
-  }
-
-  return count;
-
-} /* end mp_unsigned_bin_size() */
-
-/* }}} */
-
-/* {{{ mp_to_unsigned_bin(mp, str) */
-
-mp_err mp_to_unsigned_bin(mp_int *mp, unsigned char *str)
-{
-  mp_digit      *dp, *end, d;
-  unsigned char *spos;
-
-  ARGCHK(mp != NULL && str != NULL, MP_BADARG);
-
-  dp = DIGITS(mp);
-  end = dp + USED(mp) - 1;
-  spos = str;
-
-  /* Special case for zero, quick test */
-  if(dp == end && *dp == 0) {
-    *str = '\0';
-    return MP_OKAY;
-  }
-
-  /* Generate digits in reverse order */
-  while(dp < end) {
-    int      ix;
-
-    d = *dp;
-    for(ix = 0; ix < (int)sizeof(mp_digit); ++ix) {
-      *spos = d & UCHAR_MAX;
-      d >>= CHAR_BIT;
-      ++spos;
-    }
-
-    ++dp;
-  }
-
-  /* Now handle last digit specially, high order zeroes are not written */
-  d = *end;
-  while(d != 0) {
-    *spos = d & UCHAR_MAX;
-    d >>= CHAR_BIT;
-    ++spos;
-  }
-
-  /* Reverse everything to get digits in the correct order */
-  while(--spos > str) {
-    unsigned char t = *str;
-    *str = *spos;
-    *spos = t;
-
-    ++str;
-  }
-
-  return MP_OKAY;
-
-} /* end mp_to_unsigned_bin() */
-
-/* }}} */
-
-/* {{{ mp_count_bits(mp) */
-
-int    mp_count_bits(mp_int *mp)
-{
-  int      len;
-  mp_digit d;
-
-  ARGCHK(mp != NULL, MP_BADARG);
-
-  len = DIGIT_BIT * (USED(mp) - 1);
-  d = DIGIT(mp, USED(mp) - 1);
-
-  while(d != 0) {
-    ++len;
-    d >>= 1;
-  }
-
-  return len;
-  
-} /* end mp_count_bits() */
-
-/* }}} */
-
-/* {{{ mp_read_radix(mp, str, radix) */
-
-/*
-  mp_read_radix(mp, str, radix)
-
-  Read an integer from the given string, and set mp to the resulting
-  value.  The input is presumed to be in base 10.  Leading non-digit
-  characters are ignored, and the function reads until a non-digit
-  character or the end of the string.
- */
-
-mp_err  mp_read_radix(mp_int *mp, unsigned char *str, int radix)
-{
-  int     ix = 0, val = 0;
-  mp_err  res;
-  mp_sign sig = MP_ZPOS;
-
-  ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX, 
-	 MP_BADARG);
-
-  mp_zero(mp);
-
-  /* Skip leading non-digit characters until a digit or '-' or '+' */
-  while(str[ix] && 
-	(s_mp_tovalue(str[ix], radix) < 0) && 
-	str[ix] != '-' &&
-	str[ix] != '+') {
-    ++ix;
-  }
-
-  if(str[ix] == '-') {
-    sig = MP_NEG;
-    ++ix;
-  } else if(str[ix] == '+') {
-    sig = MP_ZPOS; /* this is the default anyway... */
-    ++ix;
-  }
-
-  while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
-    if((res = s_mp_mul_d(mp, (mp_digit)radix)) != MP_OKAY)
-      return res;
-    if((res = s_mp_add_d(mp, (mp_digit)val)) != MP_OKAY)
-      return res;
-    ++ix;
-  }
-
-  if(s_mp_cmp_d(mp, 0) == MP_EQ)
-    SIGN(mp) = MP_ZPOS;
-  else
-    SIGN(mp) = sig;
-
-  return MP_OKAY;
-
-} /* end mp_read_radix() */
-
-/* }}} */
-
-/* {{{ mp_radix_size(mp, radix) */
-
-int    mp_radix_size(mp_int *mp, int radix)
-{
-  int  len;
-  ARGCHK(mp != NULL, 0);
-
-  len = s_mp_outlen(mp_count_bits(mp), radix) + 1; /* for NUL terminator */
-
-  if(mp_cmp_z(mp) < 0)
-    ++len; /* for sign */
-
-  return len;
-
-} /* end mp_radix_size() */
-
-/* }}} */
-
-/* {{{ mp_value_radix_size(num, qty, radix) */
-
-/* num = number of digits
-   qty = number of bits per digit
-   radix = target base
-   
-   Return the number of digits in the specified radix that would be
-   needed to express 'num' digits of 'qty' bits each.
- */
-int    mp_value_radix_size(int num, int qty, int radix)
-{
-  ARGCHK(num >= 0 && qty > 0 && radix >= 2 && radix <= MAX_RADIX, 0);
-
-  return s_mp_outlen(num * qty, radix);
-
-} /* end mp_value_radix_size() */
-
-/* }}} */
-
-/* {{{ mp_toradix(mp, str, radix) */
-
-mp_err mp_toradix(mp_int *mp, unsigned char *str, int radix)
-{
-  int  ix, pos = 0;
-
-  ARGCHK(mp != NULL && str != NULL, MP_BADARG);
-  ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
-
-  if(mp_cmp_z(mp) == MP_EQ) {
-    str[0] = '0';
-    str[1] = '\0';
-  } else {
-    mp_err   res;
-    mp_int   tmp;
-    mp_sign  sgn;
-    mp_digit rem, rdx = (mp_digit)radix;
-    char     ch;
-
-    if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
-      return res;
-
-    /* Save sign for later, and take absolute value */
-    sgn = SIGN(&tmp); SIGN(&tmp) = MP_ZPOS;
-
-    /* Generate output digits in reverse order      */
-    while(mp_cmp_z(&tmp) != 0) {
-      if((res = s_mp_div_d(&tmp, rdx, &rem)) != MP_OKAY) {
-	mp_clear(&tmp);
-	return res;
-      }
-
-      /* Generate digits, use capital letters */
-      ch = s_mp_todigit(rem, radix, 0);
-
-      str[pos++] = ch;
-    }
-
-    /* Add - sign if original value was negative */
-    if(sgn == MP_NEG)
-      str[pos++] = '-';
-
-    /* Add trailing NUL to end the string        */
-    str[pos--] = '\0';
-
-    /* Reverse the digits and sign indicator     */
-    ix = 0;
-    while(ix < pos) {
-      char tmp = str[ix];
-
-      str[ix] = str[pos];
-      str[pos] = tmp;
-      ++ix;
-      --pos;
-    }
-    
-    mp_clear(&tmp);
-  }
-
-  return MP_OKAY;
-
-} /* end mp_toradix() */
-
-/* }}} */
-
-/* {{{ mp_char2value(ch, r) */
-
-int    mp_char2value(char ch, int r)
-{
-  return s_mp_tovalue(ch, r);
-
-} /* end mp_tovalue() */
-
-/* }}} */
-
-/* }}} */
-
-/* {{{ mp_strerror(ec) */
-
-/*
-  mp_strerror(ec)
-
-  Return a string describing the meaning of error code 'ec'.  The
-  string returned is allocated in static memory, so the caller should
-  not attempt to modify or free the memory associated with this
-  string.
- */
-const char  *mp_strerror(mp_err ec)
-{
-  int   aec = (ec < 0) ? -ec : ec;
-
-  /* Code values are negative, so the senses of these comparisons
-     are accurate */
-  if(ec < MP_LAST_CODE || ec > MP_OKAY) {
-    return mp_err_string[0];  /* unknown error code */
-  } else {
-    return mp_err_string[aec + 1];
-  }
-
-} /* end mp_strerror() */
-
-/* }}} */
-
-/*========================================================================*/
-/*------------------------------------------------------------------------*/
-/* Static function definitions (internal use only)                        */
-
-/* {{{ Memory management */
-
-/* {{{ s_mp_grow(mp, min) */
-
-/* Make sure there are at least 'min' digits allocated to mp              */
-static mp_err s_mp_grow(mp_int *mp, mp_size min)
-{
-  if(min > ALLOC(mp)) {
-    mp_digit   *tmp;
-
-    /* Set min to next nearest default precision block size */
-    min = ((min + (s_mp_defprec - 1)) / s_mp_defprec) * s_mp_defprec;
-
-    if((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
-      return MP_MEM;
-
-    s_mp_copy(DIGITS(mp), tmp, USED(mp));
-
-#if MP_CRYPTO
-    s_mp_setz(DIGITS(mp), ALLOC(mp));
-#endif
-    s_mp_free(DIGITS(mp));
-    DIGITS(mp) = tmp;
-    ALLOC(mp) = min;
-  }
-
-  return MP_OKAY;
-
-} /* end s_mp_grow() */
-
-/* }}} */
-
-/* {{{ s_mp_pad(mp, min) */
-
-/* Make sure the used size of mp is at least 'min', growing if needed     */
-static mp_err s_mp_pad(mp_int *mp, mp_size min)
-{
-  if(min > USED(mp)) {
-    mp_err  res;
-
-    /* Make sure there is room to increase precision  */
-    if(min > ALLOC(mp) && (res = s_mp_grow(mp, min)) != MP_OKAY)
-      return res;
-
-    /* Increase precision; should already be 0-filled */
-    USED(mp) = min;
-  }
-
-  return MP_OKAY;
-
-} /* end s_mp_pad() */
-
-/* }}} */
-
-/* {{{ s_mp_setz(dp, count) */
-
-#if MP_MACRO == 0
-/* Set 'count' digits pointed to by dp to be zeroes                       */
-void s_mp_setz(mp_digit *dp, mp_size count)
-{
-#if MP_MEMSET == 0
-  int  ix;
-
-  for(ix = 0; ix < count; ix++)
-    dp[ix] = 0;
-#else
-  memset(dp, 0, count * sizeof(mp_digit));
-#endif
-
-} /* end s_mp_setz() */
-#endif
-
-/* }}} */
-
-/* {{{ s_mp_copy(sp, dp, count) */
-
-#if MP_MACRO == 0
-/* Copy 'count' digits from sp to dp                                      */
-void s_mp_copy(mp_digit *sp, mp_digit *dp, mp_size count)
-{
-#if MP_MEMCPY == 0
-  int  ix;
-
-  for(ix = 0; ix < count; ix++)
-    dp[ix] = sp[ix];
-#else
-  memcpy(dp, sp, count * sizeof(mp_digit));
-#endif
-
-} /* end s_mp_copy() */
-#endif
-
-/* }}} */
-
-/* {{{ s_mp_alloc(nb, ni) */
-
-#if MP_MACRO == 0
-/* Allocate ni records of nb bytes each, and return a pointer to that     */
-void    *s_mp_alloc(size_t nb, size_t ni)
-{
-  return XCALLOC(nb, ni);
-
-} /* end s_mp_alloc() */
-#endif
-
-/* }}} */
-
-/* {{{ s_mp_free(ptr) */
-
-#if MP_MACRO == 0
-/* Free the memory pointed to by ptr                                      */
-void     s_mp_free(void *ptr)
-{
-  if(ptr)
-    XFREE(ptr);
-
-} /* end s_mp_free() */
-#endif
-
-/* }}} */
-
-/* {{{ s_mp_clamp(mp) */
-
-/* Remove leading zeroes from the given value                             */
-void     s_mp_clamp(mp_int *mp)
-{
-  mp_size   du = USED(mp);
-  mp_digit *zp = DIGITS(mp) + du - 1;
-
-  while(du > 1 && !*zp--)
-    --du;
-
-  USED(mp) = du;
-
-} /* end s_mp_clamp() */
-
-
-/* }}} */
-
-/* {{{ s_mp_exch(a, b) */
-
-/* Exchange the data for a and b; (b, a) = (a, b)                         */
-void     s_mp_exch(mp_int *a, mp_int *b)
-{
-  mp_int   tmp;
-
-  tmp = *a;
-  *a = *b;
-  *b = tmp;
-
-} /* end s_mp_exch() */
-
-/* }}} */
-
-/* }}} */
-
-/* {{{ Arithmetic helpers */
-
-/* {{{ s_mp_lshd(mp, p) */
-
-/* 
-   Shift mp leftward by p digits, growing if needed, and zero-filling
-   the in-shifted digits at the right end.  This is a convenient
-   alternative to multiplication by powers of the radix
- */   
-
-static mp_err s_mp_lshd(mp_int *mp, mp_size p)
-{
-  mp_err   res;
-  mp_size  pos;
-  mp_digit *dp;
-  int     ix;
-
-  if(p == 0)
-    return MP_OKAY;
-
-  if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
-    return res;
-
-  pos = USED(mp) - 1;
-  dp = DIGITS(mp);
-
-  /* Shift all the significant figures over as needed */
-  for(ix = pos - p; ix >= 0; ix--) 
-    dp[ix + p] = dp[ix];
-
-  /* Fill the bottom digits with zeroes */
-  for(ix = 0; ix < (int)p; ix++)
-    dp[ix] = 0;
-
-  return MP_OKAY;
-
-} /* end s_mp_lshd() */
-
-/* }}} */
-
-/* {{{ s_mp_rshd(mp, p) */
-
-/* 
-   Shift mp rightward by p digits.  Maintains the invariant that
-   digits above the precision are all zero.  Digits shifted off the
-   end are lost.  Cannot fail.
- */
-
-void     s_mp_rshd(mp_int *mp, mp_size p)
-{
-  mp_size  ix;
-  mp_digit *dp;
-
-  if(p == 0)
-    return;
-
-  /* Shortcut when all digits are to be shifted off */
-  if(p >= USED(mp)) {
-    s_mp_setz(DIGITS(mp), ALLOC(mp));
-    USED(mp) = 1;
-    SIGN(mp) = MP_ZPOS;
-    return;
-  }
-
-  /* Shift all the significant figures over as needed */
-  dp = DIGITS(mp);
-  for(ix = p; ix < USED(mp); ix++)
-    dp[ix - p] = dp[ix];
-
-
-  /* Fill the top digits with zeroes */
-  
-  ix -= p;
-  while(ix < USED(mp))
-    dp[ix++] = 0;
-
-  /* Strip off any leading zeroes    */
-  s_mp_clamp(mp);
-
-} /* end s_mp_rshd() */
-
-/* }}} */
-
-/* {{{ s_mp_div_2(mp) */
-
-/* Divide by two -- take advantage of radix properties to do it fast      */
-void     s_mp_div_2(mp_int *mp)
-{
-  s_mp_div_2d(mp, 1);
-
-} /* end s_mp_div_2() */
-
-/* }}} */
-
-/* {{{ s_mp_mul_2(mp) */
-
-static mp_err s_mp_mul_2(mp_int *mp)
-{
-  int      ix;
-  mp_digit kin = 0, kout, *dp = DIGITS(mp);
-  mp_err   res;
-
-  /* Shift digits leftward by 1 bit */
-  for(ix = 0; ix < (int)USED(mp); ix++) {
-    kout = (dp[ix] >> (DIGIT_BIT - 1)) & 1;
-    dp[ix] = (dp[ix] << 1) | kin;
-
-    kin = kout;
-  }
-
-  /* Deal with rollover from last digit */
-  if(kin) {
-    if(ix >= (int)ALLOC(mp)) {
-      if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
-	return res;
-      dp = DIGITS(mp);
-    }
-
-    dp[ix] = kin;
-    USED(mp) += 1;
-  }
-
-  return MP_OKAY;
-
-} /* end s_mp_mul_2() */
-
-/* }}} */
-
-/* {{{ s_mp_mod_2d(mp, d) */
-
-/*
-  Remainder the integer by 2^d, where d is a number of bits.  This
-  amounts to a bitwise AND of the value, and does not require the full
-  division code
- */
-void     s_mp_mod_2d(mp_int *mp, mp_digit d)
-{
-  unsigned int  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
-  unsigned int  ix;
-  mp_digit      dmask, *dp = DIGITS(mp);
-
-  if(ndig >= USED(mp))
-    return;
-
-  /* Flush all the bits above 2^d in its digit */
-  dmask = (1 << nbit) - 1;
-  dp[ndig] &= dmask;
-
-  /* Flush all digits above the one with 2^d in it */
-  for(ix = ndig + 1; ix < USED(mp); ix++)
-    dp[ix] = 0;
-
-  s_mp_clamp(mp);
-} /* end s_mp_mod_2d() */
-
-/* }}} */
-
-/* {{{ s_mp_mul_2d(mp, d) */
-
-/*
-  Multiply by the integer 2^d, where d is a number of bits.  This
-  amounts to a bitwise shift of the value, and does not require the
-  full multiplication code.
- */
-static mp_err s_mp_mul_2d(mp_int *mp, mp_digit d)
-{
-  mp_err   res;
-  mp_digit save, next, mask, *dp;
-  mp_size  used;
-  int      ix;
-
-  if((res = s_mp_lshd(mp, d / DIGIT_BIT)) != MP_OKAY)
-    return res;
-
-  dp = DIGITS(mp); used = USED(mp);
-  d %= DIGIT_BIT;
-
-  mask = (1 << d) - 1;
-
-  /* If the shift requires another digit, make sure we've got one to
-     work with */
-  if((dp[used - 1] >> (DIGIT_BIT - d)) & mask) {
-    if((res = s_mp_grow(mp, used + 1)) != MP_OKAY)
-      return res;
-    dp = DIGITS(mp);
-  }
-
-  /* Do the shifting... */
-  save = 0;
-  for(ix = 0; ix < (int)used; ix++) {
-    next = (dp[ix] >> (DIGIT_BIT - d)) & mask;
-    dp[ix] = (dp[ix] << d) | save;
-    save = next;
-  }
-
-  /* If, at this point, we have a nonzero carryout into the next
-     digit, we'll increase the size by one digit, and store it...
-   */
-  if(save) {
-    dp[used] = save;
-    USED(mp) += 1;
-  }
-
-  s_mp_clamp(mp);
-  return MP_OKAY;
-
-} /* end s_mp_mul_2d() */
-
-/* }}} */
-
-/* {{{ s_mp_div_2d(mp, d) */
-
-/*
-  Divide the integer by 2^d, where d is a number of bits.  This
-  amounts to a bitwise shift of the value, and does not require the
-  full division code (used in Barrett reduction, see below)
- */
-void     s_mp_div_2d(mp_int *mp, mp_digit d)
-{
-  int       ix;
-  mp_digit  save, next, mask, *dp = DIGITS(mp);
-
-  s_mp_rshd(mp, d / DIGIT_BIT);
-  d %= DIGIT_BIT;
-
-  mask = (1 << d) - 1;
-
-  save = 0;
-  for(ix = USED(mp) - 1; ix >= 0; ix--) {
-    next = dp[ix] & mask;
-    dp[ix] = (dp[ix] >> d) | (save << (DIGIT_BIT - d));
-    save = next;
-  }
-
-  s_mp_clamp(mp);
-
-} /* end s_mp_div_2d() */
-
-/* }}} */
-
-/* {{{ s_mp_norm(a, b) */
-
-/*
-  s_mp_norm(a, b)
-
-  Normalize a and b for division, where b is the divisor.  In order
-  that we might make good guesses for quotient digits, we want the
-  leading digit of b to be at least half the radix, which we
-  accomplish by multiplying a and b by a constant.  This constant is
-  returned (so that it can be divided back out of the remainder at the
-  end of the division process).
-
-  We multiply by the smallest power of 2 that gives us a leading digit
-  at least half the radix.  By choosing a power of 2, we simplify the 
-  multiplication and division steps to simple shifts.
- */
-mp_digit s_mp_norm(mp_int *a, mp_int *b)
-{
-  mp_digit  t, d = 0;
-
-  t = DIGIT(b, USED(b) - 1);
-  while(t < (RADIX / 2)) {
-    t <<= 1;
-    ++d;
-  }
-    
-  if(d != 0) {
-    s_mp_mul_2d(a, d);
-    s_mp_mul_2d(b, d);
-  }
-
-  return d;
-
-} /* end s_mp_norm() */
-
-/* }}} */
-
-/* }}} */
-
-/* {{{ Primitive digit arithmetic */
-
-/* {{{ s_mp_add_d(mp, d) */
-
-/* Add d to |mp| in place                                                 */
-static mp_err s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
-{
-  mp_word   w, k = 0;
-  mp_size   ix = 1, used = USED(mp);
-  mp_digit *dp = DIGITS(mp);
-
-  w = dp[0] + d;
-  dp[0] = ACCUM(w);
-  k = CARRYOUT(w);
-
-  while(ix < used && k) {
-    w = dp[ix] + k;
-    dp[ix] = ACCUM(w);
-    k = CARRYOUT(w);
-    ++ix;
-  }
-
-  if(k != 0) {
-    mp_err  res;
-
-    if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
-      return res;
-
-    DIGIT(mp, ix) = k;
-  }
-
-  return MP_OKAY;
-
-} /* end s_mp_add_d() */
-
-/* }}} */
-
-/* {{{ s_mp_sub_d(mp, d) */
-
-/* Subtract d from |mp| in place, assumes |mp| > d                        */
-static mp_err s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
-{
-  mp_word   w, b = 0;
-  mp_size   ix = 1, used = USED(mp);
-  mp_digit *dp = DIGITS(mp);
-
-  /* Compute initial subtraction    */
-  w = (RADIX + dp[0]) - d;
-  b = CARRYOUT(w) ? 0 : 1;
-  dp[0] = ACCUM(w);
-
-  /* Propagate borrows leftward     */
-  while(b && ix < used) {
-    w = (RADIX + dp[ix]) - b;
-    b = CARRYOUT(w) ? 0 : 1;
-    dp[ix] = ACCUM(w);
-    ++ix;
-  }
-
-  /* Remove leading zeroes          */
-  s_mp_clamp(mp);
-
-  /* If we have a borrow out, it's a violation of the input invariant */
-  if(b)
-    return MP_RANGE;
-  else
-    return MP_OKAY;
-
-} /* end s_mp_sub_d() */
-
-/* }}} */
-
-/* {{{ s_mp_mul_d(a, d) */
-
-/* Compute a = a * d, single digit multiplication                         */
-static mp_err s_mp_mul_d(mp_int *a, mp_digit d)
-{
-  mp_word w, k = 0;
-  mp_size ix, max;
-  mp_err  res;
-  mp_digit *dp = DIGITS(a);
-
-  /*
-    Single-digit multiplication will increase the precision of the
-    output by at most one digit.  However, we can detect when this
-    will happen -- if the high-order digit of a, times d, gives a
-    two-digit result, then the precision of the result will increase;
-    otherwise it won't.  We use this fact to avoid calling s_mp_pad()
-    unless absolutely necessary.
-   */
-  max = USED(a);
-  w = dp[max - 1] * d;
-  if(CARRYOUT(w) != 0) {
-    if((res = s_mp_pad(a, max + 1)) != MP_OKAY)
-      return res;
-    dp = DIGITS(a);
-  }
-
-  for(ix = 0; ix < max; ix++) {
-    w = (dp[ix] * d) + k;
-    dp[ix] = ACCUM(w);
-    k = CARRYOUT(w);
-  }
-
-  /* If there is a precision increase, take care of it here; the above
-     test guarantees we have enough storage to do this safely.
-   */
-  if(k) {
-    dp[max] = k; 
-    USED(a) = max + 1;
-  }
-
-  s_mp_clamp(a);
-
-  return MP_OKAY;
-  
-} /* end s_mp_mul_d() */
-
-/* }}} */
-
-/* {{{ s_mp_div_d(mp, d, r) */
-
-/*
-  s_mp_div_d(mp, d, r)
-
-  Compute the quotient mp = mp / d and remainder r = mp mod d, for a
-  single digit d.  If r is null, the remainder will be discarded.
- */
-
-static mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
-{
-  mp_word   w = 0, t;
-  mp_int    quot;
-  mp_err    res;
-  mp_digit *dp = DIGITS(mp), *qp;
-  int       ix;
-
-  if(d == 0)
-    return MP_RANGE;
-
-  /* Make room for the quotient */
-  if((res = mp_init_size(&quot, USED(mp))) != MP_OKAY)
-    return res;
-
-  USED(&quot) = USED(mp); /* so clamping will work below */
-  qp = DIGITS(&quot);
-
-  /* Divide without subtraction */
-  for(ix = USED(mp) - 1; ix >= 0; ix--) {
-    w = (w << DIGIT_BIT) | dp[ix];
-
-    if(w >= d) {
-      t = w / d;
-      w = w % d;
-    } else {
-      t = 0;
-    }
-
-    qp[ix] = t;
-  }
-
-  /* Deliver the remainder, if desired */
-  if(r)
-    *r = w;
-
-  s_mp_clamp(&quot);
-  mp_exch(&quot, mp);
-  mp_clear(&quot);
-
-  return MP_OKAY;
-
-} /* end s_mp_div_d() */
-
-/* }}} */
-
-/* }}} */
-
-/* {{{ Primitive full arithmetic */
-
-/* {{{ s_mp_add(a, b) */
-
-/* Compute a = |a| + |b|                                                  */
-static mp_err s_mp_add(mp_int *a, mp_int *b)        /* magnitude addition      */
-{
-  mp_word   w = 0;
-  mp_digit *pa, *pb;
-  mp_size   ix, used = USED(b);
-  mp_err    res;
-
-  /* Make sure a has enough precision for the output value */
-  if((used > USED(a)) && (res = s_mp_pad(a, used)) != MP_OKAY)
-    return res;
-
-  /*
-    Add up all digits up to the precision of b.  If b had initially
-    the same precision as a, or greater, we took care of it by the
-    padding step above, so there is no problem.  If b had initially
-    less precision, we'll have to make sure the carry out is duly
-    propagated upward among the higher-order digits of the sum.
-   */
-  pa = DIGITS(a);
-  pb = DIGITS(b);
-  for(ix = 0; ix < used; ++ix) {
-    w += *pa + *pb++;
-    *pa++ = ACCUM(w);
-    w = CARRYOUT(w);
-  }
-
-  /* If we run out of 'b' digits before we're actually done, make
-     sure the carries get propagated upward...  
-   */
-  used = USED(a);
-  while(w && ix < used) {
-    w += *pa;
-    *pa++ = ACCUM(w);
-    w = CARRYOUT(w);
-    ++ix;
-  }
-
-  /* If there's an overall carry out, increase precision and include
-     it.  We could have done this initially, but why touch the memory
-     allocator unless we're sure we have to?
-   */
-  if(w) {
-    if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
-      return res;
-
-    DIGIT(a, ix) = w;  /* pa may not be valid after s_mp_pad() call */
-  }
-
-  return MP_OKAY;
-
-} /* end s_mp_add() */
-
-/* }}} */
-
-/* {{{ s_mp_sub(a, b) */
-
-/* Compute a = |a| - |b|, assumes |a| >= |b|                              */
-static mp_err s_mp_sub(mp_int *a, mp_int *b)        /* magnitude subtract      */
-{
-  mp_word   w = 0;
-  mp_digit *pa, *pb;
-  mp_size   ix, used = USED(b);
-
-  /*
-    Subtract and propagate borrow.  Up to the precision of b, this
-    accounts for the digits of b; after that, we just make sure the
-    carries get to the right place.  This saves having to pad b out to
-    the precision of a just to make the loops work right...
-   */
-  pa = DIGITS(a);
-  pb = DIGITS(b);
-
-  for(ix = 0; ix < used; ++ix) {
-    w = (RADIX + *pa) - w - *pb++;
-    *pa++ = ACCUM(w);
-    w = CARRYOUT(w) ? 0 : 1;
-  }
-
-  used = USED(a);
-  while(ix < used) {
-    w = RADIX + *pa - w;
-    *pa++ = ACCUM(w);
-    w = CARRYOUT(w) ? 0 : 1;
-    ++ix;
-  }
-
-  /* Clobber any leading zeroes we created    */
-  s_mp_clamp(a);
-
-  /* 
-     If there was a borrow out, then |b| > |a| in violation
-     of our input invariant.  We've already done the work,
-     but we'll at least complain about it...
-   */
-  if(w)
-    return MP_RANGE;
-  else
-    return MP_OKAY;
-
-} /* end s_mp_sub() */
-
-/* }}} */
-
-/* {{{ s_mp_mul(a, b) */
-
-/* Compute a = |a| * |b|                                                  */
-static mp_err s_mp_mul(mp_int *a, mp_int *b)
-{
-  mp_word   w, k = 0;
-  mp_int    tmp;
-  mp_err    res;
-  mp_size   ix, jx, ua = USED(a), ub = USED(b);
-  mp_digit *pa, *pb, *pt, *pbt;
-
-  if((res = mp_init_size(&tmp, ua + ub)) != MP_OKAY)
-    return res;
-
-  /* This has the effect of left-padding with zeroes... */
-  USED(&tmp) = ua + ub;
-
-  /* We're going to need the base value each iteration */
-  pbt = DIGITS(&tmp);
-
-  /* Outer loop:  Digits of b */
-
-  pb = DIGITS(b);
-  for(ix = 0; ix < ub; ++ix, ++pb) {
-    if(*pb == 0) 
-      continue;
-
-    /* Inner product:  Digits of a */
-    pa = DIGITS(a);
-    for(jx = 0; jx < ua; ++jx, ++pa) {
-      pt = pbt + ix + jx;
-      w = *pb * *pa + k + *pt;
-      *pt = ACCUM(w);
-      k = CARRYOUT(w);
-    }
-
-    pbt[ix + jx] = k;
-    k = 0;
-  }
-
-  s_mp_clamp(&tmp);
-  s_mp_exch(&tmp, a);
-
-  mp_clear(&tmp);
-
-  return MP_OKAY;
-
-} /* end s_mp_mul() */
-
-/* Compute a = |a| * |b| max of digs digits */
-static mp_err s_mp_mul_dig(mp_int *a, mp_int *b, int digs)
-{
-  mp_word   w, k = 0;
-  mp_int    tmp;
-  mp_err    res;
-  mp_size   ix, jx, ua = USED(a), ub = USED(b);
-  mp_digit *pa, *pb, *pt, *pbt;
-
-  if((res = mp_init_size(&tmp, digs+1)) != MP_OKAY)
-    return res;
-
-  /* This has the effect of left-padding with zeroes... */
-  USED(&tmp) = digs+1;
-
-  /* We're going to need the base value each iteration */
-  pbt = DIGITS(&tmp);
-
-  /* Outer loop:  Digits of b */
-
-  ub = MIN(digs, (int)ub);
-  ua = MIN(digs, (int)ua);
-
-  pb = DIGITS(b);
-  for(ix = 0; ix < ub; ++ix, ++pb) {
-    if(*pb == 0) 
-      continue;
-
-    /* Inner product:  Digits of a */
-    pa = DIGITS(a);
-    for(jx = 0; jx < ua; ++jx, ++pa) {
-      if ((int)(ix+jx) > digs) { break; }
-      pt = pbt + ix + jx;
-      w = *pb * *pa + k + *pt;
-      *pt = ACCUM(w);
-      k = CARRYOUT(w);
-    }
-    if ((int)(ix + jx) < digs) {
-       pbt[ix + jx] = k;
-    }
-    k = 0;
-  }
-
-  USED(&tmp) = digs;
-  s_mp_clamp(&tmp);
-  s_mp_exch(&tmp, a);
-
-  mp_clear(&tmp);
-
-  return MP_OKAY;
-
-} /* end s_mp_mul() */
-
-/* }}} */
-
-/* {{{ s_mp_kmul(a, b, out, len) */
-
-#if 0
-void   s_mp_kmul(mp_digit *a, mp_digit *b, mp_digit *out, mp_size len)
-{
-  mp_word   w, k = 0;
-  mp_size   ix, jx;
-  mp_digit *pa, *pt;
-
-  for(ix = 0; ix < len; ++ix, ++b) {
-    if(*b == 0)
-      continue;
-    
-    pa = a;
-    for(jx = 0; jx < len; ++jx, ++pa) {
-      pt = out + ix + jx;
-      w = *b * *pa + k + *pt;
-      *pt = ACCUM(w);
-      k = CARRYOUT(w);
-    }
-
-    out[ix + jx] = k;
-    k = 0;
-  }
-
-} /* end s_mp_kmul() */
-#endif
-
-/* }}} */
-
-/* {{{ s_mp_sqr(a) */
-
-/*
-  Computes the square of a, in place.  This can be done more
-  efficiently than a general multiplication, because many of the
-  computation steps are redundant when squaring.  The inner product
-  step is a bit more complicated, but we save a fair number of
-  iterations of the multiplication loop.
- */
-#if MP_SQUARE
-static mp_err s_mp_sqr(mp_int *a)
-{
-  mp_word  w, k = 0;
-  mp_int   tmp;
-  mp_err   res;
-  mp_size  ix, jx, kx, used = USED(a);
-  mp_digit *pa1, *pa2, *pt, *pbt;
-
-  if((res = mp_init_size(&tmp, 2 * used)) != MP_OKAY)
-    return res;
-
-  /* Left-pad with zeroes */
-  USED(&tmp) = 2 * used;
-
-  /* We need the base value each time through the loop */
-  pbt = DIGITS(&tmp);
-
-  pa1 = DIGITS(a);
-  for(ix = 0; ix < used; ++ix, ++pa1) {
-    if(*pa1 == 0)
-      continue;
-
-    w = DIGIT(&tmp, ix + ix) + (*pa1 * *pa1);
-
-    pbt[ix + ix] = ACCUM(w);
-    k = CARRYOUT(w);
-
-    /*
-      The inner product is computed as:
-
-         (C, S) = t[i,j] + 2 a[i] a[j] + C
-
-      This can overflow what can be represented in an mp_word, and
-      since C arithmetic does not provide any way to check for
-      overflow, we have to check explicitly for overflow conditions
-      before they happen.
-     */
-    for(jx = ix + 1, pa2 = DIGITS(a) + jx; jx < used; ++jx, ++pa2) {
-      mp_word  u = 0, v;
-      
-      /* Store this in a temporary to avoid indirections later */
-      pt = pbt + ix + jx;
-
-      /* Compute the multiplicative step */
-      w = *pa1 * *pa2;
-
-      /* If w is more than half MP_WORD_MAX, the doubling will
-	 overflow, and we need to record a carry out into the next
-	 word */
-      u = (w >> (MP_WORD_BIT - 1)) & 1;
-
-      /* Double what we've got, overflow will be ignored as defined
-	 for C arithmetic (we've already noted if it is to occur)
-       */
-      w *= 2;
-
-      /* Compute the additive step */
-      v = *pt + k;
-
-      /* If we do not already have an overflow carry, check to see
-	 if the addition will cause one, and set the carry out if so 
-       */
-      u |= ((MP_WORD_MAX - v) < w);
-
-      /* Add in the rest, again ignoring overflow */
-      w += v;
-
-      /* Set the i,j digit of the output */
-      *pt = ACCUM(w);
-
-      /* Save carry information for the next iteration of the loop.
-	 This is why k must be an mp_word, instead of an mp_digit */
-      k = CARRYOUT(w) | (u << DIGIT_BIT);
-
-    } /* for(jx ...) */
-
-    /* Set the last digit in the cycle and reset the carry */
-    k = DIGIT(&tmp, ix + jx) + k;
-    pbt[ix + jx] = ACCUM(k);
-    k = CARRYOUT(k);
-
-    /* If we are carrying out, propagate the carry to the next digit
-       in the output.  This may cascade, so we have to be somewhat
-       circumspect -- but we will have enough precision in the output
-       that we won't overflow 
-     */
-    kx = 1;
-    while(k) {
-      k = pbt[ix + jx + kx] + 1;
-      pbt[ix + jx + kx] = ACCUM(k);
-      k = CARRYOUT(k);
-      ++kx;
-    }
-  } /* for(ix ...) */
-
-  s_mp_clamp(&tmp);
-  s_mp_exch(&tmp, a);
-
-  mp_clear(&tmp);
-
-  return MP_OKAY;
-
-} /* end s_mp_sqr() */
-#endif
-
-/* }}} */
-
-/* {{{ s_mp_div(a, b) */
-
-/*
-  s_mp_div(a, b)
-
-  Compute a = a / b and b = a mod b.  Assumes b > a.
- */
-
-static mp_err s_mp_div(mp_int *a, mp_int *b)
-{
-  mp_int   quot, rem, t;
-  mp_word  q;
-  mp_err   res;
-  mp_digit d;
-  int      ix;
-
-  if(mp_cmp_z(b) == 0)
-    return MP_RANGE;
-
-  /* Shortcut if b is power of two */
-  if((ix = s_mp_ispow2(b)) >= 0) {
-    mp_copy(a, b);  /* need this for remainder */
-    s_mp_div_2d(a, (mp_digit)ix);
-    s_mp_mod_2d(b, (mp_digit)ix);
-
-    return MP_OKAY;
-  }
-
-  /* Allocate space to store the quotient */
-  if((res = mp_init_size(&quot, USED(a))) != MP_OKAY)
-    return res;
-
-  /* A working temporary for division     */
-  if((res = mp_init_size(&t, USED(a))) != MP_OKAY)
-    goto T;
-
-  /* Allocate space for the remainder     */
-  if((res = mp_init_size(&rem, USED(a))) != MP_OKAY)
-    goto REM;
-
-  /* Normalize to optimize guessing       */
-  d = s_mp_norm(a, b);
-
-  /* Perform the division itself...woo!   */
-  ix = USED(a) - 1;
-
-  while(ix >= 0) {
-    /* Find a partial substring of a which is at least b */
-    while(s_mp_cmp(&rem, b) < 0 && ix >= 0) {
-      if((res = s_mp_lshd(&rem, 1)) != MP_OKAY) 
-	goto CLEANUP;
-
-      if((res = s_mp_lshd(&quot, 1)) != MP_OKAY)
-	goto CLEANUP;
-
-      DIGIT(&rem, 0) = DIGIT(a, ix);
-      s_mp_clamp(&rem);
-      --ix;
-    }
-
-    /* If we didn't find one, we're finished dividing    */
-    if(s_mp_cmp(&rem, b) < 0) 
-      break;    
-
-    /* Compute a guess for the next quotient digit       */
-    q = DIGIT(&rem, USED(&rem) - 1);
-    if(q <= DIGIT(b, USED(b) - 1) && USED(&rem) > 1)
-      q = (q << DIGIT_BIT) | DIGIT(&rem, USED(&rem) - 2);
-
-    q /= DIGIT(b, USED(b) - 1);
-
-    /* The guess can be as much as RADIX + 1 */
-    if(q >= RADIX)
-      q = RADIX - 1;
-
-    /* See what that multiplies out to                   */
-    mp_copy(b, &t);
-    if((res = s_mp_mul_d(&t, (mp_digit)q)) != MP_OKAY)
-      goto CLEANUP;
-
-    /* 
-       If it's too big, back it off.  We should not have to do this
-       more than once, or, in rare cases, twice.  Knuth describes a
-       method by which this could be reduced to a maximum of once, but
-       I didn't implement that here.
-     */
-    while(s_mp_cmp(&t, &rem) > 0) {
-      --q;
-      s_mp_sub(&t, b);
-    }
-
-    /* At this point, q should be the right next digit   */
-    if((res = s_mp_sub(&rem, &t)) != MP_OKAY)
-      goto CLEANUP;
-
-    /*
-      Include the digit in the quotient.  We allocated enough memory
-      for any quotient we could ever possibly get, so we should not
-      have to check for failures here
-     */
-    DIGIT(&quot, 0) = q;
-  }
-
-  /* Denormalize remainder                */
-  if(d != 0) 
-    s_mp_div_2d(&rem, d);
-
-  s_mp_clamp(&quot);
-  s_mp_clamp(&rem);
-
-  /* Copy quotient back to output         */
-  s_mp_exch(&quot, a);
-  
-  /* Copy remainder back to output        */
-  s_mp_exch(&rem, b);
-
-CLEANUP:
-  mp_clear(&rem);
-REM:
-  mp_clear(&t);
-T:
-  mp_clear(&quot);
-
-  return res;
-
-} /* end s_mp_div() */
-
-/* }}} */
-
-/* {{{ s_mp_2expt(a, k) */
-
-static mp_err s_mp_2expt(mp_int *a, mp_digit k)
-{
-  mp_err    res;
-  mp_size   dig, bit;
-
-  dig = k / DIGIT_BIT;
-  bit = k % DIGIT_BIT;
-
-  mp_zero(a);
-  if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
-    return res;
-  
-  DIGIT(a, dig) |= (1 << bit);
-
-  return MP_OKAY;
-
-} /* end s_mp_2expt() */
-
-/* }}} */
-
-/* {{{ s_mp_reduce(x, m, mu) */
-
-/*
-  Compute Barrett reduction, x (mod m), given a precomputed value for
-  mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
-  faster than straight division, when many reductions by the same
-  value of m are required (such as in modular exponentiation).  This
-  can nearly halve the time required to do modular exponentiation,
-  as compared to using the full integer divide to reduce.
-
-  This algorithm was derived from the _Handbook of Applied
-  Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
-  pp. 603-604.  
- */
-
-static mp_err s_mp_reduce(mp_int *x, mp_int *m, mp_int *mu)
-{
-  mp_int   q;
-  mp_err   res;
-  mp_size  um = USED(m);
-
-  if((res = mp_init_copy(&q, x)) != MP_OKAY)
-    return res;
-
-  s_mp_rshd(&q, um - 1);       /* q1 = x / b^(k-1)  */
-  s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
-  s_mp_rshd(&q, um + 1);       /* q3 = q2 / b^(k+1) */
-
-  /* x = x mod b^(k+1), quick (no division) */
-  s_mp_mod_2d(x, (mp_digit)(DIGIT_BIT * (um + 1)));
-
-  /* q = q * m mod b^(k+1), quick (no division) */
-  s_mp_mul_dig(&q, m, um + 1);
-//  s_mp_mod_2d(&q, (mp_digit)(DIGIT_BIT * (um + 1)));
-
-  /* x = x - q */
-  if((res = mp_sub(x, &q, x)) != MP_OKAY)
-    goto CLEANUP;
-
-  /* If x < 0, add b^(k+1) to it */
-  if(mp_cmp_z(x) < 0) {
-    mp_set(&q, 1);
-    if((res = s_mp_lshd(&q, um + 1)) != MP_OKAY)
-      goto CLEANUP;
-    if((res = mp_add(x, &q, x)) != MP_OKAY)
-      goto CLEANUP;
-  }
-
-  /* Back off if it's too big */
-  while(mp_cmp(x, m) >= 0) {
-    if((res = s_mp_sub(x, m)) != MP_OKAY)
-      break;
-  }
-
- CLEANUP:
-  mp_clear(&q);
-
-  return res;
-
-} /* end s_mp_reduce() */
-
-/* }}} */
-
-/* }}} */
-
-/* {{{ Primitive comparisons */
-
-/* {{{ s_mp_cmp(a, b) */
-
-/* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
-static int s_mp_cmp(mp_int *a, mp_int *b)
-{
-  mp_size   ua = USED(a), ub = USED(b);
-
-  if(ua > ub)
-    return MP_GT;
-  else if(ua < ub)
-    return MP_LT;
-  else {
-    int      ix = ua - 1;
-    mp_digit *ap = DIGITS(a) + ix, *bp = DIGITS(b) + ix;
-
-    while(ix >= 0) {
-      if(*ap > *bp)
-	return MP_GT;
-      else if(*ap < *bp)
-	return MP_LT;
-
-      --ap; --bp; --ix;
-    }
-
-    return MP_EQ;
-  }
-
-} /* end s_mp_cmp() */
-
-/* }}} */
-
-/* {{{ s_mp_cmp_d(a, d) */
-
-/* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
-static int s_mp_cmp_d(mp_int *a, mp_digit d)
-{
-  mp_size  ua = USED(a);
-  mp_digit *ap = DIGITS(a);
-
-  if(ua > 1)
-    return MP_GT;
-
-  if(*ap < d) 
-    return MP_LT;
-  else if(*ap > d)
-    return MP_GT;
-  else
-    return MP_EQ;
-
-} /* end s_mp_cmp_d() */
-
-/* }}} */
-
-/* {{{ s_mp_ispow2(v) */
-
-/*
-  Returns -1 if the value is not a power of two; otherwise, it returns
-  k such that v = 2^k, i.e. lg(v).
- */
-static int s_mp_ispow2(mp_int *v)
-{
-  mp_digit d, *dp;
-  mp_size  uv = USED(v);
-  int      extra = 0, ix;
-
-  d = DIGIT(v, uv - 1); /* most significant digit of v */
-
-  while(d && ((d & 1) == 0)) {
-    d >>= 1;
-    ++extra;
-  }
-
-  if(d == 1) {
-    ix = uv - 2;
-    dp = DIGITS(v) + ix;
-
-    while(ix >= 0) {
-      if(*dp)
-	return -1; /* not a power of two */
-
-      --dp; --ix;
-    }
-
-    return ((uv - 1) * DIGIT_BIT) + extra;
-  } 
-
-  return -1;
-
-} /* end s_mp_ispow2() */
-
-/* }}} */
-
-/* {{{ s_mp_ispow2d(d) */
-
-static int s_mp_ispow2d(mp_digit d)
-{
-  int   pow = 0;
-
-  while((d & 1) == 0) {
-    ++pow; d >>= 1;
-  }
-
-  if(d == 1)
-    return pow;
-
-  return -1;
-
-} /* end s_mp_ispow2d() */
-
-/* }}} */
-
-/* }}} */
-
-/* {{{ Primitive I/O helpers */
-
-/* {{{ s_mp_tovalue(ch, r) */
-
-/*
-  Convert the given character to its digit value, in the given radix.
-  If the given character is not understood in the given radix, -1 is
-  returned.  Otherwise the digit's numeric value is returned.
-
-  The results will be odd if you use a radix < 2 or > 62, you are
-  expected to know what you're up to.
- */
-static int s_mp_tovalue(char ch, int r)
-{
-  int    val, xch;
-  
-  if(r > 36)
-    xch = ch;
-  else
-    xch = toupper(ch);
-
-  if(isdigit(xch))
-    val = xch - '0';
-  else if(isupper(xch))
-    val = xch - 'A' + 10;
-  else if(islower(xch))
-    val = xch - 'a' + 36;
-  else if(xch == '+')
-    val = 62;
-  else if(xch == '/')
-    val = 63;
-  else 
-    return -1;
-
-  if(val < 0 || val >= r)
-    return -1;
-
-  return val;
-
-} /* end s_mp_tovalue() */
-
-/* }}} */
-
-/* {{{ s_mp_todigit(val, r, low) */
-
-/*
-  Convert val to a radix-r digit, if possible.  If val is out of range
-  for r, returns zero.  Otherwise, returns an ASCII character denoting
-  the value in the given radix.
-
-  The results may be odd if you use a radix < 2 or > 64, you are
-  expected to know what you're doing.
- */
-  
-char     s_mp_todigit(int val, int r, int low)
-{
-  char   ch;
-
-  if(val < 0 || val >= r)
-    return 0;
-
-  ch = s_dmap_1[val];
-
-  if(r <= 36 && low)
-    ch = tolower(ch);
-
-  return ch;
-
-} /* end s_mp_todigit() */
-
-/* }}} */
-
-/* {{{ s_mp_outlen(bits, radix) */
-
-/* 
-   Return an estimate for how long a string is needed to hold a radix
-   r representation of a number with 'bits' significant bits.
-
-   Does not include space for a sign or a NUL terminator.
- */
-static int s_mp_outlen(int bits, int r)
-{
-  return (int)((double)bits * LOG_V_2(r));
-
-} /* end s_mp_outlen() */
-
-/* }}} */
-
-/* }}} */
-
-#endif /* MPI */
-
-/*------------------------------------------------------------------------*/
-/* HERE THERE BE DRAGONS                                                  */
-
- 

+ 3 - 5
mycrypt.h

@@ -9,17 +9,15 @@
 #include <limits.h>
 
 /* if there is a custom definition header file use it */
-#ifdef HAVE_CUSTOM
-    #include "mycrypt_custom.h"
-#endif
+#include <mycrypt_custom.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /* version */
-#define CRYPT   0x0080
-#define SCRYPT  "0.80"
+#define CRYPT   0x0081
+#define SCRYPT  "0.81"
 
 /* max size of either a cipher/hash block or symmetric key [largest of the two] */
 #define MAXBLOCKSIZE           128

+ 0 - 17
mycrypt_cfg.h

@@ -80,22 +80,5 @@ extern clock_t XCLOCK(void);
     #define PACKET_SUB_ENC_KEY     3
 #endif
 
-#ifdef MPI
-   #include "mpi.h"
-#else
-   #ifdef MRSA
-      #error RSA requires the big int library 
-   #endif
-   #ifdef MECC
-      #error ECC requires the big int library 
-   #endif
-   #ifdef MDH
-      #error DH requires the big int library 
-   #endif
-   #ifdef MDSA
-      #error DSA requires the big int library 
-   #endif
-#endif /* MPI */
-
 #endif /* MYCRYPT_CFG_H */
 

+ 76 - 0
mycrypt_custom.h

@@ -0,0 +1,76 @@
+/* This header is meant to be included before mycrypt.h in projects where
+ * you don't want to throw all the defines in a makefile. 
+ */
+
+#ifndef MYCRYPT_CUSTOM_H_
+#define MYCRYPT_CUSTOM_H_
+
+#ifdef CRYPT
+	#error mycrypt_custom.h should be included before mycrypt.h
+#endif
+
+#define XMALLOC malloc
+#define XREALLOC realloc
+#define XCALLOC calloc
+#define XFREE free
+#define XCLOCK clock
+#define XCLOCKS_PER_SEC CLOCKS_PER_SEC
+#define SMALL_CODE
+#define BLOWFISH
+#define RC2
+#define RC5
+#define RC6
+#define SERPENT
+#define SAFERP
+#define SAFER
+#define RIJNDAEL
+#define XTEA
+#define TWOFISH
+#define DES
+#define CAST5
+#define NOEKEON
+#define CFB
+#define OFB
+#define ECB
+#define CBC
+#define CTR
+#define SHA512
+#define SHA384
+#define SHA256
+#define TIGER
+#define SHA1
+#define MD5
+#define MD4
+#define MD2
+#define HMAC
+#define BASE64
+#define YARROW
+#define SPRNG
+#define RC4
+#define DEVRANDOM
+#define MRSA
+#define MDH
+#define MECC
+#define KR
+#define DH768
+#define DH1024
+#define DH1280
+#define DH1536
+#define DH1792
+#define DH2048
+#define DH2560
+#define DH3072
+#define DH4096
+#define ECC160
+#define ECC192
+#define ECC224
+#define ECC256
+#define ECC384
+#define ECC521
+#define MPI
+
+
+#include <mycrypt.h>
+
+#endif
+

+ 17 - 1
mycrypt_pk.h

@@ -1,12 +1,28 @@
 /* ---- NUMBER THEORY ---- */
 #ifdef MPI
 
+#include "tommath.h"
+
 extern int is_prime(mp_int *, int *);
 extern int rand_prime(mp_int *N, long len, prng_state *prng, int wprng);
 extern mp_err mp_init_multi(mp_int* mp, ...);
 extern void mp_clear_multi(mp_int* mp, ...);
 
-#endif
+#else
+   #ifdef MRSA
+      #error RSA requires the big int library 
+   #endif
+   #ifdef MECC
+      #error ECC requires the big int library 
+   #endif
+   #ifdef MDH
+      #error DH requires the big int library 
+   #endif
+   #ifdef MDSA
+      #error DSA requires the big int library 
+   #endif
+#endif /* MPI */
+
 
 /* ---- PUBLIC KEY CRYPTO ---- */
 

+ 5 - 1
mycrypt_prng.h

@@ -56,7 +56,11 @@ extern int prng_is_valid(int idx);
 /* Slow RNG you **might** be able to use to seed a PRNG with.  Be careful as this
  * might not work on all platforms as planned
  */
-extern unsigned long rng_get_bytes(unsigned char *buf, unsigned long len, void (*callback)(void));
+/* ch2-02-1 */ 
+extern unsigned long rng_get_bytes(unsigned char *buf, 
+                                   unsigned long len, 
+                                   void (*callback)(void));
+/* ch2-02-1 */
 
 extern int rng_make_prng(int bits, int wprng, prng_state *prng, void (*callback)(void));
 

+ 2 - 3
prime.c

@@ -147,11 +147,11 @@ static int next_prime(mp_int *N, mp_digit step)
     int res;
     mp_int n1, a, y, r;
     mp_digit dist, residues[UPPER_LIMIT];
-    
+
     _ARGCHK(N != NULL);
 
     /* first find the residues */
-	for (x = 0; x < (long)UPPER_LIMIT; x++) {
+    for (x = 0; x < (long)UPPER_LIMIT; x++) {
         if (mp_mod_d(N, prime_tab[x], &residues[x]) != MP_OKAY) {
            return CRYPT_MEM;
         }
@@ -193,7 +193,6 @@ loop:
            goto error;
         }
     }
-
     for (x = 0; x < 8; x++) {
         /* choose a */
         mp_set(&a, prime_tab[x]);

+ 0 - 1
rsa.c

@@ -73,7 +73,6 @@ int rsa_make_key(prng_state *prng, int wprng, int size, long e, rsa_key *key)
 
    if (mp_copy(&p, &key->p) != MP_OKAY)                    { goto error2; }
    if (mp_copy(&q, &key->q) != MP_OKAY)                    { goto error2; }
-   
  
    /* shrink ram required  */
    if (mp_shrink(&key->e) != MP_OKAY)                      { goto error2; }

+ 352 - 0
tommath.h

@@ -0,0 +1,352 @@
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision 
+ * integer arithmetic as well as number theoretic functionality.
+ * 
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with 
+ * additional optimizations in place.  
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtommath.iahu.ca
+ */
+#ifndef BN_H_
+#define BN_H_
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+
+#undef MIN
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#undef MAX
+#define MAX(x,y) ((x)>(y)?(x):(y))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* some default configurations.  
+ *
+ * A "mp_digit" must be able to hold DIGIT_BIT + 1 bits 
+ * A "mp_word" must be able to hold 2*DIGIT_BIT + 1 bits 
+ *
+ * At the very least a mp_digit must be able to hold 7 bits 
+ * [any size beyond that is ok provided it overflow the data type]
+ */
+#ifdef MP_8BIT
+   typedef unsigned char      mp_digit;
+   typedef unsigned short     mp_word;
+#elif defined(MP_16BIT)
+   typedef unsigned short     mp_digit;
+   typedef unsigned long      mp_word;
+#else
+#ifndef CRYPT
+   #ifdef _MSC_VER
+      typedef unsigned __int64   ulong64;
+      typedef signed __int64     long64;
+   #else
+      typedef unsigned long long ulong64;
+      typedef signed long long   long64;
+   #endif   
+#endif   
+
+   /* default case */
+   typedef unsigned long      mp_digit;
+   typedef ulong64            mp_word;
+  
+   #define DIGIT_BIT          28
+#endif  
+
+#ifndef DIGIT_BIT
+   #define DIGIT_BIT     ((CHAR_BIT * sizeof(mp_digit) - 1))  /* bits per digit */
+#endif
+
+#define MP_DIGIT_BIT     DIGIT_BIT
+#define MP_MASK          ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
+#define MP_DIGIT_MAX     MP_MASK   
+
+/* equalities */
+#define MP_LT        -1   /* less than */
+#define MP_EQ         0   /* equal to */
+#define MP_GT         1   /* greater than */
+
+#define MP_ZPOS       0   /* positive integer */
+#define MP_NEG        1   /* negative */
+
+#define MP_OKAY       0   /* ok result */
+#define MP_MEM        -2  /* out of mem */
+#define MP_VAL        -3  /* invalid input */
+#define MP_RANGE      MP_VAL
+
+typedef int           mp_err;
+
+/* you'll have to tune these... */
+extern int KARATSUBA_MUL_CUTOFF,
+           KARATSUBA_SQR_CUTOFF,
+           MONTGOMERY_EXPT_CUTOFF;
+
+#define MP_PREC                 64      /* default digits of precision */
+
+typedef struct  {
+    int used, alloc, sign;
+    mp_digit *dp;
+} mp_int;
+
+#define USED(m)    ((m)->used)
+#define DIGIT(m,k) ((m)->dp[k])
+#define SIGN(m)    ((m)->sign)
+
+/* ---> init and deinit bignum functions <--- */
+
+/* init a bignum */
+int mp_init(mp_int *a);
+
+/* free a bignum */
+void mp_clear(mp_int *a);
+
+/* exchange two ints */
+void mp_exch(mp_int *a, mp_int *b);
+
+/* shrink ram required for a bignum */
+int mp_shrink(mp_int *a);
+
+/* ---> Basic Manipulations <--- */
+
+#define mp_iszero(a) (((a)->used == 0) ? 1 : 0)
+#define mp_iseven(a) (((a)->used == 0 || (((a)->dp[0] & 1) == 0)) ? 1 : 0)
+#define mp_isodd(a)  (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? 1 : 0)
+
+/* set to zero */
+void mp_zero(mp_int *a);
+
+/* set to a digit */
+void mp_set(mp_int *a, mp_digit b);
+
+/* set a 32-bit const */
+int mp_set_int(mp_int *a, unsigned long b);
+
+/* grow an int to a given size */
+int mp_grow(mp_int *a, int size);
+
+/* init to a given number of digits */
+int mp_init_size(mp_int *a, int size);
+
+/* copy, b = a */
+int mp_copy(mp_int *a, mp_int *b);
+
+/* inits and copies, a = b */
+int mp_init_copy(mp_int *a, mp_int *b);
+
+/* trim unused digits */
+void mp_clamp(mp_int *a);
+
+/* ---> digit manipulation <--- */
+
+/* right shift by "b" digits */
+void mp_rshd(mp_int *a, int b);
+
+/* left shift by "b" digits */
+int mp_lshd(mp_int *a, int b);
+
+/* c = a / 2^b */
+int mp_div_2d(mp_int *a, int b, mp_int *c, mp_int *d);
+
+/* b = a/2 */
+int mp_div_2(mp_int *a, mp_int *b);
+
+/* c = a * 2^b */
+int mp_mul_2d(mp_int *a, int b, mp_int *c);
+
+/* b = a*2 */
+int mp_mul_2(mp_int *a, mp_int *b);
+
+/* c = a mod 2^d */
+int mp_mod_2d(mp_int *a, int b, mp_int *c);
+
+/* computes a = 2^b */
+int mp_2expt(mp_int *a, int b);
+
+/* makes a pseudo-random int of a given size */
+int mp_rand(mp_int *a, int digits);
+
+/* ---> binary operations <--- */
+/* c = a XOR b  */
+int mp_xor(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = a OR b */
+int mp_or(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = a AND b */
+int mp_and(mp_int *a, mp_int *b, mp_int *c);
+
+/* ---> Basic arithmetic <--- */
+
+/* b = -a */
+int mp_neg(mp_int *a, mp_int *b);
+
+/* b = |a| */
+int mp_abs(mp_int *a, mp_int *b);
+
+/* compare a to b */
+int mp_cmp(mp_int *a, mp_int *b);
+
+/* compare |a| to |b| */
+int mp_cmp_mag(mp_int *a, mp_int *b);
+
+/* c = a + b */
+int mp_add(mp_int *a, mp_int *b, mp_int *c);
+
+
+/* c = a - b */
+int mp_sub(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = a * b */
+int mp_mul(mp_int *a, mp_int *b, mp_int *c);
+
+/* b = a^2 */
+int mp_sqr(mp_int *a, mp_int *b);
+
+/* a/b => cb + d == a */
+int mp_div(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
+
+/* c = a mod b, 0 <= c < b  */
+int mp_mod(mp_int *a, mp_int *b, mp_int *c);
+
+/* ---> single digit functions <--- */
+
+/* compare against a single digit */
+int mp_cmp_d(mp_int *a, mp_digit b);
+
+/* c = a + b */
+int mp_add_d(mp_int *a, mp_digit b, mp_int *c);
+
+/* c = a - b */
+int mp_sub_d(mp_int *a, mp_digit b, mp_int *c);
+
+/* c = a * b */
+int mp_mul_d(mp_int *a, mp_digit b, mp_int *c);
+
+/* a/b => cb + d == a */
+int mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d);
+
+/* c = a^b */
+int mp_expt_d(mp_int *a, mp_digit b, mp_int *c);
+
+/* c = a mod b, 0 <= c < b  */
+int mp_mod_d(mp_int *a, mp_digit b, mp_digit *c);
+
+/* ---> number theory <--- */
+
+/* d = a + b (mod c) */
+int mp_addmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
+
+/* d = a - b (mod c) */
+int mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
+
+/* d = a * b (mod c) */
+int mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
+
+/* c = a * a (mod b) */
+int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = 1/a (mod b) */
+int mp_invmod(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = (a, b) */
+int mp_gcd(mp_int *a, mp_int *b, mp_int *c);
+
+/* c = [a, b] or (a*b)/(a, b) */
+int mp_lcm(mp_int *a, mp_int *b, mp_int *c);
+
+/* finds one of the b'th root of a, such that |c|^b <= |a| 
+ *
+ * returns error if a < 0 and b is even
+ */
+int mp_n_root(mp_int *a, mp_digit b, mp_int *c);
+
+/* shortcut for square root */
+#define mp_sqrt(a, b) mp_n_root(a, 2, b)
+
+/* computes the jacobi c = (a | n) (or Legendre if b is prime)  */
+int mp_jacobi(mp_int *a, mp_int *n, int *c);
+
+/* used to setup the Barrett reduction for a given modulus b */
+int mp_reduce_setup(mp_int *a, mp_int *b);
+
+/* Barrett Reduction, computes a (mod b) with a precomputed value c
+ *
+ * Assumes that 0 < a <= b^2, note if 0 > a > -(b^2) then you can merely
+ * compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code].
+ */
+int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
+
+/* setups the montgomery reduction */
+int mp_montgomery_setup(mp_int *a, mp_digit *mp);
+
+/* computes a = B^n mod b without division or multiplication useful for 
+ * normalizing numbers in a Montgomery system.
+ */
+int mp_montgomery_calc_normalization(mp_int *a, mp_int *b);
+
+/* computes xR^-1 == x (mod N) via Montgomery Reduction */
+int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
+
+/* d = a^b (mod c) */
+int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
+
+/* ---> radix conversion <--- */
+int mp_count_bits(mp_int *a);
+
+int mp_unsigned_bin_size(mp_int *a);
+int mp_read_unsigned_bin(mp_int *a, unsigned char *b, int c);
+int mp_to_unsigned_bin(mp_int *a, unsigned char *b);
+
+int mp_signed_bin_size(mp_int *a);
+int mp_read_signed_bin(mp_int *a, unsigned char *b, int c);
+int mp_to_signed_bin(mp_int *a, unsigned char *b);
+
+int mp_read_radix(mp_int *a, char *str, int radix);
+int mp_toradix(mp_int *a, char *str, int radix);
+int mp_radix_size(mp_int *a, int radix);
+
+#define mp_read_raw(mp, str, len) mp_read_signed_bin((mp), (str), (len))
+#define mp_raw_size(mp)           mp_signed_bin_size(mp)
+#define mp_toraw(mp, str)         mp_to_signed_bin((mp), (str))
+#define mp_read_mag(mp, str, len) mp_read_unsigned_bin((mp), (str), (len))
+#define mp_mag_size(mp)           mp_unsigned_bin_size(mp)
+#define mp_tomag(mp, str)         mp_to_unsigned_bin((mp), (str))
+
+#define mp_tobinary(M, S)  mp_toradix((M), (S), 2)
+#define mp_tooctal(M, S)   mp_toradix((M), (S), 8)
+#define mp_todecimal(M, S) mp_toradix((M), (S), 10)
+#define mp_tohex(M, S)     mp_toradix((M), (S), 16)
+
+/* lowlevel functions, do not call! */
+int s_mp_add(mp_int *a, mp_int *b, mp_int *c);
+int s_mp_sub(mp_int *a, mp_int *b, mp_int *c);
+#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
+int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
+int s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
+int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
+int s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
+int fast_s_mp_sqr(mp_int *a, mp_int *b);
+int s_mp_sqr(mp_int *a, mp_int *b);
+int mp_karatsuba_mul(mp_int *a, mp_int *b, mp_int *c);
+int mp_karatsuba_sqr(mp_int *a, mp_int *b);
+int fast_mp_invmod(mp_int *a, mp_int *b, mp_int *c);
+int fast_mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
+int mp_exptmod_fast(mp_int *G, mp_int *X, mp_int *P, mp_int *Y);
+void bn_reverse(unsigned char *s, int len);
+
+#ifdef __cplusplus
+   }
+#endif
+
+#endif
+

Certains fichiers n'ont pas été affichés car il y a eu trop de fichiers modifiés dans ce diff