Browse Source

added libtomcrypt-0.97b

Tom St Denis 21 years ago
parent
commit
a21f63bbd8
41 changed files with 1191 additions and 325 deletions
  1. 2 2
      aes.c
  2. 23 0
      changes
  3. 0 3
      crypt.c
  4. 1 1
      crypt.tex
  5. 1 33
      crypt_cipher_descriptor.c
  6. 2 33
      crypt_hash_descriptor.c
  7. 1 34
      crypt_prng_descriptor.c
  8. 30 1
      demos/test/rsa_test.c
  9. 44 9
      demos/tv_gen.c
  10. 101 3
      demos/x86_prof.c
  11. BIN
      doc/crypt.pdf
  12. 0 18
      examples/ch1-01.c
  13. 0 25
      examples/ch1-02.c
  14. 0 29
      examples/ch1-03.c
  15. 0 35
      examples/ch2-01.c
  16. 256 0
      fortuna.c
  17. 1 0
      hmac_done.c
  18. 8 1
      hmac_init.c
  19. 1 1
      hmac_test.c
  20. 4 1
      ltc_tommath.h
  21. 3 2
      makefile
  22. 2 1
      makefile.cygwin_dll
  23. 3 2
      makefile.icc
  24. 2 1
      makefile.msvc
  25. 255 35
      mpi.c
  26. 2 2
      mycrypt.h
  27. 4 4
      mycrypt_cfg.h
  28. 13 9
      mycrypt_custom.h
  29. 1 1
      mycrypt_hash.h
  30. 25 0
      mycrypt_pk.h
  31. 40 1
      mycrypt_prng.h
  32. 2 2
      pkcs_5_2.c
  33. 21 31
      rc2.c
  34. 24 2
      rc4.c
  35. 4 1
      rsa_decrypt_key.c
  36. 63 0
      rsa_v15_decrypt_key.c
  37. 54 0
      rsa_v15_encrypt_key.c
  38. 57 0
      rsa_v15_sign_hash.c
  39. 69 0
      rsa_v15_verify_hash.c
  40. 23 1
      sprng.c
  41. 49 1
      yarrow.c

+ 2 - 2
aes.c

@@ -89,7 +89,7 @@ static ulong32 setup_mix(ulong32 temp)
 }
 
 #ifndef ENCRYPT_ONLY
-
+#ifdef SMALL_CODE
 static ulong32 setup_mix2(ulong32 temp)
 {
    return Td0(255 & Te4[byte(temp, 3)]) ^
@@ -97,7 +97,7 @@ static ulong32 setup_mix2(ulong32 temp)
           Td2(255 & Te4[byte(temp, 1)]) ^
           Td3(255 & Te4[byte(temp, 0)]);
 }
-
+#endif
 #endif
 
 int SETUP(const unsigned char *key, int keylen, int rounds, symmetric_key *skey)

+ 23 - 0
changes

@@ -1,3 +1,26 @@
+July 23rd, 2004
+v0.97b -- Added PKCS #1 v1.5 RSA encrypt/sign helpers (like rsa_sign_hash, etc...)
+       -- Added missing prng check to rsa_decrypt_key() [not critical as I don't use 
+          descriptors directly in that function]
+       -- Merged in LTM-SSE, define LTMSSE before you build and you will get SSE2 optimized math ;-)
+          (roughly 3x faster on a P4 Northwood).  By default it will compile as ISO C portable
+          code (when LTMSSE is undefined).
+       -- Fixed bug in ltc_tommath.h where I had the kara/toom cutoffs not marked as ``extern''
+          Thanks to "Stefan Arentz" <stefan at organicnetwork.net>
+       -- Steven Dake <[email protected]> and Richard Amacker <[email protected]> submitted patches to 
+          fix pkcs_5_2().  It now matches the output of another crypto library.  Whoops... hehehe
+       -- Updated PRNG api.  Added Fortuna PRNG to the list of supported PRNGs
+       -- Fixed up the descriptor tables since globals are automatically zero'ed on startup.
+       -- Changed RC4 to store it's output.  If you want to encrypt with RC4
+          you'll have to do the XOR yourself.
+       -- Fixed buffer overflows/overruns in the HMAC code.  
+
+       ++ API change for the PRNGs there now is a done() function per PRNG.  You
+          should call it when you are done with a prng state.  So far it's
+          not absolutely required (won't cause problems) but is a good idea to
+          start.  
+
+
 June 23rd, 2004
 v0.97a ++ Fixed several potentially crippling bugs... [read on]
        -- Fixed bug in OAEP decoder that would incorrectly report 

+ 0 - 3
crypt.c

@@ -229,9 +229,6 @@ const char *crypt_build_settings =
 #endif
 #if defined(NO_FILE)
     " NO_FILE "
-#endif
-#if defined(LTC_TEST)
-    " LTC_TEST "
 #endif
     "\n"
     "\n\n\n"

+ 1 - 1
crypt.tex

@@ -47,7 +47,7 @@
 \def\gap{\vspace{0.5ex}}
 \makeindex
 \begin{document}
-\title{LibTomCrypt \\ Version 0.97a}
+\title{LibTomCrypt \\ Version 0.97b}
 \author{Tom St Denis \\
 \\
 [email protected] \\

+ 1 - 33
crypt_cipher_descriptor.c

@@ -10,37 +10,5 @@
  */
 #include "mycrypt.h"
 
-struct _cipher_descriptor cipher_descriptor[TAB_SIZE] = {
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } };
+struct _cipher_descriptor cipher_descriptor[TAB_SIZE];
 

+ 2 - 33
crypt_hash_descriptor.c

@@ -10,36 +10,5 @@
  */
 #include "mycrypt.h"
 
-struct _hash_descriptor hash_descriptor[TAB_SIZE] = {
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL },
-{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL } };
+struct _hash_descriptor hash_descriptor[TAB_SIZE];
+

+ 1 - 34
crypt_prng_descriptor.c

@@ -10,37 +10,4 @@
  */
 #include "mycrypt.h"
 
-struct _prng_descriptor prng_descriptor[TAB_SIZE] = {
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL },
-{ NULL, NULL, NULL, NULL, NULL } };
-
+struct _prng_descriptor prng_descriptor[TAB_SIZE];

+ 30 - 1
demos/test/rsa_test.c

@@ -21,6 +21,35 @@ int rsa_test(void)
    /* make a random key */
    DO(rsa_make_key(&test_yarrow, prng_idx, 1024/8, 65537, &key));
    
+   /* test PKCS #1 v1.5 */
+   for (rsa_msgsize = 1; rsa_msgsize <= 117; rsa_msgsize++) {
+      /* make a random key/msg */
+      yarrow_read(in, rsa_msgsize, &test_yarrow);
+
+      len  = sizeof(out);
+      len2 = rsa_msgsize;
+
+      /* encrypt */
+      DO(rsa_v15_encrypt_key(in, rsa_msgsize, out, &len, &test_yarrow, prng_idx, &key));
+      DO(rsa_v15_decrypt_key(out, len, tmp, rsa_msgsize, &test_yarrow, prng_idx, &stat, &key));
+      if (stat != 1 || memcmp(tmp, in, rsa_msgsize)) {
+         printf("PKCS #1 v1.5 encrypt/decrypt failure (rsa_msgsize: %lu, stat: %d)\n", rsa_msgsize, stat);
+	 return 1;
+      }
+   }
+   
+   /* signature */
+   len = sizeof(out);
+   DO(rsa_v15_sign_hash(in, 20, out, &len, &test_yarrow, prng_idx, hash_idx, &key));
+   in[1] ^= 1;
+   DO(rsa_v15_verify_hash(out, len, in, 20, &test_yarrow, prng_idx, hash_idx, &stat, &key));
+   in[1] ^= 1;
+   DO(rsa_v15_verify_hash(out, len, in, 20, &test_yarrow, prng_idx, hash_idx, &stat2, &key));
+   if (!(stat == 0 && stat2 == 1)) {
+      printf("PKCS #1 v1.5 sign/verify failure (stat %d, stat2 %d)\n", stat, stat2);
+      return 1;
+   }
+   
    /* encrypt the key (without lparam) */
    for (rsa_msgsize = 1; rsa_msgsize <= 86; rsa_msgsize++) {
       /* make a random key/msg */
@@ -47,7 +76,7 @@ int rsa_test(void)
          return 1;
       }
       if (len2 != rsa_msgsize || memcmp(tmp, in, rsa_msgsize)) {
-         int x;
+         unsigned long x;
          printf("\nrsa_decrypt_key mismatch, len %lu (second decrypt)\n", len2);
          printf("Original contents: \n"); 
          for (x = 0; x < rsa_msgsize; ) {

+ 44 - 9
demos/tv_gen.c

@@ -86,22 +86,33 @@ void reg_algs(void)
 
 void hash_gen(void)
 {
-   unsigned char md[MAXBLOCKSIZE], buf[MAXBLOCKSIZE*2+2];
+   unsigned char md[MAXBLOCKSIZE], *buf;
    unsigned long outlen, x, y, z;
    FILE *out;
+   int   err;
    
    out = fopen("hash_tv.txt", "w");
+   if (out == NULL) {
+      perror("can't open hash_tv");
+   }
    
    fprintf(out, "Hash Test Vectors:\n\nThese are the hashes of nn bytes '00 01 02 03 .. (nn-1)'\n\n");
    for (x = 0; hash_descriptor[x].name != NULL; x++) {
+      buf = XMALLOC(2 * hash_descriptor[x].blocksize);
+      if (buf == NULL) {
+         perror("can't alloc mem");
+         exit(EXIT_FAILURE);
+      }
       fprintf(out, "Hash: %s\n", hash_descriptor[x].name);
-      
       for (y = 0; y <= (hash_descriptor[x].blocksize * 2); y++) {
          for (z = 0; z < y; z++) {
             buf[z] = (unsigned char)(z & 255);
          }
          outlen = sizeof(md);
-         hash_memory(x, buf, y, md, &outlen);
+         if ((err = hash_memory(x, buf, y, md, &outlen)) != CRYPT_OK) {
+            printf("hash_memory error: %s\n", error_to_string(err));
+            exit(EXIT_FAILURE);
+         }
          fprintf(out, "%3lu: ", y);
          for (z = 0; z < outlen; z++) {
             fprintf(out, "%02X", md[z]);
@@ -109,15 +120,16 @@ void hash_gen(void)
          fprintf(out, "\n");
       }
       fprintf(out, "\n");
+      XFREE(buf);
    }
    fclose(out);
 }
 
 void cipher_gen(void)
 {
-   unsigned char key[MAXBLOCKSIZE], pt[MAXBLOCKSIZE];
+   unsigned char *key, pt[MAXBLOCKSIZE];
    unsigned long x, y, z, w;
-   int kl, lastkl;
+   int err, kl, lastkl;
    FILE *out;
    symmetric_key skey;
    
@@ -138,15 +150,27 @@ void cipher_gen(void)
             case 1: kl = (cipher_descriptor[x].min_key_length + cipher_descriptor[x].max_key_length)/2; break;
             case 2: kl = cipher_descriptor[x].max_key_length; break;
          }
-         cipher_descriptor[x].keysize(&kl);
+         if ((err = cipher_descriptor[x].keysize(&kl)) != CRYPT_OK) {
+            printf("keysize error: %s\n", error_to_string(err));
+            exit(EXIT_FAILURE);
+         }
          if (kl == lastkl) break;
          lastkl = kl;
          fprintf(out, "Key Size: %d bytes\n", kl);
 
+         key = XMALLOC(kl);
+         if (key == NULL) {
+            perror("can't malloc memory");
+            exit(EXIT_FAILURE);
+         }
+
          for (z = 0; (int)z < kl; z++) {
              key[z] = (unsigned char)z;
          }
-         cipher_descriptor[x].setup(key, kl, 0, &skey);
+         if ((err = cipher_descriptor[x].setup(key, kl, 0, &skey)) != CRYPT_OK) {
+            printf("setup error: %s\n", error_to_string(err));
+            exit(EXIT_FAILURE);
+         }
          
          for (z = 0; (int)z < cipher_descriptor[x].block_length; z++) {
             pt[z] = (unsigned char)z;
@@ -163,9 +187,13 @@ void cipher_gen(void)
              for (z = 0; z < (unsigned long)kl; z++) {
                  key[z] = pt[z % cipher_descriptor[x].block_length];
              }
-             cipher_descriptor[x].setup(key, kl, 0, &skey);
+             if ((err = cipher_descriptor[x].setup(key, kl, 0, &skey)) != CRYPT_OK) {
+                printf("cipher setup2 error: %s\n", error_to_string(err));
+                exit(EXIT_FAILURE);
+             }
          }
          fprintf(out, "\n");
+         XFREE(key);
      }
      fprintf(out, "\n");
   }
@@ -174,7 +202,7 @@ void cipher_gen(void)
 
 void hmac_gen(void)
 {
-   unsigned char key[MAXBLOCKSIZE], output[MAXBLOCKSIZE], input[MAXBLOCKSIZE*2+2];
+   unsigned char key[MAXBLOCKSIZE], output[MAXBLOCKSIZE], *input;
    int x, y, z, kl, err;
    FILE *out;
    unsigned long len;
@@ -193,6 +221,12 @@ void hmac_gen(void)
       for (y = 0; y < (int)hash_descriptor[x].hashsize; y++) {
           key[y] = (y&255);
       }
+
+      input = XMALLOC(hash_descriptor[x].blocksize * 2);
+      if (input == NULL) {
+         perror("Can't malloc memory");
+         exit(EXIT_FAILURE);
+      }
       
       for (y = 0; y <= (int)(hash_descriptor[x].blocksize * 2); y++) {
          for (z = 0; z < y; z++) {
@@ -212,6 +246,7 @@ void hmac_gen(void)
          /* forward the key */
          memcpy(key, output, hash_descriptor[x].hashsize);
       }
+      XFREE(input);
       fprintf(out, "\n");
    }
    fclose(out);

+ 101 - 3
demos/x86_prof.c

@@ -45,9 +45,6 @@ void tally_results(int type)
    }
 }
 
-
-
-
 /* RDTSC from Scott Duplichan */
 static ulong64 rdtsc (void)
    {
@@ -195,6 +192,9 @@ void reg_algs(void)
 #endif
 
 register_prng(&yarrow_desc);
+register_prng(&fortuna_desc);
+register_prng(&rc4_desc);
+
 rng_make_prng(128, find_prng("yarrow"), &prng, NULL);
 }
 
@@ -342,6 +342,101 @@ int time_hash(void)
    return 0;
 }
 
+void time_mult(void)
+{
+   ulong64 t1, t2;
+   unsigned long x, y;
+   mp_int  a, b, c;
+
+   printf("Timing Multiplying:\n");
+   mp_init_multi(&a,&b,&c,NULL);
+   for (x = 128/DIGIT_BIT; x <= 1024/DIGIT_BIT; x += 128/DIGIT_BIT) {
+       mp_rand(&a, x);
+       mp_rand(&b, x);
+
+#define DO1 mp_mul(&a, &b, &c);
+#define DO2 DO1; DO1;
+
+       t2 = -1;
+       for (y = 0; y < TIMES; y++) {
+           t_start();
+           t1 = t_read();
+           DO2;
+           t1 = (t_read() - t1)>>1;
+           if (t1 < t2) t2 = t1;
+       }
+       printf("%3d digits: %9llu cycles\n", x, t2);
+   }
+   mp_clear_multi(&a,&b,&c,NULL);
+
+#undef DO1
+#undef DO2
+}      
+
+void time_sqr(void)
+{
+   ulong64 t1, t2;
+   unsigned long x, y;
+   mp_int  a, b;
+
+   printf("Timing Squaring:\n");
+   mp_init_multi(&a,&b,NULL);
+   for (x = 128/DIGIT_BIT; x <= 1024/DIGIT_BIT; x += 128/DIGIT_BIT) {
+       mp_rand(&a, x);
+
+#define DO1 mp_sqr(&a, &b);
+#define DO2 DO1; DO1;
+
+       t2 = -1;
+       for (y = 0; y < TIMES; y++) {
+           t_start();
+           t1 = t_read();
+           DO2;
+           t1 = (t_read() - t1)>>1;
+           if (t1 < t2) t2 = t1;
+       }
+       printf("%3d digits: %9llu cycles\n", x, t2);
+   }
+   mp_clear_multi(&a,&b,NULL);
+
+#undef DO1
+#undef DO2
+}    
+   
+void time_prng(void)
+{
+   ulong64 t1, t2;
+   unsigned char buf[4096];
+   prng_state prng;
+   unsigned long x, y;
+
+   printf("Timing PRNGs:\n");
+   for (x = 0; prng_descriptor[x].name != NULL; x++) {
+      prng_descriptor[x].start(&prng);
+      zeromem(buf, 256);
+      prng_descriptor[x].add_entropy(buf, 256, &prng);
+      prng_descriptor[x].ready(&prng);
+      t2 = -1;
+
+#define DO1 prng_descriptor[x].read(buf, 4096, &prng);
+#define DO2 DO1 DO1
+
+      for (y = 0; y < 10000; y++) {
+         t_start();
+         t1 = t_read();
+         DO2;
+         t1 = (t_read() - t1)>>1;
+         if (t1 < t2) t2 = t1;
+      }
+      printf("%20s: %llu\n", prng_descriptor[x].name, t2>>12);
+   }
+#undef DO2
+#undef DO1
+
+}
+      
+
+
 int main(void)
 {
   reg_algs();
@@ -349,6 +444,9 @@ int main(void)
   printf("Timings for ciphers and hashes.  Times are listed as cycles per byte processed.\n\n");
 
 //  init_timer();
+  time_mult();
+  time_sqr();
+  time_prng();
   time_cipher();
   time_keysched();
   time_hash();

BIN
doc/crypt.pdf


+ 0 - 18
examples/ch1-01.c

@@ -1,18 +0,0 @@
-/* 
- * Name      : ch1-01.c
- * Purpose   : Demonstration of a basic libtomcrypt program
- * Author    : Tom St Denis
- *
- * History   : v0.79 Initial release
- */
- 
-/* ch1-01-1  */
-/* Include the default headers and libtomcrypt headers */
-#include <mycrypt.h>
-
-int main(void)
-{
-   return 0;
-}
-/* ch1-01-1  */
-

+ 0 - 25
examples/ch1-02.c

@@ -1,25 +0,0 @@
-/* 
- * Name      : ch1-02.c
- * Purpose   : Demonstration of error handling
- * Author    : Tom St Denis
- *
- * History   : v0.79 Initial release
- */
- 
-/* ch1-01-1 */
-#include <mycrypt.h>
-
-int main(void)
-{
-   int errno;
-   
-   if ((errno = some_func(...)) != CRYPT_OK) {
-      printf("Error: %s\n", error_to_string(errno));
-      return EXIT_FAILURE;
-   }
-   
-   return 0;
-}
-/*ch1-01-1 */
-
-

+ 0 - 29
examples/ch1-03.c

@@ -1,29 +0,0 @@
-/* 
- * Name      : ch1-03.c
- * Purpose   : Demonstration of variable length outputs
- * Author    : Tom St Denis
- *
- * History   : v0.79 Initial release
- */
- 
- /* ch1-01-1 */
- #include <mycrypt.h>
- 
- int main(void)
- {
-    unsigned long length;
-    unsigned char buffer[512];
-    int errno;
-    
-    length = sizeof(buffer);
-    if ((errno = some_func(..., buffer, &length)) != CRYPT_OK) {
-       printf("Error: %s\n", error_to_string(errno));
-       return EXIT_FAILURE;
-    }
-    printf("Size of output is %lu bytes\n", length);
-    return 0;
-}
-/* ch1-01-1 */
-
-
-    

+ 0 - 35
examples/ch2-01.c

@@ -1,35 +0,0 @@
-/* 
- * Name      : ch2-01.c
- * Purpose   : Demonstration of reading the RNG
- * Author    : Tom St Denis
- *
- * History   : v0.81 Initial release
- */
- 
- /* ch2-02-2 */
- #include <mycrypt.h>
- 
- int main(void) 
- {
-    unsigned char buf[16];
-    unsigned long len;
-    int           ix;
-    
-    /* read the RNG */
-    len = rng_get_bytes(buf, sizeof(buf), NULL);
-    
-    /* verify return */
-    if (len != sizeof(buf)) {
-       printf("Error: Only read %lu bytes.\n", len);
-    } else {
-       printf("Read %lu bytes\n", len);
-       for (ix = 0; ix < sizeof(buf); ix++) {
-           printf("%02x ", buf[ix]);
-       }
-       printf("\n");
-    }
-    
-    return EXIT_SUCCESS;
-}
-/* ch2-02-2 */
-

+ 256 - 0
fortuna.c

@@ -0,0 +1,256 @@
+/* LibTomCrypt, modular cryptographic library -- Tom St Denis
+ *
+ * LibTomCrypt is a library that provides various cryptographic
+ * algorithms in a highly modular and flexible manner.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtomcrypt.org
+ */
+
+/* Implementation of Fortuna by Tom St Denis 
+
+We deviate slightly here for reasons of simplicity [and to fit in the API].  First all "sources"
+in the AddEntropy function are fixed to 0.  Second since no reliable timer is provided 
+we reseed automatically when len(pool0) >= 64 or every FORTUNA_WD calls to the read function */
+
+#include "mycrypt.h"
+
+#ifdef FORTUNA 
+
+const struct _prng_descriptor fortuna_desc = {
+    "fortuna",
+    &fortuna_start,
+    &fortuna_add_entropy,
+    &fortuna_ready,
+    &fortuna_read,
+    &fortuna_done,
+    &fortuna_export,
+    &fortuna_import
+
+};
+
+/* update the IV */
+static void fortuna_update_iv(prng_state *prng)
+{
+   int x;
+   unsigned char *IV;
+   /* update IV */
+   IV = prng->fortuna.IV;
+   for (x = 0; x < 16; x++) {
+      IV[x] = (IV[x] + 1) & 255;
+      if (IV[x] != 0) break;
+   }
+}
+
+/* reseed the PRNG */
+static int fortuna_reseed(prng_state *prng)
+{
+   unsigned char tmp[32];
+   hash_state    md;
+   int           err, x;
+
+   ++prng->fortuna.reset_cnt;
+
+   /* new K == SHA256(K || s) where s == SHA256(P0) || SHA256(P1) ... */
+   sha256_init(&md);
+   if ((err = sha256_process(&md, prng->fortuna.K, 32)) != CRYPT_OK) {
+      return err;
+   }
+
+   for (x = 0; x < 32; x++) {
+       if (x == 0 || ((prng->fortuna.reset_cnt >> (x-1)) & 1) == 0) { 
+          /* terminate this hash */
+          if ((err = sha256_done(&prng->fortuna.pool[x], tmp)) != CRYPT_OK) {
+             return err; 
+          }
+          /* add it to the string */
+          if ((err = sha256_process(&md, tmp, 32)) != CRYPT_OK) {
+             return err;
+          }
+          /* reset this pool */
+          sha256_init(&prng->fortuna.pool[x]);
+       } else {
+          break;
+       }
+   }
+
+   /* finish key */
+   if ((err = sha256_done(&md, prng->fortuna.K)) != CRYPT_OK) {
+      return err; 
+   }
+   if ((err = rijndael_setup(prng->fortuna.K, 32, 0, &prng->fortuna.skey)) != CRYPT_OK) {
+      return err;
+   }
+   fortuna_update_iv(prng);
+
+   /* reset pool len */
+   prng->fortuna.pool0_len = 0;
+   prng->fortuna.wd        = 0;
+
+
+#ifdef CLEAN_STACK
+   zeromem(&md, sizeof(md));
+   zeromem(tmp, sizeof(tmp));
+#endif
+
+   return CRYPT_OK;
+}
+
+int fortuna_start(prng_state *prng)
+{
+   int err, x;
+
+   _ARGCHK(prng != NULL);
+   
+   /* initialize the pools */
+   for (x = 0; x < 32; x++) {
+       sha256_init(&prng->fortuna.pool[x]);
+   }
+   prng->fortuna.pool_idx = prng->fortuna.pool0_len = prng->fortuna.reset_cnt = 
+   prng->fortuna.wd = 0;
+
+   /* reset bufs */
+   zeromem(prng->fortuna.K, 32);
+   if ((err = rijndael_setup(prng->fortuna.K, 32, 0, &prng->fortuna.skey)) != CRYPT_OK) {
+      return err;
+   }
+   zeromem(prng->fortuna.IV, 16);
+
+   return CRYPT_OK;
+}
+
+int fortuna_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng)
+{
+   unsigned char tmp[2];
+   int           err;
+
+   _ARGCHK(buf  != NULL);
+   _ARGCHK(prng != NULL);
+
+   /* ensure len <= 32 */
+   if (len > 32) {
+      return CRYPT_INVALID_ARG;
+   }
+
+   /* add s || length(buf) || buf to pool[pool_idx] */
+   tmp[0] = 0;
+   tmp[1] = len;
+   if ((err = sha256_process(&prng->fortuna.pool[prng->fortuna.pool_idx], tmp, 2)) != CRYPT_OK) {
+      return err;
+   }
+   if ((err = sha256_process(&prng->fortuna.pool[prng->fortuna.pool_idx], buf, len)) != CRYPT_OK) {
+      return err;
+   }
+   if (prng->fortuna.pool_idx == 0) {
+      prng->fortuna.pool0_len += len + 2;
+   }
+   prng->fortuna.pool_idx = (prng->fortuna.pool_idx + 1) & 31;
+
+   return CRYPT_OK;
+}
+
+int fortuna_ready(prng_state *prng)
+{
+   return fortuna_reseed(prng);
+}
+
+unsigned long fortuna_read(unsigned char *dst, unsigned long len, prng_state *prng)
+{
+   unsigned char tmp[16];
+   int           err;
+   unsigned long tlen, n;
+
+   _ARGCHK(dst  != NULL);
+   _ARGCHK(prng != NULL);
+
+   /* do we have to reseed? */
+   if (++prng->fortuna.wd == FORTUNA_WD || prng->fortuna.pool0_len >= 64) {
+      if ((err = fortuna_reseed(prng)) != CRYPT_OK) {
+         return 0;
+      }
+   }
+
+   /* now generate the blocks required */
+   tlen = len;
+   while (len > 0) {
+       if (len >= 16) {
+          /* encrypt the IV and store it */
+          rijndael_ecb_encrypt(prng->fortuna.IV, dst, &prng->fortuna.skey);
+          dst += 16;
+          len -= 16;
+       } else {
+          rijndael_ecb_encrypt(prng->fortuna.IV, tmp, &prng->fortuna.skey);
+          XMEMCPY(dst, tmp, len);
+          len = 0;
+       }
+       fortuna_update_iv(prng);
+   }
+       
+   /* generate new key */
+   rijndael_ecb_encrypt(prng->fortuna.IV, prng->fortuna.K   , &prng->fortuna.skey); fortuna_update_iv(prng);
+   rijndael_ecb_encrypt(prng->fortuna.IV, prng->fortuna.K+16, &prng->fortuna.skey); fortuna_update_iv(prng);
+   if ((err = rijndael_setup(prng->fortuna.K, 32, 0, &prng->fortuna.skey)) != CRYPT_OK) {
+      return 0;
+   }
+
+#ifdef CLEAN_STACK
+   zeromem(tmp, sizeof(tmp));
+#endif
+   return tlen;
+}   
+
+void fortuna_done(prng_state *prng)
+{
+   _ARGCHK(prng != NULL);
+   /* call cipher done when we invent one ;-) */
+}
+
+int fortuna_export(unsigned char *out, unsigned long *outlen, prng_state *prng)
+{
+   int x;
+
+   _ARGCHK(out    != NULL);
+   _ARGCHK(outlen != NULL);
+   _ARGCHK(prng   != NULL);
+
+   /* we'll write 2048 bytes for s&g's */
+   if (*outlen < 2048) {
+      return CRYPT_BUFFER_OVERFLOW;
+   }
+
+   for (x = 0; x < 32; x++) {
+      if (fortuna_read(out+x*64, 64, prng) != 64) {
+         return CRYPT_ERROR_READPRNG;
+      }
+   }
+   *outlen = 2048;
+
+   return CRYPT_OK;
+}
+ 
+int fortuna_import(const unsigned char *in, unsigned long inlen, prng_state *prng)
+{
+   int err, x;
+
+   _ARGCHK(in   != NULL);
+   _ARGCHK(prng != NULL);
+
+   if (inlen != 2048) {
+      return CRYPT_INVALID_ARG;
+   }
+
+   if ((err = fortuna_start(prng)) != CRYPT_OK) {
+      return err;
+   }
+   for (x = 0; x < 32; x++) {
+      if ((err = fortuna_add_entropy(in+x*64, 64, &prng)) != CRYPT_OK) {
+         return err;
+      }
+   }
+   return fortuna_ready(&prng);
+}
+
+#endif
+

+ 1 - 0
hmac_done.c

@@ -94,6 +94,7 @@ int hmac_done(hmac_state *hmac, unsigned char *hashOut, unsigned long *outlen)
 
     err = CRYPT_OK;
 __ERR:
+    XFREE(hmac->key);
 #ifdef CLEAN_STACK
     zeromem(isha, hashsize);
     zeromem(buf,  hashsize);

+ 8 - 1
hmac_init.c

@@ -61,9 +61,16 @@ int hmac_init(hmac_state *hmac, int hash, const unsigned char *key, unsigned lon
        return CRYPT_MEM;
     }
 
+    /* allocate memory for key */
+    hmac->key = XMALLOC(HMAC_BLOCKSIZE);
+    if (hmac->key == NULL) {
+       XFREE(buf);
+       return CRYPT_MEM;
+    }
+
     // (1) make sure we have a large enough key
     if(keylen > HMAC_BLOCKSIZE) {
-        z = (unsigned long)sizeof(hmac->key);
+        z = (unsigned long)HMAC_BLOCKSIZE;
         if ((err = hash_memory(hash, key, keylen, hmac->key, &z)) != CRYPT_OK) {
            goto __ERR;
         }

+ 1 - 1
hmac_test.c

@@ -285,7 +285,7 @@ Key First"
         outlen = sizeof(digest);
         if((err = hmac_memory(hash, cases[i].key, cases[i].keylen, cases[i].data, cases[i].datalen, digest, &outlen)) != CRYPT_OK) {
 #if 0
-            printf("HMAC-%s test #%d\n", cases[i].algo, cases[i].num);
+            printf("HMAC-%s test #%d, %s\n", cases[i].algo, cases[i].num, error_to_string(err));
 #endif
             return err;
         }

+ 4 - 1
ltc_tommath.h

@@ -1,3 +1,4 @@
+
 /* LibTomMath, multiple-precision integer library -- Tom St Denis
  *
  * LibTomMath is a library that provides multiple-precision
@@ -20,6 +21,7 @@
 #include <stdlib.h>
 #include <ctype.h>
 #include <limits.h>
+#include <mycrypt_custom.h>
 
 #undef MIN
 #define MIN(x,y) ((x)<(y)?(x):(y))
@@ -147,7 +149,7 @@ extern "C" {
 typedef int           mp_err;
 
 /* you'll have to tune these... */
- int KARATSUBA_MUL_CUTOFF,
+extern int KARATSUBA_MUL_CUTOFF,
            KARATSUBA_SQR_CUTOFF,
            TOOM_MUL_CUTOFF,
            TOOM_SQR_CUTOFF;
@@ -552,6 +554,7 @@ void bn_reverse(unsigned char *s, int len);
 
  const char *mp_s_rmap;
 
+
 #ifdef __cplusplus
    }
 #endif

+ 3 - 2
makefile

@@ -4,7 +4,7 @@
 # Modified by Clay Culver
 
 # The version
-VERSION=0.97a
+VERSION=0.97b
 
 # Compiler and Linker Names
 #CC=gcc
@@ -63,7 +63,7 @@ crypt_find_cipher_id.o     crypt_find_prng.o        crypt_prng_is_valid.o      \
 crypt_unregister_cipher.o  crypt_cipher_is_valid.o  crypt_find_hash.o          \
 crypt_hash_descriptor.o    crypt_register_cipher.o  crypt_unregister_hash.o    \
 \
-sprng.o yarrow.o rc4.o rng_get_bytes.o  rng_make_prng.o \
+fortuna.o sprng.o yarrow.o rc4.o rng_get_bytes.o  rng_make_prng.o \
 \
 rand_prime.o is_prime.o \
 \
@@ -71,6 +71,7 @@ ecc.o  dh.o \
 \
 rsa_decrypt_key.o  rsa_encrypt_key.o  rsa_exptmod.o  rsa_free.o  rsa_make_key.o  \
 rsa_sign_hash.o  rsa_verify_hash.o rsa_export.o rsa_import.o tim_exptmod.o \
+rsa_v15_encrypt_key.o rsa_v15_decrypt_key.o rsa_v15_sign_hash.o rsa_v15_verify_hash.o \
 \
 dsa_export.o  dsa_free.o  dsa_import.o  dsa_make_key.o  dsa_sign_hash.o  \
 dsa_verify_hash.o  dsa_verify_key.o \

+ 2 - 1
makefile.cygwin_dll

@@ -28,7 +28,7 @@ crypt_find_cipher_id.o     crypt_find_prng.o        crypt_prng_is_valid.o      \
 crypt_unregister_cipher.o  crypt_cipher_is_valid.o  crypt_find_hash.o          \
 crypt_hash_descriptor.o    crypt_register_cipher.o  crypt_unregister_hash.o    \
 \
-sprng.o yarrow.o rc4.o rng_get_bytes.o  rng_make_prng.o \
+sprng.o fortuna.o yarrow.o rc4.o rng_get_bytes.o  rng_make_prng.o \
 \
 rand_prime.o is_prime.o \
 \
@@ -36,6 +36,7 @@ ecc.o  dh.o \
 \
 rsa_decrypt_key.o  rsa_encrypt_key.o  rsa_exptmod.o  rsa_free.o  rsa_make_key.o  \
 rsa_sign_hash.o  rsa_verify_hash.o rsa_export.o rsa_import.o tim_exptmod.o \
+rsa_v15_encrypt_key.o rsa_v15_decrypt_key.o rsa_v15_sign_hash.o rsa_v15_verify_hash.o \
 \
 dsa_export.o  dsa_free.o  dsa_import.o  dsa_make_key.o  dsa_sign_hash.o  \
 dsa_verify_hash.o  dsa_verify_key.o \

+ 3 - 2
makefile.icc

@@ -41,7 +41,7 @@ default:library
 #   B - Blend of P4 and PM [mobile]
 #
 # Default to just generic max opts
-CFLAGS += -O3 -xN -ip
+CFLAGS += -O3 -xN -ip 
 
 # want to see stuff?
 #CFLAGS += -opt_report
@@ -79,7 +79,7 @@ crypt_find_cipher_id.o     crypt_find_prng.o        crypt_prng_is_valid.o      \
 crypt_unregister_cipher.o  crypt_cipher_is_valid.o  crypt_find_hash.o          \
 crypt_hash_descriptor.o    crypt_register_cipher.o  crypt_unregister_hash.o    \
 \
-sprng.o yarrow.o rc4.o rng_get_bytes.o  rng_make_prng.o \
+sprng.o fortuna.o yarrow.o rc4.o rng_get_bytes.o  rng_make_prng.o \
 \
 rand_prime.o is_prime.o \
 \
@@ -87,6 +87,7 @@ ecc.o  dh.o \
 \
 rsa_decrypt_key.o  rsa_encrypt_key.o  rsa_exptmod.o  rsa_free.o  rsa_make_key.o  \
 rsa_sign_hash.o  rsa_verify_hash.o rsa_export.o rsa_import.o tim_exptmod.o \
+rsa_v15_encrypt_key.o rsa_v15_decrypt_key.o rsa_v15_sign_hash.o rsa_v15_verify_hash.o \
 \
 dsa_export.o  dsa_free.o  dsa_import.o  dsa_make_key.o  dsa_sign_hash.o  \
 dsa_verify_hash.o  dsa_verify_key.o \

+ 2 - 1
makefile.msvc

@@ -18,7 +18,7 @@ crypt_find_cipher_id.obj     crypt_find_prng.obj        crypt_prng_is_valid.obj
 crypt_unregister_cipher.obj  crypt_cipher_is_valid.obj  crypt_find_hash.obj          \
 crypt_hash_descriptor.obj    crypt_register_cipher.obj  crypt_unregister_hash.obj    \
 \
-sprng.obj yarrow.obj rc4.obj rng_get_bytes.obj  rng_make_prng.obj \
+sprng.obj fortuna.obj yarrow.obj rc4.obj rng_get_bytes.obj  rng_make_prng.obj \
 \
 rand_prime.obj is_prime.obj \
 \
@@ -26,6 +26,7 @@ ecc.obj  dh.obj \
 \
 rsa_decrypt_key.obj  rsa_encrypt_key.obj  rsa_exptmod.obj  rsa_free.obj  rsa_make_key.obj  \
 rsa_sign_hash.obj  rsa_verify_hash.obj rsa_export.obj rsa_import.obj tim_exptmod.obj \
+rsa_v15_encrypt_key.obj rsa_v15_decrypt_key.obj rsa_v15_sign_hash.obj rsa_v15_verify_hash.obj \
 \
 dsa_export.obj  dsa_free.obj  dsa_import.obj  dsa_make_key.obj  dsa_sign_hash.obj  \
 dsa_verify_hash.obj  dsa_verify_key.obj \

+ 255 - 35
mpi.c

@@ -258,6 +258,15 @@ fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
   /* now we proceed to zero successive digits
    * from the least significant upwards
    */
+#ifdef LTMSSE
+  // compute globals we'd like to have in MMX registers
+  asm ("movl $268435455,%%eax   \n\t"       //mm2 == MP_MASK
+       "movd %%eax,%%mm2        \n\t"
+       "movd %0,%%mm3           \n\t"       //mm3 = rho
+       "movq (%1),%%mm0         \n\t"       // W[ix] for ix=0
+       ::"r"(rho),"r"(W):"%eax");
+#endif
+
   for (ix = 0; ix < n->used; ix++) {
     /* mu = ai * m' mod b
      *
@@ -265,9 +274,13 @@ fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
      * by casting the value down to a mp_digit.  Note this requires
      * that W[ix-1] have  the carry cleared (see after the inner loop)
      */
+#ifndef LTMSSE
     register mp_digit mu;
     mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
-
+#else
+    asm("pmuludq        %mm3,%mm0   \n\t"    // multiply against rho 
+        "pand           %mm2,%mm0   \n\t");  // mu == mm0
+#endif
     /* a = a + mu * m * b**i
      *
      * This is computed in place and on the fly.  The multiplication
@@ -295,13 +308,33 @@ fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
 
       /* inner loop */
       for (iy = 0; iy < n->used; iy++) {
+#ifndef LTMSSE
           *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
+#else
+// SSE version
+      asm ("movd     (%0), %%mm1 \n\t"   // load right side
+           "pmuludq  %%mm0,%%mm1 \n\t"   // multiply into left side
+           "paddq    (%1),%%mm1  \n\t"   // add 64-bit result out 
+           "movq     %%mm1,(%1)"         // store result
+           :: "r"(tmpn), "r"(_W));
+      // update pointers 
+      ++tmpn; 
+      ++_W;
+#endif
       }
     }
 
     /* now fix carry for next digit, W[ix+1] */
+#ifndef LTMSSE
     W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
-  }
+#else
+    asm("movq  (%0),%%mm0           \n\t"        // W[ix]
+        "psrlq $28,%%mm0            \n\t"        // W[ix]>>28
+        "paddq 8(%0),%%mm0          \n\t"        // W[ix+1] + W[ix]>>28
+        "movq  %%mm0,8(%0)              "        // store
+        ::"r"(&W[ix]));
+#endif  
+}
 
   /* now we have to propagate the carries and
    * shift the words downward [all those least
@@ -319,35 +352,36 @@ fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
     /* alias for next word, where the carry goes */
     _W = W + ++ix;
 
-    for (; ix <= n->used * 2 + 1; ix++) {
-      *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
-    }
-
-    /* copy out, A = A/b**n
-     *
-     * The result is A/b**n but instead of converting from an
-     * array of mp_word to mp_digit than calling mp_rshd
-     * we just copy them in the right order
-     */
-
     /* alias for destination word */
     tmpx = x->dp;
 
-    /* alias for shifted double precision result */
-    _W = W + n->used;
-
-    for (ix = 0; ix < n->used + 1; ix++) {
-      *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
+    for (; ix <= n->used * 2 + 1; ix++) {
+#ifndef LTMSSE
+      *tmpx++ = (mp_digit)(*_W1 & ((mp_word) MP_MASK));
+      *_W++  += *_W1++ >> ((mp_word) DIGIT_BIT);
+#else
+    asm("movq   %%mm0,%%mm1        \n\t" // copy of W[ix]
+        "psrlq  $28,%%mm0          \n\t" // >>28
+        "pand   %%mm2,%%mm1        \n\t" // & with MP_MASK
+        "paddq  (%0),%%mm0         \n\t" // += _W
+        "movd   %%mm1,(%1)         \n\t" // store it
+        ::"r"(_W),"r"(tmpx));
+    ++_W; ++tmpx;
+#endif
     }
 
     /* zero oldused digits, if the input a was larger than
      * m->used+1 we'll have to clear the digits
      */
-    for (; ix < olduse; ix++) {
+    for (ix = n->used + 1; ix < olduse; ix++) {
       *tmpx++ = 0;
     }
   }
 
+#ifdef LTMSSE
+  asm("emms");
+#endif
+
   /* set the max used and clamp */
   x->used = n->used + 1;
   mp_clamp (x);
@@ -408,7 +442,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   }
 
   /* clear temp buf (the columns) */
-  XMEMSET (W, 0, sizeof (mp_word) * digs);
+  memset (W, 0, sizeof (mp_word) * digs);
 
   /* calculate the columns */
   pa = a->used;
@@ -423,13 +457,21 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
      * the loop without scheduling problems
      */
     {
-      register mp_digit tmpx, *tmpy;
+#ifndef LTMSSE
+      register mp_digit tmpx;
+#endif
+
+      register mp_digit *tmpy;
       register mp_word *_W;
       register int iy, pb;
 
       /* alias for the the word on the left e.g. A[ix] * A[iy] */
+#ifndef LTMSSE
       tmpx = a->dp[ix];
-
+#else
+// SSE: now we load the left side in mm0 
+      asm (" movd %0, %%mm0 " :: "r"(a->dp[ix]));
+#endif
       /* alias for the right side */
       tmpy = b->dp;
 
@@ -445,7 +487,19 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
       pb = MIN (b->used, digs - ix);
 
       for (iy = 0; iy < pb; iy++) {
+#ifndef LTMSSE
         *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
+#else
+// SSE version
+      asm ("movd     (%0), %%mm1 \n\t"   // load right side
+           "pmuludq  %%mm0,%%mm1 \n\t"   // multiply into left side
+           "paddq    (%1), %%mm1 \n\t"   // add 64-bit result out 
+           "movq     %%mm1,(%1)"         // store result
+           :: "r"(tmpy), "r"(_W));
+      // update pointers 
+      ++tmpy; 
+      ++_W;
+#endif
       }
     }
 
@@ -474,21 +528,56 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
      * last digit to copy
      */
     tmpc = c->dp;
+
+#ifdef LTMSSE
+    // mm2 has W[ix-1] 
+    asm("movq (%0),%%mm2"::"r"(W));
+#endif
+
     for (ix = 1; ix < digs; ix++) {
+#ifndef LTMSSE
       /* forward the carry from the previous temp */
       W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
 
       /* now extract the previous digit [below the carry] */
       *tmpc++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
+
+#else
+      asm(
+          "movq (%0),%%mm1         \n\t"      // W[ix]
+          "movd  %%mm2,%%eax       \n\t"      // get 32-bit version of it W[ix-1]
+          "psrlq $28,%%mm2         \n\t"      // W[ix-1] >> DIGIT_BIT ... must be 28
+          "andl  $268435455,%%eax  \n\t"      // & with MP_MASK against W[ix-1]
+          "paddq %%mm1,%%mm2       \n\t"      // add them
+          "movl  %%eax,(%1)        \n\t"      // store it
+          :: "r"(&W[ix]), "r"(tmpc) : "%eax");
+      ++tmpc;
+#endif
+
     }
+
+#ifndef LTMSSE
     /* fetch the last digit */
     *tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK));
+#else
+    // get last since we don't store into W[ix] anymore ;-)
+    asm("movd %%mm2,%%eax         \n\t"
+        "andl  $268435455,%%eax   \n\t"      // & with MP_MASK against W[ix-1]
+        "movl  %%eax,(%0)"                   // store it
+        ::"r"(tmpc):"%eax");
+    ++tmpc;
+#endif
 
     /* clear unused digits [that existed in the old copy of c] */
     for (; ix < olduse; ix++) {
       *tmpc++ = 0;
     }
   }
+
+#ifdef LTMSSE
+  asm("emms");
+#endif
+
   mp_clamp (c);
   return MP_OKAY;
 }
@@ -538,10 +627,14 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   /* like the other comba method we compute the columns first */
   pa = a->used;
   pb = b->used;
-  XMEMSET (W + digs, 0, (pa + pb + 1 - digs) * sizeof (mp_word));
+  memset (W + digs, 0, (pa + pb + 1 - digs) * sizeof (mp_word));
   for (ix = 0; ix < pa; ix++) {
     {
-      register mp_digit tmpx, *tmpy;
+#ifndef LTMSSE
+      register mp_digit tmpx;
+#endif
+
+      register mp_digit *tmpy;
       register int iy;
       register mp_word *_W;
 
@@ -549,7 +642,12 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
       iy = digs - ix;
 
       /* copy of word on the left of A[ix] * B[iy] */
+#ifndef LTMSSE
       tmpx = a->dp[ix];
+#else
+//SSE we load tmpx into mm0
+      asm (" movd %0, %%mm0 " :: "r"(a->dp[ix]));
+#endif
 
       /* alias for right side */
       tmpy = b->dp + iy;
@@ -569,8 +667,21 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 
       /* compute column products for digits above the minimum */
       for (; iy < pb; iy++) {
+#ifndef LTMSSE
          *_W++ += ((mp_word) tmpx) * ((mp_word)*tmpy++);
+#else
+// SSE version
+      asm ("movd     (%0), %%mm1 \n\t"   // load right side
+           "pmuludq  %%mm0,%%mm1 \n\t"   // multiply into left side
+           "paddq    (%1),%%mm1 \n\t"    // add 64-bit result out 
+           "movq     %%mm1,(%1)"         // store result
+           :: "r"(tmpy), "r"(_W));
+      // update pointers 
+      ++tmpy; 
+      ++_W;
+#endif
       }
+
     }
   }
 
@@ -582,15 +693,46 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
    *
    * See comments in bn_fast_s_mp_mul_digs.c
    */
+#ifdef LTMSSE
+    // mm2 has W[ix-1] 
+    asm("movq (%0),%%mm2"::"r"(W + digs));
+#endif
+
   for (ix = digs + 1; ix < newused; ix++) {
+      /* forward the carry from the previous temp */
+#ifndef LTMSSE
     W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
     c->dp[ix - 1] = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
+#else
+      asm(
+          "movd  %%mm2,%%eax      \n\t"      // get 32-bit version of it W[ix-1]
+          "psrlq $28,%%mm2        \n\t"      // W[ix-1] >> DIGIT_BIT ... must be 28
+          "andl  $268435455,%%eax \n\t"      // & with MP_MASK against W[ix-1]
+          "paddq (%0),%%mm2       \n\t"      // add them
+          "movl  %%eax,(%1)       \n\t"      // store it
+          :: "r"(&W[ix]), "r"(&c->dp[ix-1]) : "%eax");
+#endif
+
   }
+
+#ifndef LTMSSE
   c->dp[newused - 1] = (mp_digit) (W[newused - 1] & ((mp_word) MP_MASK));
+#else
+    // get last since we don't store into W[ix] anymore ;-)
+    asm("movd %%mm2,%%eax\n\t"
+        "andl  $268435455,%%eax   \n\t"      // & with MP_MASK against W[ix-1]
+        "movl  %%eax,(%0)"                   // store it
+        ::"r"(&(c->dp[newused-1])):"%eax");
+#endif
 
   for (; ix < oldused; ix++) {
     c->dp[ix] = 0;
   }
+
+#ifdef LTMSSE
+  asm("emms");
+#endif
+
   mp_clamp (c);
   return MP_OKAY;
 }
@@ -638,7 +780,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
 
   /* calculate size of product and allocate as required */
   pa = a->used;
-  newused = pa + pa + 1;
+  newused = pa + pa;
   if (b->alloc < newused) {
     if ((res = mp_grow (b, newused)) != MP_OKAY) {
       return res;
@@ -654,12 +796,15 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
    * the inner product can be doubled using n doublings instead of
    * n**2
    */
-  XMEMSET (W,  0, newused * sizeof (mp_word));
-  XMEMSET (W2, 0, newused * sizeof (mp_word));
+  memset (W,  0, newused * sizeof (mp_word));
+#ifndef LTMSSE
+  memset (W2, 0, newused * sizeof (mp_word));
+#endif
 
   /* This computes the inner product.  To simplify the inner N**2 loop
    * the multiplication by two is done afterwards in the N loop.
    */
+
   for (ix = 0; ix < pa; ix++) {
     /* compute the outer product
      *
@@ -668,15 +813,31 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
      * there is no need todo a double precision addition
      * into the W2[] array.
      */
+#ifndef LTMSSE
     W2[ix + ix] = ((mp_word)a->dp[ix]) * ((mp_word)a->dp[ix]);
+#else
+    asm("movd    %0,%%xmm0        \n\t" // load a->dp[ix]
+        "movdq2q %%xmm0,%%mm0     \n\t" // get 64-bit version
+        "pmuludq %%xmm0,%%xmm0    \n\t" // square it 
+        "movdqu  %%xmm0,(%1)      \n\t" // store it (8-byte result, 8-byte zero)
+        ::"r"(a->dp[ix]), "r"(&(W2[ix+ix])));
+#endif
 
     {
-      register mp_digit tmpx, *tmpy;
+#ifndef LTMSSE
+      register mp_digit tmpx;
+#endif
+      register mp_digit *tmpy;
       register mp_word *_W;
       register int iy;
 
       /* copy of left side */
+#ifndef LTMSSE
       tmpx = a->dp[ix];
+#else
+//SSE we load tmpx into mm0 [note: loaded above]
+//      asm (" movd %0, %%mm0 " :: "r"(a->dp[ix]));
+#endif
 
       /* alias for right side */
       tmpy = a->dp + (ix + 1);
@@ -686,7 +847,19 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
 
       /* inner products */
       for (iy = ix + 1; iy < pa; iy++) {
+#ifndef LTMSSE
           *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
+#else
+// SSE version
+      asm ("movd     (%0), %%mm1 \n\t"   // load right side
+           "pmuludq  %%mm0,%%mm1 \n\t"   // multiply into left side
+           "paddq    (%1),%%mm1 \n\t"    // add 64-bit result out 
+           "movq     %%mm1,(%1)"         // store result
+           :: "r"(tmpy), "r"(_W));
+      // update pointers 
+      ++tmpy; 
+      ++_W;
+#endif
       }
     }
   }
@@ -707,10 +880,19 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
     /* double first value, since the inner products are
      * half of what they should be
      */
+    tmpb = b->dp;
+#ifndef LTMSSE
     W[0] += W[0] + W2[0];
+#else
+    // mm2 has W[ix-1]
+    asm("movq    (%0),%%mm2         \n\t"       // load W[0]
+        "paddq  %%mm2,%%mm2         \n\t"       // W[0] + W[0]
+        "paddq   (%1),%%mm2         \n\t"       // W[0] + W[0] + W2[0]
+        ::"r"(W),"r"(W2));
+#endif
 
-    tmpb = b->dp;
     for (ix = 1; ix < newused; ix++) {
+#ifndef LTMSSE
       /* double/add next digit */
       W[ix] += W[ix] + W2[ix];
 
@@ -721,12 +903,34 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
        * needed
        */
       *tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
+#else
+      asm( "movq (%0),%%mm0          \n\t"      // load W[ix]
+           "movd %%mm2,%%eax         \n\t"      // 32-bit version of W[ix-1]
+           "paddq %%mm0,%%mm0        \n\t"      // W[ix] + W[ix]
+           "psrlq $28,%%mm2          \n\t"      // W[ix-1] >> DIGIT_BIT ... must be 28
+           "paddq (%1),%%mm0         \n\t"      // W[ix] + W[ix] + W2[ix]
+           "andl  $268435455,%%eax   \n\t"      // & with MP_MASK against W[ix-1]
+           "paddq %%mm0,%%mm2        \n\t"      // W[ix] + W[ix] + W2[ix] + W[ix-1]>>DIGIT_BIT
+           "movl  %%eax,(%2)             "      // store it
+         :: "r"(&W[ix]), "r"(&W2[ix]), "r"(tmpb):"%eax");
+      ++tmpb;
+#endif
     }
+
+#ifndef LTMSSE
     /* set the last value.  Note even if the carry is zero
      * this is required since the next step will not zero
      * it if b originally had a value at b->dp[2*a.used]
      */
     *tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK));
+#else
+    // get last since we don't store into W[ix] anymore ;-)
+    asm("movd  %%mm2,%%eax        \n\t"
+        "andl  $268435455,%%eax   \n\t"      // & with MP_MASK against W[ix-1]
+        "movl  %%eax,(%0)             "      // store it
+        ::"r"(tmpb):"%eax");
+    ++tmpb;
+#endif
 
     /* clear high digits of b if there were any originally */
     for (; ix < olduse; ix++) {
@@ -734,6 +938,10 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
     }
   }
 
+#ifdef LTMSSE
+  asm("emms");
+#endif
+
   mp_clamp (b);
   return MP_OKAY;
 }
@@ -1142,10 +1350,14 @@ mp_clamp (mp_int * a)
 void
 mp_clear (mp_int * a)
 {
+  int i;
+
   /* only do anything if a hasn't been freed previously */
   if (a->dp != NULL) {
     /* first zero the digits */
-    XMEMSET (a->dp, 0, sizeof (mp_digit) * a->used);
+    for (i = 0; i < a->used; i++) {
+        a->dp[i] = 0;
+    }
 
     /* free ram */
     XFREE(a->dp);
@@ -3083,15 +3295,22 @@ int mp_grow (mp_int * a, int size)
  */
 #include <ltc_tommath.h>
 
-/* init a new bigint */
+/* init a new mp_int */
 int mp_init (mp_int * a)
 {
+  int i;
+
   /* allocate memory required and clear it */
-  a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), MP_PREC);
+  a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
   if (a->dp == NULL) {
     return MP_MEM;
   }
 
+  /* set the digits to zero */
+  for (i = 0; i < MP_PREC; i++) {
+      a->dp[i] = 0;
+  }
+
   /* set the used to zero, allocated digits to the default precision
    * and sign to positive */
   a->used  = 0;
@@ -7538,7 +7757,7 @@ mp_zero (mp_int * a)
 {
   a->sign = MP_ZPOS;
   a->used = 0;
-  XMEMSET (a->dp, 0, sizeof (mp_digit) * a->alloc);
+  memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
 }
 
 /* End: bn_mp_zero.c */
@@ -8396,6 +8615,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
 
  CPU                    /Compiler     /MUL CUTOFF/SQR CUTOFF
 -------------------------------------------------------------
+ Intel P4 Northwood     /GCC v3.3.3   /       121/       128/SSE patches ;-)
  Intel P4 Northwood     /GCC v3.3.3   /        59/        81/profiled build
  Intel P4 Northwood     /GCC v3.3.3   /        59/        80/profiled_single build
  Intel P4 Northwood     /ICC v8.0     /        57/        70/profiled build
@@ -8404,8 +8624,8 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
  
 */
 
-int     KARATSUBA_MUL_CUTOFF = 57,      /* Min. number of digits before Karatsuba multiplication is used. */
-        KARATSUBA_SQR_CUTOFF = 70,      /* Min. number of digits before Karatsuba squaring is used. */
+int     KARATSUBA_MUL_CUTOFF = 121,      /* Min. number of digits before Karatsuba multiplication is used. */
+        KARATSUBA_SQR_CUTOFF = 128,      /* Min. number of digits before Karatsuba squaring is used. */
         
         TOOM_MUL_CUTOFF      = 350,      /* no optimal values of these are known yet so set em high */
         TOOM_SQR_CUTOFF      = 400; 

+ 2 - 2
mycrypt.h

@@ -16,8 +16,8 @@ extern "C" {
 #endif
 
 /* version */
-#define CRYPT   0x0097
-#define SCRYPT  "0.97a"
+#define CRYPT   0x0098
+#define SCRYPT  "0.98"
 
 /* max size of either a cipher/hash block or symmetric key [largest of the two] */
 #define MAXBLOCKSIZE  64

+ 4 - 4
mycrypt_cfg.h

@@ -8,10 +8,10 @@
 #define MYCRYPT_CFG_H
 
 /* you can change how memory allocation works ... */
- void *XMALLOC(size_t n);
- void *REALLOC(void *p, size_t n);
- void *XCALLOC(size_t n, size_t s);
- void XFREE(void *p);
+void *XMALLOC(size_t n);
+void *REALLOC(void *p, size_t n);
+void *XCALLOC(size_t n, size_t s);
+void XFREE(void *p);
 
 /* change the clock function too */
  clock_t XCLOCK(void);

+ 13 - 9
mycrypt_custom.h

@@ -5,10 +5,6 @@
 #ifndef MYCRYPT_CUSTOM_H_
 #define MYCRYPT_CUSTOM_H_
 
-#ifdef CRYPT
-	#error mycrypt_custom.h should be included before mycrypt.h
-#endif
-
 /* macros for various libc functions you can change for embedded targets */
 #define XMALLOC  malloc
 #define XREALLOC realloc
@@ -28,7 +24,7 @@
 #define LTC_TEST
 
 /* clean the stack of functions which put private information on stack */
-//#define CLEAN_STACK
+// #define CLEAN_STACK
 
 /* disable all file related functions */
 //#define NO_FILE
@@ -43,8 +39,8 @@
 #define XTEA
 #define TWOFISH
 #define TWOFISH_TABLES
-//#define TWOFISH_ALL_TABLES
-//#define TWOFISH_SMALL
+// #define TWOFISH_ALL_TABLES
+// #define TWOFISH_SMALL
 #define DES
 #define CAST5
 #define NOEKEON
@@ -92,6 +88,13 @@
 #define YARROW_AES 0
 #define SPRNG
 #define RC4
+
+/* Fortuna */
+#define FORTUNA
+/* reseed every N calls to the read function */
+#define FORTUNA_WD    1024
+
+
 #define DEVRANDOM
 #define TRY_URANDOM_FIRST
 
@@ -133,11 +136,12 @@
 /* Include the MPI functionality?  (required by the PK algorithms) */
 #define MPI
 
+/* Use SSE2 optimizations in LTM?  Requires GCC or ICC and a P4 or K8 processor */
+// #define LTMSSE
+
 /* PKCS #1 and #5 stuff */
 #define PKCS_1
 #define PKCS_5
 
-#include <mycrypt.h>
-
 #endif
 

+ 1 - 1
mycrypt_hash.h

@@ -276,7 +276,7 @@ typedef struct Hmac_state {
      hash_state     md;
      int            hash;
      hash_state     hashstate;
-     unsigned char  key[MAXBLOCKSIZE];
+     unsigned char  *key;
 } hmac_state;
 
  int hmac_init(hmac_state *hmac, int hash, const unsigned char *key, unsigned long keylen);

+ 25 - 0
mycrypt_pk.h

@@ -113,6 +113,7 @@ typedef struct Rsa_key {
 
  void rsa_free(rsa_key *key);
 
+/* These use PKCS #1 v2.0 padding */
 int rsa_encrypt_key(const unsigned char *inkey,  unsigned long inlen,
                           unsigned char *outkey, unsigned long *outlen,
                     const unsigned char *lparam, unsigned long lparamlen,
@@ -137,6 +138,30 @@ int rsa_verify_hash(const unsigned char *sig,      unsigned long siglen,
                           int            hash_idx, unsigned long saltlen,
                           int           *stat,     rsa_key      *key);
 
+/* these use PKCS #1 v1.5 padding */
+int rsa_v15_encrypt_key(const unsigned char *inkey,    unsigned long  inlen,
+                              unsigned char *outkey,   unsigned long *outlen,
+                              prng_state    *prng,     int            prng_idx, 
+                              rsa_key       *key);
+			      
+int rsa_v15_decrypt_key(const unsigned char *in,     unsigned long  inlen,
+                              unsigned char *outkey, unsigned long keylen, 
+                              prng_state    *prng,   int            prng_idx,
+                              int           *res,    rsa_key       *key);
+
+int rsa_v15_sign_hash(const unsigned char *msghash,  unsigned long  msghashlen, 
+                            unsigned char *sig,      unsigned long *siglen, 
+                            prng_state    *prng,     int            prng_idx,
+                            int            hash_idx, rsa_key       *key);
+
+int rsa_v15_verify_hash(const unsigned char *sig,      unsigned long siglen,
+                        const unsigned char *msghash,  unsigned long msghashlen,
+                              prng_state    *prng,     int           prng_idx,
+                              int            hash_idx, int          *stat,     
+                              rsa_key       *key);
+
+
+/* PKCS #1 import/export */
 int rsa_export(unsigned char *out, unsigned long *outlen, int type, rsa_key *key);
 int rsa_import(const unsigned char *in, unsigned long inlen, rsa_key *key);
                         

+ 40 - 1
mycrypt_prng.h

@@ -10,9 +10,25 @@ struct rc4_prng {
     unsigned char buf[256];
 };
 
+struct fortuna_prng {
+    hash_state pool[32];     /* the 32 pools */
+
+    symmetric_key skey;
+
+    unsigned char K[32],      /* the current key */
+                  IV[16];     /* IV for CTR mode */
+    
+    unsigned long pool_idx,   /* current pool we will add to */
+                  pool0_len,  /* length of 0'th pool */
+                  wd;            
+
+    ulong64       reset_cnt;  /* number of times we have reset */
+};
+
 typedef union Prng_state {
     struct yarrow_prng    yarrow;
     struct rc4_prng       rc4;
+    struct fortuna_prng   fortuna;
 } prng_state;
 
 extern struct _prng_descriptor {
@@ -20,7 +36,10 @@ extern struct _prng_descriptor {
     int (*start)(prng_state *);
     int (*add_entropy)(const unsigned char *, unsigned long, prng_state *);
     int (*ready)(prng_state *);
-    unsigned long (*read)(unsigned char *, unsigned long len, prng_state *);
+    unsigned long (*read)(unsigned char *, unsigned long, prng_state *);
+    void (*done)(prng_state *);
+    int (*export)(unsigned char *, unsigned long *, prng_state *);
+    int (*import)(const unsigned char *, unsigned long, prng_state *);
 } prng_descriptor[];
 
 #ifdef YARROW
@@ -28,14 +47,31 @@ extern struct _prng_descriptor {
  int yarrow_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng);
  int yarrow_ready(prng_state *prng);
  unsigned long yarrow_read(unsigned char *buf, unsigned long len, prng_state *prng);
+ void yarrow_done(prng_state *prng);
+ int  yarrow_export(unsigned char *out, unsigned long *outlen, prng_state *prng);
+ int  yarrow_import(const unsigned char *in, unsigned long inlen, prng_state *prng);
  extern const struct _prng_descriptor yarrow_desc;
 #endif
 
+#ifdef FORTUNA
+ int fortuna_start(prng_state *prng);
+ int fortuna_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng);
+ int fortuna_ready(prng_state *prng);
+ unsigned long fortuna_read(unsigned char *buf, unsigned long len, prng_state *prng);
+ void fortuna_done(prng_state *prng);
+ int  fortuna_export(unsigned char *out, unsigned long *outlen, prng_state *prng);
+ int  fortuna_import(const unsigned char *in, unsigned long inlen, prng_state *prng);
+ extern const struct _prng_descriptor fortuna_desc;
+#endif
+
 #ifdef RC4
  int rc4_start(prng_state *prng);
  int rc4_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng);
  int rc4_ready(prng_state *prng);
  unsigned long rc4_read(unsigned char *buf, unsigned long len, prng_state *prng);
+ void rc4_done(prng_state *prng);
+ int  rc4_export(unsigned char *out, unsigned long *outlen, prng_state *prng);
+ int  rc4_import(const unsigned char *in, unsigned long inlen, prng_state *prng);
  extern const struct _prng_descriptor rc4_desc;
 #endif
 
@@ -44,6 +80,9 @@ extern struct _prng_descriptor {
  int sprng_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng);
  int sprng_ready(prng_state *prng);
  unsigned long sprng_read(unsigned char *buf, unsigned long len, prng_state *prng);
+ void sprng_done(prng_state *prng);
+ int  sprng_export(unsigned char *out, unsigned long *outlen, prng_state *prng);
+ int  sprng_import(const unsigned char *in, unsigned long inlen, prng_state *prng);
  extern const struct _prng_descriptor sprng_desc;
 #endif
 

+ 2 - 2
pkcs_5_2.c

@@ -52,7 +52,7 @@ int pkcs_5_alg2(const unsigned char *password, unsigned long password_len,
    stored = 0;
    while (left != 0) {
        /* process block number blkno */
-       zeromem(buf, MAXBLOCKSIZE*2);
+       zeromem(buf[0], MAXBLOCKSIZE*2);
        
        /* store current block number and increment for next pass */
        STORE32H(blkno, buf[1]);
@@ -75,7 +75,7 @@ int pkcs_5_alg2(const unsigned char *password, unsigned long password_len,
 
        /* now compute repeated and XOR it in buf[1] */
        XMEMCPY(buf[1], buf[0], x);
-       for (itts = 2; itts < iteration_count; ++itts) {
+       for (itts = 1; itts < iteration_count; ++itts) {
            if ((err = hmac_memory(hash_idx, password, password_len, buf[0], x, buf[0], &x)) != CRYPT_OK) {
               goto __ERR;
            }

+ 21 - 31
rc2.c

@@ -33,17 +33,8 @@ const struct _cipher_descriptor rc2_desc = {
    &rc2_keysize
 };
 
-
-/**********************************************************************\
-* Expand a variable-length user key (between 1 and 128 bytes) to a     *
-* 64-short working rc2 key, of at most "bits" effective key bits.      *
-* The effective key bits parameter looks like an export control hack.  *
-* For normal use, it should always be set to 1024.  For convenience,   *
-* zero is accepted as an alias for 1024.                               *
-\**********************************************************************/
-
-   /* 256-entry permutation table, probably derived somehow from pi */
-    static const unsigned char permute[256] = {
+/* 256-entry permutation table, probably derived somehow from pi */
+static const unsigned char permute[256] = {
         217,120,249,196, 25,221,181,237, 40,233,253,121, 74,160,216,157,
         198,126, 55,131, 43,118, 83,142, 98, 76,100,136, 68,139,251,162,
          23,154, 89,245,135,179, 79, 19, 97, 69,109,141,  9,129,125, 50,
@@ -60,7 +51,7 @@ const struct _cipher_descriptor rc2_desc = {
         211,  0,230,207,225,158,168, 44, 99, 22,  1, 63, 88,226,137,169,
          13, 56, 52, 27,171, 51,255,176,187, 72, 12, 95,185,177,205, 46,
         197,243,219, 71,229,165,156,119, 10,166, 32,104,254,127,193,173
-    };
+};
 
 int rc2_setup(const unsigned char *key, int keylen, int rounds, symmetric_key *skey)
 {
@@ -87,24 +78,23 @@ int rc2_setup(const unsigned char *key, int keylen, int rounds, symmetric_key *s
     /* Phase 1: Expand input key to 128 bytes */
     if (keylen < 128) {
         for (i = keylen; i < 128; i++) {
-            tmp[i] = permute[(int)((tmp[i - 1] + tmp[i - keylen]) & 255)];
+            tmp[i] = permute[(tmp[i - 1] + tmp[i - keylen]) & 255];
         }
     }
     
     /* Phase 2 - reduce effective key size to "bits" */
-    bits = keylen*8;
+    bits = keylen<<3;
     T8   = (unsigned)(bits+7)>>3;
     TM   = (255 >> (unsigned)(7 & -bits));
-    tmp[128 - T8] = permute[(int)(tmp[128 - T8] & TM)];
+    tmp[128 - T8] = permute[tmp[128 - T8] & TM];
     for (i = 127 - T8; i >= 0; i--) {
-        tmp[i] = permute[(int)(tmp[i + 1] ^ tmp[i + T8])];
+        tmp[i] = permute[tmp[i + 1] ^ tmp[i + T8]];
     }
 
     /* Phase 3 - copy to xkey in little-endian order */
-    i = 63;
-    do {
+    for (i = 0; i < 64; i++) {
         xkey[i] =  (unsigned)tmp[2*i] + ((unsigned)tmp[2*i+1] << 8);
-    } while (i-- > 0);
+    }        
 
 #ifdef CLEAN_STACK
     zeromem(tmp, sizeof(tmp));
@@ -129,9 +119,9 @@ void rc2_ecb_encrypt( const unsigned char *plain,
     unsigned *xkey;
     unsigned x76, x54, x32, x10, i;
 
-    _ARGCHK(plain != NULL);
+    _ARGCHK(plain  != NULL);
     _ARGCHK(cipher != NULL);
-    _ARGCHK(skey != NULL);
+    _ARGCHK(skey   != NULL);
 
     xkey = skey->rc2.xkey;
 
@@ -142,16 +132,16 @@ void rc2_ecb_encrypt( const unsigned char *plain,
 
     for (i = 0; i < 16; i++) {
         x10 = (x10 + (x32 & ~x76) + (x54 & x76) + xkey[4*i+0]) & 0xFFFF;
-        x10 = ((x10 << 1) | (x10 >> 15)) & 0xFFFF;
+        x10 = ((x10 << 1) | (x10 >> 15));
 
         x32 = (x32 + (x54 & ~x10) + (x76 & x10) + xkey[4*i+1]) & 0xFFFF;
-        x32 = ((x32 << 2) | (x32 >> 14)) & 0xFFFF;
+        x32 = ((x32 << 2) | (x32 >> 14));
 
         x54 = (x54 + (x76 & ~x32) + (x10 & x32) + xkey[4*i+2]) & 0xFFFF;
-        x54 = ((x54 << 3) | (x54 >> 13)) & 0xFFFF;
+        x54 = ((x54 << 3) | (x54 >> 13));
 
         x76 = (x76 + (x10 & ~x54) + (x32 & x54) + xkey[4*i+3]) & 0xFFFF;
-        x76 = ((x76 << 5) | (x76 >> 11)) & 0xFFFF;
+        x76 = ((x76 << 5) | (x76 >> 11));
 
         if (i == 4 || i == 10) {
             x10 = (x10 + xkey[x76 & 63]) & 0xFFFF;
@@ -199,9 +189,9 @@ void rc2_ecb_decrypt( const unsigned char *cipher,
     unsigned *xkey;
     int i;
 
-    _ARGCHK(plain != NULL);
+    _ARGCHK(plain  != NULL);
     _ARGCHK(cipher != NULL);
-    _ARGCHK(skey != NULL);
+    _ARGCHK(skey   != NULL);
 
     xkey = skey->rc2.xkey;
 
@@ -218,16 +208,16 @@ void rc2_ecb_decrypt( const unsigned char *cipher,
             x10 = (x10 - xkey[x76 & 63]) & 0xFFFF;
         }
 
-        x76 = ((x76 << 11) | (x76 >> 5)) & 0xFFFF;
+        x76 = ((x76 << 11) | (x76 >> 5));
         x76 = (x76 - ((x10 & ~x54) + (x32 & x54) + xkey[4*i+3])) & 0xFFFF;
 
-        x54 = ((x54 << 13) | (x54 >> 3)) & 0xFFFF;
+        x54 = ((x54 << 13) | (x54 >> 3));
         x54 = (x54 - ((x76 & ~x32) + (x10 & x32) + xkey[4*i+2])) & 0xFFFF;
 
-        x32 = ((x32 << 14) | (x32 >> 2)) & 0xFFFF;
+        x32 = ((x32 << 14) | (x32 >> 2));
         x32 = (x32 - ((x54 & ~x10) + (x76 & x10) + xkey[4*i+1])) & 0xFFFF;
 
-        x10 = ((x10 << 15) | (x10 >> 1)) & 0xFFFF;
+        x10 = ((x10 << 15) | (x10 >> 1));
         x10 = (x10 - ((x32 & ~x76) + (x54 & x76) + xkey[4*i+0])) & 0xFFFF;
     }
 

+ 24 - 2
rc4.c

@@ -18,7 +18,10 @@ const struct _prng_descriptor rc4_desc =
     &rc4_start,
     &rc4_add_entropy,
     &rc4_ready,
-    &rc4_read
+    &rc4_read,
+    &rc4_done,
+    &rc4_export,
+    &rc4_import
 };
 
 int rc4_start(prng_state *prng)
@@ -96,12 +99,31 @@ unsigned long rc4_read(unsigned char *buf, unsigned long len, prng_state *prng)
       y = (y + s[x]) & 255;
       tmp = s[x]; s[x] = s[y]; s[y] = tmp;
       tmp = (s[x] + s[y]) & 255;
-      *buf++ ^= s[tmp];
+      *buf++ = s[tmp];
    }
    prng->rc4.x = x;
    prng->rc4.y = y;
    return n;
 }
 
+void rc4_done(prng_state *prng)
+{
+   _ARGCHK(prng != NULL);
+}
+
+int rc4_export(unsigned char *out, unsigned long *outlen, prng_state *prng)
+{
+   _ARGCHK(outlen != NULL);
+
+   *outlen = 0;
+   return CRYPT_OK;
+}
+ 
+int rc4_import(const unsigned char *in, unsigned long inlen, prng_state *prng)
+{
+   return CRYPT_OK;
+}
+
+
 #endif
 

+ 4 - 1
rsa_decrypt_key.c

@@ -30,7 +30,10 @@ int rsa_decrypt_key(const unsigned char *in,     unsigned long inlen,
   _ARGCHK(key    != NULL);
   _ARGCHK(res    != NULL);
 
-  /* valid hash ? */
+  /* valid hash/prng ? */
+  if ((err = prng_is_valid(prng_idx)) != CRYPT_OK) {
+     return err;
+  }
   if ((err = hash_is_valid(hash_idx)) != CRYPT_OK) {
      return err;
   }

+ 63 - 0
rsa_v15_decrypt_key.c

@@ -0,0 +1,63 @@
+	/* LibTomCrypt, modular cryptographic library -- Tom St Denis
+ *
+ * LibTomCrypt is a library that provides various cryptographic
+ * algorithms in a highly modular and flexible manner.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtomcrypt.org
+ */
+
+#include "mycrypt.h"
+
+#ifdef MRSA
+
+/* decrypt then PKCS #1 v1.5 depad  */
+int rsa_v15_decrypt_key(const unsigned char *in,     unsigned long  inlen,
+                              unsigned char *outkey, unsigned long keylen, 
+                              prng_state    *prng,   int            prng_idx,
+                              int           *res,    rsa_key       *key)
+{
+  unsigned long modulus_bitlen, modulus_bytelen, x;
+  int           err;
+  unsigned char *tmp;
+  
+  _ARGCHK(outkey != NULL);
+  _ARGCHK(key    != NULL);
+  _ARGCHK(res    != NULL);
+
+  /* valid prng ? */
+  if ((err = prng_is_valid(prng_idx)) != CRYPT_OK) {
+     return err;
+  }
+  
+  /* get modulus len in bits */
+  modulus_bitlen = mp_count_bits(&(key->N));
+
+  /* outlen must be at least the size of the modulus */
+  modulus_bytelen = mp_unsigned_bin_size(&(key->N));
+  if (modulus_bytelen != inlen) {
+     return CRYPT_INVALID_PACKET;
+  }
+
+  /* allocate ram */
+  tmp = XMALLOC(inlen);
+  if (tmp == NULL) {
+     return CRYPT_MEM;
+  }
+
+  /* rsa decode the packet */
+  x = inlen;
+  if ((err = rsa_exptmod(in, inlen, tmp, &x, PK_PRIVATE, prng, prng_idx, key)) != CRYPT_OK) {
+     XFREE(tmp);
+     return err;
+  }
+
+  /* PKCS #1 v1.5 depad */
+  err = pkcs_1_v15_es_decode(tmp, x, modulus_bitlen, outkey, keylen, res);
+  XFREE(tmp);
+  return err;
+}
+
+#endif

+ 54 - 0
rsa_v15_encrypt_key.c

@@ -0,0 +1,54 @@
+/* LibTomCrypt, modular cryptographic library -- Tom St Denis
+ *
+ * LibTomCrypt is a library that provides various cryptographic
+ * algorithms in a highly modular and flexible manner.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtomcrypt.org
+ */
+
+#include "mycrypt.h"
+
+#ifdef MRSA
+
+/* PKCS #1 v1.5 pad then encrypt */
+int rsa_v15_encrypt_key(const unsigned char *inkey,    unsigned long  inlen,
+                              unsigned char *outkey,   unsigned long *outlen,
+                              prng_state    *prng,     int            prng_idx, 
+                              rsa_key       *key)
+{
+  unsigned long modulus_bitlen, modulus_bytelen, x;
+  int           err;
+  
+  _ARGCHK(inkey  != NULL);
+  _ARGCHK(outkey != NULL);
+  _ARGCHK(outlen != NULL);
+  _ARGCHK(key    != NULL);
+  
+  /* valid prng? */
+  if ((err = prng_is_valid(prng_idx)) != CRYPT_OK) {
+     return err;
+  }
+  
+  /* get modulus len in bits */
+  modulus_bitlen = mp_count_bits(&(key->N));
+
+  /* outlen must be at least the size of the modulus */
+  modulus_bytelen = mp_unsigned_bin_size(&(key->N));
+  if (modulus_bytelen > *outlen) {
+     return CRYPT_BUFFER_OVERFLOW;
+  }
+  
+  /* pad it */
+  x = *outlen;
+  if ((err = pkcs_1_v15_es_encode(inkey, inlen, modulus_bitlen, prng, prng_idx, outkey, &x)) != CRYPT_OK) {
+     return err;
+  }
+  
+  /* encrypt it */
+  return rsa_exptmod(outkey, x, outkey, outlen, PK_PUBLIC, prng, prng_idx, key);
+}
+
+#endif

+ 57 - 0
rsa_v15_sign_hash.c

@@ -0,0 +1,57 @@
+/* LibTomCrypt, modular cryptographic library -- Tom St Denis
+ *
+ * LibTomCrypt is a library that provides various cryptographic
+ * algorithms in a highly modular and flexible manner.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtomcrypt.org
+ */
+
+#include "mycrypt.h"
+
+#ifdef MRSA
+
+/* PKCS #1 v1.5 pad then sign */
+int rsa_v15_sign_hash(const unsigned char *msghash,  unsigned long  msghashlen, 
+                            unsigned char *sig,      unsigned long *siglen, 
+                            prng_state    *prng,     int            prng_idx,
+                            int            hash_idx, rsa_key       *key)
+{
+   unsigned long modulus_bitlen, modulus_bytelen, x;
+   int           err;
+   
+  _ARGCHK(msghash  != NULL);
+  _ARGCHK(sig      != NULL);
+  _ARGCHK(siglen   != NULL);
+  _ARGCHK(key      != NULL);
+  
+  /* valid prng and hash ? */
+  if ((err = prng_is_valid(prng_idx)) != CRYPT_OK) {
+     return err;
+  }
+  if ((err = hash_is_valid(hash_idx)) != CRYPT_OK) {
+     return err;
+  }
+  
+  /* get modulus len in bits */
+  modulus_bitlen = mp_count_bits(&(key->N));
+
+  /* outlen must be at least the size of the modulus */
+  modulus_bytelen = mp_unsigned_bin_size(&(key->N));
+  if (modulus_bytelen > *siglen) {
+     return CRYPT_BUFFER_OVERFLOW;
+  }
+      
+  /* PKCS #1 v1.5 pad the key */
+  x = *siglen;
+  if ((err = pkcs_1_v15_sa_encode(msghash, msghashlen, hash_idx, modulus_bitlen, sig, &x)) != CRYPT_OK) {
+     return err;
+  }
+
+  /* RSA encode it */
+  return rsa_exptmod(sig, x, sig, siglen, PK_PRIVATE, prng, prng_idx, key);
+}
+
+#endif

+ 69 - 0
rsa_v15_verify_hash.c

@@ -0,0 +1,69 @@
+/* LibTomCrypt, modular cryptographic library -- Tom St Denis
+ *
+ * LibTomCrypt is a library that provides various cryptographic
+ * algorithms in a highly modular and flexible manner.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, [email protected], http://libtomcrypt.org
+ */
+
+#include "mycrypt.h"
+
+#ifdef MRSA
+
+/* design then PKCS v1.5 depad */
+int rsa_v15_verify_hash(const unsigned char *sig,      unsigned long siglen,
+                        const unsigned char *msghash,  unsigned long msghashlen,
+                              prng_state    *prng,     int           prng_idx,
+                              int            hash_idx, int          *stat,     
+                              rsa_key       *key)
+{
+   unsigned long modulus_bitlen, modulus_bytelen, x;
+   int           err;
+   unsigned char *tmpbuf;
+   
+  _ARGCHK(msghash  != NULL);
+  _ARGCHK(sig      != NULL);
+  _ARGCHK(stat     != NULL);
+  _ARGCHK(key      != NULL);
+  
+  /* valid hash ? */
+  if ((err = hash_is_valid(hash_idx)) != CRYPT_OK) {
+     return err;
+  }
+
+  if ((err = prng_is_valid(prng_idx)) != CRYPT_OK) {
+     return err;
+  }
+  
+  /* get modulus len in bits */
+  modulus_bitlen = mp_count_bits(&(key->N));
+
+  /* outlen must be at least the size of the modulus */
+  modulus_bytelen = mp_unsigned_bin_size(&(key->N));
+  if (modulus_bytelen != siglen) {
+     return CRYPT_INVALID_PACKET;
+  }
+  
+  /* allocate temp buffer for decoded sig */
+  tmpbuf = XMALLOC(siglen);
+  if (tmpbuf == NULL) {
+     return CRYPT_MEM;
+  }
+      
+  /* RSA decode it  */
+  x = siglen;
+  if ((err = rsa_exptmod(sig, siglen, tmpbuf, &x, PK_PUBLIC, prng, prng_idx, key)) != CRYPT_OK) {
+     XFREE(tmpbuf);
+     return err;
+  }
+  
+  /* PSS decode it */
+  err = pkcs_1_v15_sa_decode(msghash, msghashlen, tmpbuf, x, hash_idx, modulus_bitlen, stat);
+  XFREE(tmpbuf);
+  return err;
+}
+
+#endif

+ 23 - 1
sprng.c

@@ -23,7 +23,11 @@ const struct _prng_descriptor sprng_desc =
     &sprng_start,
     &sprng_add_entropy,
     &sprng_ready,
-    &sprng_read
+    &sprng_read,
+    &sprng_done,
+    &sprng_export,
+    &sprng_import
+
 };
 
 int sprng_start(prng_state *prng)
@@ -47,6 +51,24 @@ unsigned long sprng_read(unsigned char *buf, unsigned long len, prng_state *prng
    return rng_get_bytes(buf, len, NULL);
 }
 
+void sprng_done(prng_state *prng)
+{
+   _ARGCHK(prng != NULL);
+}
+
+int sprng_export(unsigned char *out, unsigned long *outlen, prng_state *prng)
+{
+   _ARGCHK(outlen != NULL);
+
+   *outlen = 0;
+   return CRYPT_OK;
+}
+ 
+int sprng_import(const unsigned char *in, unsigned long inlen, prng_state *prng)
+{
+   return CRYPT_OK;
+}
+
 #endif
 
 

+ 49 - 1
yarrow.c

@@ -19,7 +19,10 @@ const struct _prng_descriptor yarrow_desc =
     &yarrow_start,
     &yarrow_add_entropy,
     &yarrow_ready,
-    &yarrow_read
+    &yarrow_read,
+    &yarrow_done,
+    &yarrow_export,
+    &yarrow_import
 };
 
 int yarrow_start(prng_state *prng)
@@ -180,5 +183,50 @@ unsigned long yarrow_read(unsigned char *buf, unsigned long len, prng_state *prn
    return len;
 }
 
+void yarrow_done(prng_state *prng)
+{
+   _ARGCHK(prng != NULL);
+   /* call cipher done when we invent one ;-) */
+}
+
+int yarrow_export(unsigned char *out, unsigned long *outlen, prng_state *prng)
+{
+   _ARGCHK(out    != NULL);
+   _ARGCHK(outlen != NULL);
+   _ARGCHK(prng   != NULL);
+
+   /* we'll write 64 bytes for s&g's */
+   if (*outlen < 64) {
+      return CRYPT_BUFFER_OVERFLOW;
+   }
+
+   if (yarrow_read(out, 64, prng) != 64) {
+      return CRYPT_ERROR_READPRNG;
+   }
+   *outlen = 64;
+
+   return CRYPT_OK;
+}
+ 
+int yarrow_import(const unsigned char *in, unsigned long inlen, prng_state *prng)
+{
+   int err;
+
+   _ARGCHK(in   != NULL);
+   _ARGCHK(prng != NULL);
+
+   if (inlen != 64) {
+      return CRYPT_INVALID_ARG;
+   }
+
+   if ((err = yarrow_start(prng)) != CRYPT_OK) {
+      return err;
+   }
+   if ((err = yarrow_add_entropy(in, 64, &prng)) != CRYPT_OK) {
+      return err;
+   }
+   return yarrow_ready(&prng);
+}
+
 #endif