4 years ago · 2092250088
--- a/src/pk/asn1/der/utf8/der_decode_utf8_string.c
+++ b/src/pk/asn1/der/utf8/der_decode_utf8_string.c
@@ -11,11 +11,11 @@
 
															 #ifdef LTC_DER
														
 
															 /**
														
 
															-  Store a UTF8 STRING
														
 
															+  Decode a UTF8 STRING and recover an array of unicode characters.
														
 
															   @param in      The DER encoded UTF8 STRING
														
 
															   @param inlen   The size of the DER UTF8 STRING
														
 
															-  @param out     [out] The array of utf8s stored (one per char)
														
 
															-  @param outlen  [in/out] The number of utf8s stored
														
 
															+  @param out     [out] The array of unicode characters (wchar_t*)
														
 
															+  @param outlen  [in/out] The number of unicode characters in the array
														
 
															   @return CRYPT_OK if successful
														
 
															 */
														
 
															 int der_decode_utf8_string(const unsigned char *in,  unsigned long inlen,
														
@@ -51,23 +51,47 @@ int der_decode_utf8_string(const unsigned char *in,  unsigned long inlen,
 
															       return CRYPT_INVALID_PACKET;
														
 
															    }
														
 
															-   /* proceed to decode */
														
 
															+   /* proceed to recover unicode characters from utf8 data.
														
 
															+      for reference see Section 3 of RFC 3629:
														
 
															+
														
 
															+        https://tools.ietf.org/html/rfc3629#section-3
														
 
															+    */
														
 
															    for (y = 0; x < inlen; ) {
														
 
															-      /* get first byte */
														
 
															+      /* read first byte */
														
 
															       tmp = in[x++];
														
 
															-      /* count number of bytes */
														
 
															+      /* a unicode character is recovered from a sequence of 1 to 4 utf8 bytes.
														
 
															+         the form of those bytes must match a row in the following table:
														
 
															+
														
 
															+           0xxxxxxx
														
 
															+           110xxxxx 10xxxxxx
														
 
															+           1110xxxx 10xxxxxx 10xxxxxx
														
 
															+           11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
														
 
															+
														
 
															+         the number of leading ones in the first byte (0,2,3,4) determines the
														
 
															+         number of remaining bytes to read (0,1,2,3)
														
 
															+       */
														
 
															+
														
 
															+      /* determine z, the number of leading ones.
														
 
															+         this is done by left-shifting tmp, which clears the ms-bits */
														
 
															       for (z = 0; (tmp & 0x80) && (z <= 4); z++, tmp = (tmp << 1) & 0xFF);
														
 
															-      if (z == 1 || z > 4 || (x + (z - 1) > inlen)) {
														
 
															+      /* z should be in {0,2,3,4} */
														
 
															+      if (z == 1 || z > 4) {
														
 
															          return CRYPT_INVALID_PACKET;
														
 
															       }
														
 
															-      /* decode, grab upper bits */
														
 
															+      /* right-shift tmp to restore least-sig bits */
														
 
															       tmp >>= z;
														
 
															-      /* grab remaining bytes */
														
 
															-      if (z > 1) { --z; }
														
 
															+      /* now update z so it equals the number of additional bytes to read */
														
 
															+      if (z > 0) { --z; }
														
 
															+
														
 
															+      if (x + z > inlen) {
														
 
															+         return CRYPT_INVALID_PACKET;
														
 
															+      }
														
 
															+
														
 
															+      /* read remaining bytes */
														
 
															       while (z-- != 0) {
														
 
															          if ((in[x] & 0xC0) != 0x80) {
														
 
															             return CRYPT_INVALID_PACKET;
														
--- a/tests/der_test.c
+++ b/tests/der_test.c
@@ -1603,6 +1603,8 @@ int der_test(void)
 
															    static const unsigned char utf8_1_der[] = { 0x0C, 0x07, 0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E };
														
 
															    static const wchar_t utf8_2[]           = { 0xD55C, 0xAD6D, 0xC5B4 };
														
 
															    static const unsigned char utf8_2_der[] = { 0x0C, 0x09, 0xED, 0x95, 0x9C, 0xEA, 0xB5, 0xAD, 0xEC, 0x96, 0xB4 };
														
 
															+   static const wchar_t utf8_3[]           = { 0x05E9, 0x05DC, 0x05D5, 0x05DD };
														
 
															+   static const unsigned char utf8_3_der[] = { 0x0C, 0x08, 0xD7, 0xA9, 0xD7, 0x9C, 0xD7, 0x95, 0xD7, 0x9D };
														
 
															    unsigned char utf8_buf[32];
														
 
															    wchar_t utf8_out[32];
														
@@ -1961,6 +1963,24 @@ tmp_time.off_hh);
 
															         return 1;
														
 
															      }
														
 
															+     /* encode it */
														
 
															+     x = sizeof(utf8_buf);
														
 
															+     DO(der_encode_utf8_string(utf8_3, sizeof(utf8_3) / sizeof(utf8_3[0]), utf8_buf, &x));
														
 
															+     if (x != sizeof(utf8_3_der) || memcmp(utf8_buf, utf8_3_der, x)) {
														
 
															+        fprintf(stderr, "DER UTF8_3 encoded to %lu bytes\n", x);
														
 
															+        for (y = 0; y < x; y++) fprintf(stderr, "%02x ", (unsigned)utf8_buf[y]);
														
 
															+        fprintf(stderr, "\n");
														
 
															+        return 1;
														
 
															+     }
														
 
															+     /* decode it */
														
 
															+     y = sizeof(utf8_out) / sizeof(utf8_out[0]);
														
 
															+     DO(der_decode_utf8_string(utf8_buf, x, utf8_out, &y));
														
 
															+     if (y != (sizeof(utf8_3) / sizeof(utf8_3[0])) || memcmp(utf8_3, utf8_out, y * sizeof(wchar_t))) {
														
 
															+        fprintf(stderr, "DER UTF8_3 decoded to %lu wchar_t\n", y);
														
 
															+        for (x = 0; x < y; x++) fprintf(stderr, "%04lx ", (unsigned long)utf8_out[x]);
														
 
															+        fprintf(stderr, "\n");
														
 
															+        return 1;
														
 
															+     }
														
 
															    der_set_test();
														
 
															    der_flexi_test();