Browse Source

- added fast base64 and base16 encoders/decoders along with test program and
some results

Andrei Pelinescu-Onciul 17 years ago
parent
commit
45a13a97e9
4 changed files with 1560 additions and 0 deletions
  1. 230 0
      basex.c
  2. 869 0
      basex.h
  3. 401 0
      test/basex.c
  4. 60 0
      test/basex.txt

+ 230 - 0
basex.c

@@ -0,0 +1,230 @@
+/*
+ * $Id$
+ *
+ * convert/decode to/from ascii using various bases
+ *
+ * Copyright (C) 2008 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Defines:
+ *  BASE64_LOOKUP_TABLE - use small lookup tables for conversions (faster
+ *                         in general)
+ *
+ * History:
+ * --------
+ *  2008-06-11  created by andrei
+ */
+
+#include "basex.h"
+
+#ifdef BASE16_LOOKUP_TABLE
+#ifdef BASE16_LOOKUP_LARGE
+
+unsigned char _bx_hexdig_hi[256]={
+	'0', '0', '0', '0', '0', '0', '0', '0',
+	'0', '0', '0', '0', '0', '0', '0', '0',
+	'1', '1', '1', '1', '1', '1', '1', '1',
+	'1', '1', '1', '1', '1', '1', '1', '1',
+	'2', '2', '2', '2', '2', '2', '2', '2',
+	'2', '2', '2', '2', '2', '2', '2', '2',
+	'3', '3', '3', '3', '3', '3', '3', '3',
+	'3', '3', '3', '3', '3', '3', '3', '3',
+	'4', '4', '4', '4', '4', '4', '4', '4',
+	'4', '4', '4', '4', '4', '4', '4', '4',
+	'5', '5', '5', '5', '5', '5', '5', '5',
+	'5', '5', '5', '5', '5', '5', '5', '5',
+	'6', '6', '6', '6', '6', '6', '6', '6',
+	'6', '6', '6', '6', '6', '6', '6', '6',
+	'7', '7', '7', '7', '7', '7', '7', '7',
+	'7', '7', '7', '7', '7', '7', '7', '7',
+	'8', '8', '8', '8', '8', '8', '8', '8',
+	'8', '8', '8', '8', '8', '8', '8', '8',
+	'9', '9', '9', '9', '9', '9', '9', '9',
+	'9', '9', '9', '9', '9', '9', '9', '9',
+	'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
+	'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
+	'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',
+	'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',
+	'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',
+	'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',
+	'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D',
+	'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D',
+	'E', 'E', 'E', 'E', 'E', 'E', 'E', 'E',
+	'E', 'E', 'E', 'E', 'E', 'E', 'E', 'E',
+	'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F',
+	'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F'
+};
+
+unsigned char _bx_hexdig_low[256]={
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
+};
+
+unsigned char _bx_unhexdig256[256]={
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 
+0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 
+0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, 0x0b, 0x0c, 
+0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+#else /* BASE16_LOOKUP_LARGE */
+
+unsigned char _bx_hexdig[16+1]="0123456789ABCDEF";
+
+unsigned char _bx_unhexdig32[32]={
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, 0x0b, 0x0c,
+	0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff };
+
+#endif /*  BASE16_LOOKUP_LARGE */
+#endif /* BASE16_LOOKUP_TABLE */
+
+#ifdef BASE64_LOOKUP_TABLE
+
+#ifdef BASE64_LOOKUP_LARGE
+/* large lookup tables, 2.5 k */
+
+unsigned char _bx_b64_first[256];
+unsigned char _bx_b64_second[4][256];
+unsigned char _bx_b64_third[4][256];
+unsigned char _bx_b64_fourth[256];
+
+unsigned char _bx_ub64[256];
+
+#elif defined BASE64_LOOKUP_8K
+unsigned short _bx_b64_12[4096];
+unsigned char _bx_ub64[256];
+
+#else /*  BASE64_LOOKUP_LARGE */
+/* very small lookup, 65 bytes */
+
+unsigned char _bx_b64[64+1]=
+		"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+
+unsigned char _bx_ub64[0x54+1]={
+		                              0x3e, 0xff, 0xff, 0xff, 0x3f,
+		0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d,
+		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02,
+		0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
+		0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
+		0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1a,
+		0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24,
+		0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e,
+		0x2f, 0x30, 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+#endif /*  BASE64_LOOKUP_LARGE */
+
+#endif /* BASE64_LOOKUP_TABLE */
+
+#define b64_enc_char(c) base64_enc_char(c)
+#define b64_dec_char(c) base64_dec_char(c)
+
+int init_basex()
+{
+#ifdef BASE64_LOOKUP_TABLE
+#if defined BASE64_LOOKUP_LARGE || defined BASE64_LOOKUP_8K
+	int r;
+#endif
+#ifdef BASE64_LOOKUP_LARGE
+	int i;
+	
+	/* encode tables */
+	for (r=0; r<256; r++)
+		_bx_b64_first[r]=b64_enc_char(((unsigned char)r)>>2);
+	for(i=0; i<4; i++){
+		for (r=0; r<256; r++)
+			_bx_b64_second[i][r]=
+					b64_enc_char((unsigned char)((i<<4)|(r>>4)));
+	}
+	for(i=0; i<4; i++){
+		for (r=0; r<256; r++)
+			_bx_b64_third[i][r]=
+				b64_enc_char((unsigned char)(((r<<2)&0x3f)|i));
+	}
+	for (r=0; r<256; r++)
+		_bx_b64_fourth[r]=b64_enc_char(((unsigned char)r&0x3f));
+	
+	/* decode */
+	for (r=0; r<256; r++)
+		_bx_ub64[r]=b64_dec_char((unsigned char)r);
+#elif defined BASE64_LOOKUP_8K
+	for (r=0; r< 4096; r++)
+#if defined __IS_LITTLE_ENDIAN
+		_bx_b64_12[r]=b64_enc_char(r>>6)|(b64_enc_char(r&0x3f)<<8);
+#elif defined __IS_BIG_ENDIAN /* __IS_LITTLE_ENDIAN */
+		_bx_b64_12[r]=(b64_enc_char(r>>6)<<8)|b64_enc_char(r&0x3f);
+#else /* __IS_LITTLE_ENDIAN */
+#error Neither __IS_LITTE_ENDIAN nor __IS_BIG_ENDIAN  defined
+#endif
+	/* decode */
+	for (r=0; r<256; r++)
+		_bx_ub64[r]=b64_dec_char((unsigned char)r);
+#endif
+#endif
+	return 0;
+}

+ 869 - 0
basex.h

@@ -0,0 +1,869 @@
+/*
+ * $Id$
+ *
+ * convert/decode to/from ascii using various bases
+ *
+ * Copyright (C) 2008 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Functions:
+ *  init_basex()                              - inits internal lookup tables
+ *  HEX_HI(unsigned char c)                   - returns the first 4 bits of
+ *                                              c converted to a hex digit
+ *  HEX_LOW(unsigned char c)                  - returns the low 4 bits of
+ *                                              c converted to a hex digit
+ *  UNHEX(unsigned char hex_digit)            - converts hex_digit to a
+ *                                              number (0..15); it might
+ *                                              return 0xff for invalid 
+ *                                              digit (but with some compile
+ *                                              option it won't check)
+ *
+ *  base16_enc(src, src_len, dst, dst_len)    - encode to standard hex
+ *  base16_dec(src, src_len, dst, dst_len)    - decode from standard hex
+ *  base16_enc_len(len)                       - length needed to encode len
+ *                                              bytes (macro)
+ *  base16_max_dec_len(len)                   - length needed to decode a 
+ *                                              string of size len
+ *
+ *  base64_enc(src, src_len, dst, dst_len)    - encode to base64, standard
+ *                                              alphabet
+ *  base64_dec(src, src_len, dst, dst_len)    - decode from base64, standard
+ *                                              alphabet
+ *  base64_enc_len(len)                       - length needed to encode
+ *                                               len bytes (macro)
+ *  base64_max_dec_len(len)                   - maximum length needed to
+ *                                               decode len bytes (macro)
+ *  base64_dec_len(str, len)                  - size of the decoded str 
+ *
+ *
+ *  q_base64_enc(src, src_len, dst, dst_len)  - encode to special base64
+ *                                              alphabet (non standard)
+ *
+ *  q_base64_dec(src, src_len, dst, dst_len)  - decode from special non-
+ *                                              standard base64 alphabet
+ *  All the above functions return the size used (in dst) on success and
+ *   0 or a negative number (which is -1*size_needed) on error.
+ *
+ * There are close to no checks for validity, an unexpected char will lead
+ * to a corrupted result, but the functions won't return error.
+ *
+ * Notes:
+ *  on a core2 duo the versions with lookup tables are way faster (see
+ *  http://www.experts-exchange.com/Programming/Languages/CPP/Q_21988706.html
+ *  for some interesting tests and ideeas).
+ *
+ *  Test results for 40 bytes  (typical ser nounce) in average cpu cycles:
+ *                    lookup   lookup_large lookup8k no-lookup
+ *  base16_enc           211/231  218/199      -       1331
+ *  base16_dec           252/251  236          -       1226
+ *  base64_enc           209      186         156      1005
+ *  base64_dec           208      207         207      1242
+ *  q_base64_enc         -                              288
+ *  q_base64_dec         -                              281
+ *  (see test/basex.txt for more results)
+ *
+ * Defines:
+ *  BASE64_LOOKUP_TABLE/NO_BASE64_LOOKUP_TABLE - use (default)/don't use
+ *     small lookup tables for conversions (faster in general).
+ *  BASE64_LOOKUP_LARGE    - use large lookup tables (2560 bytes for 
+ *    encoding and 256 bytes for decoding; without it 64 bytes are used for
+ *    encoding and 85 bytes for decoding.
+ *  BASE64_LOOKUP_8K - use even larger lookup tables (8K for encoding and
+ *    256 for decoding); also try to write 2 bytes at a time (short) if
+ *    the destination is 2 byte aligned
+ *
+ *  BASE16_LOOKUP_TABLE/NO_BASE16_LOOKUP_TABLE - use (default)/don't use
+ *     small lookup tables for conversions (faster in general).
+ *  BASE16_LOOKUP_LARGE  - use large lookup tables (512 bytes for 
+ *    encoding and 256 bytes for decoding
+ *  BASE16_READ_WHOLE_INTS - read an int at a time
+ *
+ * History:
+ * --------
+ *  2008-06-11  created by andrei
+ */
+ 
+
+
+#ifndef _basex_h
+#define _basex_h
+
+#include "compiler_opt.h"
+
+/* defaults */
+#ifndef NO_BASE16_LOOKUP_TABLE
+#define BASE16_LOOKUP_TABLE
+#endif
+
+#ifndef NO_BASE64_LOOKUP_TABLE
+#define BASE64_LOOKUP_TABLE
+#endif
+
+#ifndef NO_BASE64_LOOKUP_8K
+#define BASE64_LOOKUP_8K
+#endif
+
+#ifndef NO_BASE16_LOOKUP_LARGE
+#define BASE16_LOOKUP_LARGE
+#endif
+
+#if !defined NO_BASE64_LOOKUP_LARGE && !defined BASE64_LOOKUP_8K
+#define BASE64_LOOKUP_LARGE
+#endif
+
+
+
+#if defined BASE16_READ_WHOLE_INTS || defined BASE64_READ_WHOLE_INTS || \
+	defined BASE64_LOOKUP_8K
+#include "endianness.h"
+
+/* aligns p to a type* pointer, type must have a 2^k size */
+#define ALIGN_POINTER(p, type) \
+	((type*) ((long)((char*)(p)+sizeof(type)-1)&~(long)(sizeof(type)-1)))
+
+#define ALIGN_UINT_POINTER(p) ALIGN_POINTER(p, unsigned int)
+
+#endif
+
+
+#ifdef BASE16_LOOKUP_TABLE
+
+#ifdef BASE16_LOOKUP_LARGE
+/* use large tables: 512 for lookup and 256 for decode */
+
+extern unsigned char _bx_hexdig_hi[256];
+extern unsigned char _bx_hexdig_low[256];
+
+#define HEX_HI(h)	_bx_hexdig_hi[(unsigned char)(h)]
+#define HEX_LOW(h)	_bx_hexdig_low[(unsigned char)(h)]
+
+extern unsigned char _bx_unhexdig256[256];
+
+#define UNHEX(h)	_bx_unhexdig256[(h)]
+
+#else /* BASE16_LOOKUP_LARGE */
+/* use small tabes: 16 bytes for lookup and 32 for decode */
+
+extern unsigned char _bx_hexdig[16+1];
+
+#define HEX_4BITS(h) _bx_hexdig[(h)]
+#define HEX_HI(h)	HEX_4BITS(((unsigned char)(h))>>4)
+#define HEX_LOW(h)	HEX_4BITS((h)&0xf)
+
+extern unsigned char _bx_unhexdig32[32];
+#define UNHEX(h) _bx_unhexdig32[(((h))-'0')&0x1f]
+
+#endif /* BASE16_LOOKUP_LARGE */
+
+#else /* BASE16_LOOKUP_TABLE */
+/* no lookup tables */
+#if 0
+#define HEX_4BITS(h) (unsigned char)((unlikely((h)>=10))?((h)-10+'A'):(h)+'0')
+#define UNHEX(c) (unsigned char)((unlikely((c)>='A'))?(c)-'A'+10:(c)-'0')
+#else
+#define HEX_4BITS(hc) (unsigned char)( ((((hc)>=10)-1)&((hc)+'0')) | \
+									((((hc)<10)-1)&((hc)+'A')) )
+#define UNHEX(c) (unsigned char) ( ((((c)>'9')-1)& ((c)-'0')) | \
+								((((c)<='9')-1)&((c)-'A')) )
+#endif 
+
+#define HEX_HI(h)	HEX_4BITS(((unsigned char)(h))>>4)
+#define HEX_LOW(h)	HEX_4BITS((h)&0xf)
+
+#endif /* BASE16_LOOKUP_TABLE */
+
+
+#ifdef BASE64_LOOKUP_TABLE
+#ifdef BASE64_LOOKUP_LARGE
+/* large lookup tables, 2.5 k */
+
+extern unsigned char _bx_b64_first[256];
+extern unsigned char _bx_b64_second[4][256];
+extern unsigned char _bx_b64_third[4][256];
+extern unsigned char _bx_b64_fourth[256];
+
+#define BASE64_1(a) _bx_b64_first[(a)]
+#define BASE64_2(a,b) _bx_b64_second[(a)&0x3][(b)]
+#define BASE64_3(b,c) _bx_b64_third[(c)>>6][(b)]
+#define BASE64_4(c) _bx_b64_fourth[(c)]
+
+extern unsigned char _bx_ub64[256];
+#define UNBASE64(v) _bx_ub64[(v)]
+
+#elif defined BASE64_LOOKUP_8K
+/* even larger encode tables: 8k */
+extern unsigned short _bx_b64_12[4096];
+
+/* return a word (16 bits) */
+#define BASE64_12(a,b)	_bx_b64_12[((a)<<4)|((b)>>4)]
+#define BASE64_34(b,c)	_bx_b64_12[(((b)&0xf)<<8)|(c)]
+#ifdef __IS_LITTLE_ENDIAN
+#define FIRST_8B(s)	((unsigned char)(s))
+#define LAST_8B(s)	((s)>>8)
+#elif defined __IS_BIG_ENDIAN
+#define FIRST_8B(s)	((s)>>8)
+#define LAST_8B(s)	((unsigned char)(s))
+#else
+#error neither __IS_LITTLE_ENDIAN nor __IS_BIG_ENDIAN are defined
+#endif
+
+
+extern unsigned char _bx_ub64[256];
+#define UNBASE64(v) _bx_ub64[(v)]
+
+#else /* BASE64_LOOKUP_LARGE */
+/* small lookup tables */
+extern unsigned char _bx_b64[64+1];
+
+#define BASE64_DIG(v)	_bx_b64[(v)]
+
+#define BASE64_1(a)		BASE64_DIG((a)>>2)
+#define BASE64_2(a, b)	BASE64_DIG( (((a)<<4)&0x3f) | ((b)>>4))
+#define BASE64_3(b, c)	BASE64_DIG( (((b)<<2)&0x3f) | ((c)>>6))
+#define BASE64_4(c)		BASE64_DIG((c)&0x3f)
+
+extern unsigned char _bx_ub64[0x54+1];
+#define UNBASE64(v) _bx_ub64[(((v)&0x7f)-0x2b)]
+
+#endif /* BASE64_LOOKUP_LARGE */
+
+
+#else /* BASE64_LOOKUP_TABLE */
+
+#define BASE64_DIG(v) base64_enc_char(v)
+#define BASE64_1(a)		BASE64_DIG((a)>>2)
+#define BASE64_2(a, b)	BASE64_DIG( (((a)<<4)&0x3f) | ((b)>>4))
+#define BASE64_3(b, c)	BASE64_DIG( (((b)<<2)&0x3f) | ((c)>>6))
+#define BASE64_4(c)		BASE64_DIG((c)&0x3f)
+
+#define UNBASE64(v) base64_dec_char(v)
+
+#endif /* BASE64_LOOKUP_TABLE */
+
+
+
+/* lenght needed for encoding l bytes */
+#define base16_enc_len(l) (l*2)
+/* maximum lenght needed for decoding l bytes */
+#define base16_max_dec_len(l) (l/2)
+/* actual space needed for decoding a string b of size l */
+#define base16_dec_len(b, l) base16_max_dec_len(l)
+/* minimum valid source len for decoding */
+#define base16_dec_min_len() 2
+/* minimum valid source len for encoding */
+#define base16_enc_min_len() 0
+
+/* space needed for encoding l bytes */
+#define base64_enc_len(l) (((l)+2)/3*4)
+/* maximum space needed for encoding l bytes */
+#define base64_max_dec_len(l) ((l)/4*3)
+/* actual space needed for decoding a string b of size l, l>=4 */
+#define base64_dec_len(b, l) \
+	(base64_max_dec_len(l)-((b)[(l)-2]=='=') -((b)[(l)-1]=='='))
+/* minimum valid source len for decoding */
+#define base64_dec_min_len() 4
+/* minimum valid source len for encoding */
+#define base64_enc_min_len() 0
+
+
+#ifdef BASE16_READ_WHOLE_INTS
+
+/* params: 
+ * returns: size used from the output buffer (dst) on success,
+ *          -size_needed on error
+ * WARNING: the output string is not 0-term
+ */
+inline static int base16_enc(unsigned char* src, int slen,
+							 unsigned char*  dst, int dlen)
+{
+	unsigned int* p;
+	unsigned char* end;
+	int osize;
+	unsigned short us;
+	
+	osize=2*slen;
+	if (unlikely(dlen<osize))
+		return -osize;
+	end=src+slen;
+	p=ALIGN_UINT_POINTER(src);
+	if (likely((unsigned char*)p<end)){
+		switch((unsigned char)((unsigned char*)p-src)){
+			case 3:
+				*dst=HEX_HI(*src);
+				*(dst+1)=HEX_LOW(*src);
+				dst+=2;
+				src++;
+				/* no break */
+			case 2:
+				us=*(unsigned short*)(src);
+#if   defined __IS_LITTLE_ENDIAN
+				*(dst+0)=HEX_HI(us);
+				*(dst+1)=HEX_LOW(us);
+				*(dst+2)=HEX_HI(us>>8);
+				*(dst+3)=HEX_LOW(us>>8);
+#elif defined __IS_BIG_ENDIAN
+				*(dst+2)=HEX_HI(us);
+				*(dst+3)=HEX_LOW(us);
+				*(dst+0)=HEX_HI(us>>8);
+				*(dst+1)=HEX_LOW(us>>8);
+#endif
+				dst+=4;
+				/* no need to inc src */
+				break;
+			case 1:
+				*dst=HEX_HI(*src);
+				*(dst+1)=HEX_LOW(*src);
+				dst+=2;
+				/* no need to inc src */
+			case 0:
+				break;
+		}
+		for(;(unsigned char*)p<=(end-4);p++,dst+=8){
+#if   defined __IS_LITTLE_ENDIAN
+			*(dst+0)=HEX_HI(*p);
+			*(dst+1)=HEX_LOW(*p);
+			*(dst+2)=HEX_HI(((*p)>>8));
+			*(dst+3)=HEX_LOW(((*p)>>8));
+			*(dst+4)=HEX_HI(((*p)>>16));
+			*(dst+5)=HEX_LOW(((*p)>>16));
+			*(dst+6)=HEX_HI(((*p)>>24));
+			*(dst+7)=HEX_LOW(((*p)>>24));
+#elif defined __IS_BIG_ENDIAN
+			*(dst+6)=HEX_HI(*p);
+			*(dst+7)=HEX_LOW(*p);
+			*(dst+4)=HEX_HI(((*p)>>8));
+			*(dst+5)=HEX_LOW(((*p)>>8));
+			*(dst+2)=HEX_HI(((*p)>>16));
+			*(dst+3)=HEX_LOW(((*p)>>16));
+			*(dst+0)=HEX_HI(((*p)>>24));
+			*(dst+1)=HEX_LOW(((*p)>>24));
+#else
+#error neither BIG ro LITTLE endian defined
+#endif /* __IS_*_ENDIAN */
+		}
+		src=(unsigned char*)p;
+		/* src is 2-bytes aligned (short) */
+		switch((unsigned char)((unsigned char*)end-src)){
+			case 3:
+			case 2:
+				us=*(unsigned short*)(src);
+#if   defined __IS_LITTLE_ENDIAN
+				*(dst+0)=HEX_HI(us);
+				*(dst+1)=HEX_LOW(us);
+				*(dst+2)=HEX_HI(us>>8);
+				*(dst+3)=HEX_LOW(us>>8);
+#elif defined __IS_BIG_ENDIAN
+				*(dst+2)=HEX_HI(us);
+				*(dst+3)=HEX_LOW(us);
+				*(dst+0)=HEX_HI(us>>8);
+				*(dst+1)=HEX_LOW(us>>8);
+#endif
+				if ((end-src)==3){
+					*(dst+4)=HEX_HI(*(src+2));
+					*(dst+5)=HEX_LOW(*(src+2));
+				}
+				/* no need to inc anything */
+				break;
+			case 1:
+				*dst=HEX_HI(*src);
+				*(dst+1)=HEX_LOW(*src);
+				/* no need to inc anything */
+			case 0:
+				break;
+		}
+	}else if (unlikely((long)src&1)){
+		/* src is not 2-bytes (short) aligned */
+		switch((unsigned char)((unsigned char*)end-src)){
+			case 3:
+				*dst=HEX_HI(*src);
+				*(dst+1)=HEX_LOW(*src);
+				dst+=2;
+				src++;
+				/* no break */
+			case 2:
+				us=*(unsigned short*)(src);
+#if   defined __IS_LITTLE_ENDIAN
+				*(dst+0)=HEX_HI(us);
+				*(dst+1)=HEX_LOW(us);
+				*(dst+2)=HEX_HI(us>>8);
+				*(dst+3)=HEX_LOW(us>>8);
+#elif defined __IS_BIG_ENDIAN
+				*(dst+2)=HEX_HI(us);
+				*(dst+3)=HEX_LOW(us);
+				*(dst+0)=HEX_HI(us>>8);
+				*(dst+1)=HEX_LOW(us>>8);
+#endif
+				/* no need to inc anything */
+				break;
+			case 1:
+				*dst=HEX_HI(*src);
+				*(dst+1)=HEX_LOW(*src);
+				/* no need to inc anything */
+			case 0:
+				break;
+		}
+	}else{
+		/* src is 2-bytes aligned (short) */
+		switch((unsigned char)((unsigned char*)end-src)){
+			case 3:
+			case 2:
+				us=*(unsigned short*)(src);
+#if   defined __IS_LITTLE_ENDIAN
+				*(dst+0)=HEX_HI(us);
+				*(dst+1)=HEX_LOW(us);
+				*(dst+2)=HEX_HI(us>>8);
+				*(dst+3)=HEX_LOW(us>>8);
+#elif defined __IS_BIG_ENDIAN
+				*(dst+2)=HEX_HI(us);
+				*(dst+3)=HEX_LOW(us);
+				*(dst+0)=HEX_HI(us>>8);
+				*(dst+1)=HEX_LOW(us>>8);
+#endif
+				if ((end-src)==3){
+					*(dst+4)=HEX_HI(*(src+2));
+					*(dst+5)=HEX_LOW(*(src+2));
+				}
+				/* no need to inc anything */
+				break;
+			case 1:
+				*dst=HEX_HI(*src);
+				*(dst+1)=HEX_LOW(*src);
+				/* no need to inc anything */
+			case 0:
+				break;
+		}
+	}
+	
+	return osize;
+}
+
+
+
+#else /* BASE16_READ_WHOLE_INTS */
+
+
+/* params: 
+ * returns: size used from the output buffer (dst) on success,
+ *          -size_needed on error
+ * WARNING: the output string is not 0-term
+ */
+inline static int base16_enc(unsigned char* src, int slen,
+							 unsigned char*  dst, int dlen)
+{
+	unsigned char* end;
+	int osize;
+	
+	osize=2*slen;
+	if (unlikely(dlen<osize))
+		return -osize;
+	end=src+slen;
+	for (;src<end; src++,dst+=2){
+		*dst=HEX_HI(*src);
+		*(dst+1)=HEX_LOW(*src);
+	}
+	return osize;
+}
+
+
+#endif /* BASE16_READ_WHOLE_INTS */
+
+inline static int base16_dec(unsigned char* src, int slen,
+							 unsigned char* dst, int dlen)
+{
+	unsigned char* end;
+	int osize;
+	
+	osize=slen/2;
+	if (unlikely(dlen<osize))
+		return -osize;
+	end=src+2*osize;
+	for (; src<end; src+=2, dst++)
+		*dst=(UNHEX(*src)<<4) | UNHEX(*(src+1));
+	return osize;
+}
+
+
+
+
+
+/* helper internal function: encodes v (6 bits value)
+ * returns char ascii encoding on success and 0xff on error
+ * (value out of range) */
+inline static unsigned char base64_enc_char(unsigned char v)
+{
+	switch(v){
+		case 0x3f:
+			return '/';
+		case 0x3e:
+			return '+';
+		default:
+			if (v<=25)
+				return v+'A';
+			else if (v<=51)
+				return v-26+'a';
+			else if (v<=61)
+				return v-52+'0';
+	}
+	return 0xff;
+}
+
+/* helper internal function: decodes a base64 "digit",
+ * returns value on success (0-63) and 0xff on error (invalid)*/
+inline static unsigned base64_dec_char(unsigned char v)
+{
+	switch(v){
+		case '/':
+			return 0x3f;
+		case '+':
+			return 0x3e;
+		case ':':
+		case ';':
+		case '<':
+		case '=':
+		case '>':
+		case '?':
+		case '@':
+		case '[':
+		case '\\':
+		case ']':
+		case '^':
+		case '_':
+		case '`':
+			return 0xff;
+		default:
+			if ((v)<'0')
+				return 0xff;
+			if ((v)<='9')
+				return (v)-'0'+0x34;
+			else if ((v)<='Z')
+				return (v)-'A';
+			else if ((v) <='z')
+				return (v)-'a'+0x1a;
+	}
+	return 0xff;
+}
+
+
+#ifdef BASE64_LOOKUP_8K
+/* params: 
+ * returns: size used from the output buffer (dst) on success ((slen+2)/3*4)
+ *          -size_needed on error
+ * WARNING: the output string is not 0-term
+ */
+inline static int base64_enc(unsigned char* src, int slen,
+							unsigned char* dst,  int dlen)
+{
+	unsigned char* end;
+	int osize;
+	
+	osize=(slen+2)/3*4;
+	if (unlikely(dlen<osize))
+		return -osize;
+	end=src+slen/3*3;
+	if (unlikely((long)dst%2)){
+		for (;src<end; src+=3,dst+=4){
+			dst[0]=FIRST_8B(BASE64_12(src[0], src[1]));
+			dst[1]=LAST_8B(BASE64_12(src[0], src[1]));
+			dst[2]=FIRST_8B(BASE64_34(src[1], src[2]));
+			dst[3]=LAST_8B(BASE64_34(src[1], src[2]));
+		}
+		switch(slen%3){
+			case 2:
+				dst[0]=FIRST_8B(BASE64_12(src[0], src[1]));
+				dst[1]=LAST_8B(BASE64_12(src[0], src[1]));
+				dst[2]=FIRST_8B(BASE64_34(src[1], 0));
+				dst[3]='=';
+				break;
+			case 1:
+				dst[0]=FIRST_8B(BASE64_12(src[0], 0));
+				dst[1]=LAST_8B(BASE64_12(src[0], 0));
+				dst[2]='=';
+				dst[3]='=';
+				break;
+		}
+	}else{
+		for (;src<end; src+=3,dst+=4){
+			*(unsigned short*)(dst+0)=_bx_b64_12[(src[0]<<4)|(src[1]>>4)];
+			*(unsigned short*)(dst+2)=_bx_b64_12[((src[1]&0xf)<<8)|src[2]];
+		}
+		switch(slen%3){
+			case 2:
+				*(unsigned short*)(dst+0)=_bx_b64_12[(src[0]<<4)|(src[1]>>4)];
+				*(unsigned short*)(dst+2)=_bx_b64_12[((src[1]&0xf)<<8)|0];
+				dst[3]='=';
+				break;
+			case 1:
+				*(unsigned short*)(dst+0)=_bx_b64_12[(src[0]<<4)|0];
+				dst[2]='=';
+				dst[3]='=';
+				break;
+		}
+	}
+	return osize;
+}
+#else /*BASE64_LOOKUP_8K*/
+/* params: 
+ * returns: size used from the output buffer (dst) on success ((slen+2)/3*4)
+ *          -size_needed on error
+ * WARNING: the output string is not 0-term
+ */
+inline static int base64_enc(unsigned char* src, int slen,
+							unsigned char* dst,  int dlen)
+{
+	unsigned char* end;
+	int osize;
+	
+	osize=(slen+2)/3*4;
+	if (unlikely(dlen<osize))
+		return -osize;
+	end=src+slen/3*3;
+	for (;src<end; src+=3,dst+=4){
+		dst[0]=BASE64_1(src[0]);
+		dst[1]=BASE64_2(src[0], src[1]);
+		dst[2]=BASE64_3(src[1], src[2]);
+		dst[3]=BASE64_4(src[2]);
+	}
+	switch(slen%3){
+		case 2:
+			dst[0]=BASE64_1(src[0]);
+			dst[1]=BASE64_2(src[0], src[1]);
+			dst[2]=BASE64_3(src[1], 0);
+			dst[3]='=';
+			break;
+		case 1:
+			dst[0]=BASE64_1(src[0]);
+			dst[1]=BASE64_2(src[0], 0);
+			dst[2]='=';
+			dst[3]='=';
+			break;
+	}
+	return osize;
+}
+#endif /*BASE64_LOOKUP_8K*/
+
+
+
+/* params: 
+ * returns: size used from the output buffer (dst) on success (max: slen/4*3)
+ *          -size_needed on error or 0 on bad base64 encoded string
+ * WARNING: the output string is not 0-term
+ */
+inline static int base64_dec(unsigned char* src, int slen,
+							unsigned char* dst,  int dlen)
+{
+	
+	unsigned char* end;
+	int osize;
+	register unsigned a, b, c, d; /* more registers used, but allows for
+									 paralles execution */
+	
+	if (unlikely((slen<4) || (slen%4) || 
+				(src[slen-2]=='=' && src[slen-1]!='=')))
+		return 0; /* invalid base64 enc. */
+	osize=(slen/4*3)-(src[slen-2]=='=')-(src[slen-1]=='=');
+	if (unlikely(dlen<osize))
+		return -osize;
+	end=src+slen-4;
+	for (;src<end; src+=4,dst+=3){
+#if 0
+		u=	(UNBASE64(src[0])<<18) | (UNBASE64(src[1])<<12) | 
+			(UNBASE64(src[2])<<6)  |  UNBASE64(src[3]);
+		dst[0]=u>>16;
+		dst[1]=u>>8;
+		dst[3]=u;
+#endif
+		a=UNBASE64(src[0]);
+		b=UNBASE64(src[1]);
+		c=UNBASE64(src[2]);
+		d=UNBASE64(src[3]);
+		dst[0]=(a<<2) | (b>>4);
+		dst[1]=(b<<4) | (c>>2);
+		dst[2]=(c<<6) | d;
+	}
+	switch(osize%3){
+		case 0: /* no '=' => 3 output bytes at the end */
+			a=UNBASE64(src[0]);
+			b=UNBASE64(src[1]);
+			c=UNBASE64(src[2]);
+			d=UNBASE64(src[3]);
+			dst[0]=(a<<2) | (b>>4);
+			dst[1]=(b<<4) | (c>>2);
+			dst[2]=(c<<6) | d;
+			break;
+		case 2: /* 1  '=' => 2 output bytes at the end */
+			a=UNBASE64(src[0]);
+			b=UNBASE64(src[1]);
+			c=UNBASE64(src[2]);
+			dst[0]=(a<<2) | (b>>4);
+			dst[1]=(b<<4) | (c>>2);
+			break;
+		case 1: /* 2  '=' => 1 output byte at the end */
+			a=UNBASE64(src[0]);
+			b=UNBASE64(src[1]);
+			dst[0]=(a<<2) | (b>>4);
+			break;
+	}
+	return osize;
+}
+
+
+
+
+/*
+ * same as base64_enc but with a different alphabet, that allows simpler and
+ *  faster enc/dec
+ * params: 
+ * returns: size used from the output buffer (dst) on success ((slen+2)/3*4)
+ *          -size_needed on error
+ * WARNING: the alphabet includes ":;<>?@[]\`", so it might not be suited
+ *  in all cases (e.g. encoding something in a sip uri).
+ */
+inline static int q_base64_enc(unsigned char* src, int slen,
+							unsigned char* dst,  int dlen)
+{
+#define q_b64_base	'0'
+#define q_b64_pad	'z'
+#define Q_BASE64(v)	(unsigned char)((v)+q_b64_base)
+	unsigned char* end;
+	int osize;
+	
+	osize=(slen+2)/3*4;
+	if (unlikely(dlen<osize))
+		return -osize;
+	end=src+slen/3*3;
+	for (;src<end; src+=3,dst+=4){
+		dst[0]=Q_BASE64(src[0]>>2);
+		dst[1]=(Q_BASE64((src[0]<<4)&0x3f) | (src[1]>>4));
+		dst[2]=(Q_BASE64((src[1]<<2)&0x3f) | (src[2]>>6) );
+		dst[3]=Q_BASE64(src[2]&0x3f);
+	}
+	switch(slen%3){
+		case 2:
+			dst[0]=Q_BASE64(src[0]>>2);
+			dst[1]=(Q_BASE64((src[0]<<4)&0x3f) | (src[1]>>4));
+			dst[2]=Q_BASE64((src[1]<<2)&0x3f);
+			dst[3]=q_b64_pad;
+			break;
+		case 1:
+			dst[0]=Q_BASE64(src[0]>>2);
+			dst[1]=Q_BASE64((src[0]<<4)&0x3f);
+			dst[2]=q_b64_pad;
+			dst[3]=q_b64_pad;
+			break;
+	}
+	return osize;
+#undef Q_BASE64
+}
+
+
+
+/*
+ * same as base64_enc but with a different alphabet, that allows simpler and
+ *  faster enc/dec
+ * params: 
+ * params: 
+ * returns: size used from the output buffer (dst) on success (max: slen/4*3)
+ *          -size_needed on error or 0 on bad base64 encoded string
+ * WARNING: the output string is not 0-term
+ */
+inline static int q_base64_dec(unsigned char* src, int slen,
+							unsigned char* dst,  int dlen)
+{
+#define Q_UNBASE64(v) (unsigned char)((v)-q_b64_base)
+	
+	unsigned char* end;
+	int osize;
+#ifdef SINGLE_REG
+	register unsigned u;
+#else
+	register unsigned a, b, c, d; /* more registers used, but allows for
+									 paralles execution */
+#endif
+	
+	if (unlikely((slen<4) || (slen%4) || 
+				(src[slen-2]==q_b64_pad && src[slen-1]!=q_b64_pad)))
+		return 0; /* invalid base64 enc. */
+	osize=(slen/4*3)-(src[slen-2]==q_b64_pad)-(src[slen-1]==q_b64_pad);
+	if (unlikely(dlen<osize))
+		return -osize;
+	end=src+slen-4;
+	for (;src<end; src+=4,dst+=3){
+#ifdef SINGLE_REG
+		u=	(Q_UNBASE64(src[0])<<18) | (Q_UNBASE64(src[1])<<12) | 
+			(Q_UNBASE64(src[2])<<6)  |  Q_UNBASE64(src[3]);
+		dst[0]=u>>16;
+		dst[1]=u>>8;
+		dst[2]=u;
+#else
+		a=Q_UNBASE64(src[0]);
+		b=Q_UNBASE64(src[1]);
+		c=Q_UNBASE64(src[2]);
+		d=Q_UNBASE64(src[3]);
+		dst[0]=(a<<2) | (b>>4);
+		dst[1]=(b<<4) | (c>>2);
+		dst[2]=(c<<6) | d;
+#endif
+	}
+	switch(osize%3){
+		case 0: /* no '=' => 3 output bytes at the end */
+#ifdef SINGLE_REG
+			u=	(Q_UNBASE64(src[0])<<18) | (Q_UNBASE64(src[1])<<12) | 
+				(Q_UNBASE64(src[2])<<6)  |  Q_UNBASE64(src[3]);
+			dst[0]=u>>16;
+			dst[1]=u>>8;
+			dst[2]=u;
+#else
+			a=Q_UNBASE64(src[0]);
+			b=Q_UNBASE64(src[1]);
+			c=Q_UNBASE64(src[2]);
+			d=Q_UNBASE64(src[3]);
+			dst[0]=(a<<2) | (b>>4);
+			dst[1]=(b<<4) | (c>>2);
+			dst[2]=(c<<6) | d;
+#endif
+			break;
+		case 2: /* 1  '=' => 2 output bytes at the end */
+#ifdef SINGLE_REG
+			u=	(Q_UNBASE64(src[0])<<12) | (Q_UNBASE64(src[1])<<6) | 
+				(Q_UNBASE64(src[2]));
+			dst[0]=u>>10;
+			dst[1]=u>>2;
+#else
+			a=Q_UNBASE64(src[0]);
+			b=Q_UNBASE64(src[1]);
+			c=Q_UNBASE64(src[2]);
+			dst[0]=(a<<2) | (b>>4);
+			dst[1]=(b<<4) | (c>>2);
+#endif
+			break;
+		case 1: /* 2  '=' => 1 output byte at the end */
+#ifdef SINGLE_REG
+			dst[0]=(Q_UNBASE64(src[0])<<2) | (Q_UNBASE64(src[1])>>4); 
+#else
+			a=Q_UNBASE64(src[0]);
+			b=Q_UNBASE64(src[1]);
+			dst[0]=(a<<2) | (b>>4);
+#endif
+			break;
+	}
+	return osize;
+#undef q_b64_base
+#undef q_b64_pad
+}
+
+int init_basex();
+
+
+#endif /* _basex_h */

+ 401 - 0
test/basex.c

@@ -0,0 +1,401 @@
+/*
+ * $Id$
+ *
+ * Tests for basex.h
+ *
+ * Copyright (C) 2008 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+/*#define NO_BASE64_LOOKUP_TABLE
+ #define SINGLE_REG */
+
+#include "../basex.h"
+#include "profile.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+
+
+#ifndef MIN
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#endif
+
+#define BASE64 64
+#define Q_BASE64 640
+#define BASE16 16
+
+#ifndef BASEX
+#define BASEX BASE16
+#endif
+
+#if BASEX == Q_BASE64
+
+#warning Q_BASE64
+#define B_ENC	q_base64_enc
+#define B_DEC	q_base64_dec
+#define B_ENC_LEN(l)	(((l)+2)/3*4)
+
+#elif BASEX == BASE16
+
+#warning BASE16
+#define B_ENC	base16_enc
+#define B_DEC	base16_dec
+#define B_ENC_LEN(l)	((l)*2)
+
+#else
+
+#warning BASE64
+#define B_ENC	base64_enc
+#define B_DEC	base64_dec
+#define B_ENC_LEN(l)	(((l)+2)/3*4)
+
+
+#endif
+
+
+#define QUOTE_MACRO(x) QUOTEME(x)
+#define QUOTEME(x) #x
+
+static char* id="$Id$";
+static char* version="basex test 0.1 " 
+"BASE" QUOTE_MACRO(BASEX)  ": " QUOTE_MACRO(B_ENC) ", " QUOTE_MACRO(B_DEC) ""
+#if defined BASE64_LOOKUP_TABLE 
+#ifdef BASE64_LOOKUP_LARGE
+" (large b64 lookup table)"
+#else
+" (lookup b64 table)"
+#endif
+#else
+" (no b64 lookup table)"
+#endif
+#if defined BASE16_LOOKUP_TABLE
+#ifdef BASE16_LOOKUP_LARGE
+" (large b16 lookup table)"
+#else
+" (lookup b16 table)"
+#endif
+#else
+" (no b16 lookup table)"
+#endif
+#if defined BASE64_READ_WHOLE_INTS || defined BASE16_READ_WHOLE_INTS
+" (read 4 bytes at a time)"
+#else
+" (read 1 byte at a time)"
+#endif
+;
+
+static char* help_msg="\
+Usage: basex  [-hv] ... [options]\n\
+Options:\n\
+    -m min        minimum length\n\
+    -M max        maximum length\n\
+    -o offset     offset from the start of the buffer (alignment tests)\n\
+    -e offset     offset from the start of the dst. buf. (alignment tests)\n\
+    -n no.        number of test loops\n\
+    -v            increase verbosity\n\
+    -V            version number\n\
+    -h            this help message\n\
+";
+
+
+/* profiling */
+struct profile_data pf1, pf2, pf3, pf4, pf5, pf6;
+
+
+void dump_profile_info(struct profile_data* pd)
+{
+	printf("profiling for %s (%ld/%ld):  %lld/%lld/%lld (max/avg/last),"
+			" total %lld\n",
+			pd->name, pd->entries, pd->exits, pd->max_cycles, 
+			pd->entries?pd->total_cycles/pd->entries:0, pd->cycles,
+			pd->total_cycles);
+}
+
+
+
+int seed_prng()
+{
+	int seed, rfd;
+	
+	if ((rfd=open("/dev/urandom", O_RDONLY))!=-1){
+try_again:
+		if (read(rfd, (void*)&seed, sizeof(seed))==-1){
+			if (errno==EINTR) goto try_again; /* interrupted by signal */
+				fprintf(stderr, "WARNING: could not read from /dev/urandom: "
+								" %s (%d)\n", strerror(errno), errno);
+		}
+		close(rfd);
+	}else{
+		fprintf(stderr, "WARNING: could not open /dev/urandom: %s (%d)\n",
+						strerror(errno), errno);
+	}
+	seed+=getpid()+time(0);
+	srand(seed);
+	return 0;
+}
+
+
+/* fill buf with random data*/
+void fill_rand(unsigned char* buf, int len)
+{
+	unsigned char* end;
+	int v;
+
+/* find out how many random bytes we can get from rand() */
+#if RAND_MAX >= 0xffffffff
+#define RAND_BYTES 4
+#warning RAND_BYTES is 4
+#elif RAND_MAX >= 0xffffff
+#define RAND_BYTES 3
+#warning RAND_BYTES is 3
+#elif RAND_MAX >= 0xffff
+#define RAND_BYTES 2
+#warning RAND_BYTES is 2
+#else
+#define RAND_BYTES 1
+#endif
+
+	end=buf+len/RAND_BYTES*RAND_BYTES;
+	for(;buf<end;buf+=RAND_BYTES){
+		v=rand();
+		buf[0]=v;
+#if RAND_BYTES > 1
+		buf[1]=v>>8;
+#endif
+#if RAND_BYTES > 2
+		buf[2]=v>>16;
+#endif
+#if RAND_BYTES > 4
+		buf[3]=v>>24;
+#endif
+	}
+	v=rand();
+	switch(end-buf){
+		case 3:
+#if RAND_BYTES > 2
+			buf[2]=v>>16;
+#else
+			buf[2]=rand();
+#endif
+		case 2:
+#if RAND_BYTES > 1
+			buf[1]=v>>8;
+#else
+			buf[1]=rand();
+#endif
+		case 1:
+			buf[0]=v;
+		case 0:
+			break;
+	}
+}
+
+
+
+int main(int argc, char** argv)
+{
+
+	int loops, min_len, max_len, offset, e_offset;
+	unsigned char* ibuf;
+	unsigned char* enc_buf;
+	unsigned char* dec_buf;
+	int ibuf_len, enc_buf_len, dec_buf_len;
+	int offs, c_len, e_len, l;
+	int r;
+	int verbose;
+	int c;
+	char* tmp;
+
+	verbose=0;
+	min_len=max_len=offset=-1;
+	e_offset=0;
+	loops=1024;
+	opterr=0;
+	while ((c=getopt(argc, argv, "n:m:M:o:e:vhV"))!=-1){
+		switch(c){
+			case 'n':
+				loops=strtol(optarg, &tmp, 0);
+				if ((tmp==0)||(*tmp)||(loops<0)){
+					fprintf(stderr, "bad number: -%c %s\n", c, optarg);
+					goto error;
+				}
+				break;
+			case 'm':
+				min_len=strtol(optarg, &tmp, 0);
+				if ((tmp==0)||(*tmp)||(min_len<0)){
+					fprintf(stderr, "bad number: -%c %s\n", c, optarg);
+					goto error;
+				}
+				break;
+			case 'M':
+				max_len=strtol(optarg, &tmp, 0);
+				if ((tmp==0)||(*tmp)||(max_len<0)){
+					fprintf(stderr, "bad number: -%c %s\n", c, optarg);
+					goto error;
+				}
+				break;
+			case 'o':
+				offset=strtol(optarg, &tmp, 0);
+				if ((tmp==0)||(*tmp)||(offset<0)){
+					fprintf(stderr, "bad number: -%c %s\n", c, optarg);
+					goto error;
+				}
+				break;
+			case 'e':
+				e_offset=strtol(optarg, &tmp, 0);
+				if ((tmp==0)||(*tmp)||(e_offset<0)){
+					fprintf(stderr, "bad number: -%c %s\n", c, optarg);
+					goto error;
+				}
+				break;
+			case 'v':
+				verbose++;
+				break;
+			case 'V':
+				printf("version: %s\n", version);
+				printf("%s\n", id);
+				exit(0);
+				break;
+			case 'h':
+				printf("version: %s\n", version);
+				printf("%s", help_msg);
+				exit(0);
+				break;
+			case '?':
+				if (isprint(optopt))
+					fprintf(stderr, "Unknown option `-%c\n", optopt);
+				else
+					fprintf(stderr, "Unknown character `\\x%x\n", optopt);
+				goto error;
+			case ':':
+				fprintf(stderr, "Option `-%c requires an argument.\n",
+						optopt);
+				goto error;
+				break;
+			default:
+				abort();
+		}
+	}
+	if (min_len==-1 && max_len==-1){
+		min_len=0;
+		max_len=4*1024*1024;
+	}else if (min_len==-1)
+		min_len=0;
+	else if (max_len==-1)
+		max_len=min_len;
+	/* init */
+	ibuf_len=max_len;
+	ibuf=malloc(ibuf_len);
+	if (ibuf==0){
+		fprintf(stderr, "ERROR: 1. memory allocation error (%d bytes)\n",
+						ibuf_len);
+		exit(-1);
+	}
+	enc_buf_len=B_ENC_LEN(ibuf_len);
+	enc_buf=malloc(enc_buf_len+e_offset);
+	if (enc_buf==0){
+		fprintf(stderr, "ERROR: 2. memory allocation error (%d bytes)\n",
+						enc_buf_len);
+		exit(-1);
+	}
+	enc_buf+=e_offset; /* make sure it's off by e_offset bytes from the
+						 aligned stuff malloc returns */
+	dec_buf_len=ibuf_len;
+	dec_buf=malloc(dec_buf_len+e_offset);
+	if (dec_buf==0){
+		fprintf(stderr, "ERROR: 3. memory allocation error (%d bytes)\n",
+						dec_buf_len+e_offset);
+		exit(-1);
+	}
+	dec_buf+=e_offset; /* make sure it's off by e_offset bytes from the
+						  aligned stuff malloc returns */
+	
+	
+	seed_prng();
+	/* profile */
+	profile_init(&pf1, "encode");
+	profile_init(&pf2, "decode");
+	
+	init_basex();
+	if (verbose)
+		printf("starting (loops %d, min size %d, max size %d, offset %d,"
+				", e_offset %d, buffer sizes %d %d %d)\n",
+				loops, min_len, max_len, offset, e_offset, ibuf_len,
+				enc_buf_len, dec_buf_len);
+		
+		for (r=0; r<loops; r++){
+			if (min_len!=max_len)
+				/* test encode/decode random data w/ random length*/
+				c_len= min_len+(int)((float)(max_len-min_len+1)*
+											(rand()/(RAND_MAX+1.0)));
+			else 
+				/* test encode /decode random data w/ fixed lenght*/
+				c_len=max_len;
+			if (offset==-1)
+				/* offset between 0 & MIN(clen,3) */
+				offs= (int)((float)(MIN(c_len,3)+1)*(rand()/(RAND_MAX+1.0)));
+			else if (offset>c_len)
+				offs=0;
+			else
+				offs=offset;
+			if (verbose>2)
+				printf("loop %d, current len %d, offset %d, start %p\n",
+							r, c_len-offs, offs, &ibuf[offs]);
+			else if ((verbose >1) && (r %10==0)) putchar('.');
+			
+			fill_rand(ibuf, c_len);
+			
+			c_len-=offs;
+			e_len=B_ENC_LEN(c_len);
+			profile_start(&pf1);
+			l=B_ENC(&ibuf[offs], c_len, enc_buf, e_len);
+			profile_end(&pf1);
+			if (l != e_len){
+				fprintf(stderr, "ERROR: invalid length for encoding: %d "
+								"instead of %d (loops=%d)\n", l, e_len, r);
+				exit(-1);
+			}
+			profile_start(&pf2);
+			l=B_DEC(enc_buf, e_len, dec_buf, c_len);
+			profile_end(&pf2);
+			if (l != c_len){
+				fprintf(stderr, "ERROR: invalid length for decoding: %d "
+								"instead of %d (loops=%d)\n", l, c_len, r);
+				exit(-1);
+			}
+			if (memcmp(&ibuf[offs], dec_buf, c_len)!=0){
+				fprintf(stderr, "ERROR: decoding mismatch "
+								"(loops=%d, c_len=%d)\n", r, c_len);
+				abort();
+				exit(-1);
+			}
+		}
+	 if (verbose >1) putchar('\n');
+	/* encode len data and decode it, print profiling info*/
+	 dump_profile_info(&pf1);
+	 dump_profile_info(&pf2);
+	 return 0;
+error:
+		 exit(-1);
+}

+ 60 - 0
test/basex.txt

@@ -0,0 +1,60 @@
+# test results for basex.c
+
+
+pentium-m (core2): 
+gcc -g  -Wall -O9 -mtune=pentium-m -mcpu=pentium-m -D__CPU_x86  ${DEFS} \
+        ../basex.c basex.c -o basex
+
+               4		  8		 16		 32		 40		 64		 80		128
+lookup:
+base16_enc4   83		 96		125		177		205		291		345		509
+base16_dec4
+base16_enc    79		 97		150		208		240		326		390		559
+base16_dec    76		 96		152		217		249		350		402		588
+
+lookup(DEFS=-DBASEX=BASE64)::
+base16_enc4										211
+base16_dec4										252
+base16_enc										231
+base16_dec										251
+lookup_Large (DEFS=-DBASE16_LOOKUP_LARGE -DBASEX=BASE64):
+base16_enc4										218
+base16_dec4										236
+base16_enc										199
+base16_dec										236
+
+no lookup (DEFS=NO_BASE64_LOOKUP_TABLE -DBASEX=BASE64):
+base64_enc  156									1005						
+            181									1242
+lookup (DEFS=-DBASEX=BASE64):
+base64_enc   82			101		120		185		209		302		335		521
+base64_dec   89			103		140		198		208		289		359		546
+lookup_large (DEFS=-DBASE64_LOOKUP_LARGE -DBASEX=BASE64):
+base64_enc   79			 85		103		157		186		268		276		421
+base64_dec   93			103		126		188		207		281		348		510
+
+lookup_8k (DEFS=-DBASE64_LOOKUP_8K -DBASEX=BASE64), -e 1
+base64_enc   77			 94						177						422
+lookup_8k words (like above but -e 0)
+base64_enc   77									156						329
+
+
+ultrasparc:
+gcc -g  -Wall -O9 -mtune=ultrasparc -mcpu=ultrasparc -D__CPU_sparc64 ${DEFS} \
+     ../basex.c basex.c -o basex
+
+no lookup (DEFS=-DNO_BASE64_LOOKUP_TABLE -DBASEX=BASE64):
+base64_enc     			   						 728						  
+              									1425
+lookup (DEFS=-DBASEX=BASE64):
+base64_enc     			   						314						   
+              									260
+lookup_large (DEFS=-DBASE64_LOOKUP_LARGE -DBASEX=BASE64):
+base64_enc     			   						295						   
+
+lookup_8k (DEFS=-DBASE64_LOOKUP_8K -DBASEX=BASE64), -e 1
+base64_enc     			   						323						   
+               									250
+lookup_8k words (like above but -e 0)
+base64_enc   91			118						296						
+            101			117						249