StringUtilities.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993
  1. /*
  2. * This source file is part of libRocket, the HTML/CSS Interface Middleware
  3. *
  4. * For the latest information, see http://www.librocket.com
  5. *
  6. * Copyright (c) 2008-2010 CodePoint Ltd, Shift Technology Ltd
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. *
  26. */
  27. #include "precompiled.h"
  28. #include <Rocket/Core/StringUtilities.h>
  29. #include <ctype.h>
  30. #include <stdio.h>
  31. namespace Rocket {
  32. namespace Core {
  33. StringUtilities::ArgumentState::ArgumentState()
  34. {
  35. index = 1;
  36. display_errors = true;
  37. }
  38. // Expands character-delimited list of values in a single string to a whitespace-trimmed list of values.
  39. void StringUtilities::ExpandString(StringList& string_list, const String& string, const char delimiter)
  40. {
  41. char quote = 0;
  42. bool last_char_delimiter = true;
  43. const char* ptr = string.CString();
  44. const char* start_ptr = NULL;
  45. const char* end_ptr = ptr;
  46. while (*ptr)
  47. {
  48. // Switch into quote mode if the last char was a delimeter ( excluding whitespace )
  49. // and we're not already in quote mode
  50. if (last_char_delimiter && !quote && (*ptr == '"' || *ptr == '\''))
  51. {
  52. quote = *ptr;
  53. }
  54. // Switch out of quote mode if we encounter a quote that hasn't been escaped
  55. else if (*ptr == quote && *(ptr-1) != '\\')
  56. {
  57. quote = 0;
  58. }
  59. // If we encouter a delimiter while not in quote mode, add the item to the list
  60. else if (*ptr == delimiter && !quote)
  61. {
  62. if (start_ptr)
  63. string_list.push_back(String(start_ptr, end_ptr + 1));
  64. else
  65. string_list.push_back("");
  66. last_char_delimiter = true;
  67. start_ptr = NULL;
  68. }
  69. // Otherwise if its not white space or we're in quote mode, advance the pointers
  70. else if (!isspace(*ptr) || quote)
  71. {
  72. if (!start_ptr)
  73. start_ptr = ptr;
  74. end_ptr = ptr;
  75. last_char_delimiter = false;
  76. }
  77. ptr++;
  78. }
  79. // If there's data pending, add it.
  80. if (start_ptr)
  81. string_list.push_back(String(start_ptr, end_ptr + 1));
  82. }
  83. // Joins a list of string values into a single string separated by a character delimiter.
  84. void StringUtilities::JoinString(String& string, const StringList& string_list, const char delimiter)
  85. {
  86. for (size_t i = 0; i < string_list.size(); i++)
  87. {
  88. string += string_list[i];
  89. if (delimiter != '\0' && i < string_list.size() - 1)
  90. string.Append(delimiter);
  91. }
  92. }
  93. // Forward declare the MD5 function
  94. String MD5String(const char* string, int length);
  95. // Hashes a string of data to an 32-character MD5 value.
  96. String StringUtilities::MD5Hash(const char* data, int length)
  97. {
  98. return MD5String(data, length);
  99. }
  100. // Hashes a string of data to an integer value using the FNV algorithm.
  101. Hash StringUtilities::FNVHash(const char *string)
  102. {
  103. // FNV-1 hash algorithm
  104. Hash hval = 0;
  105. unsigned char *bp = (unsigned char *)string; // start of buffer
  106. // FNV-1 hash each octet in the buffer
  107. while (*bp)
  108. {
  109. // multiply by the 32 bit FNV magic prime mod 2^32
  110. hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
  111. // xor the bottom with the current octet
  112. hval ^= *bp++;
  113. }
  114. return hval;
  115. }
  116. static unsigned char hexchars[] = "0123456789ABCDEF";
  117. // Encodes a string with URL-encoding.
  118. bool StringUtilities::URLEncode(const char* input, size_t input_length, String& output)
  119. {
  120. for (size_t i = 0; i < input_length; i++)
  121. {
  122. if (input[i] == ' ')
  123. {
  124. output += '+';
  125. }
  126. else if (isalnum((unsigned char)input[i]) || input[i] == '_' || input[i] == '-' || input[i] == '.')
  127. {
  128. /* Allow only alphanumeric chars and '_', '-', '.'; escape the rest */
  129. output += input[i];
  130. }
  131. else
  132. {
  133. output += '%';
  134. output += hexchars[(unsigned char) input[i] >> 4];
  135. output += hexchars[(unsigned char) input[i] & 0x0F];
  136. }
  137. }
  138. return true;
  139. }
  140. // Decodes a URL-encoded string.
  141. bool StringUtilities::URLDecode(const String& input, char* output, size_t output_length)
  142. {
  143. char* dest = output;
  144. const char* data = input.CString();
  145. int len = (int) input.Length();
  146. size_t used = 0;
  147. while (len-- && used < output_length)
  148. {
  149. if (*data == '+')
  150. {
  151. *dest = ' ';
  152. }
  153. else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1)) && isxdigit((int) *(data + 2)))
  154. {
  155. int value;
  156. int c;
  157. c = ((unsigned char*) data)[1];
  158. if (isupper(c))
  159. c = tolower(c);
  160. value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
  161. c = ((unsigned char*) data)[2];
  162. if (isupper(c))
  163. c = tolower(c);
  164. value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;
  165. data += 2;
  166. len -= 2;
  167. *dest = (char) value;
  168. }
  169. else
  170. {
  171. *dest = *data;
  172. }
  173. data++;
  174. dest++;
  175. used++;
  176. }
  177. return true;
  178. }
  179. static const char base64digits[] =
  180. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  181. #define BAD -1
  182. static const char base64val[] = {
  183. BAD,BAD,BAD,BAD, BAD,BAD,BAD,BAD, BAD,BAD,BAD,BAD, BAD,BAD,BAD,BAD,
  184. BAD,BAD,BAD,BAD, BAD,BAD,BAD,BAD, BAD,BAD,BAD,BAD, BAD,BAD,BAD,BAD,
  185. BAD,BAD,BAD,BAD, BAD,BAD,BAD,BAD, BAD,BAD,BAD, 62, BAD,BAD,BAD, 63,
  186. 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,BAD,BAD, BAD,BAD,BAD,BAD,
  187. BAD, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
  188. 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,BAD, BAD,BAD,BAD,BAD,
  189. BAD, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  190. 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,BAD, BAD,BAD,BAD,BAD
  191. };
  192. #define DECODE64(c) (isascii(c) ? base64val[c] : BAD)
  193. // Encodes a string with base64-encoding.
  194. bool StringUtilities::Base64Encode( const char* input, size_t input_length, String& output )
  195. {
  196. output.Clear();
  197. size_t i = input_length;
  198. for (; i >= 3; i -= 3)
  199. {
  200. output += base64digits[input[0] >> 2];
  201. output += base64digits[((input[0] << 4) & 0x30) | (input[1] >> 4)];
  202. output += base64digits[((input[1] << 2) & 0x3c) | (input[2] >> 6)];
  203. output += base64digits[input[2] & 0x3f];
  204. input += 3;
  205. }
  206. if (i > 0)
  207. {
  208. unsigned char fragment;
  209. output += base64digits[input[0] >> 2];
  210. fragment = (input[0] << 4) & 0x30;
  211. if (i > 1)
  212. fragment |= input[1] >> 4;
  213. output += base64digits[fragment];
  214. output += (i < 2) ? '=' : base64digits[(input[1] << 2) & 0x3c];
  215. output += '=';
  216. }
  217. return true;
  218. }
  219. // Decodes a base64-encoded string.
  220. bool StringUtilities::Base64Decode( const String& input, char* output, size_t output_length )
  221. {
  222. const char* in = input.CString();
  223. size_t len = 0;
  224. unsigned char digit1, digit2, digit3, digit4;
  225. if (in[0] == '+' && in[1] == ' ')
  226. in += 2;
  227. if (*in == '\r')
  228. return false;
  229. do
  230. {
  231. digit1 = in[0];
  232. if (DECODE64(digit1) == BAD)
  233. return false;
  234. digit2 = in[1];
  235. if (DECODE64(digit2) == BAD)
  236. return false;
  237. digit3 = in[2];
  238. if (digit3 != '=' && DECODE64(digit3) == BAD)
  239. return false;
  240. digit4 = in[3];
  241. if (digit4 != '=' && DECODE64(digit4) == BAD)
  242. return false;
  243. in += 4;
  244. ++len;
  245. if (output_length && len > output_length)
  246. return false;
  247. *output++ = (DECODE64(digit1) << 2) | (DECODE64(digit2) >> 4);
  248. if (digit3 != '=')
  249. {
  250. ++len;
  251. if (output_length && len > output_length)
  252. return false;
  253. *output++ = ((DECODE64(digit2) << 4) & 0xf0) | (DECODE64(digit3) >> 2);
  254. if (digit4 != '=')
  255. {
  256. ++len;
  257. if (output_length && len > output_length)
  258. return false;
  259. *output++ = ((DECODE64(digit3) << 6) & 0xc0) | DECODE64(digit4);
  260. }
  261. }
  262. }
  263. while (*in && *in != '\r' && digit4 != '=');
  264. return true;
  265. }
  266. // Defines, helper functions for the UTF8 / UCS2 conversion functions.
  267. #define _NXT 0x80
  268. #define _SEQ2 0xc0
  269. #define _SEQ3 0xe0
  270. #define _SEQ4 0xf0
  271. #define _SEQ5 0xf8
  272. #define _SEQ6 0xfc
  273. #define _BOM 0xfeff
  274. static int __wchar_forbidden(unsigned int sym)
  275. {
  276. // Surrogate pairs
  277. if (sym >= 0xd800 && sym <= 0xdfff)
  278. return -1;
  279. return 0;
  280. }
  281. static int __utf8_forbidden(unsigned char octet)
  282. {
  283. switch (octet)
  284. {
  285. case 0xc0:
  286. case 0xc1:
  287. case 0xf5:
  288. case 0xff:
  289. return -1;
  290. default:
  291. return 0;
  292. }
  293. }
  294. // Converts a character array in UTF-8 encoding to a vector of words.
  295. bool StringUtilities::UTF8toUCS2(const String& input, std::vector< word >& output)
  296. {
  297. if (input.Empty())
  298. return true;
  299. unsigned char* p = (unsigned char*) input.CString();
  300. unsigned char* lim = p + input.Length();
  301. // Skip the UTF-8 byte order marker if it exists.
  302. if (input.Substring(0, 3) == "\xEF\xBB\xBF")
  303. p += 3;
  304. int num_bytes;
  305. for (; p < lim; p += num_bytes)
  306. {
  307. if (__utf8_forbidden(*p) != 0)
  308. return false;
  309. // Get number of bytes for one wide character.
  310. word high;
  311. num_bytes = 1;
  312. if ((*p & 0x80) == 0)
  313. {
  314. high = (wchar_t)*p;
  315. }
  316. else if ((*p & 0xe0) == _SEQ2)
  317. {
  318. num_bytes = 2;
  319. high = (wchar_t)(*p & 0x1f);
  320. }
  321. else if ((*p & 0xf0) == _SEQ3)
  322. {
  323. num_bytes = 3;
  324. high = (wchar_t)(*p & 0x0f);
  325. }
  326. else if ((*p & 0xf8) == _SEQ4)
  327. {
  328. num_bytes = 4;
  329. high = (wchar_t)(*p & 0x07);
  330. }
  331. else if ((*p & 0xfc) == _SEQ5)
  332. {
  333. num_bytes = 5;
  334. high = (wchar_t)(*p & 0x03);
  335. }
  336. else if ((*p & 0xfe) == _SEQ6)
  337. {
  338. num_bytes = 6;
  339. high = (wchar_t)(*p & 0x01);
  340. }
  341. else
  342. {
  343. return false;
  344. }
  345. // Does the sequence header tell us the truth about length?
  346. if (lim - p <= num_bytes - 1)
  347. {
  348. return false;
  349. }
  350. // Validate the sequence. All symbols must have higher bits set to 10xxxxxx.
  351. if (num_bytes > 1)
  352. {
  353. int i;
  354. for (i = 1; i < num_bytes; i++)
  355. {
  356. if ((p[i] & 0xc0) != _NXT)
  357. break;
  358. }
  359. if (i != num_bytes)
  360. {
  361. return false;
  362. }
  363. }
  364. // Make up a single UCS-4 (32-bit) character from the required number of UTF-8 tokens. The first byte has
  365. // been determined earlier, the second and subsequent bytes contribute the first six of their bits into the
  366. // final character code.
  367. unsigned int ucs4_char = 0;
  368. int num_bits = 0;
  369. for (int i = 1; i < num_bytes; i++)
  370. {
  371. ucs4_char |= (word)(p[num_bytes - i] & 0x3f) << num_bits;
  372. num_bits += 6;
  373. }
  374. ucs4_char |= high << num_bits;
  375. // Check for surrogate pairs.
  376. if (__wchar_forbidden(ucs4_char) != 0)
  377. {
  378. return false;
  379. }
  380. // Only add the character to the output if it exists in the Basic Multilingual Plane (ie, fits in a single
  381. // word).
  382. if (ucs4_char <= 0xffff)
  383. output.push_back((word) ucs4_char);
  384. }
  385. output.push_back(0);
  386. return true;
  387. }
  388. // Converts a vector of words in UCS-2 encoding a character array in UTF-8 encoding.
  389. bool StringUtilities::UCS2toUTF8(const std::vector< word >& input, String& output)
  390. {
  391. return UCS2toUTF8(&input[0], input.size(), output);
  392. }
  393. // Converts an array of words in UCS-2 encoding into a character array in UTF-8 encoding.
  394. bool StringUtilities::UCS2toUTF8(const word* input, size_t input_size, String& output)
  395. {
  396. unsigned char *oc;
  397. size_t n;
  398. word* w = (word*) input;
  399. word* wlim = w + input_size;
  400. //Log::Message(LC_CORE, Log::LT_ALWAYS, "UCS2TOUTF8 size: %d", input_size);
  401. for (; w < wlim; w++)
  402. {
  403. if (__wchar_forbidden(*w) != 0)
  404. return false;
  405. if (*w == _BOM)
  406. continue;
  407. //if (*w < 0)
  408. // return false;
  409. if (*w <= 0x007f)
  410. n = 1;
  411. else if (*w <= 0x07ff)
  412. n = 2;
  413. else //if (*w <= 0x0000ffff)
  414. n = 3;
  415. /*else if (*w <= 0x001fffff)
  416. n = 4;
  417. else if (*w <= 0x03ffffff)
  418. n = 5;
  419. else // if (*w <= 0x7fffffff)
  420. n = 6;*/
  421. // Convert to little endian.
  422. word ch = (*w >> 8) & 0x00FF;
  423. ch |= (*w << 8) & 0xFF00;
  424. // word ch = EMPConvertEndian(*w, ROCKET_ENDIAN_BIG);
  425. oc = (unsigned char *)&ch;
  426. switch (n)
  427. {
  428. case 1:
  429. output += oc[1];
  430. break;
  431. case 2:
  432. output += (_SEQ2 | (oc[1] >> 6) | ((oc[0] & 0x07) << 2));
  433. output += (_NXT | oc[1] & 0x3f);
  434. break;
  435. case 3:
  436. output += (_SEQ3 | ((oc[0] & 0xf0) >> 4));
  437. output += (_NXT | (oc[1] >> 6) | ((oc[0] & 0x0f) << 2));
  438. output += (_NXT | oc[1] & 0x3f);
  439. break;
  440. case 4:
  441. break;
  442. case 5:
  443. break;
  444. case 6:
  445. break;
  446. }
  447. //Log::Message(LC_CORE, Log::LT_ALWAYS, "Converting...%c(%d) %d -> %d", *w, *w, w - input, output.Length());
  448. }
  449. return true;
  450. }
  451. // Strip whitespace characters from the beginning and end of a string.
  452. String StringUtilities::StripWhitespace(const String& string)
  453. {
  454. const char* start = string.CString();
  455. const char* end = start + string.Length();
  456. while (start < end && IsWhitespace(*start))
  457. start++;
  458. while (end > start && IsWhitespace(*(end - 1)))
  459. end--;
  460. if (start < end)
  461. return String(start, end);
  462. return String();
  463. }
  464. ////////////////////////////////////////////////////////////////////////////
  465. // GetOpt - Public Domain Software
  466. ////////////////////////////////////////////////////////////////////////////
  467. /* transcript/src/getopt.c
  468. *
  469. * public domain getopt from mod.sources
  470. * RCSID: $Header: getopt.c,v 2.1 85/11/24 11:49:10 shore Rel $
  471. */
  472. /*
  473. ** This is a public domain version of getopt(3).
  474. ** Bugs, fixes to:
  475. ** Keith Bostic
  476. ** ARPA: keith@seismo
  477. ** UUCP: seismo!keith
  478. ** Added NO_STDIO, opterr handling, Rich $alz (mirror!rs).
  479. */
  480. /*
  481. ** Error macro. Maybe we want stdio, maybe we don't.
  482. ** The (undocumented?) variable opterr tells us whether or not
  483. ** to print errors.
  484. */
  485. #define tell(s) \
  486. if (arg_state.display_errors) \
  487. { \
  488. (void)fputs(*nargv, stderr); \
  489. (void)fputs(s,stderr); \
  490. (void)fputc(arg_state.option, stderr); \
  491. (void)fputc('\n', stderr); \
  492. }
  493. /* Global variables. */
  494. static char EMSG[] = "";
  495. int StringUtilities::GetOpt( int nargc, char* nargv[], char* ostr, ArgumentState& arg_state )
  496. {
  497. static char *place = EMSG; /* option letter processing */
  498. register char *oli; /* option letter list index */
  499. if (!*place) /* update scanning pointer */
  500. {
  501. if (arg_state.index >= nargc || *(place = nargv[arg_state.index]) != '-' || !*++place)
  502. return(EOF);
  503. if (*place == '-') /* found "--" */
  504. {
  505. arg_state.index++;
  506. return(EOF);
  507. }
  508. }
  509. /* option letter okay? */
  510. if ((arg_state.option = *place++) == ':' || (oli = strchr(ostr, arg_state.option)) == NULL)
  511. {
  512. if (!*place)
  513. arg_state.index++;
  514. tell(": illegal option -- ");
  515. goto Bad;
  516. }
  517. if (*++oli != ':') /* don't need argument */
  518. {
  519. arg_state.argument = NULL;
  520. if (!*place)
  521. arg_state.index++;
  522. }
  523. else /* need an argument */
  524. {
  525. if (*place)
  526. arg_state.argument = place; /* no white space */
  527. else
  528. if (nargc <= ++arg_state.index)
  529. {
  530. place = EMSG;
  531. tell(": option requires an argument -- ");
  532. goto Bad;
  533. }
  534. else
  535. arg_state.argument = nargv[arg_state.index]; /* white space */
  536. place = EMSG;
  537. arg_state.index++;
  538. }
  539. return(arg_state.option); /* dump back option letter */
  540. Bad:
  541. return('?');
  542. }
  543. ////////////////////////////////////////////////////////////////////////////
  544. // MD5 Algorithm
  545. ////////////////////////////////////////////////////////////////////////////
  546. /* This function is the RSA Data Security, Inc. MD5 Message-Digest Algorithm
  547. BE WARNED: This code is ripped straight from the RFC, and is very ugly. Read at your own peril.
  548. The only function that is human written is the last one, right at the bottom of this file */
  549. // POINTER defines a generic pointer type
  550. typedef unsigned char *POINTER;
  551. // UINT2 defines a two byte word
  552. typedef unsigned short int UINT2;
  553. // UINT4 defines a four byte word
  554. typedef unsigned long int UINT4;
  555. #define PROTO_LIST(list) list
  556. typedef struct
  557. {
  558. UINT4 state[4]; // state (ABCD)
  559. UINT4 count[2]; // number of bits, modulo 2^64 (lsb first)
  560. unsigned char buffer[64]; // input buffer
  561. } MD5_CTX;
  562. void MD5Init PROTO_LIST ((MD5_CTX *));
  563. void MD5Update PROTO_LIST ((MD5_CTX *, unsigned char *, unsigned int));
  564. void MD5Final PROTO_LIST ((unsigned char [16], MD5_CTX *));
  565. // Constants for MD5Transform routine.
  566. #define S11 7
  567. #define S12 12
  568. #define S13 17
  569. #define S14 22
  570. #define S21 5
  571. #define S22 9
  572. #define S23 14
  573. #define S24 20
  574. #define S31 4
  575. #define S32 11
  576. #define S33 16
  577. #define S34 23
  578. #define S41 6
  579. #define S42 10
  580. #define S43 15
  581. #define S44 21
  582. void MD5Transform PROTO_LIST ((UINT4 [4], unsigned char [64]));
  583. void Encode PROTO_LIST ((unsigned char *, UINT4 *, unsigned int));
  584. void Decode PROTO_LIST ((UINT4 *, unsigned char *, unsigned int));
  585. void MD5_memcpy PROTO_LIST ((POINTER, POINTER, unsigned int));
  586. void MD5_memset PROTO_LIST ((POINTER, int, unsigned int));
  587. static unsigned char MD5_PADDING[64] = {
  588. 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  589. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  590. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  591. };
  592. // F, G, H and I are basic MD5 functions.
  593. #define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
  594. #define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
  595. #define H(x, y, z) ((x) ^ (y) ^ (z))
  596. #define I(x, y, z) ((y) ^ ((x) | (~z)))
  597. // ROTATE_LEFT rotates x left n bits.
  598. #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
  599. /* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
  600. Rotation is separate from addition to prevent recomputation. */
  601. #define FF(a, b, c, d, x, s, ac) \
  602. { \
  603. (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
  604. (a) = ROTATE_LEFT ((a), (s)); \
  605. (a) += (b); \
  606. }
  607. #define GG(a, b, c, d, x, s, ac) \
  608. { \
  609. (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
  610. (a) = ROTATE_LEFT ((a), (s)); \
  611. (a) += (b); \
  612. }
  613. #define HH(a, b, c, d, x, s, ac) \
  614. { \
  615. (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
  616. (a) = ROTATE_LEFT ((a), (s)); \
  617. (a) += (b); \
  618. }
  619. #define II(a, b, c, d, x, s, ac) \
  620. { \
  621. (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
  622. (a) = ROTATE_LEFT ((a), (s)); \
  623. (a) += (b); \
  624. }
  625. // MD5 initialization. Begins an MD5 operation, writing a new context.
  626. void MD5Init (MD5_CTX *context)
  627. {
  628. context->count[0] = context->count[1] = 0;
  629. /* Load magic initialization constants.*/
  630. context->state[0] = 0x67452301;
  631. context->state[1] = 0xefcdab89;
  632. context->state[2] = 0x98badcfe;
  633. context->state[3] = 0x10325476;
  634. }
  635. /* MD5 block update operation. Continues an MD5 message-digest
  636. operation, processing another message block, and updating the
  637. context. */
  638. void MD5Update (MD5_CTX *context, unsigned char *input, unsigned int inputLen)
  639. {
  640. unsigned int i, index, partLen;
  641. // Compute number of bytes mod 64
  642. index = (unsigned int)((context->count[0] >> 3) & 0x3F);
  643. // Update number of bits
  644. if ((context->count[0] += ((UINT4)inputLen << 3)) < ((UINT4)inputLen << 3))
  645. context->count[1]++;
  646. context->count[1] += ((UINT4)inputLen >> 29);
  647. partLen = 64 - index;
  648. // Transform as many times as possible.
  649. if (inputLen >= partLen)
  650. {
  651. MD5_memcpy ((POINTER)&context->buffer[index], (POINTER)input, partLen);
  652. MD5Transform (context->state, context->buffer);
  653. for (i = partLen; i + 63 < inputLen; i += 64)
  654. MD5Transform (context->state, &input[i]);
  655. index = 0;
  656. }
  657. else
  658. i = 0;
  659. // Buffer remaining input
  660. MD5_memcpy ((POINTER)&context->buffer[index], (POINTER)&input[i],
  661. inputLen-i);
  662. }
  663. /* MD5 finalization. Ends an MD5 message-digest operation, writing the
  664. the message digest and zeroizing the context. */
  665. void MD5Final (unsigned char digest[16], MD5_CTX *context)
  666. {
  667. unsigned char bits[8];
  668. unsigned int index, padLen;
  669. // Save number of bits
  670. Encode (bits, context->count, 8);
  671. // Pad out to 56 mod 64.
  672. index = (unsigned int)((context->count[0] >> 3) & 0x3f);
  673. padLen = (index < 56) ? (56 - index) : (120 - index);
  674. MD5Update (context, MD5_PADDING, padLen);
  675. // Append length (before padding)
  676. MD5Update (context, bits, 8);
  677. // Store state in digest
  678. Encode (digest, context->state, 16);
  679. // Zeroize sensitive information.
  680. MD5_memset ((POINTER)context, 0, sizeof (*context));
  681. }
  682. // MD5 basic transformation. Transforms state based on block.
  683. void MD5Transform (UINT4 state[4], unsigned char block[64])
  684. {
  685. UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
  686. Decode (x, block, 64);
  687. // Round 1
  688. FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
  689. FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
  690. FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
  691. FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
  692. FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
  693. FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
  694. FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
  695. FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
  696. FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
  697. FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
  698. FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
  699. FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
  700. FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
  701. FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
  702. FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
  703. FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
  704. // Round 2
  705. GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
  706. GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
  707. GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
  708. GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
  709. GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
  710. GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
  711. GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
  712. GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
  713. GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
  714. GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
  715. GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
  716. GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
  717. GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
  718. GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
  719. GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
  720. GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
  721. // Round 3
  722. HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
  723. HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
  724. HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
  725. HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
  726. HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
  727. HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
  728. HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
  729. HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
  730. HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
  731. HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
  732. HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
  733. HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
  734. HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
  735. HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
  736. HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
  737. HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
  738. // Round 4
  739. II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
  740. II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
  741. II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
  742. II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
  743. II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
  744. II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
  745. II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
  746. II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
  747. II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
  748. II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
  749. II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
  750. II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
  751. II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
  752. II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
  753. II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
  754. II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
  755. state[0] += a;
  756. state[1] += b;
  757. state[2] += c;
  758. state[3] += d;
  759. // Zeroize sensitive information.
  760. MD5_memset ((POINTER)x, 0, sizeof (x));
  761. }
  762. // Encodes input (UINT4) into output (unsigned char). Assumes len is a multiple of 4.
  763. void Encode (unsigned char *output, UINT4 *input, unsigned int len)
  764. {
  765. unsigned int i, j;
  766. for (i = 0, j = 0; j < len; i++, j += 4)
  767. {
  768. output[j] = (unsigned char)(input[i] & 0xff);
  769. output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
  770. output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
  771. output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
  772. }
  773. }
  774. // Decodes input (unsigned char) into output (UINT4). Assumes len is a multiple of 4.
  775. void Decode (UINT4 *output, unsigned char *input, unsigned int len)
  776. {
  777. unsigned int i, j;
  778. for (i = 0, j = 0; j < len; i++, j += 4)
  779. output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) | (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
  780. }
  781. // Note: Replace "for loop" with standard memcpy if possible.
  782. void MD5_memcpy (POINTER output, POINTER input, unsigned int len)
  783. {
  784. unsigned int i;
  785. for (i = 0; i < len; i++)
  786. output[i] = input[i];
  787. }
  788. // Note: Replace "for loop" with standard memset if possible.
  789. void MD5_memset (POINTER output, int value, unsigned int len)
  790. {
  791. unsigned int i;
  792. for (i = 0; i < len; i++)
  793. ((char *)output)[i] = (char)value;
  794. }
  795. // Length of test block, number of test blocks.
  796. #define TEST_BLOCK_LEN 1000
  797. #define TEST_BLOCK_COUNT 1000
  798. void MDString PROTO_LIST ((char *));
  799. void MDTimeTrial PROTO_LIST ((void));
  800. void MDTestSuite PROTO_LIST ((void));
  801. void MDFile PROTO_LIST ((char *));
  802. void MDFilter PROTO_LIST ((void));
  803. String MDPrint PROTO_LIST ((unsigned char [16]));
  804. // Digests a string and returns the result.
  805. String MD5String (const char *string, int length)
  806. {
  807. MD5_CTX context;
  808. unsigned char digest[16];
  809. unsigned int len;
  810. if (length < 0)
  811. len = (unsigned int) strlen (string);
  812. else
  813. len = (unsigned int) length;
  814. MD5Init (&context);
  815. MD5Update (&context, (unsigned char*)string, len);
  816. MD5Final (digest, &context);
  817. return MDPrint(digest);
  818. }
  819. // Prints a message digest in hexadecimal.
  820. String MDPrint (unsigned char digest[16])
  821. {
  822. char hex_digest[33];
  823. for (unsigned int i = 0; i < 16; i++)
  824. sprintf(&(hex_digest[i * 2]), "%02x", digest[i]);
  825. return String(hex_digest);
  826. }
  827. // Operators for STL containers using strings.
  828. bool StringUtilities::StringComparei::operator()(const String& lhs, const String& rhs) const
  829. {
  830. return strcasecmp(lhs.CString(), rhs.CString()) < 0;
  831. }
  832. }
  833. }