string_utils.h 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. /*
  2. Copyright (c) 2013 Daniele Bartolini, Michele Rossi
  3. Copyright (c) 2012 Daniele Bartolini, Simone Boscaratto
  4. Permission is hereby granted, free of charge, to any person
  5. obtaining a copy of this software and associated documentation
  6. files (the "Software"), to deal in the Software without
  7. restriction, including without limitation the rights to use,
  8. copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. copies of the Software, and to permit persons to whom the
  10. Software is furnished to do so, subject to the following
  11. conditions:
  12. The above copyright notice and this permission notice shall be
  13. included in all copies or substantial portions of the Software.
  14. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  16. OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  18. HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  19. WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. OTHER DEALINGS IN THE SOFTWARE.
  22. */
  23. #pragma once
  24. #include <cstdio>
  25. #include <cstring>
  26. #include "assert.h"
  27. #include "types.h"
  28. #include "config.h"
  29. #include "macros.h"
  30. namespace crown
  31. {
  32. inline bool is_alpha(char c)
  33. {
  34. return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));
  35. }
  36. inline bool is_digit(char c)
  37. {
  38. return !(c < '0' || c > '9');
  39. }
  40. inline bool is_upper(char c)
  41. {
  42. return (c >= 'A' && c <= 'Z');
  43. }
  44. inline bool is_lower(char c)
  45. {
  46. return (c >= 'a' && c <= 'z');
  47. }
  48. inline bool is_whitespace(char c)
  49. {
  50. return (c == ' ' || c == '\t' || c == '\n' || c == '\r');
  51. }
  52. inline size_t strlen(const char* str)
  53. {
  54. return ::strlen(str);
  55. }
  56. inline const char* strstr(const char* str1, const char* str2)
  57. {
  58. return ::strstr(str1, str2);
  59. }
  60. inline int32_t strcmp(const char* str1, const char* str2)
  61. {
  62. return ::strcmp(str1, str2);
  63. }
  64. inline int32_t strncmp(const char* s1, const char* s2, size_t len)
  65. {
  66. return ::strncmp(s1, s2, len);
  67. }
  68. inline char* strncpy(char* dest, const char* src, size_t len)
  69. {
  70. char* ret = ::strncpy(dest, src, len);
  71. dest[len - 1] = '\0';
  72. return ret;
  73. }
  74. inline char* strcat(char* dest, const char* src)
  75. {
  76. return ::strcat(dest, src);
  77. }
  78. inline char* strncat(char* dest, const char* src, size_t len)
  79. {
  80. return ::strncat(dest, src, len);
  81. }
  82. inline const char* begin(const char* str)
  83. {
  84. CE_ASSERT(str != NULL, "Str must be != NULL");
  85. return str;
  86. }
  87. inline const char* end(const char* str)
  88. {
  89. CE_ASSERT(str != NULL, "Str must be != NULL");
  90. return str + strlen(str) + 1;
  91. }
  92. inline const char* find_first(const char* str, char c)
  93. {
  94. CE_ASSERT(str != NULL, "Str must be != NULL");
  95. const char* str_begin = begin(str);
  96. while (str_begin != end(str))
  97. {
  98. if ((*str_begin) == c)
  99. {
  100. return str_begin;
  101. }
  102. str_begin++;
  103. }
  104. return end(str);
  105. }
  106. inline const char* find_last(const char* str, char c)
  107. {
  108. CE_ASSERT(str != NULL, "Str must be != NULL");
  109. const char* str_end = end(str) - 1;
  110. while (str_end != begin(str) - 1)
  111. {
  112. if ((*str_end) == c)
  113. {
  114. return str_end;
  115. }
  116. str_end--;
  117. }
  118. return end(str);
  119. }
  120. inline void substring(const char* begin, const char* end, char* out, size_t len)
  121. {
  122. CE_ASSERT(begin != NULL, "Begin must be != NULL");
  123. CE_ASSERT(end != NULL, "End must be != NULL");
  124. CE_ASSERT(out != NULL, "Out must be != NULL");
  125. size_t i = 0;
  126. char* out_iterator = out;
  127. while (begin != end && i < len)
  128. {
  129. (*out_iterator) = (*begin);
  130. begin++;
  131. out_iterator++;
  132. i++;
  133. }
  134. out[i] = '\0';
  135. }
  136. inline int32_t parse_int(const char* string)
  137. {
  138. int val;
  139. int ok = sscanf(string, "%d", &val);
  140. CE_ASSERT(ok == 1, "Failed to parse int: %s", string);
  141. CE_UNUSED(ok);
  142. return val;
  143. }
  144. inline uint32_t parse_uint(const char* string)
  145. {
  146. unsigned int val;
  147. int ok = sscanf(string, "%u", &val);
  148. CE_ASSERT(ok == 1, "Failed to parse uint: %s", string);
  149. CE_UNUSED(ok);
  150. return val;
  151. }
  152. inline float parse_float(const char* string)
  153. {
  154. float val;
  155. int ok = sscanf(string, "%f", &val);
  156. CE_ASSERT(ok == 1, "Failed to parse float: %s", string);
  157. CE_UNUSED(ok);
  158. return val;
  159. }
  160. inline double parse_double(const char* string)
  161. {
  162. double val;
  163. int ok = sscanf(string, "%lf", &val);
  164. CE_ASSERT(ok == 1, "Failed to parse float: %s", string);
  165. CE_UNUSED(ok);
  166. return val;
  167. }
  168. /// MurmurHash2, by Austin Appleby
  169. ///
  170. /// @note
  171. /// This code makes a few assumptions about how your machine behaves
  172. ///
  173. /// 1. We can read a 4-byte value from any address without crashing
  174. /// 2. sizeof(int) == 4
  175. ///
  176. /// And it has a few limitations -
  177. ///
  178. /// 1. It will not work incrementally.
  179. /// 2. It will not produce the same results on little-endian and big-endian
  180. /// machines.
  181. inline uint32_t murmur2_32(const void* key, size_t len, uint32_t seed = 0)
  182. {
  183. CE_ASSERT_NOT_NULL(key);
  184. // 'm' and 'r' are mixing constants generated offline.
  185. // They're not really 'magic', they just happen to work well.
  186. const unsigned int m = 0x5bd1e995;
  187. const int r = 24;
  188. // Initialize the hash to a 'random' value
  189. unsigned int h = seed ^ len;
  190. // Mix 4 bytes at a time into the hash
  191. const unsigned char * data = (const unsigned char *)key;
  192. while(len >= 4)
  193. {
  194. unsigned int k = *(unsigned int *)data;
  195. k *= m;
  196. k ^= k >> r;
  197. k *= m;
  198. h *= m;
  199. h ^= k;
  200. data += 4;
  201. len -= 4;
  202. }
  203. // Handle the last few bytes of the input array
  204. switch(len)
  205. {
  206. case 3: h ^= data[2] << 16;
  207. case 2: h ^= data[1] << 8;
  208. case 1: h ^= data[0];
  209. h *= m;
  210. };
  211. // Do a few final mixes of the hash to ensure the last few
  212. // bytes are well-incorporated.
  213. h ^= h >> 13;
  214. h *= m;
  215. h ^= h >> 15;
  216. return h;
  217. }
  218. inline uint64_t murmur2_64(const void* key, int len, uint64_t seed = 0)
  219. {
  220. const uint64_t m = 0xc6a4a7935bd1e995ull;
  221. const int r = 47;
  222. uint64_t h = seed ^ (len * m);
  223. const uint64_t * data = (const uint64_t *)key;
  224. const uint64_t * end = data + (len/8);
  225. while(data != end)
  226. {
  227. uint64_t k = *data++;
  228. k *= m;
  229. k ^= k >> r;
  230. k *= m;
  231. h ^= k;
  232. h *= m;
  233. }
  234. const unsigned char * data2 = (const unsigned char*)data;
  235. switch(len & 7)
  236. {
  237. case 7: h ^= uint64_t(data2[6]) << 48;
  238. case 6: h ^= uint64_t(data2[5]) << 40;
  239. case 5: h ^= uint64_t(data2[4]) << 32;
  240. case 4: h ^= uint64_t(data2[3]) << 24;
  241. case 3: h ^= uint64_t(data2[2]) << 16;
  242. case 2: h ^= uint64_t(data2[1]) << 8;
  243. case 1: h ^= uint64_t(data2[0]);
  244. h *= m;
  245. };
  246. h ^= h >> r;
  247. h *= m;
  248. h ^= h >> r;
  249. return h;
  250. }
  251. #ifdef CROWN_DEBUG
  252. inline uint32_t HASH32(const char *s, uint32_t value)
  253. {
  254. CE_ASSERT(murmur2_32(s, strlen(s), 0) == value, "Hash mismatch");
  255. return value;
  256. }
  257. inline uint64_t HASH64(const char* s, uint64_t value)
  258. {
  259. CE_ASSERT(murmur2_64(s, strlen(s), 0) == value, "Hash mismatch");
  260. return value;
  261. }
  262. #else
  263. #define HASH32(s, v) (v)
  264. #define HASH64(s, v) (v)
  265. #endif
  266. } // namespace crown