string.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. gb_global gbArena string_buffer_arena = {};
  2. gb_global gbAllocator string_buffer_allocator = {};
  3. void init_string_buffer_memory(void) {
  4. // NOTE(bill): This should be enough memory for file systems
  5. gb_arena_init_from_allocator(&string_buffer_arena, heap_allocator(), gb_megabytes(1));
  6. string_buffer_allocator = gb_arena_allocator(&string_buffer_arena);
  7. }
  8. // NOTE(bill): Used for UTF-8 strings
  9. struct String {
  10. u8 * text;
  11. isize len;
  12. u8 &operator[](isize i) {
  13. GB_ASSERT(0 <= i && i < len);
  14. return text[i];
  15. }
  16. u8 const &operator[](isize i) const {
  17. GB_ASSERT(0 <= i && i < len);
  18. return text[i];
  19. }
  20. };
  21. // NOTE(bill): used for printf style arguments
  22. #define LIT(x) ((int)(x).len), (x).text
  23. #define STR_LIT(c_str) {cast(u8 *)c_str, gb_size_of(c_str)-1}
  24. #if defined(GB_COMPILER_MSVC) && _MSC_VER < 1700
  25. #define str_lit(c_str) make_string(cast(u8 *)c_str, gb_size_of(c_str)-1)
  26. #else
  27. #define str_lit(c_str) String{cast(u8 *)c_str, gb_size_of(c_str)-1}
  28. #endif
  29. // NOTE(bill): String16 is only used for Windows due to its file directories
  30. struct String16 {
  31. wchar_t *text;
  32. isize len;
  33. wchar_t &operator[](isize i) {
  34. GB_ASSERT(0 <= i && i < len);
  35. return text[i];
  36. }
  37. wchar_t const &operator[](isize i) const {
  38. GB_ASSERT(0 <= i && i < len);
  39. return text[i];
  40. }
  41. };
  42. gb_inline String make_string(u8 *text, isize len) {
  43. String s;
  44. s.text = text;
  45. if (len < 0) {
  46. len = gb_strlen(cast(char *)text);
  47. }
  48. s.len = len;
  49. return s;
  50. }
  51. gb_inline String16 make_string16(wchar_t *text, isize len) {
  52. String16 s;
  53. s.text = text;
  54. s.len = len;
  55. return s;
  56. }
  57. isize string16_len(wchar_t *s) {
  58. if (s == NULL) {
  59. return 0;
  60. }
  61. wchar_t *p = s;
  62. while (*p) {
  63. p++;
  64. }
  65. return p - s;
  66. }
  67. gb_inline String make_string_c(char *text) {
  68. return make_string(cast(u8 *)cast(void *)text, gb_strlen(text));
  69. }
  70. gb_inline bool str_eq_ignore_case(String a, String b) {
  71. if (a.len == b.len) {
  72. isize i;
  73. for (i = 0; i < a.len; i++) {
  74. char x = cast(char)a[i];
  75. char y = cast(char)b[i];
  76. if (gb_char_to_lower(x) != gb_char_to_lower(y))
  77. return false;
  78. }
  79. return true;
  80. }
  81. return false;
  82. }
  83. int string_compare(String x, String y) {
  84. if (!(x.len == y.len &&
  85. x.text == y.text)) {
  86. isize n, fast, offset, curr_block;
  87. isize *la, *lb;
  88. isize pos;
  89. n = gb_min(x.len, y.len);
  90. fast = n/gb_size_of(isize) + 1;
  91. offset = (fast-1)*gb_size_of(isize);
  92. curr_block = 0;
  93. if (n <= gb_size_of(isize)) {
  94. fast = 0;
  95. }
  96. la = cast(isize *)x.text;
  97. lb = cast(isize *)y.text;
  98. for (; curr_block < fast; curr_block++) {
  99. if (la[curr_block] ^ lb[curr_block]) {
  100. for (pos = curr_block*gb_size_of(isize); pos < n; pos++) {
  101. if (x[pos] ^ y[pos]) {
  102. return cast(int)x[pos] - cast(int)y[pos];
  103. }
  104. }
  105. }
  106. }
  107. for (; offset < n; offset++) {
  108. if (x[offset] ^ y[offset]) {
  109. return cast(int)x[offset] - cast(int)y[offset];
  110. }
  111. }
  112. }
  113. return 0;
  114. }
  115. GB_COMPARE_PROC(string_cmp_proc) {
  116. String x = *(String *)a;
  117. String y = *(String *)b;
  118. return string_compare(x, y);
  119. }
  120. gb_inline bool str_eq(String a, String b) { return a.len == b.len ? gb_memcompare(a.text, b.text, a.len) == 0 : false; }
  121. gb_inline bool str_ne(String a, String b) { return !str_eq(a, b); }
  122. gb_inline bool str_lt(String a, String b) { return string_compare(a, b) < 0; }
  123. gb_inline bool str_gt(String a, String b) { return string_compare(a, b) > 0; }
  124. gb_inline bool str_le(String a, String b) { return string_compare(a, b) <= 0; }
  125. gb_inline bool str_ge(String a, String b) { return string_compare(a, b) >= 0; }
  126. bool operator == (String a, String b) { return str_eq(a, b); }
  127. bool operator != (String a, String b) { return str_ne(a, b); }
  128. bool operator < (String a, String b) { return str_lt(a, b); }
  129. bool operator > (String a, String b) { return str_gt(a, b); }
  130. bool operator <= (String a, String b) { return str_le(a, b); }
  131. bool operator >= (String a, String b) { return str_ge(a, b); }
  132. template <isize N> bool operator == (String a, char const (&b)[N]) { return str_eq(a, make_string(cast(u8 *)b, N-1)); }
  133. template <isize N> bool operator != (String a, char const (&b)[N]) { return str_ne(a, make_string(cast(u8 *)b, N-1)); }
  134. template <isize N> bool operator < (String a, char const (&b)[N]) { return str_lt(a, make_string(cast(u8 *)b, N-1)); }
  135. template <isize N> bool operator > (String a, char const (&b)[N]) { return str_gt(a, make_string(cast(u8 *)b, N-1)); }
  136. template <isize N> bool operator <= (String a, char const (&b)[N]) { return str_le(a, make_string(cast(u8 *)b, N-1)); }
  137. template <isize N> bool operator >= (String a, char const (&b)[N]) { return str_ge(a, make_string(cast(u8 *)b, N-1)); }
  138. gb_inline bool str_has_prefix(String s, String prefix) {
  139. isize i;
  140. if (prefix.len < s.len) {
  141. return false;
  142. }
  143. for (i = 0; i < prefix.len; i++) {
  144. if (s[i] != prefix[i]) {
  145. return false;
  146. }
  147. }
  148. return true;
  149. }
  150. gb_inline isize string_extension_position(String str) {
  151. isize dot_pos = -1;
  152. isize i = str.len;
  153. bool seen_dot = false;
  154. while (i --> 0) {
  155. if (str[i] == GB_PATH_SEPARATOR)
  156. break;
  157. if (str[i] == '.') {
  158. dot_pos = i;
  159. break;
  160. }
  161. }
  162. return dot_pos;
  163. }
  164. String string_trim_whitespace(String str) {
  165. while (str.len > 0 && rune_is_whitespace(str[str.len-1])) {
  166. str.len--;
  167. }
  168. while (str.len > 0 && rune_is_whitespace(str[0])) {
  169. str.text++;
  170. str.len--;
  171. }
  172. return str;
  173. }
  174. gb_inline bool string_has_extension(String str, String ext) {
  175. str = string_trim_whitespace(str);
  176. if (str.len <= ext.len+1) {
  177. return false;
  178. }
  179. isize len = str.len;
  180. for (isize i = len-1; i >= 0; i--) {
  181. if (str[i] == '.') {
  182. break;
  183. }
  184. len--;
  185. }
  186. if (len == 0) {
  187. return false;
  188. }
  189. u8 *s = str.text + len;
  190. return gb_memcompare(s, ext.text, ext.len) == 0;
  191. }
  192. bool string_contains_char(String s, u8 c) {
  193. isize i;
  194. for (i = 0; i < s.len; i++) {
  195. if (s[i] == c)
  196. return true;
  197. }
  198. return false;
  199. }
  200. String filename_from_path(String s) {
  201. isize i = string_extension_position(s);
  202. if (i > 0) {
  203. isize j = 0;
  204. s.len = i;
  205. for (j = i-1; j >= 0; j--) {
  206. if (s[j] == '/' ||
  207. s[j] == '\\') {
  208. break;
  209. }
  210. }
  211. s.text += j+1;
  212. s.len = i-j-1;
  213. }
  214. return make_string(NULL, 0);
  215. }
  216. #if defined(GB_SYSTEM_WINDOWS)
  217. int convert_multibyte_to_widechar(char *multibyte_input, int input_length, wchar_t *output, int output_size) {
  218. return MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, multibyte_input, input_length, output, output_size);
  219. }
  220. int convert_widechar_to_multibyte(wchar_t *widechar_input, int input_length, char *output, int output_size) {
  221. return WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, widechar_input, input_length, output, output_size, NULL, NULL);
  222. }
  223. #elif defined(GB_SYSTEM_UNIX) || defined(GB_SYSTEM_OSX)
  224. #include <iconv.h>
  225. int convert_multibyte_to_widechar(char *multibyte_input, usize input_length, wchar_t *output, usize output_size) {
  226. iconv_t conv = iconv_open("WCHAR_T", "UTF-8");
  227. size_t result = iconv(conv, cast(char **)&multibyte_input, &input_length, cast(char **)&output, &output_size);
  228. iconv_close(conv);
  229. return (int) result;
  230. }
  231. int convert_widechar_to_multibyte(wchar_t* widechar_input, usize input_length, char* output, usize output_size) {
  232. iconv_t conv = iconv_open("UTF-8", "WCHAR_T");
  233. size_t result = iconv(conv, (char**) &widechar_input, &input_length, (char**) &output, &output_size);
  234. iconv_close(conv);
  235. return (int) result;
  236. }
  237. #else
  238. #error Implement system
  239. #endif
  240. // TODO(bill): Make this non-windows specific
  241. String16 string_to_string16(gbAllocator a, String s) {
  242. int len, len1;
  243. wchar_t *text;
  244. if (s.len < 1) {
  245. return make_string16(NULL, 0);
  246. }
  247. len = convert_multibyte_to_widechar(cast(char *)s.text, s.len, NULL, 0);
  248. if (len == 0) {
  249. return make_string16(NULL, 0);
  250. }
  251. text = gb_alloc_array(a, wchar_t, len+1);
  252. len1 = convert_multibyte_to_widechar(cast(char *)s.text, s.len, text, len);
  253. if (len1 == 0) {
  254. gb_free(a, text);
  255. return make_string16(NULL, 0);
  256. }
  257. text[len] = 0;
  258. return make_string16(text, len-1);
  259. }
  260. String string16_to_string(gbAllocator a, String16 s) {
  261. int len, len1;
  262. u8 *text;
  263. if (s.len < 1) {
  264. return make_string(NULL, 0);
  265. }
  266. len = convert_widechar_to_multibyte(s.text, s.len, NULL, 0);
  267. if (len == 0) {
  268. return make_string(NULL, 0);
  269. }
  270. len += 1; // NOTE(bill): It needs an extra 1 for some reason
  271. text = gb_alloc_array(a, u8, len+1);
  272. len1 = convert_widechar_to_multibyte(s.text, s.len, cast(char *)text, len);
  273. if (len1 == 0) {
  274. gb_free(a, text);
  275. return make_string(NULL, 0);
  276. }
  277. text[len] = 0;
  278. return make_string(text, len-1);
  279. }
  280. bool unquote_char(String s, u8 quote, Rune *rune, bool *multiple_bytes, String *tail_string) {
  281. u8 c;
  282. if (s[0] == quote &&
  283. (quote == '\'' || quote == '"')) {
  284. return false;
  285. } else if (s[0] >= 0x80) {
  286. Rune r = -1;
  287. isize size = gb_utf8_decode(s.text, s.len, &r);
  288. *rune = r;
  289. *multiple_bytes = true;
  290. *tail_string = make_string(s.text+size, s.len-size);
  291. return true;
  292. } else if (s[0] != '\\') {
  293. *rune = s[0];
  294. *tail_string = make_string(s.text+1, s.len-1);
  295. return true;
  296. }
  297. if (s.len <= 1) {
  298. return false;
  299. }
  300. c = s[1];
  301. s = make_string(s.text+2, s.len-2);
  302. switch (c) {
  303. default: return false;
  304. case 'a': *rune = '\a'; break;
  305. case 'b': *rune = '\b'; break;
  306. case 'f': *rune = '\f'; break;
  307. case 'n': *rune = '\n'; break;
  308. case 'r': *rune = '\r'; break;
  309. case 't': *rune = '\t'; break;
  310. case 'v': *rune = '\v'; break;
  311. case '\\': *rune = '\\'; break;
  312. case '\'':
  313. case '"':
  314. if (c != quote) {
  315. return false;
  316. }
  317. *rune = c;
  318. break;
  319. case '0':
  320. case '1':
  321. case '2':
  322. case '3':
  323. case '4':
  324. case '5':
  325. case '6':
  326. case '7': {
  327. isize i;
  328. i32 r = gb_digit_to_int(c);
  329. if (s.len < 2) {
  330. return false;
  331. }
  332. for (i = 0; i < 2; i++) {
  333. i32 d = gb_digit_to_int(s[i]);
  334. if (d < 0 || d > 7) {
  335. return false;
  336. }
  337. r = (r<<3) | d;
  338. }
  339. s = make_string(s.text+2, s.len-2);
  340. if (r > 0xff) {
  341. return false;
  342. }
  343. *rune = r;
  344. } break;
  345. case 'x':
  346. case 'u':
  347. case 'U': {
  348. Rune r = 0;
  349. isize i, count = 0;
  350. switch (c) {
  351. case 'x': count = 2; break;
  352. case 'u': count = 4; break;
  353. case 'U': count = 8; break;
  354. }
  355. if (s.len < count) {
  356. return false;
  357. }
  358. for (i = 0; i < count; i++) {
  359. i32 d = gb_hex_digit_to_int(s[i]);
  360. if (d < 0) {
  361. return false;
  362. }
  363. r = (r<<4) | d;
  364. }
  365. s = make_string(s.text+count, s.len-count);
  366. if (c == 'x') {
  367. *rune = r;
  368. break;
  369. }
  370. if (r > GB_RUNE_MAX) {
  371. return false;
  372. }
  373. *rune = r;
  374. *multiple_bytes = true;
  375. } break;
  376. }
  377. *tail_string = s;
  378. return true;
  379. }
  380. // 0 == failure
  381. // 1 == original memory
  382. // 2 == new allocation
  383. i32 unquote_string(gbAllocator a, String *s_) {
  384. String s = *s_;
  385. isize n = s.len;
  386. u8 quote;
  387. if (n < 2) {
  388. return 0;
  389. }
  390. quote = s[0];
  391. if (quote != s[n-1]) {
  392. return 0;
  393. }
  394. s.text += 1;
  395. s.len -= 2;
  396. if (quote == '`') {
  397. if (string_contains_char(s, '`')) {
  398. return 0;
  399. }
  400. *s_ = s;
  401. return 1;
  402. }
  403. if (quote != '"' && quote != '\'') {
  404. return 0;
  405. }
  406. if (string_contains_char(s, '\n')) {
  407. return 0;
  408. }
  409. if (!string_contains_char(s, '\\') && !string_contains_char(s, quote)) {
  410. if (quote == '"') {
  411. *s_ = s;
  412. return 1;
  413. } else if (quote == '\'') {
  414. Rune r = GB_RUNE_INVALID;
  415. isize size = gb_utf8_decode(s.text, s.len, &r);
  416. if ((size == s.len) && (r != -1 || size != 1)) {
  417. *s_ = s;
  418. return 1;
  419. }
  420. }
  421. }
  422. {
  423. u8 rune_temp[4] = {};
  424. isize buf_len = 3*s.len / 2;
  425. u8 *buf = gb_alloc_array(a, u8, buf_len);
  426. isize offset = 0;
  427. while (s.len > 0) {
  428. String tail_string = {};
  429. Rune r = 0;
  430. bool multiple_bytes = false;
  431. bool success = unquote_char(s, quote, &r, &multiple_bytes, &tail_string);
  432. if (!success) {
  433. gb_free(a, buf);
  434. return 0;
  435. }
  436. s = tail_string;
  437. if (r < 0x80 || !multiple_bytes) {
  438. buf[offset++] = cast(u8)r;
  439. } else {
  440. isize size = gb_utf8_encode_rune(rune_temp, r);
  441. gb_memmove(buf+offset, rune_temp, size);
  442. offset += size;
  443. }
  444. if (quote == '\'' && s.len != 0) {
  445. gb_free(a, buf);
  446. return 0;
  447. }
  448. }
  449. *s_ = make_string(buf, offset);
  450. }
  451. return 2;
  452. }