string.cpp 17 KB


  1. gb_global gbArena string_buffer_arena = {};
  2. gb_global gbAllocator string_buffer_allocator = {};
  3. gb_global gbMutex string_buffer_mutex = {};
  4. void init_string_buffer_memory(void) {
  5. // NOTE(bill): This should be enough memory for file systems
  6. gb_arena_init_from_allocator(&string_buffer_arena, heap_allocator(), gb_megabytes(1));
  7. string_buffer_allocator = gb_arena_allocator(&string_buffer_arena);
  8. gb_mutex_init(&string_buffer_mutex);
  9. }
  10. // NOTE(bill): Used for UTF-8 strings
  11. struct String {
  12. u8 * text;
  13. isize len;
  14. // u8 &operator[](isize i) {
  15. // GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i);
  16. // return text[i];
  17. // }
  18. u8 const &operator[](isize i) const {
  19. GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i);
  20. return text[i];
  21. }
  22. };
  23. // NOTE(bill): used for printf style arguments
  24. #define LIT(x) ((int)(x).len), (x).text
  25. #define STR_LIT(c_str) {cast(u8 *)c_str, gb_size_of(c_str)-1}
  26. #if defined(GB_COMPILER_MSVC) && _MSC_VER < 1700
  27. #define str_lit(c_str) make_string(cast(u8 *)c_str, gb_size_of(c_str)-1)
  28. #else
  29. #define str_lit(c_str) String{cast(u8 *)c_str, gb_size_of(c_str)-1}
  30. #endif
  31. // NOTE(bill): String16 is only used for Windows due to its file directories
  32. struct String16 {
  33. wchar_t *text;
  34. isize len;
  35. wchar_t &operator[](isize i) {
  36. GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i);
  37. return text[i];
  38. }
  39. wchar_t const &operator[](isize i) const {
  40. GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i);
  41. return text[i];
  42. }
  43. };
  44. gb_inline String make_string(u8 const *text, isize len) {
  45. String s;
  46. s.text = cast(u8 *)text;
  47. if (len < 0) {
  48. len = gb_strlen(cast(char const *)text);
  49. }
  50. s.len = len;
  51. return s;
  52. }
  53. gb_inline String16 make_string16(wchar_t const *text, isize len) {
  54. String16 s;
  55. s.text = cast(wchar_t *)text;
  56. s.len = len;
  57. return s;
  58. }
  59. isize string16_len(wchar_t const *s) {
  60. if (s == nullptr) {
  61. return 0;
  62. }
  63. wchar_t const *p = s;
  64. while (*p) {
  65. p++;
  66. }
  67. return p - s;
  68. }
  69. gb_inline String make_string_c(char const *text) {
  70. return make_string(cast(u8 *)cast(void *)text, gb_strlen(text));
  71. }
  72. gb_inline String16 make_string16_c(wchar_t const *text) {
  73. return make_string16(text, string16_len(text));
  74. }
  75. String substring(String const &s, isize lo, isize hi) {
  76. isize max = s.len;
  77. GB_ASSERT_MSG(lo <= hi && hi <= max, "%td..%td..%td", lo, hi, max);
  78. return make_string(s.text+lo, hi-lo);
  79. }
  80. char *alloc_cstring(gbAllocator a, String s) {
  81. char *c_str = gb_alloc_array(a, char, s.len+1);
  82. gb_memmove(c_str, s.text, s.len);
  83. c_str[s.len] = '\0';
  84. return c_str;
  85. }
  86. gb_inline bool str_eq_ignore_case(String const &a, String const &b) {
  87. if (a.len == b.len) {
  88. for (isize i = 0; i < a.len; i++) {
  89. char x = cast(char)a[i];
  90. char y = cast(char)b[i];
  91. if (gb_char_to_lower(x) != gb_char_to_lower(y)) {
  92. return false;
  93. }
  94. }
  95. return true;
  96. }
  97. return false;
  98. }
  99. int string_compare(String const &x, String const &y) {
  100. if (x.len != y.len || x.text != y.text) {
  101. isize n, fast, offset, curr_block;
  102. isize *la, *lb;
  103. isize pos;
  104. n = gb_min(x.len, y.len);
  105. fast = n/gb_size_of(isize) + 1;
  106. offset = (fast-1)*gb_size_of(isize);
  107. curr_block = 0;
  108. if (n <= gb_size_of(isize)) {
  109. fast = 0;
  110. }
  111. la = cast(isize *)x.text;
  112. lb = cast(isize *)y.text;
  113. for (; curr_block < fast; curr_block++) {
  114. if (la[curr_block] ^ lb[curr_block]) {
  115. for (pos = curr_block*gb_size_of(isize); pos < n; pos++) {
  116. if (x[pos] ^ y[pos]) {
  117. return cast(int)x[pos] - cast(int)y[pos];
  118. }
  119. }
  120. }
  121. }
  122. for (; offset < n; offset++) {
  123. if (x[offset] ^ y[offset]) {
  124. return cast(int)x[offset] - cast(int)y[offset];
  125. }
  126. }
  127. }
  128. return 0;
  129. }
  130. GB_COMPARE_PROC(string_cmp_proc) {
  131. String x = *(String *)a;
  132. String y = *(String *)b;
  133. return string_compare(x, y);
  134. }
  135. gb_inline bool str_eq(String const &a, String const &b) {
  136. if (a.len != b.len) return false;
  137. for (isize i = 0; i < a.len; i++) {
  138. if (a.text[i] != b.text[i]) {
  139. return false;
  140. }
  141. }
  142. return true;
  143. }
  144. gb_inline bool str_ne(String const &a, String const &b) { return !str_eq(a, b); }
  145. gb_inline bool str_lt(String const &a, String const &b) { return string_compare(a, b) < 0; }
  146. gb_inline bool str_gt(String const &a, String const &b) { return string_compare(a, b) > 0; }
  147. gb_inline bool str_le(String const &a, String const &b) { return string_compare(a, b) <= 0; }
  148. gb_inline bool str_ge(String const &a, String const &b) { return string_compare(a, b) >= 0; }
  149. gb_inline bool operator == (String const &a, String const &b) { return str_eq(a, b); }
  150. gb_inline bool operator != (String const &a, String const &b) { return str_ne(a, b); }
  151. gb_inline bool operator < (String const &a, String const &b) { return str_lt(a, b); }
  152. gb_inline bool operator > (String const &a, String const &b) { return str_gt(a, b); }
  153. gb_inline bool operator <= (String const &a, String const &b) { return str_le(a, b); }
  154. gb_inline bool operator >= (String const &a, String const &b) { return str_ge(a, b); }
  155. template <isize N> bool operator == (String const &a, char const (&b)[N]) { return str_eq(a, make_string(cast(u8 *)b, N-1)); }
  156. template <isize N> bool operator != (String const &a, char const (&b)[N]) { return str_ne(a, make_string(cast(u8 *)b, N-1)); }
  157. template <isize N> bool operator < (String const &a, char const (&b)[N]) { return str_lt(a, make_string(cast(u8 *)b, N-1)); }
  158. template <isize N> bool operator > (String const &a, char const (&b)[N]) { return str_gt(a, make_string(cast(u8 *)b, N-1)); }
  159. template <isize N> bool operator <= (String const &a, char const (&b)[N]) { return str_le(a, make_string(cast(u8 *)b, N-1)); }
  160. template <isize N> bool operator >= (String const &a, char const (&b)[N]) { return str_ge(a, make_string(cast(u8 *)b, N-1)); }
  161. gb_inline bool string_starts_with(String const &s, String const &prefix) {
  162. if (prefix.len > s.len) {
  163. return false;
  164. }
  165. return substring(s, 0, prefix.len) == prefix;
  166. }
  167. gb_inline bool string_ends_with(String const &s, String const &suffix) {
  168. if (suffix.len > s.len) {
  169. return false;
  170. }
  171. return substring(s, s.len-suffix.len, s.len) == suffix;
  172. }
  173. gb_inline bool string_starts_with(String const &s, u8 prefix) {
  174. if (1 > s.len) {
  175. return false;
  176. }
  177. return s[0] == prefix;
  178. }
  179. gb_inline bool string_ends_with(String const &s, u8 suffix) {
  180. if (1 > s.len) {
  181. return false;
  182. }
  183. return s[s.len-1] == suffix;
  184. }
  185. gb_inline isize string_extension_position(String const &str) {
  186. isize dot_pos = -1;
  187. isize i = str.len;
  188. while (i --> 0) {
  189. if (str[i] == GB_PATH_SEPARATOR)
  190. break;
  191. if (str[i] == '.') {
  192. dot_pos = i;
  193. break;
  194. }
  195. }
  196. return dot_pos;
  197. }
  198. String path_extension(String const &str) {
  199. isize pos = string_extension_position(str);
  200. if (pos < 0) {
  201. return make_string(nullptr, 0);
  202. }
  203. return substring(str, pos, str.len);
  204. }
  205. String string_trim_whitespace(String str) {
  206. while (str.len > 0 && rune_is_whitespace(str[str.len-1])) {
  207. str.len--;
  208. }
  209. while (str.len > 0 && str[str.len-1] == 0) {
  210. str.len--;
  211. }
  212. while (str.len > 0 && rune_is_whitespace(str[0])) {
  213. str.text++;
  214. str.len--;
  215. }
  216. return str;
  217. }
  218. bool string_contains_char(String const &s, u8 c) {
  219. isize i;
  220. for (i = 0; i < s.len; i++) {
  221. if (s[i] == c)
  222. return true;
  223. }
  224. return false;
  225. }
  226. String filename_from_path(String s) {
  227. isize i = string_extension_position(s);
  228. if (i >= 0) {
  229. s = substring(s, 0, i);
  230. return s;
  231. }
  232. if (i > 0) {
  233. isize j = 0;
  234. for (j = s.len-1; j >= 0; j--) {
  235. if (s[j] == '/' ||
  236. s[j] == '\\') {
  237. break;
  238. }
  239. }
  240. return substring(s, j+1, s.len);
  241. }
  242. return make_string(nullptr, 0);
  243. }
  244. String remove_extension_from_path(String const &s) {
  245. for (isize i = s.len-1; i >= 0; i--) {
  246. if (s[i] == '.') {
  247. return substring(s, 0, i);
  248. }
  249. }
  250. return s;
  251. }
  252. String remove_directory_from_path(String const &s) {
  253. isize len = 0;
  254. for (isize i = s.len-1; i >= 0; i--) {
  255. if (s[i] == '/' ||
  256. s[i] == '\\') {
  257. break;
  258. }
  259. len += 1;
  260. }
  261. return substring(s, s.len-len, s.len);
  262. }
  263. String directory_from_path(String const &s) {
  264. isize i = s.len-1;
  265. for (; i >= 0; i--) {
  266. if (s[i] == '/' ||
  267. s[i] == '\\') {
  268. break;
  269. }
  270. }
  271. return substring(s, 0, i);
  272. }
  273. String concatenate_strings(gbAllocator a, String const &x, String const &y) {
  274. isize len = x.len+y.len;
  275. u8 *data = gb_alloc_array(a, u8, len+1);
  276. gb_memmove(data, x.text, x.len);
  277. gb_memmove(data+x.len, y.text, y.len);
  278. data[len] = 0;
  279. return make_string(data, len);
  280. }
  281. String string_join_and_quote(gbAllocator a, Array<String> strings) {
  282. if (!strings.count) {
  283. return make_string(nullptr, 0);
  284. }
  285. isize str_len = 0;
  286. for (isize i = 0; i < strings.count; i++) {
  287. str_len += strings[i].len;
  288. }
  289. gbString s = gb_string_make_reserve(a, str_len+strings.count); // +strings.count for spaces after args.
  290. for (isize i = 0; i < strings.count; i++) {
  291. if (i > 0) {
  292. s = gb_string_append_fmt(s, " ");
  293. }
  294. s = gb_string_append_fmt(s, "\"%.*s\" ", LIT(strings[i]));
  295. }
  296. return make_string(cast(u8 *) s, gb_string_length(s));
  297. }
  298. String copy_string(gbAllocator a, String const &s) {
  299. u8 *data = gb_alloc_array(a, u8, s.len+1);
  300. gb_memmove(data, s.text, s.len);
  301. data[s.len] = 0;
  302. return make_string(data, s.len);
  303. }
  304. #if defined(GB_SYSTEM_WINDOWS)
  305. int convert_multibyte_to_widechar(char const *multibyte_input, int input_length, wchar_t *output, int output_size) {
  306. return MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, multibyte_input, input_length, output, output_size);
  307. }
  308. int convert_widechar_to_multibyte(wchar_t const *widechar_input, int input_length, char *output, int output_size) {
  309. return WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, widechar_input, input_length, output, output_size, nullptr, nullptr);
  310. }
  311. #elif defined(GB_SYSTEM_UNIX) || defined(GB_SYSTEM_OSX)
  312. #include <iconv.h>
  313. int convert_multibyte_to_widechar(char const *multibyte_input, usize input_length, wchar_t *output, usize output_size) {
  314. iconv_t conv = iconv_open("WCHAR_T", "UTF-8");
  315. size_t result = iconv(conv, cast(char **)&multibyte_input, &input_length, cast(char **)&output, &output_size);
  316. iconv_close(conv);
  317. return cast(int)result;
  318. }
  319. int convert_widechar_to_multibyte(wchar_t const *widechar_input, usize input_length, char* output, usize output_size) {
  320. iconv_t conv = iconv_open("UTF-8", "WCHAR_T");
  321. size_t result = iconv(conv, cast(char**) &widechar_input, &input_length, cast(char **)&output, &output_size);
  322. iconv_close(conv);
  323. return cast(int)result;
  324. }
  325. #else
  326. #error Implement system
  327. #endif
  328. // TODO(bill): Make this non-windows specific
  329. String16 string_to_string16(gbAllocator a, String s) {
  330. int len, len1;
  331. wchar_t *text;
  332. if (s.len < 1) {
  333. return make_string16(nullptr, 0);
  334. }
  335. len = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, nullptr, 0);
  336. if (len == 0) {
  337. return make_string16(nullptr, 0);
  338. }
  339. text = gb_alloc_array(a, wchar_t, len+1);
  340. len1 = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, text, cast(int)len);
  341. if (len1 == 0) {
  342. gb_free(a, text);
  343. return make_string16(nullptr, 0);
  344. }
  345. text[len] = 0;
  346. return make_string16(text, len);
  347. }
  348. String string16_to_string(gbAllocator a, String16 s) {
  349. int len, len1;
  350. u8 *text;
  351. if (s.len < 1) {
  352. return make_string(nullptr, 0);
  353. }
  354. len = convert_widechar_to_multibyte(s.text, cast(int)s.len, nullptr, 0);
  355. if (len == 0) {
  356. return make_string(nullptr, 0);
  357. }
  358. len += 1; // NOTE(bill): It needs an extra 1 for some reason
  359. text = gb_alloc_array(a, u8, len+1);
  360. len1 = convert_widechar_to_multibyte(s.text, cast(int)s.len, cast(char *)text, cast(int)len);
  361. if (len1 == 0) {
  362. gb_free(a, text);
  363. return make_string(nullptr, 0);
  364. }
  365. text[len] = 0;
  366. return make_string(text, len-1);
  367. }
  368. bool is_printable(Rune r) {
  369. if (r <= 0xff) {
  370. if (0x20 <= r && r <= 0x7e) {
  371. return true;
  372. }
  373. if (0xa1 <= r && r <= 0xff) {
  374. return r != 0xad;
  375. }
  376. return false;
  377. }
  378. return false;
  379. }
  380. gb_global char const lower_hex[] = "0123456789abcdef";
  381. String quote_to_ascii(gbAllocator a, String str, u8 quote='"') {
  382. u8 *s = str.text;
  383. isize n = str.len;
  384. auto buf = array_make<u8>(a, 0, n);
  385. array_add(&buf, quote);
  386. for (isize width = 0; n > 0; s += width, n -= width) {
  387. Rune r = cast(Rune)s[0];
  388. width = 1;
  389. if (r >= 0x80) {
  390. width = gb_utf8_decode(s, n, &r);
  391. }
  392. if (width == 1 && r == GB_RUNE_INVALID) {
  393. array_add(&buf, cast(u8)'\\');
  394. array_add(&buf, cast(u8)'x');
  395. array_add(&buf, cast(u8)lower_hex[s[0]>>4]);
  396. array_add(&buf, cast(u8)lower_hex[s[0]&0xf]);
  397. continue;
  398. }
  399. if (r == quote || r == '\\') {
  400. array_add(&buf, cast(u8)'\\');
  401. array_add(&buf, u8(r));
  402. continue;
  403. }
  404. if (r < 0x80 && is_printable(r)) {
  405. array_add(&buf, u8(r));
  406. continue;
  407. }
  408. switch (r) {
  409. case '\a':
  410. case '\b':
  411. case '\f':
  412. case '\n':
  413. case '\r':
  414. case '\t':
  415. case '\v':
  416. default:
  417. if (r < ' ') {
  418. u8 b = cast(u8)r;
  419. array_add(&buf, cast(u8)'\\');
  420. array_add(&buf, cast(u8)'x');
  421. array_add(&buf, cast(u8)lower_hex[b>>4]);
  422. array_add(&buf, cast(u8)lower_hex[b&0xf]);
  423. }
  424. if (r > GB_RUNE_MAX) {
  425. r = 0XFFFD;
  426. }
  427. if (r < 0x10000) {
  428. u8 b = cast(u8)r;
  429. array_add(&buf, cast(u8)'\\');
  430. array_add(&buf, cast(u8)'u');
  431. for (isize i = 12; i >= 0; i -= 4) {
  432. array_add(&buf, cast(u8)lower_hex[(r>>i)&0xf]);
  433. }
  434. } else {
  435. u8 b = cast(u8)r;
  436. array_add(&buf, cast(u8)'\\');
  437. array_add(&buf, cast(u8)'U');
  438. for (isize i = 28; i >= 0; i -= 4) {
  439. array_add(&buf, cast(u8)lower_hex[(r>>i)&0xf]);
  440. }
  441. }
  442. }
  443. }
  444. array_add(&buf, quote);
  445. String res = {};
  446. res.text = buf.data;
  447. res.len = buf.count;
  448. return res;
  449. }
  450. bool unquote_char(String s, u8 quote, Rune *rune, bool *multiple_bytes, String *tail_string) {
  451. u8 c;
  452. if (s[0] == quote &&
  453. (quote == '\'' || quote == '"')) {
  454. return false;
  455. } else if (s[0] >= 0x80) {
  456. Rune r = -1;
  457. isize size = gb_utf8_decode(s.text, s.len, &r);
  458. *rune = r;
  459. *multiple_bytes = true;
  460. *tail_string = make_string(s.text+size, s.len-size);
  461. return true;
  462. } else if (s[0] != '\\') {
  463. *rune = s[0];
  464. *tail_string = make_string(s.text+1, s.len-1);
  465. return true;
  466. }
  467. if (s.len <= 1) {
  468. return false;
  469. }
  470. c = s[1];
  471. s = make_string(s.text+2, s.len-2);
  472. switch (c) {
  473. default: return false;
  474. case 'a': *rune = '\a'; break;
  475. case 'b': *rune = '\b'; break;
  476. case 'e': *rune = 0x1b; break;
  477. case 'f': *rune = '\f'; break;
  478. case 'n': *rune = '\n'; break;
  479. case 'r': *rune = '\r'; break;
  480. case 't': *rune = '\t'; break;
  481. case 'v': *rune = '\v'; break;
  482. case '\\': *rune = '\\'; break;
  483. case '\'':
  484. case '"':
  485. *rune = c;
  486. break;
  487. case '0':
  488. case '1':
  489. case '2':
  490. case '3':
  491. case '4':
  492. case '5':
  493. case '6':
  494. case '7': {
  495. isize i;
  496. i32 r = gb_digit_to_int(c);
  497. if (s.len < 2) {
  498. return false;
  499. }
  500. for (i = 0; i < 2; i++) {
  501. i32 d = gb_digit_to_int(s[i]);
  502. if (d < 0 || d > 7) {
  503. return false;
  504. }
  505. r = (r<<3) | d;
  506. }
  507. s = make_string(s.text+2, s.len-2);
  508. if (r > 0xff) {
  509. return false;
  510. }
  511. *rune = r;
  512. } break;
  513. case 'x':
  514. case 'u':
  515. case 'U': {
  516. Rune r = 0;
  517. isize i, count = 0;
  518. switch (c) {
  519. case 'x': count = 2; break;
  520. case 'u': count = 4; break;
  521. case 'U': count = 8; break;
  522. }
  523. if (s.len < count) {
  524. return false;
  525. }
  526. for (i = 0; i < count; i++) {
  527. i32 d = gb_hex_digit_to_int(s[i]);
  528. if (d < 0) {
  529. return false;
  530. }
  531. r = (r<<4) | d;
  532. }
  533. s = make_string(s.text+count, s.len-count);
  534. if (c == 'x') {
  535. *rune = r;
  536. break;
  537. }
  538. if (r > GB_RUNE_MAX) {
  539. return false;
  540. }
  541. *rune = r;
  542. *multiple_bytes = true;
  543. } break;
  544. }
  545. *tail_string = s;
  546. return true;
  547. }
  548. // 0 == failure
  549. // 1 == original memory
  550. // 2 == new allocation
  551. i32 unquote_string(gbAllocator a, String *s_, u8 quote=0) {
  552. String s = *s_;
  553. isize n = s.len;
  554. if (quote == 0) {
  555. if (n < 2) {
  556. return 0;
  557. }
  558. quote = s[0];
  559. if (quote != s[n-1]) {
  560. return 0;
  561. }
  562. s.text += 1;
  563. s.len -= 2;
  564. }
  565. if (quote == '`') {
  566. if (string_contains_char(s, '`')) {
  567. return 0;
  568. }
  569. *s_ = s;
  570. return 1;
  571. }
  572. if (quote != '"' && quote != '\'') {
  573. return 0;
  574. }
  575. if (string_contains_char(s, '\n')) {
  576. return 0;
  577. }
  578. if (!string_contains_char(s, '\\') && !string_contains_char(s, quote)) {
  579. if (quote == '"') {
  580. *s_ = s;
  581. return 1;
  582. } else if (quote == '\'') {
  583. Rune r = GB_RUNE_INVALID;
  584. isize size = gb_utf8_decode(s.text, s.len, &r);
  585. if ((size == s.len) && (r != -1 || size != 1)) {
  586. *s_ = s;
  587. return 1;
  588. }
  589. }
  590. }
  591. {
  592. u8 rune_temp[4] = {};
  593. isize buf_len = 3*s.len / 2;
  594. u8 *buf = gb_alloc_array(a, u8, buf_len);
  595. isize offset = 0;
  596. while (s.len > 0) {
  597. String tail_string = {};
  598. Rune r = 0;
  599. bool multiple_bytes = false;
  600. bool success = unquote_char(s, quote, &r, &multiple_bytes, &tail_string);
  601. if (!success) {
  602. gb_free(a, buf);
  603. return 0;
  604. }
  605. s = tail_string;
  606. if (r < 0x80 || !multiple_bytes) {
  607. buf[offset++] = cast(u8)r;
  608. } else {
  609. isize size = gb_utf8_encode_rune(rune_temp, r);
  610. gb_memmove(buf+offset, rune_temp, size);
  611. offset += size;
  612. }
  613. if (quote == '\'' && s.len != 0) {
  614. gb_free(a, buf);
  615. return 0;
  616. }
  617. }
  618. *s_ = make_string(buf, offset);
  619. }
  620. return 2;
  621. }