string.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
  1. gb_global gbArena string_buffer_arena = {};
  2. gb_global gbAllocator string_buffer_allocator = {};
  3. gb_global gbMutex string_buffer_mutex = {};
  4. void init_string_buffer_memory(void) {
  5. // NOTE(bill): This should be enough memory for file systems
  6. gb_arena_init_from_allocator(&string_buffer_arena, heap_allocator(), gb_megabytes(1));
  7. string_buffer_allocator = gb_arena_allocator(&string_buffer_arena);
  8. gb_mutex_init(&string_buffer_mutex);
  9. }
  10. // NOTE(bill): Used for UTF-8 strings
  11. struct String {
  12. u8 * text;
  13. isize len;
  14. u8 &operator[](isize i) {
  15. GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i);
  16. return text[i];
  17. }
  18. u8 const &operator[](isize i) const {
  19. GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i);
  20. return text[i];
  21. }
  22. };
  23. // NOTE(bill): used for printf style arguments
  24. #define LIT(x) ((int)(x).len), (x).text
  25. #define STR_LIT(c_str) {cast(u8 *)c_str, gb_size_of(c_str)-1}
  26. #if defined(GB_COMPILER_MSVC) && _MSC_VER < 1700
  27. #define str_lit(c_str) make_string(cast(u8 *)c_str, gb_size_of(c_str)-1)
  28. #else
  29. #define str_lit(c_str) String{cast(u8 *)c_str, gb_size_of(c_str)-1}
  30. #endif
  31. // NOTE(bill): String16 is only used for Windows due to its file directories
  32. struct String16 {
  33. wchar_t *text;
  34. isize len;
  35. wchar_t &operator[](isize i) {
  36. GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i);
  37. return text[i];
  38. }
  39. wchar_t const &operator[](isize i) const {
  40. GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i);
  41. return text[i];
  42. }
  43. };
  44. gb_inline String make_string(u8 *text, isize len) {
  45. String s;
  46. s.text = text;
  47. if (len < 0) {
  48. len = gb_strlen(cast(char *)text);
  49. }
  50. s.len = len;
  51. return s;
  52. }
  53. gb_inline String16 make_string16(wchar_t *text, isize len) {
  54. String16 s;
  55. s.text = text;
  56. s.len = len;
  57. return s;
  58. }
  59. isize string16_len(wchar_t *s) {
  60. if (s == nullptr) {
  61. return 0;
  62. }
  63. wchar_t *p = s;
  64. while (*p) {
  65. p++;
  66. }
  67. return p - s;
  68. }
  69. gb_inline String make_string_c(char *text) {
  70. return make_string(cast(u8 *)cast(void *)text, gb_strlen(text));
  71. }
  72. gb_inline String16 make_string16_c(wchar_t *text) {
  73. return make_string16(text, string16_len(text));
  74. }
  75. String substring(String const &s, isize lo, isize hi) {
  76. isize max = s.len;
  77. GB_ASSERT_MSG(lo <= hi && hi <= max, "%td..%td..%td", lo, hi, max);
  78. return make_string(s.text+lo, hi-lo);
  79. }
  80. char *alloc_cstring(gbAllocator a, String s) {
  81. char *c_str = gb_alloc_array(a, char, s.len+1);
  82. gb_memmove(c_str, s.text, s.len);
  83. c_str[s.len] = '\0';
  84. return c_str;
  85. }
  86. gb_inline bool str_eq_ignore_case(String const &a, String const &b) {
  87. if (a.len == b.len) {
  88. for (isize i = 0; i < a.len; i++) {
  89. char x = cast(char)a[i];
  90. char y = cast(char)b[i];
  91. if (gb_char_to_lower(x) != gb_char_to_lower(y)) {
  92. return false;
  93. }
  94. }
  95. return true;
  96. }
  97. return false;
  98. }
  99. int string_compare(String const &x, String const &y) {
  100. if (x.len != y.len || x.text != y.text) {
  101. isize n, fast, offset, curr_block;
  102. isize *la, *lb;
  103. isize pos;
  104. n = gb_min(x.len, y.len);
  105. fast = n/gb_size_of(isize) + 1;
  106. offset = (fast-1)*gb_size_of(isize);
  107. curr_block = 0;
  108. if (n <= gb_size_of(isize)) {
  109. fast = 0;
  110. }
  111. la = cast(isize *)x.text;
  112. lb = cast(isize *)y.text;
  113. for (; curr_block < fast; curr_block++) {
  114. if (la[curr_block] ^ lb[curr_block]) {
  115. for (pos = curr_block*gb_size_of(isize); pos < n; pos++) {
  116. if (x[pos] ^ y[pos]) {
  117. return cast(int)x[pos] - cast(int)y[pos];
  118. }
  119. }
  120. }
  121. }
  122. for (; offset < n; offset++) {
  123. if (x[offset] ^ y[offset]) {
  124. return cast(int)x[offset] - cast(int)y[offset];
  125. }
  126. }
  127. }
  128. return 0;
  129. }
  130. GB_COMPARE_PROC(string_cmp_proc) {
  131. String x = *(String *)a;
  132. String y = *(String *)b;
  133. return string_compare(x, y);
  134. }
  135. gb_inline bool str_eq(String const &a, String const &b) {
  136. if (a.len != b.len) return false;
  137. for (isize i = 0; i < a.len; i++) {
  138. if (a.text[i] != b.text[i]) {
  139. return false;
  140. }
  141. }
  142. return true;
  143. }
  144. gb_inline bool str_ne(String const &a, String const &b) { return !str_eq(a, b); }
  145. gb_inline bool str_lt(String const &a, String const &b) { return string_compare(a, b) < 0; }
  146. gb_inline bool str_gt(String const &a, String const &b) { return string_compare(a, b) > 0; }
  147. gb_inline bool str_le(String const &a, String const &b) { return string_compare(a, b) <= 0; }
  148. gb_inline bool str_ge(String const &a, String const &b) { return string_compare(a, b) >= 0; }
  149. gb_inline bool operator == (String const &a, String const &b) { return str_eq(a, b); }
  150. gb_inline bool operator != (String const &a, String const &b) { return str_ne(a, b); }
  151. gb_inline bool operator < (String const &a, String const &b) { return str_lt(a, b); }
  152. gb_inline bool operator > (String const &a, String const &b) { return str_gt(a, b); }
  153. gb_inline bool operator <= (String const &a, String const &b) { return str_le(a, b); }
  154. gb_inline bool operator >= (String const &a, String const &b) { return str_ge(a, b); }
  155. template <isize N> bool operator == (String const &a, char const (&b)[N]) { return str_eq(a, make_string(cast(u8 *)b, N-1)); }
  156. template <isize N> bool operator != (String const &a, char const (&b)[N]) { return str_ne(a, make_string(cast(u8 *)b, N-1)); }
  157. template <isize N> bool operator < (String const &a, char const (&b)[N]) { return str_lt(a, make_string(cast(u8 *)b, N-1)); }
  158. template <isize N> bool operator > (String const &a, char const (&b)[N]) { return str_gt(a, make_string(cast(u8 *)b, N-1)); }
  159. template <isize N> bool operator <= (String const &a, char const (&b)[N]) { return str_le(a, make_string(cast(u8 *)b, N-1)); }
  160. template <isize N> bool operator >= (String const &a, char const (&b)[N]) { return str_ge(a, make_string(cast(u8 *)b, N-1)); }
  161. gb_inline bool string_starts_with(String const &s, String const &prefix) {
  162. if (prefix.len > s.len) {
  163. return false;
  164. }
  165. return substring(s, 0, prefix.len) == prefix;
  166. }
  167. gb_inline bool string_ends_with(String const &s, String const &suffix) {
  168. if (suffix.len > s.len) {
  169. return false;
  170. }
  171. return substring(s, s.len-suffix.len, s.len) == suffix;
  172. }
  173. gb_inline isize string_extension_position(String const &str) {
  174. isize dot_pos = -1;
  175. isize i = str.len;
  176. while (i --> 0) {
  177. if (str[i] == GB_PATH_SEPARATOR)
  178. break;
  179. if (str[i] == '.') {
  180. dot_pos = i;
  181. break;
  182. }
  183. }
  184. return dot_pos;
  185. }
  186. String path_extension(String const &str) {
  187. isize pos = string_extension_position(str);
  188. if (pos < 0) {
  189. return make_string(nullptr, 0);
  190. }
  191. return substring(str, pos, str.len);
  192. }
  193. String string_trim_whitespace(String str) {
  194. while (str.len > 0 && rune_is_whitespace(str[str.len-1])) {
  195. str.len--;
  196. }
  197. while (str.len > 0 && str[str.len-1] == 0) {
  198. str.len--;
  199. }
  200. while (str.len > 0 && rune_is_whitespace(str[0])) {
  201. str.text++;
  202. str.len--;
  203. }
  204. return str;
  205. }
  206. bool string_contains_char(String const &s, u8 c) {
  207. isize i;
  208. for (i = 0; i < s.len; i++) {
  209. if (s[i] == c)
  210. return true;
  211. }
  212. return false;
  213. }
  214. String filename_from_path(String s) {
  215. isize i = string_extension_position(s);
  216. if (i >= 0) {
  217. s = substring(s, 0, i);
  218. return s;
  219. }
  220. if (i > 0) {
  221. isize j = 0;
  222. for (j = s.len-1; j >= 0; j--) {
  223. if (s[j] == '/' ||
  224. s[j] == '\\') {
  225. break;
  226. }
  227. }
  228. return substring(s, j+1, s.len);
  229. }
  230. return make_string(nullptr, 0);
  231. }
  232. String remove_extension_from_path(String const &s) {
  233. for (isize i = s.len-1; i >= 0; i--) {
  234. if (s[i] == '.') {
  235. return substring(s, 0, i);
  236. }
  237. }
  238. return s;
  239. }
  240. String remove_directory_from_path(String const &s) {
  241. isize len = 0;
  242. for (isize i = s.len-1; i >= 0; i--) {
  243. if (s[i] == '/' ||
  244. s[i] == '\\') {
  245. break;
  246. }
  247. len += 1;
  248. }
  249. return substring(s, s.len-len, s.len);
  250. }
  251. String directory_from_path(String const &s) {
  252. isize i = s.len-1;
  253. for (; i >= 0; i--) {
  254. if (s[i] == '/' ||
  255. s[i] == '\\') {
  256. break;
  257. }
  258. }
  259. return substring(s, 0, i);
  260. }
  261. String concatenate_strings(gbAllocator a, String const &x, String const &y) {
  262. isize len = x.len+y.len;
  263. u8 *data = gb_alloc_array(a, u8, len+1);
  264. gb_memmove(data, x.text, x.len);
  265. gb_memmove(data+x.len, y.text, y.len);
  266. data[len] = 0;
  267. return make_string(data, len);
  268. }
  269. String copy_string(gbAllocator a, String const &s) {
  270. u8 *data = gb_alloc_array(a, u8, s.len+1);
  271. gb_memmove(data, s.text, s.len);
  272. data[s.len] = 0;
  273. return make_string(data, s.len);
  274. }
  275. #if defined(GB_SYSTEM_WINDOWS)
  276. int convert_multibyte_to_widechar(char *multibyte_input, int input_length, wchar_t *output, int output_size) {
  277. return MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, multibyte_input, input_length, output, output_size);
  278. }
  279. int convert_widechar_to_multibyte(wchar_t *widechar_input, int input_length, char *output, int output_size) {
  280. return WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, widechar_input, input_length, output, output_size, nullptr, nullptr);
  281. }
  282. #elif defined(GB_SYSTEM_UNIX) || defined(GB_SYSTEM_OSX)
  283. #include <iconv.h>
  284. int convert_multibyte_to_widechar(char *multibyte_input, usize input_length, wchar_t *output, usize output_size) {
  285. iconv_t conv = iconv_open("WCHAR_T", "UTF-8");
  286. size_t result = iconv(conv, cast(char **)&multibyte_input, &input_length, cast(char **)&output, &output_size);
  287. iconv_close(conv);
  288. return (int) result;
  289. }
  290. int convert_widechar_to_multibyte(wchar_t* widechar_input, usize input_length, char* output, usize output_size) {
  291. iconv_t conv = iconv_open("UTF-8", "WCHAR_T");
  292. size_t result = iconv(conv, (char**) &widechar_input, &input_length, (char**) &output, &output_size);
  293. iconv_close(conv);
  294. return (int) result;
  295. }
  296. #else
  297. #error Implement system
  298. #endif
  299. // TODO(bill): Make this non-windows specific
  300. String16 string_to_string16(gbAllocator a, String s) {
  301. int len, len1;
  302. wchar_t *text;
  303. if (s.len < 1) {
  304. return make_string16(nullptr, 0);
  305. }
  306. len = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, nullptr, 0);
  307. if (len == 0) {
  308. return make_string16(nullptr, 0);
  309. }
  310. text = gb_alloc_array(a, wchar_t, len+1);
  311. len1 = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, text, cast(int)len);
  312. if (len1 == 0) {
  313. gb_free(a, text);
  314. return make_string16(nullptr, 0);
  315. }
  316. text[len] = 0;
  317. return make_string16(text, len-1);
  318. }
  319. String string16_to_string(gbAllocator a, String16 s) {
  320. int len, len1;
  321. u8 *text;
  322. if (s.len < 1) {
  323. return make_string(nullptr, 0);
  324. }
  325. len = convert_widechar_to_multibyte(s.text, cast(int)s.len, nullptr, 0);
  326. if (len == 0) {
  327. return make_string(nullptr, 0);
  328. }
  329. len += 1; // NOTE(bill): It needs an extra 1 for some reason
  330. text = gb_alloc_array(a, u8, len+1);
  331. len1 = convert_widechar_to_multibyte(s.text, cast(int)s.len, cast(char *)text, cast(int)len);
  332. if (len1 == 0) {
  333. gb_free(a, text);
  334. return make_string(nullptr, 0);
  335. }
  336. text[len] = 0;
  337. return make_string(text, len-1);
  338. }
  339. bool unquote_char(String s, u8 quote, Rune *rune, bool *multiple_bytes, String *tail_string) {
  340. u8 c;
  341. if (s[0] == quote &&
  342. (quote == '\'' || quote == '"')) {
  343. return false;
  344. } else if (s[0] >= 0x80) {
  345. Rune r = -1;
  346. isize size = gb_utf8_decode(s.text, s.len, &r);
  347. *rune = r;
  348. *multiple_bytes = true;
  349. *tail_string = make_string(s.text+size, s.len-size);
  350. return true;
  351. } else if (s[0] != '\\') {
  352. *rune = s[0];
  353. *tail_string = make_string(s.text+1, s.len-1);
  354. return true;
  355. }
  356. if (s.len <= 1) {
  357. return false;
  358. }
  359. c = s[1];
  360. s = make_string(s.text+2, s.len-2);
  361. switch (c) {
  362. default: return false;
  363. case 'a': *rune = '\a'; break;
  364. case 'b': *rune = '\b'; break;
  365. case 'e': *rune = 0x1b; break;
  366. case 'f': *rune = '\f'; break;
  367. case 'n': *rune = '\n'; break;
  368. case 'r': *rune = '\r'; break;
  369. case 't': *rune = '\t'; break;
  370. case 'v': *rune = '\v'; break;
  371. case '\\': *rune = '\\'; break;
  372. case '\'':
  373. case '"':
  374. *rune = c;
  375. break;
  376. case '0':
  377. case '1':
  378. case '2':
  379. case '3':
  380. case '4':
  381. case '5':
  382. case '6':
  383. case '7': {
  384. isize i;
  385. i32 r = gb_digit_to_int(c);
  386. if (s.len < 2) {
  387. return false;
  388. }
  389. for (i = 0; i < 2; i++) {
  390. i32 d = gb_digit_to_int(s[i]);
  391. if (d < 0 || d > 7) {
  392. return false;
  393. }
  394. r = (r<<3) | d;
  395. }
  396. s = make_string(s.text+2, s.len-2);
  397. if (r > 0xff) {
  398. return false;
  399. }
  400. *rune = r;
  401. } break;
  402. case 'x':
  403. case 'u':
  404. case 'U': {
  405. Rune r = 0;
  406. isize i, count = 0;
  407. switch (c) {
  408. case 'x': count = 2; break;
  409. case 'u': count = 4; break;
  410. case 'U': count = 8; break;
  411. }
  412. if (s.len < count) {
  413. return false;
  414. }
  415. for (i = 0; i < count; i++) {
  416. i32 d = gb_hex_digit_to_int(s[i]);
  417. if (d < 0) {
  418. return false;
  419. }
  420. r = (r<<4) | d;
  421. }
  422. s = make_string(s.text+count, s.len-count);
  423. if (c == 'x') {
  424. *rune = r;
  425. break;
  426. }
  427. if (r > GB_RUNE_MAX) {
  428. return false;
  429. }
  430. *rune = r;
  431. *multiple_bytes = true;
  432. } break;
  433. }
  434. *tail_string = s;
  435. return true;
  436. }
  437. // 0 == failure
  438. // 1 == original memory
  439. // 2 == new allocation
  440. i32 unquote_string(gbAllocator a, String *s_) {
  441. String s = *s_;
  442. isize n = s.len;
  443. u8 quote;
  444. if (n < 2) {
  445. return 0;
  446. }
  447. quote = s[0];
  448. if (quote != s[n-1]) {
  449. return 0;
  450. }
  451. s.text += 1;
  452. s.len -= 2;
  453. if (quote == '`') {
  454. if (string_contains_char(s, '`')) {
  455. return 0;
  456. }
  457. *s_ = s;
  458. return 1;
  459. }
  460. if (quote != '"' && quote != '\'') {
  461. return 0;
  462. }
  463. if (string_contains_char(s, '\n')) {
  464. return 0;
  465. }
  466. if (!string_contains_char(s, '\\') && !string_contains_char(s, quote)) {
  467. if (quote == '"') {
  468. *s_ = s;
  469. return 1;
  470. } else if (quote == '\'') {
  471. Rune r = GB_RUNE_INVALID;
  472. isize size = gb_utf8_decode(s.text, s.len, &r);
  473. if ((size == s.len) && (r != -1 || size != 1)) {
  474. *s_ = s;
  475. return 1;
  476. }
  477. }
  478. }
  479. {
  480. u8 rune_temp[4] = {};
  481. isize buf_len = 3*s.len / 2;
  482. u8 *buf = gb_alloc_array(a, u8, buf_len);
  483. isize offset = 0;
  484. while (s.len > 0) {
  485. String tail_string = {};
  486. Rune r = 0;
  487. bool multiple_bytes = false;
  488. bool success = unquote_char(s, quote, &r, &multiple_bytes, &tail_string);
  489. if (!success) {
  490. gb_free(a, buf);
  491. return 0;
  492. }
  493. s = tail_string;
  494. if (r < 0x80 || !multiple_bytes) {
  495. buf[offset++] = cast(u8)r;
  496. } else {
  497. isize size = gb_utf8_encode_rune(rune_temp, r);
  498. gb_memmove(buf+offset, rune_temp, size);
  499. offset += size;
  500. }
  501. if (quote == '\'' && s.len != 0) {
  502. gb_free(a, buf);
  503. return 0;
  504. }
  505. }
  506. *s_ = make_string(buf, offset);
  507. }
  508. return 2;
  509. }