strconv.odin 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. package strconv
  2. import "core:unicode/utf8"
  3. parse_bool :: proc(s: string) -> (result: bool = false, ok: bool) {
  4. switch s {
  5. case "1", "t", "T", "true", "TRUE", "True":
  6. return true, true;
  7. case "0", "f", "F", "false", "FALSE", "False":
  8. return false, true;
  9. }
  10. return;
  11. }
  12. _digit_value :: proc(r: rune) -> int {
  13. ri := int(r);
  14. v: int = 16;
  15. switch r {
  16. case '0'..'9': v = ri-'0';
  17. case 'a'..'z': v = ri-'a'+10;
  18. case 'A'..'Z': v = ri-'A'+10;
  19. }
  20. return v;
  21. }
  22. // Parses an integer value from a string, in the given base, without a prefix.
  23. //
  24. // Returns ok=false if no numeric value of the appropriate base could be found.
  25. //
  26. // ```
  27. // n, ok := strconv.parse_i64_of_base("-1234eeee", 10);
  28. // assert(n == -1234 && ok);
  29. // ```
  30. parse_i64_of_base :: proc(str: string, base: int) -> (value: i64, ok: bool) {
  31. assert(base <= 16, "base must be 1-16");
  32. s := str;
  33. neg := false;
  34. if len(s) > 1 {
  35. switch s[0] {
  36. case '-':
  37. neg = true;
  38. s = s[1:];
  39. case '+':
  40. s = s[1:];
  41. }
  42. }
  43. i := 0;
  44. for r in s {
  45. if r == '_' {
  46. i += 1;
  47. continue;
  48. }
  49. v := i64(_digit_value(r));
  50. if v >= i64(base) do break;
  51. value *= i64(base);
  52. value += v;
  53. i += 1;
  54. }
  55. if neg do value = -value;
  56. ok = i > 0;
  57. return;
  58. }
  59. // Parses a integer value from a string, in base 10, unless there's a prefix.
  60. //
  61. // Returns ok=false if a base 10 integer could not be found.
  62. //
  63. // ```
  64. // n, ok := strconv.parse_i64_maybe_prefixed("1234");
  65. // assert(n == 1234 && ok);
  66. //
  67. // n, ok = strconv.parse_i64_maybe_prefixed("0xeeee");
  68. // assert(n == 0xeeee && ok);
  69. // ```
  70. parse_i64_maybe_prefixed :: proc(str: string) -> (value: i64, ok: bool) {
  71. s := str;
  72. neg := false;
  73. if len(s) > 1 {
  74. switch s[0] {
  75. case '-':
  76. neg = true;
  77. s = s[1:];
  78. case '+':
  79. s = s[1:];
  80. }
  81. }
  82. base: i64 = 10;
  83. if len(s) > 2 && s[0] == '0' {
  84. switch s[1] {
  85. case 'b': base = 2; s = s[2:];
  86. case 'o': base = 8; s = s[2:];
  87. case 'd': base = 10; s = s[2:];
  88. case 'z': base = 12; s = s[2:];
  89. case 'x': base = 16; s = s[2:];
  90. }
  91. }
  92. i := 0;
  93. for r in s {
  94. if r == '_' {
  95. i += 1;
  96. continue;
  97. }
  98. v := i64(_digit_value(r));
  99. if v >= base do break;
  100. value *= base;
  101. value += v;
  102. i += 1;
  103. }
  104. if neg do value = -value;
  105. ok = i > 0;
  106. return;
  107. }
  108. parse_i64 :: proc{parse_i64_maybe_prefixed, parse_i64_of_base};
  109. // Parses an unsigned integer value from a string, in the given base, and
  110. // without a prefix.
  111. //
  112. // Returns ok=false if no numeric value of the appropriate base could be found.
  113. //
  114. // ```
  115. // n, ok := strconv.parse_u64_of_base("1234eeee", 10);
  116. // assert(n == 1234 && ok);
  117. //
  118. // n, ok = strconv.parse_u64_of_base("5678eeee", 16);
  119. // assert(n == 0x5678eeee && ok);
  120. // ```
  121. parse_u64_of_base :: proc(str: string, base: int) -> (value: u64, ok: bool) {
  122. assert(base <= 16, "base must be 1-16");
  123. s := str;
  124. if len(s) > 1 && s[0] == '+' {
  125. s = s[1:];
  126. }
  127. i := 0;
  128. for r in s {
  129. if r == '_' {
  130. i += 1;
  131. continue;
  132. }
  133. v := u64(_digit_value(r));
  134. if v >= u64(base) do break;
  135. value *= u64(base);
  136. value += v;
  137. i += 1;
  138. }
  139. ok = i > 0;
  140. return;
  141. }
  142. // Parses an unsigned integer value from a string in base 10, unless there's a prefix.
  143. //
  144. // Returns ok=false if a base 10 integer could not be found, or
  145. // if the value was negative.
  146. //
  147. // ```
  148. // n, ok := strconv.parse_u64_maybe_prefixed("1234");
  149. // assert(n == 1234 && ok);
  150. //
  151. // n, ok = strconv.parse_u64_maybe_prefixed("0xeeee");
  152. // assert(n == 0xeeee && ok);
  153. // ```
  154. parse_u64_maybe_prefixed :: proc(str: string) -> (value: u64, ok: bool) {
  155. s := str;
  156. if len(s) > 1 && s[0] == '+' {
  157. s = s[1:];
  158. }
  159. base := u64(10);
  160. if len(s) > 2 && s[0] == '0' {
  161. switch s[1] {
  162. case 'b': base = 2; s = s[2:];
  163. case 'o': base = 8; s = s[2:];
  164. case 'd': base = 10; s = s[2:];
  165. case 'z': base = 12; s = s[2:];
  166. case 'x': base = 16; s = s[2:];
  167. }
  168. }
  169. i := 0;
  170. for r in s {
  171. if r == '_' {
  172. i += 1;
  173. continue;
  174. }
  175. v := u64(_digit_value(r));
  176. if v >= base do break;
  177. value *= base;
  178. value += u64(v);
  179. i += 1;
  180. }
  181. ok = i > 0;
  182. return;
  183. }
  184. parse_u64 :: proc{parse_u64_maybe_prefixed, parse_u64_of_base};
  185. // Parses an integer value from a string in the given base, or
  186. // - if the string has a prefix (e.g: '0x') then that will determine the base;
  187. // - otherwise, assumes base 10.
  188. //
  189. // Returns ok=false if no appropriate value could be found.
  190. //
  191. // ```
  192. // n, ok := strconv.parse_int("1234"); // without prefix, inferred base 10
  193. // assert(n == 1234 && ok);
  194. //
  195. // n, ok = strconv.parse_int("ffff", 16); // without prefix, explicit base
  196. // assert(n == 0xffff && ok);
  197. //
  198. // n, ok = strconv.parse_int("0xffff"); // with prefix and inferred base
  199. // assert(n == 0xffff && ok);
  200. // ```
  201. parse_int :: proc(s: string, base := 0) -> (value: int, ok: bool) {
  202. v: i64 = ---;
  203. switch base {
  204. case 0: v, ok = parse_i64_maybe_prefixed(s);
  205. case: v, ok = parse_i64_of_base(s, base);
  206. }
  207. value = int(v);
  208. return;
  209. }
  210. // Parses an unsigned integer value from a string in the given base, or
  211. // - if the string has a prefix (e.g: '0x') then that will determine the base;
  212. // - otherwise, assumes base 10.
  213. //
  214. // Returns ok=false if:
  215. // - no appropriate value could be found; or
  216. // - the value was negative.
  217. //
  218. // ```
  219. // n, ok := strconv.parse_uint("1234"); // without prefix, inferred base 10
  220. // assert(n == 1234 && ok);
  221. //
  222. // n, ok = strconv.parse_uint("ffff", 16); // without prefix, explicit base
  223. // assert(n == 0xffff && ok);
  224. //
  225. // n, ok = strconv.parse_uint("0xffff"); // with prefix and inferred base
  226. // assert(n == 0xffff && ok);
  227. // ```
  228. parse_uint :: proc(s: string, base := 0) -> (value: uint, ok: bool) {
  229. v: u64 = ---;
  230. switch base {
  231. case 0: v, ok = parse_u64_maybe_prefixed(s);
  232. case: v, ok = parse_u64_of_base(s, base);
  233. }
  234. value = uint(v);
  235. return;
  236. }
  237. // Parses a 32-bit floating point number from a string.
  238. //
  239. // Returns ok=false if a base 10 float could not be found.
  240. //
  241. // ```
  242. // n, ok := strconv.parse_f32("12.34eee");
  243. // assert(n == 12.34 && ok);
  244. //
  245. // n, ok = strconv.parse_f32("12.34");
  246. // assert(n == 12.34 && ok);
  247. // ```
  248. parse_f32 :: proc(s: string) -> (value: f32, ok: bool) {
  249. v: f64 = ---;
  250. v, ok = parse_f64(s);
  251. return f32(v), ok;
  252. }
  253. // Parses a 64-bit floating point number from a string.
  254. //
  255. // Returns ok=false if a base 10 float could not be found.
  256. //
  257. // ```
  258. // n, ok := strconv.parse_f32("12.34eee");
  259. // assert(n == 12.34 && ok);
  260. //
  261. // n, ok = strconv.parse_f32("12.34");
  262. // assert(n == 12.34 && ok);
  263. // ```
  264. parse_f64 :: proc(s: string) -> (value: f64, ok: bool) {
  265. if s == "" {
  266. return;
  267. }
  268. i := 0;
  269. sign: f64 = 1;
  270. switch s[i] {
  271. case '-': i += 1; sign = -1;
  272. case '+': i += 1;
  273. }
  274. for ; i < len(s); i += 1 {
  275. r := rune(s[i]);
  276. if r == '_' do continue;
  277. v := _digit_value(r);
  278. if v >= 10 do break;
  279. value *= 10;
  280. value += f64(v);
  281. }
  282. if i < len(s) && s[i] == '.' {
  283. pow10: f64 = 10;
  284. i += 1;
  285. for ; i < len(s); i += 1 {
  286. r := rune(s[i]);
  287. if r == '_' do continue;
  288. v := _digit_value(r);
  289. if v >= 10 do break;
  290. value += f64(v)/pow10;
  291. pow10 *= 10;
  292. }
  293. }
  294. frac := false;
  295. scale: f64 = 1;
  296. if i < len(s) && (s[i] == 'e' || s[i] == 'E') {
  297. i += 1;
  298. if i < len(s) {
  299. switch s[i] {
  300. case '-': i += 1; frac = true;
  301. case '+': i += 1;
  302. }
  303. exp: u32 = 0;
  304. for ; i < len(s); i += 1 {
  305. r := rune(s[i]);
  306. if r == '_' do continue;
  307. d := u32(_digit_value(r));
  308. if d >= 10 do break;
  309. exp = exp * 10 + d;
  310. }
  311. if exp > 308 { exp = 308; }
  312. for exp >= 50 { scale *= 1e50; exp -= 50; }
  313. for exp >= 8 { scale *= 1e8; exp -= 8; }
  314. for exp > 0 { scale *= 10; exp -= 1; }
  315. }
  316. }
  317. if frac {
  318. value = sign * (value/scale);
  319. } else {
  320. value = sign * (value*scale);
  321. }
  322. ok = i > 0;
  323. return;
  324. }
  325. append_bool :: proc(buf: []byte, b: bool) -> string {
  326. n := 0;
  327. if b do n = copy(buf, "true");
  328. else do n = copy(buf, "false");
  329. return string(buf[:n]);
  330. }
  331. append_uint :: proc(buf: []byte, u: u64, base: int) -> string {
  332. return append_bits(buf, u64(u), base, false, 8*size_of(uint), digits, nil);
  333. }
  334. append_int :: proc(buf: []byte, i: i64, base: int) -> string {
  335. return append_bits(buf, u64(i), base, true, 8*size_of(int), digits, nil);
  336. }
  337. itoa :: proc(buf: []byte, i: int) -> string {
  338. return append_int(buf, i64(i), 10);
  339. }
  340. atoi :: proc(s: string) -> int {
  341. v, _ := parse_int(s);
  342. return v;
  343. }
  344. atof :: proc(s: string) -> f64 {
  345. v, _ := parse_f64(s);
  346. return v;
  347. }
  348. ftoa :: append_float;
  349. append_float :: proc(buf: []byte, f: f64, fmt: byte, prec, bit_size: int) -> string {
  350. return string(generic_ftoa(buf, f, fmt, prec, bit_size));
  351. }
  352. quote :: proc(buf: []byte, str: string) -> string {
  353. write_byte :: inline proc(buf: []byte, i: ^int, bytes: ..byte) {
  354. if i^ >= len(buf) do return;
  355. n := copy(buf[i^:], bytes[:]);
  356. i^ += n;
  357. }
  358. if buf == nil {
  359. return "";
  360. }
  361. c :: '"';
  362. i := 0;
  363. s := str;
  364. write_byte(buf, &i, c);
  365. for width := 0; len(s) > 0; s = s[width:] {
  366. r := rune(s[0]);
  367. width = 1;
  368. if r >= utf8.RUNE_SELF {
  369. r, width = utf8.decode_rune_in_string(s);
  370. }
  371. if width == 1 && r == utf8.RUNE_ERROR {
  372. write_byte(buf, &i, '\\', 'x');
  373. write_byte(buf, &i, digits[s[0]>>4]);
  374. write_byte(buf, &i, digits[s[0]&0xf]);
  375. }
  376. if i < len(buf) {
  377. x := quote_rune(buf[i:], r);
  378. i += len(x);
  379. }
  380. }
  381. write_byte(buf, &i, c);
  382. return string(buf[:i]);
  383. }
  384. quote_rune :: proc(buf: []byte, r: rune) -> string {
  385. write_byte :: inline proc(buf: []byte, i: ^int, bytes: ..byte) {
  386. if i^ < len(buf) {
  387. n := copy(buf[i^:], bytes[:]);
  388. i^ += n;
  389. }
  390. }
  391. write_string :: inline proc(buf: []byte, i: ^int, s: string) {
  392. if i^ < len(buf) {
  393. n := copy(buf[i^:], s);
  394. i^ += n;
  395. }
  396. }
  397. write_rune :: inline proc(buf: []byte, i: ^int, r: rune) {
  398. if i^ < len(buf) {
  399. b, w := utf8.encode_rune(r);
  400. n := copy(buf[i^:], b[:w]);
  401. i^ += n;
  402. }
  403. }
  404. if buf == nil {
  405. return "";
  406. }
  407. i := 0;
  408. write_byte(buf, &i, '\'');
  409. switch r {
  410. case '\a': write_string(buf, &i, "\\a");
  411. case '\b': write_string(buf, &i, "\\b");
  412. case '\e': write_string(buf, &i, "\\e");
  413. case '\f': write_string(buf, &i, "\\f");
  414. case '\n': write_string(buf, &i, "\\n");
  415. case '\r': write_string(buf, &i, "\\r");
  416. case '\t': write_string(buf, &i, "\\t");
  417. case '\v': write_string(buf, &i, "\\v");
  418. case:
  419. if r < 32 {
  420. write_string(buf, &i, "\\x");
  421. b: [2]byte;
  422. s := append_bits(b[:], u64(r), 16, true, 64, digits, nil);
  423. switch len(s) {
  424. case 0: write_string(buf, &i, "00");
  425. case 1: write_rune(buf, &i, '0');
  426. case 2: write_string(buf, &i, s);
  427. }
  428. } else {
  429. write_rune(buf, &i, r);
  430. }
  431. }
  432. write_byte(buf, &i, '\'');
  433. return string(buf[:i]);
  434. }
  435. unquote_char :: proc(str: string, quote: byte) -> (r: rune, multiple_bytes: bool, tail_string: string, success: bool) {
  436. hex_to_int :: proc(c: byte) -> int {
  437. switch c {
  438. case '0'..'9': return int(c-'0');
  439. case 'a'..'f': return int(c-'a')+10;
  440. case 'A'..'F': return int(c-'A')+10;
  441. }
  442. return -1;
  443. }
  444. w: int;
  445. if str[0] == quote && quote == '"' {
  446. return;
  447. } else if str[0] >= 0x80 {
  448. r, w = utf8.decode_rune_in_string(str);
  449. return r, true, str[w:], true;
  450. } else if str[0] != '\\' {
  451. return rune(str[0]), false, str[1:], true;
  452. }
  453. if len(str) <= 1 {
  454. return;
  455. }
  456. s := str;
  457. c := s[1];
  458. s = s[2:];
  459. switch c {
  460. case:
  461. return;
  462. case 'a': r = '\a';
  463. case 'b': r = '\b';
  464. case 'f': r = '\f';
  465. case 'n': r = '\n';
  466. case 'r': r = '\r';
  467. case 't': r = '\t';
  468. case 'v': r = '\v';
  469. case '\\': r = '\\';
  470. case '"': r = '"';
  471. case '\'': r = '\'';
  472. case '0'..'7':
  473. v := int(c-'0');
  474. if len(s) < 2 {
  475. return;
  476. }
  477. for i in 0..<len(s) {
  478. d := int(s[i]-'0');
  479. if d < 0 || d > 7 {
  480. return;
  481. }
  482. v = (v<<3) | d;
  483. }
  484. s = s[2:];
  485. if v > 0xff {
  486. return;
  487. }
  488. r = rune(v);
  489. case 'x', 'u', 'U':
  490. count: int;
  491. switch c {
  492. case 'x': count = 2;
  493. case 'u': count = 4;
  494. case 'U': count = 8;
  495. }
  496. if len(s) < count {
  497. return;
  498. }
  499. for i in 0..<count {
  500. d := hex_to_int(s[i]);
  501. if d < 0 {
  502. return;
  503. }
  504. r = (r<<4) | rune(d);
  505. }
  506. s = s[count:];
  507. if c == 'x' {
  508. break;
  509. }
  510. if r > utf8.MAX_RUNE {
  511. return;
  512. }
  513. multiple_bytes = true;
  514. }
  515. success = true;
  516. tail_string = s;
  517. return;
  518. }
  519. unquote_string :: proc(lit: string, allocator := context.allocator) -> (res: string, allocated, success: bool) {
  520. contains_rune :: proc(s: string, r: rune) -> int {
  521. for c, offset in s {
  522. if c == r do return offset;
  523. }
  524. return -1;
  525. }
  526. assert(len(lit) >= 2);
  527. if lit[0] == '`' {
  528. return lit[1:len(lit)-1], false, true;
  529. }
  530. s := lit;
  531. quote := '"';
  532. if s == `""` {
  533. return "", false, true;
  534. }
  535. if contains_rune(s, '\n') >= 0 {
  536. return s, false, false;
  537. }
  538. if contains_rune(s, '\\') < 0 && contains_rune(s, quote) < 0 {
  539. if quote == '"' {
  540. return s, false, true;
  541. }
  542. }
  543. buf_len := 3*len(s) / 2;
  544. buf := make([]byte, buf_len, allocator);
  545. offset := 0;
  546. for len(s) > 0 {
  547. r, multiple_bytes, tail_string, ok := unquote_char(s, byte(quote));
  548. if !ok {
  549. delete(buf);
  550. return s, false, false;
  551. }
  552. s = tail_string;
  553. if r < 0x80 || !multiple_bytes {
  554. buf[offset] = byte(r);
  555. offset += 1;
  556. } else {
  557. b, w := utf8.encode_rune(r);
  558. copy(buf[offset:], b[:w]);
  559. offset += w;
  560. }
  561. }
  562. new_string := string(buf[:offset]);
  563. return new_string, true, true;
  564. }