strconv.odin 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708
  1. package strconv
  2. import "core:unicode/utf8"
  3. parse_bool :: proc(s: string) -> (result: bool = false, ok: bool) {
  4. switch s {
  5. case "1", "t", "T", "true", "TRUE", "True":
  6. return true, true;
  7. case "0", "f", "F", "false", "FALSE", "False":
  8. return false, true;
  9. }
  10. return;
  11. }
  12. _digit_value :: proc(r: rune) -> int {
  13. ri := int(r);
  14. v: int = 16;
  15. switch r {
  16. case '0'..='9': v = ri-'0';
  17. case 'a'..='z': v = ri-'a'+10;
  18. case 'A'..='Z': v = ri-'A'+10;
  19. }
  20. return v;
  21. }
  22. // Parses an integer value from a string, in the given base, without a prefix.
  23. //
  24. // Returns ok=false if no numeric value of the appropriate base could be found,
  25. // or if the input string contained more than just the number.
  26. //
  27. // ```
  28. // n, ok := strconv.parse_i64_of_base("-1234eeee", 10);
  29. // assert(n == -1234 && ok);
  30. // ```
  31. parse_i64_of_base :: proc(str: string, base: int) -> (value: i64, ok: bool) {
  32. assert(base <= 16, "base must be 1-16");
  33. s := str;
  34. if s == "" {
  35. return;
  36. }
  37. neg := false;
  38. if len(s) > 1 {
  39. switch s[0] {
  40. case '-':
  41. neg = true;
  42. s = s[1:];
  43. case '+':
  44. s = s[1:];
  45. }
  46. }
  47. i := 0;
  48. for r in s {
  49. if r == '_' {
  50. i += 1;
  51. continue;
  52. }
  53. v := i64(_digit_value(r));
  54. if v >= i64(base) {
  55. break;
  56. }
  57. value *= i64(base);
  58. value += v;
  59. i += 1;
  60. }
  61. s = s[i:];
  62. if neg {
  63. value = -value;
  64. }
  65. ok = len(s) == 0;
  66. return;
  67. }
  68. // Parses a integer value from a string, in base 10, unless there's a prefix.
  69. //
  70. // Returns ok=false if a valid integer could not be found,
  71. // or if the input string contained more than just the number.
  72. //
  73. // ```
  74. // n, ok := strconv.parse_i64_maybe_prefixed("1234");
  75. // assert(n == 1234 && ok);
  76. //
  77. // n, ok = strconv.parse_i64_maybe_prefixed("0xeeee");
  78. // assert(n == 0xeeee && ok);
  79. // ```
  80. parse_i64_maybe_prefixed :: proc(str: string) -> (value: i64, ok: bool) {
  81. s := str;
  82. if s == "" {
  83. return;
  84. }
  85. neg := false;
  86. if len(s) > 1 {
  87. switch s[0] {
  88. case '-':
  89. neg = true;
  90. s = s[1:];
  91. case '+':
  92. s = s[1:];
  93. }
  94. }
  95. base: i64 = 10;
  96. if len(s) > 2 && s[0] == '0' {
  97. switch s[1] {
  98. case 'b': base = 2; s = s[2:];
  99. case 'o': base = 8; s = s[2:];
  100. case 'd': base = 10; s = s[2:];
  101. case 'z': base = 12; s = s[2:];
  102. case 'x': base = 16; s = s[2:];
  103. }
  104. }
  105. i := 0;
  106. for r in s {
  107. if r == '_' {
  108. i += 1;
  109. continue;
  110. }
  111. v := i64(_digit_value(r));
  112. if v >= base {
  113. break;
  114. }
  115. value *= base;
  116. value += v;
  117. i += 1;
  118. }
  119. s = s[i:];
  120. if neg {
  121. value = -value;
  122. }
  123. ok = len(s) == 0;
  124. return;
  125. }
  126. parse_i64 :: proc{parse_i64_maybe_prefixed, parse_i64_of_base};
  127. // Parses an unsigned integer value from a string, in the given base, and
  128. // without a prefix.
  129. //
  130. // Returns ok=false if no numeric value of the appropriate base could be found,
  131. // or if the input string contained more than just the number.
  132. //
  133. // ```
  134. // n, ok := strconv.parse_u64_of_base("1234eeee", 10);
  135. // assert(n == 1234 && ok);
  136. //
  137. // n, ok = strconv.parse_u64_of_base("5678eeee", 16);
  138. // assert(n == 0x5678eeee && ok);
  139. // ```
  140. parse_u64_of_base :: proc(str: string, base: int) -> (value: u64, ok: bool) {
  141. assert(base <= 16, "base must be 1-16");
  142. s := str;
  143. if s == "" {
  144. return;
  145. }
  146. if len(s) > 1 && s[0] == '+' {
  147. s = s[1:];
  148. }
  149. i := 0;
  150. for r in s {
  151. if r == '_' {
  152. i += 1;
  153. continue;
  154. }
  155. v := u64(_digit_value(r));
  156. if v >= u64(base) {
  157. break;
  158. }
  159. value *= u64(base);
  160. value += v;
  161. i += 1;
  162. }
  163. s = s[i:];
  164. ok = len(s) == 0;
  165. return;
  166. }
  167. // Parses an unsigned integer value from a string in base 10, unless there's a prefix.
  168. //
  169. // Returns ok=false if a valid integer could not be found, if the value was negative,
  170. // or if the input string contained more than just the number.
  171. //
  172. // ```
  173. // n, ok := strconv.parse_u64_maybe_prefixed("1234");
  174. // assert(n == 1234 && ok);
  175. //
  176. // n, ok = strconv.parse_u64_maybe_prefixed("0xeeee");
  177. // assert(n == 0xeeee && ok);
  178. // ```
  179. parse_u64_maybe_prefixed :: proc(str: string) -> (value: u64, ok: bool) {
  180. s := str;
  181. if s == "" {
  182. return;
  183. }
  184. if len(s) > 1 && s[0] == '+' {
  185. s = s[1:];
  186. }
  187. base := u64(10);
  188. if len(s) > 2 && s[0] == '0' {
  189. switch s[1] {
  190. case 'b': base = 2; s = s[2:];
  191. case 'o': base = 8; s = s[2:];
  192. case 'd': base = 10; s = s[2:];
  193. case 'z': base = 12; s = s[2:];
  194. case 'x': base = 16; s = s[2:];
  195. }
  196. }
  197. i := 0;
  198. for r in s {
  199. if r == '_' {
  200. i += 1;
  201. continue;
  202. }
  203. v := u64(_digit_value(r));
  204. if v >= base {
  205. break;
  206. }
  207. value *= base;
  208. value += v;
  209. i += 1;
  210. }
  211. s = s[i:];
  212. ok = len(s) == 0;
  213. return;
  214. }
  215. parse_u64 :: proc{parse_u64_maybe_prefixed, parse_u64_of_base};
  216. // Parses an integer value from a string in the given base, or
  217. // - if the string has a prefix (e.g: '0x') then that will determine the base;
  218. // - otherwise, assumes base 10.
  219. //
  220. // Returns ok=false if no appropriate value could be found, or if the input string
  221. // contained more than just the number.
  222. //
  223. // ```
  224. // n, ok := strconv.parse_int("1234"); // without prefix, inferred base 10
  225. // assert(n == 1234 && ok);
  226. //
  227. // n, ok = strconv.parse_int("ffff", 16); // without prefix, explicit base
  228. // assert(n == 0xffff && ok);
  229. //
  230. // n, ok = strconv.parse_int("0xffff"); // with prefix and inferred base
  231. // assert(n == 0xffff && ok);
  232. // ```
  233. parse_int :: proc(s: string, base := 0) -> (value: int, ok: bool) {
  234. v: i64 = ---;
  235. switch base {
  236. case 0: v, ok = parse_i64_maybe_prefixed(s);
  237. case: v, ok = parse_i64_of_base(s, base);
  238. }
  239. value = int(v);
  240. return;
  241. }
  242. // Parses an unsigned integer value from a string in the given base, or
  243. // - if the string has a prefix (e.g: '0x') then that will determine the base;
  244. // - otherwise, assumes base 10.
  245. //
  246. // Returns ok=false if:
  247. // - no appropriate value could be found; or
  248. // - the value was negative.
  249. // - the input string contained more than just the number.
  250. //
  251. // ```
  252. // n, ok := strconv.parse_uint("1234"); // without prefix, inferred base 10
  253. // assert(n == 1234 && ok);
  254. //
  255. // n, ok = strconv.parse_uint("ffff", 16); // without prefix, explicit base
  256. // assert(n == 0xffff && ok);
  257. //
  258. // n, ok = strconv.parse_uint("0xffff"); // with prefix and inferred base
  259. // assert(n == 0xffff && ok);
  260. // ```
  261. parse_uint :: proc(s: string, base := 0) -> (value: uint, ok: bool) {
  262. v: u64 = ---;
  263. switch base {
  264. case 0: v, ok = parse_u64_maybe_prefixed(s);
  265. case: v, ok = parse_u64_of_base(s, base);
  266. }
  267. value = uint(v);
  268. return;
  269. }
  270. // Parses a 32-bit floating point number from a string.
  271. //
  272. // Returns ok=false if a base 10 float could not be found,
  273. // or if the input string contained more than just the number.
  274. //
  275. // ```
  276. // n, ok := strconv.parse_f32("12.34eee");
  277. // assert(n == 12.34 && ok);
  278. //
  279. // n, ok = strconv.parse_f32("12.34");
  280. // assert(n == 12.34 && ok);
  281. // ```
  282. parse_f32 :: proc(s: string) -> (value: f32, ok: bool) {
  283. v: f64 = ---;
  284. v, ok = parse_f64(s);
  285. return f32(v), ok;
  286. }
  287. // Parses a 64-bit floating point number from a string.
  288. //
  289. // Returns ok=false if a base 10 float could not be found,
  290. // or if the input string contained more than just the number.
  291. //
  292. // ```
  293. // n, ok := strconv.parse_f32("12.34eee");
  294. // assert(n == 12.34 && ok);
  295. //
  296. // n, ok = strconv.parse_f32("12.34");
  297. // assert(n == 12.34 && ok);
  298. // ```
  299. parse_f64 :: proc(str: string) -> (value: f64, ok: bool) {
  300. s := str;
  301. if s == "" {
  302. return;
  303. }
  304. i := 0;
  305. sign: f64 = 1;
  306. switch s[i] {
  307. case '-': i += 1; sign = -1;
  308. case '+': i += 1;
  309. }
  310. for ; i < len(s); i += 1 {
  311. r := rune(s[i]);
  312. if r == '_' {
  313. continue;
  314. }
  315. v := _digit_value(r);
  316. if v >= 10 {
  317. break;
  318. }
  319. value *= 10;
  320. value += f64(v);
  321. }
  322. if i < len(s) && s[i] == '.' {
  323. pow10: f64 = 10;
  324. i += 1;
  325. for ; i < len(s); i += 1 {
  326. r := rune(s[i]);
  327. if r == '_' {
  328. continue;
  329. }
  330. v := _digit_value(r);
  331. if v >= 10 {
  332. break;
  333. }
  334. value += f64(v)/pow10;
  335. pow10 *= 10;
  336. }
  337. }
  338. frac := false;
  339. scale: f64 = 1;
  340. if i < len(s) && (s[i] == 'e' || s[i] == 'E') {
  341. i += 1;
  342. if i < len(s) {
  343. switch s[i] {
  344. case '-': i += 1; frac = true;
  345. case '+': i += 1;
  346. }
  347. exp: u32 = 0;
  348. for ; i < len(s); i += 1 {
  349. r := rune(s[i]);
  350. if r == '_' {
  351. continue;
  352. }
  353. d := u32(_digit_value(r));
  354. if d >= 10 {
  355. break;
  356. }
  357. exp = exp * 10 + d;
  358. }
  359. if exp > 308 { exp = 308; }
  360. for exp >= 50 { scale *= 1e50; exp -= 50; }
  361. for exp >= 8 { scale *= 1e8; exp -= 8; }
  362. for exp > 0 { scale *= 10; exp -= 1; }
  363. }
  364. }
  365. s = s[i:];
  366. if frac {
  367. value = sign * (value/scale);
  368. } else {
  369. value = sign * (value*scale);
  370. }
  371. ok = len(s) == 0;
  372. return;
  373. }
  374. append_bool :: proc(buf: []byte, b: bool) -> string {
  375. n := 0;
  376. if b {
  377. n = copy(buf, "true");
  378. } else {
  379. n = copy(buf, "false");
  380. }
  381. return string(buf[:n]);
  382. }
  383. append_uint :: proc(buf: []byte, u: u64, base: int) -> string {
  384. return append_bits(buf, u, base, false, 8*size_of(uint), digits, nil);
  385. }
  386. append_int :: proc(buf: []byte, i: i64, base: int) -> string {
  387. return append_bits(buf, u64(i), base, true, 8*size_of(int), digits, nil);
  388. }
  389. itoa :: proc(buf: []byte, i: int) -> string {
  390. return append_int(buf, i64(i), 10);
  391. }
  392. atoi :: proc(s: string) -> int {
  393. v, _ := parse_int(s);
  394. return v;
  395. }
  396. atof :: proc(s: string) -> f64 {
  397. v, _ := parse_f64(s);
  398. return v;
  399. }
  400. ftoa :: append_float;
  401. append_float :: proc(buf: []byte, f: f64, fmt: byte, prec, bit_size: int) -> string {
  402. return string(generic_ftoa(buf, f, fmt, prec, bit_size));
  403. }
  404. quote :: proc(buf: []byte, str: string) -> string {
  405. write_byte :: proc(buf: []byte, i: ^int, bytes: ..byte) {
  406. if i^ >= len(buf) {
  407. return;
  408. }
  409. n := copy(buf[i^:], bytes[:]);
  410. i^ += n;
  411. }
  412. if buf == nil {
  413. return "";
  414. }
  415. c :: '"';
  416. i := 0;
  417. s := str;
  418. write_byte(buf, &i, c);
  419. for width := 0; len(s) > 0; s = s[width:] {
  420. r := rune(s[0]);
  421. width = 1;
  422. if r >= utf8.RUNE_SELF {
  423. r, width = utf8.decode_rune_in_string(s);
  424. }
  425. if width == 1 && r == utf8.RUNE_ERROR {
  426. write_byte(buf, &i, '\\', 'x');
  427. write_byte(buf, &i, digits[s[0]>>4]);
  428. write_byte(buf, &i, digits[s[0]&0xf]);
  429. }
  430. if i < len(buf) {
  431. x := quote_rune(buf[i:], r);
  432. i += len(x);
  433. }
  434. }
  435. write_byte(buf, &i, c);
  436. return string(buf[:i]);
  437. }
  438. quote_rune :: proc(buf: []byte, r: rune) -> string {
  439. write_byte :: proc(buf: []byte, i: ^int, bytes: ..byte) {
  440. if i^ < len(buf) {
  441. n := copy(buf[i^:], bytes[:]);
  442. i^ += n;
  443. }
  444. }
  445. write_string :: proc(buf: []byte, i: ^int, s: string) {
  446. if i^ < len(buf) {
  447. n := copy(buf[i^:], s);
  448. i^ += n;
  449. }
  450. }
  451. write_rune :: proc(buf: []byte, i: ^int, r: rune) {
  452. if i^ < len(buf) {
  453. b, w := utf8.encode_rune(r);
  454. n := copy(buf[i^:], b[:w]);
  455. i^ += n;
  456. }
  457. }
  458. if buf == nil {
  459. return "";
  460. }
  461. i := 0;
  462. write_byte(buf, &i, '\'');
  463. switch r {
  464. case '\a': write_string(buf, &i, "\\a");
  465. case '\b': write_string(buf, &i, "\\b");
  466. case '\e': write_string(buf, &i, "\\e");
  467. case '\f': write_string(buf, &i, "\\f");
  468. case '\n': write_string(buf, &i, "\\n");
  469. case '\r': write_string(buf, &i, "\\r");
  470. case '\t': write_string(buf, &i, "\\t");
  471. case '\v': write_string(buf, &i, "\\v");
  472. case:
  473. if r < 32 {
  474. write_string(buf, &i, "\\x");
  475. b: [2]byte;
  476. s := append_bits(b[:], u64(r), 16, true, 64, digits, nil);
  477. switch len(s) {
  478. case 0: write_string(buf, &i, "00");
  479. case 1: write_rune(buf, &i, '0');
  480. case 2: write_string(buf, &i, s);
  481. }
  482. } else {
  483. write_rune(buf, &i, r);
  484. }
  485. }
  486. write_byte(buf, &i, '\'');
  487. return string(buf[:i]);
  488. }
  489. unquote_char :: proc(str: string, quote: byte) -> (r: rune, multiple_bytes: bool, tail_string: string, success: bool) {
  490. hex_to_int :: proc(c: byte) -> int {
  491. switch c {
  492. case '0'..='9': return int(c-'0');
  493. case 'a'..='f': return int(c-'a')+10;
  494. case 'A'..='F': return int(c-'A')+10;
  495. }
  496. return -1;
  497. }
  498. w: int;
  499. if str[0] == quote && quote == '"' {
  500. return;
  501. } else if str[0] >= 0x80 {
  502. r, w = utf8.decode_rune_in_string(str);
  503. return r, true, str[w:], true;
  504. } else if str[0] != '\\' {
  505. return rune(str[0]), false, str[1:], true;
  506. }
  507. if len(str) <= 1 {
  508. return;
  509. }
  510. s := str;
  511. c := s[1];
  512. s = s[2:];
  513. switch c {
  514. case:
  515. return;
  516. case 'a': r = '\a';
  517. case 'b': r = '\b';
  518. case 'f': r = '\f';
  519. case 'n': r = '\n';
  520. case 'r': r = '\r';
  521. case 't': r = '\t';
  522. case 'v': r = '\v';
  523. case '\\': r = '\\';
  524. case '"': r = '"';
  525. case '\'': r = '\'';
  526. case '0'..='7':
  527. v := int(c-'0');
  528. if len(s) < 2 {
  529. return;
  530. }
  531. for i in 0..<len(s) {
  532. d := int(s[i]-'0');
  533. if d < 0 || d > 7 {
  534. return;
  535. }
  536. v = (v<<3) | d;
  537. }
  538. s = s[2:];
  539. if v > 0xff {
  540. return;
  541. }
  542. r = rune(v);
  543. case 'x', 'u', 'U':
  544. count: int;
  545. switch c {
  546. case 'x': count = 2;
  547. case 'u': count = 4;
  548. case 'U': count = 8;
  549. }
  550. if len(s) < count {
  551. return;
  552. }
  553. for i in 0..<count {
  554. d := hex_to_int(s[i]);
  555. if d < 0 {
  556. return;
  557. }
  558. r = (r<<4) | rune(d);
  559. }
  560. s = s[count:];
  561. if c == 'x' {
  562. break;
  563. }
  564. if r > utf8.MAX_RUNE {
  565. return;
  566. }
  567. multiple_bytes = true;
  568. }
  569. success = true;
  570. tail_string = s;
  571. return;
  572. }
  573. unquote_string :: proc(lit: string, allocator := context.allocator) -> (res: string, allocated, success: bool) {
  574. contains_rune :: proc(s: string, r: rune) -> int {
  575. for c, offset in s {
  576. if c == r {
  577. return offset;
  578. }
  579. }
  580. return -1;
  581. }
  582. assert(len(lit) >= 2);
  583. if lit[0] == '`' {
  584. return lit[1:len(lit)-1], false, true;
  585. }
  586. s := lit;
  587. quote := '"';
  588. if s == `""` {
  589. return "", false, true;
  590. }
  591. if contains_rune(s, '\n') >= 0 {
  592. return s, false, false;
  593. }
  594. if contains_rune(s, '\\') < 0 && contains_rune(s, quote) < 0 {
  595. if quote == '"' {
  596. return s, false, true;
  597. }
  598. }
  599. buf_len := 3*len(s) / 2;
  600. buf := make([]byte, buf_len, allocator);
  601. offset := 0;
  602. for len(s) > 0 {
  603. r, multiple_bytes, tail_string, ok := unquote_char(s, byte(quote));
  604. if !ok {
  605. delete(buf);
  606. return s, false, false;
  607. }
  608. s = tail_string;
  609. if r < 0x80 || !multiple_bytes {
  610. buf[offset] = byte(r);
  611. offset += 1;
  612. } else {
  613. b, w := utf8.encode_rune(r);
  614. copy(buf[offset:], b[:w]);
  615. offset += w;
  616. }
  617. }
  618. new_string := string(buf[:offset]);
  619. return new_string, true, true;
  620. }