parser.odin 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. package json
  2. import "core:mem"
  3. import "core:unicode/utf8"
  4. import "core:strconv"
  5. Parser :: struct {
  6. tok: Tokenizer,
  7. prev_token: Token,
  8. curr_token: Token,
  9. spec: Specification,
  10. allocator: mem.Allocator,
  11. unmarshal_data: any,
  12. parse_integers: bool,
  13. }
  14. make_parser :: proc(data: []byte, spec := Specification.JSON, parse_integers := false, allocator := context.allocator) -> Parser {
  15. p: Parser;
  16. p.tok = make_tokenizer(data, spec, parse_integers);
  17. p.spec = spec;
  18. p.allocator = allocator;
  19. assert(p.allocator.procedure != nil);
  20. advance_token(&p);
  21. return p;
  22. }
  23. parse :: proc(data: []byte, spec := Specification.JSON, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
  24. context.allocator = allocator;
  25. p := make_parser(data, spec, parse_integers, allocator);
  26. if p.spec == Specification.JSON5 {
  27. return parse_value(&p);
  28. }
  29. return parse_object(&p);
  30. }
  31. token_end_pos :: proc(tok: Token) -> Pos {
  32. end := tok.pos;
  33. end.offset += len(tok.text);
  34. return end;
  35. }
  36. advance_token :: proc(p: ^Parser) -> (Token, Error) {
  37. err: Error;
  38. p.prev_token = p.curr_token;
  39. p.curr_token, err = get_token(&p.tok);
  40. return p.prev_token, err;
  41. }
  42. allow_token :: proc(p: ^Parser, kind: Token_Kind) -> bool {
  43. if p.curr_token.kind == kind {
  44. advance_token(p);
  45. return true;
  46. }
  47. return false;
  48. }
  49. expect_token :: proc(p: ^Parser, kind: Token_Kind) -> Error {
  50. prev := p.curr_token;
  51. advance_token(p);
  52. if prev.kind == kind {
  53. return .None;
  54. }
  55. return .Unexpected_Token;
  56. }
  57. parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
  58. value.pos = p.curr_token.pos;
  59. defer value.end = token_end_pos(p.prev_token);
  60. token := p.curr_token;
  61. #partial switch token.kind {
  62. case .Null:
  63. value.value = Null{};
  64. advance_token(p);
  65. return;
  66. case .False:
  67. value.value = Boolean(false);
  68. advance_token(p);
  69. return;
  70. case .True:
  71. value.value = Boolean(true);
  72. advance_token(p);
  73. return;
  74. case .Integer:
  75. i, _ := strconv.parse_i64(token.text);
  76. value.value = Integer(i);
  77. advance_token(p);
  78. return;
  79. case .Float:
  80. f, _ := strconv.parse_f64(token.text);
  81. value.value = Float(f);
  82. advance_token(p);
  83. return;
  84. case .String:
  85. value.value = String(unquote_string(token, p.spec, p.allocator));
  86. advance_token(p);
  87. return;
  88. case .Open_Brace:
  89. return parse_object(p);
  90. case .Open_Bracket:
  91. return parse_array(p);
  92. case:
  93. if p.spec == Specification.JSON5 {
  94. #partial switch token.kind {
  95. case .Infinity:
  96. inf: u64 = 0x7ff0000000000000;
  97. if token.text[0] == '-' {
  98. inf = 0xfff0000000000000;
  99. }
  100. value.value = transmute(f64)inf;
  101. advance_token(p);
  102. return;
  103. case .NaN:
  104. nan: u64 = 0x7ff7ffffffffffff;
  105. if token.text[0] == '-' {
  106. nan = 0xfff7ffffffffffff;
  107. }
  108. value.value = transmute(f64)nan;
  109. advance_token(p);
  110. return;
  111. }
  112. }
  113. }
  114. err = .Unexpected_Token;
  115. advance_token(p);
  116. return;
  117. }
  118. parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
  119. value.pos = p.curr_token.pos;
  120. defer value.end = token_end_pos(p.prev_token);
  121. if err = expect_token(p, .Open_Bracket); err != .None {
  122. return;
  123. }
  124. array: Array;
  125. array.allocator = p.allocator;
  126. defer if err != .None {
  127. for elem in array {
  128. destroy_value(elem);
  129. }
  130. delete(array);
  131. }
  132. for p.curr_token.kind != .Close_Bracket {
  133. elem, elem_err := parse_value(p);
  134. if elem_err != .None {
  135. err = elem_err;
  136. return;
  137. }
  138. append(&array, elem);
  139. // Disallow trailing commas for the time being
  140. if allow_token(p, .Comma) {
  141. continue;
  142. } else {
  143. break;
  144. }
  145. }
  146. if err = expect_token(p, .Close_Bracket); err != .None {
  147. return;
  148. }
  149. value.value = array;
  150. return;
  151. }
  152. clone_string :: proc(s: string, allocator: mem.Allocator) -> string {
  153. n := len(s);
  154. b := make([]byte, n+1, allocator);
  155. copy(b, s);
  156. b[n] = 0;
  157. return string(b[:n]);
  158. }
  159. parse_object_key :: proc(p: ^Parser) -> (key: string, err: Error) {
  160. tok := p.curr_token;
  161. if p.spec == Specification.JSON5 {
  162. if tok.kind == .String {
  163. expect_token(p, .String);
  164. key = unquote_string(tok, p.spec, p.allocator);
  165. return;
  166. } else if tok.kind == .Ident {
  167. expect_token(p, .Ident);
  168. key = clone_string(tok.text, p.allocator);
  169. return;
  170. }
  171. }
  172. if tok_err := expect_token(p, .String); tok_err != .None {
  173. err = .Expected_String_For_Object_Key;
  174. return;
  175. }
  176. key = unquote_string(tok, p.spec, p.allocator);
  177. return;
  178. }
  179. parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
  180. value.pos = p.curr_token.pos;
  181. defer value.end = token_end_pos(p.prev_token);
  182. if err = expect_token(p, .Open_Brace); err != .None {
  183. value.pos = p.curr_token.pos;
  184. return;
  185. }
  186. obj: Object;
  187. obj.allocator = p.allocator;
  188. defer if err != .None {
  189. for key, elem in obj {
  190. delete(key, p.allocator);
  191. destroy_value(elem);
  192. }
  193. delete(obj);
  194. }
  195. for p.curr_token.kind != .Close_Brace {
  196. key: string;
  197. key, err = parse_object_key(p);
  198. if err != .None {
  199. delete(key, p.allocator);
  200. value.pos = p.curr_token.pos;
  201. return;
  202. }
  203. if colon_err := expect_token(p, .Colon); colon_err != .None {
  204. err = .Expected_Colon_After_Key;
  205. value.pos = p.curr_token.pos;
  206. return;
  207. }
  208. elem, elem_err := parse_value(p);
  209. if elem_err != .None {
  210. err = elem_err;
  211. value.pos = p.curr_token.pos;
  212. return;
  213. }
  214. if key in obj {
  215. err = .Duplicate_Object_Key;
  216. value.pos = p.curr_token.pos;
  217. delete(key, p.allocator);
  218. return;
  219. }
  220. obj[key] = elem;
  221. if p.spec == Specification.JSON5 {
  222. // Allow trailing commas
  223. if allow_token(p, .Comma) {
  224. continue;
  225. }
  226. } else {
  227. // Disallow trailing commas
  228. if allow_token(p, .Comma) {
  229. continue;
  230. } else {
  231. break;
  232. }
  233. }
  234. }
  235. if err = expect_token(p, .Close_Brace); err != .None {
  236. value.pos = p.curr_token.pos;
  237. return;
  238. }
  239. value.value = obj;
  240. return;
  241. }
  242. // IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
  243. unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> string {
  244. get_u2_rune :: proc(s: string) -> rune {
  245. if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
  246. return -1;
  247. }
  248. r: rune;
  249. for c in s[2:4] {
  250. x: rune;
  251. switch c {
  252. case '0'..'9': x = c - '0';
  253. case 'a'..'f': x = c - 'a' + 10;
  254. case 'A'..'F': x = c - 'A' + 10;
  255. case: return -1;
  256. }
  257. r = r*16 + x;
  258. }
  259. return r;
  260. }
  261. get_u4_rune :: proc(s: string) -> rune {
  262. if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
  263. return -1;
  264. }
  265. r: rune;
  266. for c in s[2:6] {
  267. x: rune;
  268. switch c {
  269. case '0'..'9': x = c - '0';
  270. case 'a'..'f': x = c - 'a' + 10;
  271. case 'A'..'F': x = c - 'A' + 10;
  272. case: return -1;
  273. }
  274. r = r*16 + x;
  275. }
  276. return r;
  277. }
  278. if token.kind != .String {
  279. return "";
  280. }
  281. s := token.text;
  282. if len(s) <= 2 {
  283. return "";
  284. }
  285. quote := s[0];
  286. if s[0] != s[len(s)-1] {
  287. // Invalid string
  288. return "";
  289. }
  290. s = s[1:len(s)-1];
  291. i := 0;
  292. for i < len(s) {
  293. c := s[i];
  294. if c == '\\' || c == quote || c < ' ' {
  295. break;
  296. }
  297. if c < utf8.RUNE_SELF {
  298. i += 1;
  299. continue;
  300. }
  301. r, w := utf8.decode_rune_in_string(s);
  302. if r == utf8.RUNE_ERROR && w == 1 {
  303. break;
  304. }
  305. i += w;
  306. }
  307. if i == len(s) {
  308. return clone_string(s, allocator);
  309. }
  310. b := make([]byte, len(s) + 2*utf8.UTF_MAX, allocator);
  311. w := copy(b, s[0:i]);
  312. loop: for i < len(s) {
  313. c := s[i];
  314. switch {
  315. case c == '\\':
  316. i += 1;
  317. if i >= len(s) {
  318. break loop;
  319. }
  320. switch s[i] {
  321. case: break loop;
  322. case '"', '\'', '\\', '/':
  323. b[w] = s[i];
  324. i += 1;
  325. w += 1;
  326. case 'b':
  327. b[w] = '\b';
  328. i += 1;
  329. w += 1;
  330. case 'f':
  331. b[w] = '\f';
  332. i += 1;
  333. w += 1;
  334. case 'r':
  335. b[w] = '\r';
  336. i += 1;
  337. w += 1;
  338. case 't':
  339. b[w] = '\t';
  340. i += 1;
  341. w += 1;
  342. case 'n':
  343. b[w] = '\n';
  344. i += 1;
  345. w += 1;
  346. case 'u':
  347. i -= 1; // Include the \u in the check for sanity sake
  348. r := get_u4_rune(s[i:]);
  349. if r < 0 {
  350. break loop;
  351. }
  352. i += 6;
  353. buf, buf_width := utf8.encode_rune(r);
  354. copy(b[w:], buf[:buf_width]);
  355. w += buf_width;
  356. case '0':
  357. if spec == Specification.JSON5 {
  358. b[w] = '\x00';
  359. i += 1;
  360. w += 1;
  361. } else {
  362. break loop;
  363. }
  364. case 'v':
  365. if spec == Specification.JSON5 {
  366. b[w] = '\v';
  367. i += 1;
  368. w += 1;
  369. } else {
  370. break loop;
  371. }
  372. case 'x':
  373. if spec == Specification.JSON5 {
  374. i -= 1; // Include the \x in the check for sanity sake
  375. r := get_u2_rune(s[i:]);
  376. if r < 0 {
  377. break loop;
  378. }
  379. i += 4;
  380. buf, buf_width := utf8.encode_rune(r);
  381. copy(b[w:], buf[:buf_width]);
  382. w += buf_width;
  383. } else {
  384. break loop;
  385. }
  386. }
  387. case c == quote, c < ' ':
  388. break loop;
  389. case c < utf8.RUNE_SELF:
  390. b[w] = c;
  391. i += 1;
  392. w += 1;
  393. case:
  394. r, width := utf8.decode_rune_in_string(s[i:]);
  395. i += width;
  396. buf, buf_width := utf8.encode_rune(r);
  397. assert(buf_width <= width);
  398. copy(b[w:], buf[:buf_width]);
  399. w += buf_width;
  400. }
  401. }
  402. return string(b[:w]);
  403. }