parser.odin 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. package json
  2. import "core:mem"
  3. import "core:unicode/utf8"
  4. import "core:strconv"
  5. Parser :: struct {
  6. tok: Tokenizer,
  7. prev_token: Token,
  8. curr_token: Token,
  9. spec: Specification,
  10. allocator: mem.Allocator,
  11. unmarshal_data: any,
  12. }
  13. make_parser :: proc(data: []byte, spec := Specification.JSON, allocator := context.allocator) -> Parser {
  14. p: Parser;
  15. p.tok = make_tokenizer(data, spec);
  16. p.spec = spec;
  17. p.allocator = allocator;
  18. assert(p.allocator.procedure != nil);
  19. advance_token(&p);
  20. return p;
  21. }
  22. parse :: proc(data: []byte, spec := Specification.JSON, allocator := context.allocator) -> (Value, Error) {
  23. context.allocator = allocator;
  24. p := make_parser(data, spec, allocator);
  25. if p.spec == Specification.JSON5 {
  26. return parse_value(&p);
  27. }
  28. return parse_object(&p);
  29. }
  30. token_end_pos :: proc(tok: Token) -> Pos {
  31. end := tok.pos;
  32. end.offset += len(tok.text);
  33. return end;
  34. }
  35. advance_token :: proc(p: ^Parser) -> (Token, Error) {
  36. err: Error;
  37. p.prev_token = p.curr_token;
  38. p.curr_token, err = get_token(&p.tok);
  39. return p.prev_token, err;
  40. }
  41. allow_token :: proc(p: ^Parser, kind: Token_Kind) -> bool {
  42. if p.curr_token.kind == kind {
  43. advance_token(p);
  44. return true;
  45. }
  46. return false;
  47. }
  48. expect_token :: proc(p: ^Parser, kind: Token_Kind) -> Error {
  49. prev := p.curr_token;
  50. advance_token(p);
  51. if prev.kind == kind {
  52. return .None;
  53. }
  54. return .Unexpected_Token;
  55. }
  56. parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
  57. value.pos = p.curr_token.pos;
  58. defer value.end = token_end_pos(p.prev_token);
  59. token := p.curr_token;
  60. #partial switch token.kind {
  61. case .Null:
  62. value.value = Null{};
  63. advance_token(p);
  64. return;
  65. case .False:
  66. value.value = Boolean(false);
  67. advance_token(p);
  68. return;
  69. case .True:
  70. value.value = Boolean(true);
  71. advance_token(p);
  72. return;
  73. case .Integer:
  74. i, _ := strconv.parse_i64(token.text);
  75. value.value = Integer(i);
  76. advance_token(p);
  77. return;
  78. case .Float:
  79. f, _ := strconv.parse_f64(token.text);
  80. value.value = Float(f);
  81. advance_token(p);
  82. return;
  83. case .String:
  84. value.value = String(unquote_string(token, p.spec, p.allocator));
  85. advance_token(p);
  86. return;
  87. case .Open_Brace:
  88. return parse_object(p);
  89. case .Open_Bracket:
  90. return parse_array(p);
  91. case:
  92. if p.spec == Specification.JSON5 {
  93. #partial switch token.kind {
  94. case .Infinity:
  95. inf: u64 = 0x7ff0000000000000;
  96. if token.text[0] == '-' {
  97. inf = 0xfff0000000000000;
  98. }
  99. value.value = transmute(f64)inf;
  100. advance_token(p);
  101. return;
  102. case .NaN:
  103. nan: u64 = 0x7ff7ffffffffffff;
  104. if token.text[0] == '-' {
  105. nan = 0xfff7ffffffffffff;
  106. }
  107. value.value = transmute(f64)nan;
  108. advance_token(p);
  109. return;
  110. }
  111. }
  112. }
  113. err = .Unexpected_Token;
  114. advance_token(p);
  115. return;
  116. }
  117. parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
  118. value.pos = p.curr_token.pos;
  119. defer value.end = token_end_pos(p.prev_token);
  120. if err = expect_token(p, .Open_Bracket); err != .None {
  121. return;
  122. }
  123. array: Array;
  124. array.allocator = p.allocator;
  125. defer if err != .None {
  126. for elem in array {
  127. destroy_value(elem);
  128. }
  129. delete(array);
  130. }
  131. for p.curr_token.kind != .Close_Bracket {
  132. elem, elem_err := parse_value(p);
  133. if elem_err != .None {
  134. err = elem_err;
  135. return;
  136. }
  137. append(&array, elem);
  138. // Disallow trailing commas for the time being
  139. if allow_token(p, .Comma) {
  140. continue;
  141. } else {
  142. break;
  143. }
  144. }
  145. if err = expect_token(p, .Close_Bracket); err != .None {
  146. return;
  147. }
  148. value.value = array;
  149. return;
  150. }
  151. clone_string :: proc(s: string, allocator: mem.Allocator) -> string {
  152. n := len(s);
  153. b := make([]byte, n+1, allocator);
  154. copy(b, s);
  155. b[n] = 0;
  156. return string(b[:n]);
  157. }
  158. parse_object_key :: proc(p: ^Parser) -> (key: string, err: Error) {
  159. tok := p.curr_token;
  160. if p.spec == Specification.JSON5 {
  161. if tok.kind == .String {
  162. expect_token(p, .String);
  163. key = unquote_string(tok, p.spec, p.allocator);
  164. return;
  165. } else if tok.kind == .Ident {
  166. expect_token(p, .Ident);
  167. key = clone_string(tok.text, p.allocator);
  168. return;
  169. }
  170. }
  171. if tok_err := expect_token(p, .String); tok_err != .None {
  172. err = .Expected_String_For_Object_Key;
  173. return;
  174. }
  175. key = unquote_string(tok, p.spec, p.allocator);
  176. return;
  177. }
  178. parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
  179. value.pos = p.curr_token.pos;
  180. defer value.end = token_end_pos(p.prev_token);
  181. if err = expect_token(p, .Open_Brace); err != .None {
  182. value.pos = p.curr_token.pos;
  183. return;
  184. }
  185. obj: Object;
  186. obj.allocator = p.allocator;
  187. defer if err != .None {
  188. for key, elem in obj {
  189. delete(key, p.allocator);
  190. destroy_value(elem);
  191. }
  192. delete(obj);
  193. }
  194. for p.curr_token.kind != .Close_Brace {
  195. key: string;
  196. key, err = parse_object_key(p);
  197. if err != .None {
  198. delete(key, p.allocator);
  199. value.pos = p.curr_token.pos;
  200. return;
  201. }
  202. if colon_err := expect_token(p, .Colon); colon_err != .None {
  203. err = .Expected_Colon_After_Key;
  204. value.pos = p.curr_token.pos;
  205. return;
  206. }
  207. elem, elem_err := parse_value(p);
  208. if elem_err != .None {
  209. err = elem_err;
  210. value.pos = p.curr_token.pos;
  211. return;
  212. }
  213. if key in obj {
  214. err = .Duplicate_Object_Key;
  215. value.pos = p.curr_token.pos;
  216. delete(key, p.allocator);
  217. return;
  218. }
  219. obj[key] = elem;
  220. if p.spec == Specification.JSON5 {
  221. // Allow trailing commas
  222. if allow_token(p, .Comma) {
  223. continue;
  224. }
  225. } else {
  226. // Disallow trailing commas
  227. if allow_token(p, .Comma) {
  228. continue;
  229. } else {
  230. break;
  231. }
  232. }
  233. }
  234. if err = expect_token(p, .Close_Brace); err != .None {
  235. value.pos = p.curr_token.pos;
  236. return;
  237. }
  238. value.value = obj;
  239. return;
  240. }
  241. // IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
  242. unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> string {
  243. get_u2_rune :: proc(s: string) -> rune {
  244. if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
  245. return -1;
  246. }
  247. r: rune;
  248. for c in s[2:4] {
  249. x: rune;
  250. switch c {
  251. case '0'..'9': x = c - '0';
  252. case 'a'..'f': x = c - 'a' + 10;
  253. case 'A'..'F': x = c - 'A' + 10;
  254. case: return -1;
  255. }
  256. r = r*16 + x;
  257. }
  258. return r;
  259. }
  260. get_u4_rune :: proc(s: string) -> rune {
  261. if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
  262. return -1;
  263. }
  264. r: rune;
  265. for c in s[2:6] {
  266. x: rune;
  267. switch c {
  268. case '0'..'9': x = c - '0';
  269. case 'a'..'f': x = c - 'a' + 10;
  270. case 'A'..'F': x = c - 'A' + 10;
  271. case: return -1;
  272. }
  273. r = r*16 + x;
  274. }
  275. return r;
  276. }
  277. if token.kind != .String {
  278. return "";
  279. }
  280. s := token.text;
  281. if len(s) <= 2 {
  282. return "";
  283. }
  284. quote := s[0];
  285. if s[0] != s[len(s)-1] {
  286. // Invalid string
  287. return "";
  288. }
  289. s = s[1:len(s)-1];
  290. i := 0;
  291. for i < len(s) {
  292. c := s[i];
  293. if c == '\\' || c == quote || c < ' ' {
  294. break;
  295. }
  296. if c < utf8.RUNE_SELF {
  297. i += 1;
  298. continue;
  299. }
  300. r, w := utf8.decode_rune_in_string(s);
  301. if r == utf8.RUNE_ERROR && w == 1 {
  302. break;
  303. }
  304. i += w;
  305. }
  306. if i == len(s) {
  307. return clone_string(s, allocator);
  308. }
  309. b := make([]byte, len(s) + 2*utf8.UTF_MAX, allocator);
  310. w := copy(b, s[0:i]);
  311. loop: for i < len(s) {
  312. c := s[i];
  313. switch {
  314. case c == '\\':
  315. i += 1;
  316. if i >= len(s) {
  317. break loop;
  318. }
  319. switch s[i] {
  320. case: break loop;
  321. case '"', '\'', '\\', '/':
  322. b[w] = s[i];
  323. i += 1;
  324. w += 1;
  325. case 'b':
  326. b[w] = '\b';
  327. i += 1;
  328. w += 1;
  329. case 'f':
  330. b[w] = '\f';
  331. i += 1;
  332. w += 1;
  333. case 'r':
  334. b[w] = '\r';
  335. i += 1;
  336. w += 1;
  337. case 't':
  338. b[w] = '\t';
  339. i += 1;
  340. w += 1;
  341. case 'n':
  342. b[w] = '\n';
  343. i += 1;
  344. w += 1;
  345. case 'u':
  346. i -= 1; // Include the \u in the check for sanity sake
  347. r := get_u4_rune(s[i:]);
  348. if r < 0 {
  349. break loop;
  350. }
  351. i += 6;
  352. buf, buf_width := utf8.encode_rune(r);
  353. copy(b[w:], buf[:buf_width]);
  354. w += buf_width;
  355. case '0':
  356. if spec == Specification.JSON5 {
  357. b[w] = '\x00';
  358. i += 1;
  359. w += 1;
  360. } else {
  361. break loop;
  362. }
  363. case 'v':
  364. if spec == Specification.JSON5 {
  365. b[w] = '\v';
  366. i += 1;
  367. w += 1;
  368. } else {
  369. break loop;
  370. }
  371. case 'x':
  372. if spec == Specification.JSON5 {
  373. i -= 1; // Include the \x in the check for sanity sake
  374. r := get_u2_rune(s[i:]);
  375. if r < 0 {
  376. break loop;
  377. }
  378. i += 4;
  379. buf, buf_width := utf8.encode_rune(r);
  380. copy(b[w:], buf[:buf_width]);
  381. w += buf_width;
  382. } else {
  383. break loop;
  384. }
  385. }
  386. case c == quote, c < ' ':
  387. break loop;
  388. case c < utf8.RUNE_SELF:
  389. b[w] = c;
  390. i += 1;
  391. w += 1;
  392. case:
  393. r, width := utf8.decode_rune_in_string(s[i:]);
  394. i += width;
  395. buf, buf_width := utf8.encode_rune(r);
  396. assert(buf_width <= width);
  397. copy(b[w:], buf[:buf_width]);
  398. w += buf_width;
  399. }
  400. }
  401. return string(b[:w]);
  402. }