parser.odin 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. package json
  2. import "core:mem"
  3. import "core:unicode/utf8"
  4. import "core:strconv"
  5. Parser :: struct {
  6. tok: Tokenizer,
  7. prev_token: Token,
  8. curr_token: Token,
  9. spec: Specification,
  10. allocator: mem.Allocator,
  11. }
  12. make_parser :: proc(data: []byte, spec := Specification.JSON, allocator := context.allocator) -> Parser {
  13. p: Parser;
  14. p.tok = make_tokenizer(data, spec);
  15. p.spec = spec;
  16. p.allocator = allocator;
  17. assert(p.allocator.procedure != nil);
  18. advance_token(&p);
  19. return p;
  20. }
  21. parse :: proc(data: []byte, spec := Specification.JSON, allocator := context.allocator) -> (Value, Error) {
  22. context.allocator = allocator;
  23. p := make_parser(data, spec, allocator);
  24. if p.spec == Specification.JSON5 {
  25. return parse_value(&p);
  26. }
  27. return parse_object(&p);
  28. }
  29. token_end_pos :: proc(tok: Token) -> Pos {
  30. end := tok.pos;
  31. end.offset += len(tok.text);
  32. return end;
  33. }
  34. advance_token :: proc(p: ^Parser) -> (Token, Error) {
  35. err: Error;
  36. p.prev_token = p.curr_token;
  37. p.curr_token, err = get_token(&p.tok);
  38. return p.prev_token, err;
  39. }
  40. allow_token :: proc(p: ^Parser, kind: Kind) -> bool {
  41. if p.curr_token.kind == kind {
  42. advance_token(p);
  43. return true;
  44. }
  45. return false;
  46. }
  47. expect_token :: proc(p: ^Parser, kind: Kind) -> Error {
  48. prev := p.curr_token;
  49. advance_token(p);
  50. if prev.kind == kind {
  51. return Error.None;
  52. }
  53. return Error.Unexpected_Token;
  54. }
  55. parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
  56. value.pos = p.curr_token.pos;
  57. defer value.end = token_end_pos(p.prev_token);
  58. token := p.curr_token;
  59. switch token.kind {
  60. case Kind.Null:
  61. value.value = Null{};
  62. advance_token(p);
  63. return;
  64. case Kind.False:
  65. value.value = Boolean(false);
  66. advance_token(p);
  67. return;
  68. case Kind.True:
  69. value.value = Boolean(true);
  70. advance_token(p);
  71. return;
  72. case Kind.Integer:
  73. value.value = Integer(strconv.parse_i64(token.text));
  74. advance_token(p);
  75. return;
  76. case Kind.Float:
  77. value.value = Float(strconv.parse_f64(token.text));
  78. advance_token(p);
  79. return;
  80. case Kind.String:
  81. value.value = String(unquote_string(token, p.spec, p.allocator));
  82. advance_token(p);
  83. return;
  84. case Kind.Open_Brace:
  85. return parse_object(p);
  86. case Kind.Open_Bracket:
  87. return parse_array(p);
  88. case:
  89. if p.spec == Specification.JSON5 {
  90. switch token.kind {
  91. case Kind.Infinity:
  92. inf: u64 = 0x7ff0000000000000;
  93. if token.text[0] == '-' {
  94. inf = 0xfff0000000000000;
  95. }
  96. value.value = transmute(f64)inf;
  97. advance_token(p);
  98. return;
  99. case Kind.NaN:
  100. nan: u64 = 0x7ff7ffffffffffff;
  101. if token.text[0] == '-' {
  102. nan = 0xfff7ffffffffffff;
  103. }
  104. value.value = transmute(f64)nan;
  105. advance_token(p);
  106. return;
  107. }
  108. }
  109. }
  110. err = Error.Unexpected_Token;
  111. advance_token(p);
  112. return;
  113. }
  114. parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
  115. value.pos = p.curr_token.pos;
  116. defer value.end = token_end_pos(p.prev_token);
  117. if err = expect_token(p, Kind.Open_Bracket); err != Error.None {
  118. return;
  119. }
  120. array: Array;
  121. array.allocator = p.allocator;
  122. defer if err != Error.None {
  123. for elem in array {
  124. destroy_value(elem);
  125. }
  126. delete(array);
  127. }
  128. for p.curr_token.kind != Kind.Close_Bracket {
  129. elem, elem_err := parse_value(p);
  130. if elem_err != Error.None {
  131. err = elem_err;
  132. return;
  133. }
  134. append(&array, elem);
  135. // Disallow trailing commas for the time being
  136. if allow_token(p, Kind.Comma) {
  137. continue;
  138. } else {
  139. break;
  140. }
  141. }
  142. if err = expect_token(p, Kind.Close_Bracket); err != Error.None {
  143. return;
  144. }
  145. value.value = array;
  146. return;
  147. }
  148. clone_string :: proc(s: string, allocator: mem.Allocator) -> string {
  149. n := len(s);
  150. b := make([]byte, n+1, allocator);
  151. copy(b, cast([]byte)s);
  152. b[n] = 0;
  153. return string(b[:n]);
  154. }
  155. parse_object_key :: proc(p: ^Parser) -> (key: string, err: Error) {
  156. tok := p.curr_token;
  157. if p.spec == Specification.JSON5 {
  158. if tok.kind == Kind.String {
  159. expect_token(p, Kind.String);
  160. key = unquote_string(tok, p.spec, p.allocator);
  161. return;
  162. } else if tok.kind == Kind.Ident {
  163. expect_token(p, Kind.Ident);
  164. key = clone_string(tok.text, p.allocator);
  165. return;
  166. }
  167. }
  168. if tok_err := expect_token(p, Kind.String); tok_err != Error.None {
  169. err = Error.Expected_String_For_Object_Key;
  170. return;
  171. }
  172. key = unquote_string(tok, p.spec, p.allocator);
  173. return;
  174. }
  175. parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
  176. value.pos = p.curr_token.pos;
  177. defer value.end = token_end_pos(p.prev_token);
  178. if err = expect_token(p, Kind.Open_Brace); err != Error.None {
  179. value.pos = p.curr_token.pos;
  180. return;
  181. }
  182. obj: Object;
  183. obj.allocator = p.allocator;
  184. defer if err != Error.None {
  185. for key, elem in obj {
  186. delete(key, p.allocator);
  187. destroy_value(elem);
  188. }
  189. delete(obj);
  190. }
  191. for p.curr_token.kind != Kind.Close_Brace {
  192. key: string;
  193. key, err = parse_object_key(p);
  194. if err != Error.None {
  195. delete(key, p.allocator);
  196. value.pos = p.curr_token.pos;
  197. return;
  198. }
  199. if colon_err := expect_token(p, Kind.Colon); colon_err != Error.None {
  200. err = Error.Expected_Colon_After_Key;
  201. value.pos = p.curr_token.pos;
  202. return;
  203. }
  204. elem, elem_err := parse_value(p);
  205. if elem_err != Error.None {
  206. err = elem_err;
  207. value.pos = p.curr_token.pos;
  208. return;
  209. }
  210. if key in obj {
  211. err = Error.Duplicate_Object_Key;
  212. value.pos = p.curr_token.pos;
  213. delete(key, p.allocator);
  214. return;
  215. }
  216. obj[key] = elem;
  217. if p.spec == Specification.JSON5 {
  218. // Allow trailing commas
  219. if allow_token(p, Kind.Comma) {
  220. continue;
  221. }
  222. } else {
  223. // Disallow trailing commas
  224. if allow_token(p, Kind.Comma) {
  225. continue;
  226. } else {
  227. break;
  228. }
  229. }
  230. }
  231. if err = expect_token(p, Kind.Close_Brace); err != Error.None {
  232. value.pos = p.curr_token.pos;
  233. return;
  234. }
  235. value.value = obj;
  236. return;
  237. }
  238. // IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
  239. unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> string {
  240. get_u2_rune :: proc(s: string) -> rune {
  241. if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
  242. return -1;
  243. }
  244. r: rune;
  245. for c in s[2:4] {
  246. x: rune;
  247. switch c {
  248. case '0'..'9': x = c - '0';
  249. case 'a'..'f': x = c - 'a' + 10;
  250. case 'A'..'F': x = c - 'A' + 10;
  251. case: return -1;
  252. }
  253. r = r*16 + x;
  254. }
  255. return r;
  256. }
  257. get_u4_rune :: proc(s: string) -> rune {
  258. if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
  259. return -1;
  260. }
  261. r: rune;
  262. for c in s[2:6] {
  263. x: rune;
  264. switch c {
  265. case '0'..'9': x = c - '0';
  266. case 'a'..'f': x = c - 'a' + 10;
  267. case 'A'..'F': x = c - 'A' + 10;
  268. case: return -1;
  269. }
  270. r = r*16 + x;
  271. }
  272. return r;
  273. }
  274. if token.kind != Kind.String {
  275. return "";
  276. }
  277. s := token.text;
  278. if len(s) <= 2 {
  279. return "";
  280. }
  281. quote := s[0];
  282. if s[0] != s[len(s)-1] {
  283. // Invalid string
  284. return "";
  285. }
  286. s = s[1:len(s)-1];
  287. i := 0;
  288. for i < len(s) {
  289. c := s[i];
  290. if c == '\\' || c == quote || c < ' ' {
  291. break;
  292. }
  293. if c < utf8.RUNE_SELF {
  294. i += 1;
  295. continue;
  296. }
  297. r, w := utf8.decode_rune_in_string(s);
  298. if r == utf8.RUNE_ERROR && w == 1 {
  299. break;
  300. }
  301. i += w;
  302. }
  303. if i == len(s) {
  304. return clone_string(s, allocator);
  305. }
  306. b := make([]byte, len(s) + 2*utf8.UTF_MAX, allocator);
  307. w := copy(b, cast([]byte)s[0:i]);
  308. loop: for i < len(s) {
  309. c := s[i];
  310. switch {
  311. case c == '\\':
  312. i += 1;
  313. if i >= len(s) {
  314. break loop;
  315. }
  316. switch s[i] {
  317. case: break loop;
  318. case '"', '\'', '\\', '/':
  319. b[w] = s[i];
  320. i += 1;
  321. w += 1;
  322. case 'b':
  323. b[w] = '\b';
  324. i += 1;
  325. w += 1;
  326. case 'f':
  327. b[w] = '\f';
  328. i += 1;
  329. w += 1;
  330. case 'r':
  331. b[w] = '\r';
  332. i += 1;
  333. w += 1;
  334. case 't':
  335. b[w] = '\t';
  336. i += 1;
  337. w += 1;
  338. case 'n':
  339. b[w] = '\n';
  340. i += 1;
  341. w += 1;
  342. case 'u':
  343. i -= 1; // Include the \u in the check for sanity sake
  344. r := get_u4_rune(s[i:]);
  345. if r < 0 {
  346. break loop;
  347. }
  348. i += 6;
  349. buf, buf_width := utf8.encode_rune(r);
  350. copy(b[w:], buf[:buf_width]);
  351. w += buf_width;
  352. case '0':
  353. if spec == Specification.JSON5 {
  354. b[w] = '\x00';
  355. i += 1;
  356. w += 1;
  357. } else {
  358. break loop;
  359. }
  360. case 'v':
  361. if spec == Specification.JSON5 {
  362. b[w] = '\v';
  363. i += 1;
  364. w += 1;
  365. } else {
  366. break loop;
  367. }
  368. case 'x':
  369. if spec == Specification.JSON5 {
  370. i -= 1; // Include the \x in the check for sanity sake
  371. r := get_u2_rune(s[i:]);
  372. if r < 0 {
  373. break loop;
  374. }
  375. i += 4;
  376. buf, buf_width := utf8.encode_rune(r);
  377. copy(b[w:], buf[:buf_width]);
  378. w += buf_width;
  379. } else {
  380. break loop;
  381. }
  382. }
  383. case c == quote, c < ' ':
  384. break loop;
  385. case c < utf8.RUNE_SELF:
  386. b[w] = c;
  387. i += 1;
  388. w += 1;
  389. case:
  390. r, width := utf8.decode_rune_in_string(s[i:]);
  391. i += width;
  392. buf, buf_width := utf8.encode_rune(r);
  393. assert(buf_width <= width);
  394. copy(b[w:], buf[:buf_width]);
  395. w += buf_width;
  396. }
  397. }
  398. return string(b[:w]);
  399. }