parser.odin 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. package json
  2. import "core:mem"
  3. import "core:unicode/utf8"
  4. import "core:strconv"
  5. Parser :: struct {
  6. tok: Tokenizer,
  7. prev_token: Token,
  8. curr_token: Token,
  9. spec: Specification,
  10. allocator: mem.Allocator,
  11. unmarshal_data: any,
  12. parse_integers: bool,
  13. }
  14. make_parser :: proc(data: []byte, spec := Specification.JSON, parse_integers := false, allocator := context.allocator) -> Parser {
  15. p: Parser;
  16. p.tok = make_tokenizer(data, spec, parse_integers);
  17. p.spec = spec;
  18. p.allocator = allocator;
  19. assert(p.allocator.procedure != nil);
  20. advance_token(&p);
  21. return p;
  22. }
  23. parse :: proc(data: []byte, spec := Specification.JSON, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
  24. context.allocator = allocator;
  25. p := make_parser(data, spec, parse_integers, allocator);
  26. if p.spec == Specification.JSON5 {
  27. return parse_value(&p);
  28. }
  29. return parse_object(&p);
  30. }
  31. token_end_pos :: proc(tok: Token) -> Pos {
  32. end := tok.pos;
  33. end.offset += len(tok.text);
  34. return end;
  35. }
  36. advance_token :: proc(p: ^Parser) -> (Token, Error) {
  37. err: Error;
  38. p.prev_token = p.curr_token;
  39. p.curr_token, err = get_token(&p.tok);
  40. return p.prev_token, err;
  41. }
  42. allow_token :: proc(p: ^Parser, kind: Token_Kind) -> bool {
  43. if p.curr_token.kind == kind {
  44. advance_token(p);
  45. return true;
  46. }
  47. return false;
  48. }
  49. expect_token :: proc(p: ^Parser, kind: Token_Kind) -> Error {
  50. prev := p.curr_token;
  51. advance_token(p);
  52. if prev.kind == kind {
  53. return .None;
  54. }
  55. return .Unexpected_Token;
  56. }
  57. parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
  58. token := p.curr_token;
  59. #partial switch token.kind {
  60. case .Null:
  61. value = Null{};
  62. advance_token(p);
  63. return;
  64. case .False:
  65. value = Boolean(false);
  66. advance_token(p);
  67. return;
  68. case .True:
  69. value = Boolean(true);
  70. advance_token(p);
  71. return;
  72. case .Integer:
  73. i, _ := strconv.parse_i64(token.text);
  74. value = Integer(i);
  75. advance_token(p);
  76. return;
  77. case .Float:
  78. f, _ := strconv.parse_f64(token.text);
  79. value = Float(f);
  80. advance_token(p);
  81. return;
  82. case .String:
  83. value = String(unquote_string(token, p.spec, p.allocator));
  84. advance_token(p);
  85. return;
  86. case .Open_Brace:
  87. return parse_object(p);
  88. case .Open_Bracket:
  89. return parse_array(p);
  90. case:
  91. if p.spec == Specification.JSON5 {
  92. #partial switch token.kind {
  93. case .Infinity:
  94. inf: u64 = 0x7ff0000000000000;
  95. if token.text[0] == '-' {
  96. inf = 0xfff0000000000000;
  97. }
  98. value = transmute(f64)inf;
  99. advance_token(p);
  100. return;
  101. case .NaN:
  102. nan: u64 = 0x7ff7ffffffffffff;
  103. if token.text[0] == '-' {
  104. nan = 0xfff7ffffffffffff;
  105. }
  106. value = transmute(f64)nan;
  107. advance_token(p);
  108. return;
  109. }
  110. }
  111. }
  112. err = .Unexpected_Token;
  113. advance_token(p);
  114. return;
  115. }
  116. parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
  117. expect_token(p, .Open_Bracket) or_return;
  118. array: Array;
  119. array.allocator = p.allocator;
  120. defer if err != .None {
  121. for elem in array {
  122. destroy_value(elem);
  123. }
  124. delete(array);
  125. }
  126. for p.curr_token.kind != .Close_Bracket {
  127. elem := parse_value(p) or_return;
  128. append(&array, elem);
  129. // Disallow trailing commas for the time being
  130. if allow_token(p, .Comma) {
  131. continue;
  132. } else {
  133. break;
  134. }
  135. }
  136. expect_token(p, .Close_Bracket) or_return;
  137. value = array;
  138. return;
  139. }
  140. clone_string :: proc(s: string, allocator: mem.Allocator) -> string {
  141. n := len(s);
  142. b := make([]byte, n+1, allocator);
  143. copy(b, s);
  144. b[n] = 0;
  145. return string(b[:n]);
  146. }
  147. parse_object_key :: proc(p: ^Parser) -> (key: string, err: Error) {
  148. tok := p.curr_token;
  149. if p.spec == Specification.JSON5 {
  150. if tok.kind == .String {
  151. expect_token(p, .String);
  152. key = unquote_string(tok, p.spec, p.allocator);
  153. return;
  154. } else if tok.kind == .Ident {
  155. expect_token(p, .Ident);
  156. key = clone_string(tok.text, p.allocator);
  157. return;
  158. }
  159. }
  160. if tok_err := expect_token(p, .String); tok_err != .None {
  161. err = .Expected_String_For_Object_Key;
  162. return;
  163. }
  164. key = unquote_string(tok, p.spec, p.allocator);
  165. return;
  166. }
  167. parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
  168. expect_token(p, .Open_Brace) or_return;
  169. obj: Object;
  170. obj.allocator = p.allocator;
  171. defer if err != .None {
  172. for key, elem in obj {
  173. delete(key, p.allocator);
  174. destroy_value(elem);
  175. }
  176. delete(obj);
  177. }
  178. for p.curr_token.kind != .Close_Brace {
  179. key: string;
  180. key, err = parse_object_key(p);
  181. if err != .None {
  182. delete(key, p.allocator);
  183. return;
  184. }
  185. if colon_err := expect_token(p, .Colon); colon_err != .None {
  186. err = .Expected_Colon_After_Key;
  187. return;
  188. }
  189. elem := parse_value(p) or_return;
  190. if key in obj {
  191. err = .Duplicate_Object_Key;
  192. delete(key, p.allocator);
  193. return;
  194. }
  195. obj[key] = elem;
  196. if p.spec == Specification.JSON5 {
  197. // Allow trailing commas
  198. if allow_token(p, .Comma) {
  199. continue;
  200. }
  201. } else {
  202. // Disallow trailing commas
  203. if allow_token(p, .Comma) {
  204. continue;
  205. } else {
  206. break;
  207. }
  208. }
  209. }
  210. expect_token(p, .Close_Brace) or_return;
  211. value = obj;
  212. return;
  213. }
  214. // IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
  215. unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> string {
  216. get_u2_rune :: proc(s: string) -> rune {
  217. if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
  218. return -1;
  219. }
  220. r: rune;
  221. for c in s[2:4] {
  222. x: rune;
  223. switch c {
  224. case '0'..='9': x = c - '0';
  225. case 'a'..='f': x = c - 'a' + 10;
  226. case 'A'..='F': x = c - 'A' + 10;
  227. case: return -1;
  228. }
  229. r = r*16 + x;
  230. }
  231. return r;
  232. }
  233. get_u4_rune :: proc(s: string) -> rune {
  234. if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
  235. return -1;
  236. }
  237. r: rune;
  238. for c in s[2:6] {
  239. x: rune;
  240. switch c {
  241. case '0'..='9': x = c - '0';
  242. case 'a'..='f': x = c - 'a' + 10;
  243. case 'A'..='F': x = c - 'A' + 10;
  244. case: return -1;
  245. }
  246. r = r*16 + x;
  247. }
  248. return r;
  249. }
  250. if token.kind != .String {
  251. return "";
  252. }
  253. s := token.text;
  254. if len(s) <= 2 {
  255. return "";
  256. }
  257. quote := s[0];
  258. if s[0] != s[len(s)-1] {
  259. // Invalid string
  260. return "";
  261. }
  262. s = s[1:len(s)-1];
  263. i := 0;
  264. for i < len(s) {
  265. c := s[i];
  266. if c == '\\' || c == quote || c < ' ' {
  267. break;
  268. }
  269. if c < utf8.RUNE_SELF {
  270. i += 1;
  271. continue;
  272. }
  273. r, w := utf8.decode_rune_in_string(s);
  274. if r == utf8.RUNE_ERROR && w == 1 {
  275. break;
  276. }
  277. i += w;
  278. }
  279. if i == len(s) {
  280. return clone_string(s, allocator);
  281. }
  282. b := make([]byte, len(s) + 2*utf8.UTF_MAX, allocator);
  283. w := copy(b, s[0:i]);
  284. loop: for i < len(s) {
  285. c := s[i];
  286. switch {
  287. case c == '\\':
  288. i += 1;
  289. if i >= len(s) {
  290. break loop;
  291. }
  292. switch s[i] {
  293. case: break loop;
  294. case '"', '\'', '\\', '/':
  295. b[w] = s[i];
  296. i += 1;
  297. w += 1;
  298. case 'b':
  299. b[w] = '\b';
  300. i += 1;
  301. w += 1;
  302. case 'f':
  303. b[w] = '\f';
  304. i += 1;
  305. w += 1;
  306. case 'r':
  307. b[w] = '\r';
  308. i += 1;
  309. w += 1;
  310. case 't':
  311. b[w] = '\t';
  312. i += 1;
  313. w += 1;
  314. case 'n':
  315. b[w] = '\n';
  316. i += 1;
  317. w += 1;
  318. case 'u':
  319. i -= 1; // Include the \u in the check for sanity sake
  320. r := get_u4_rune(s[i:]);
  321. if r < 0 {
  322. break loop;
  323. }
  324. i += 6;
  325. buf, buf_width := utf8.encode_rune(r);
  326. copy(b[w:], buf[:buf_width]);
  327. w += buf_width;
  328. case '0':
  329. if spec == Specification.JSON5 {
  330. b[w] = '\x00';
  331. i += 1;
  332. w += 1;
  333. } else {
  334. break loop;
  335. }
  336. case 'v':
  337. if spec == Specification.JSON5 {
  338. b[w] = '\v';
  339. i += 1;
  340. w += 1;
  341. } else {
  342. break loop;
  343. }
  344. case 'x':
  345. if spec == Specification.JSON5 {
  346. i -= 1; // Include the \x in the check for sanity sake
  347. r := get_u2_rune(s[i:]);
  348. if r < 0 {
  349. break loop;
  350. }
  351. i += 4;
  352. buf, buf_width := utf8.encode_rune(r);
  353. copy(b[w:], buf[:buf_width]);
  354. w += buf_width;
  355. } else {
  356. break loop;
  357. }
  358. }
  359. case c == quote, c < ' ':
  360. break loop;
  361. case c < utf8.RUNE_SELF:
  362. b[w] = c;
  363. i += 1;
  364. w += 1;
  365. case:
  366. r, width := utf8.decode_rune_in_string(s[i:]);
  367. i += width;
  368. buf, buf_width := utf8.encode_rune(r);
  369. assert(buf_width <= width);
  370. copy(b[w:], buf[:buf_width]);
  371. w += buf_width;
  372. }
  373. }
  374. return string(b[:w]);
  375. }