parser.odin 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. package json
  2. import "core:mem"
  3. import "core:unicode/utf8"
  4. import "core:strconv"
  5. Parser :: struct {
  6. tok: Tokenizer,
  7. prev_token: Token,
  8. curr_token: Token,
  9. spec: Specification,
  10. allocator: mem.Allocator,
  11. parse_integers: bool,
  12. }
  13. make_parser :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
  14. return make_parser_from_string(string(data), spec, parse_integers, allocator)
  15. }
  16. make_parser_from_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
  17. p: Parser
  18. p.tok = make_tokenizer(data, spec, parse_integers)
  19. p.spec = spec
  20. p.allocator = allocator
  21. assert(p.allocator.procedure != nil)
  22. advance_token(&p)
  23. return p
  24. }
  25. parse :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
  26. return parse_string(string(data), spec, parse_integers, allocator)
  27. }
  28. parse_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
  29. context.allocator = allocator
  30. p := make_parser_from_string(data, spec, parse_integers, allocator)
  31. switch p.spec {
  32. case .JSON:
  33. return parse_object(&p)
  34. case .JSON5:
  35. return parse_value(&p)
  36. case .SJSON:
  37. #partial switch p.curr_token.kind {
  38. case .Ident, .String:
  39. return parse_object_body(&p, .EOF)
  40. }
  41. return parse_value(&p)
  42. }
  43. return parse_object(&p)
  44. }
  45. token_end_pos :: proc(tok: Token) -> Pos {
  46. end := tok.pos
  47. end.offset += len(tok.text)
  48. return end
  49. }
  50. advance_token :: proc(p: ^Parser) -> (Token, Error) {
  51. err: Error
  52. p.prev_token = p.curr_token
  53. p.curr_token, err = get_token(&p.tok)
  54. return p.prev_token, err
  55. }
  56. allow_token :: proc(p: ^Parser, kind: Token_Kind) -> bool {
  57. if p.curr_token.kind == kind {
  58. advance_token(p)
  59. return true
  60. }
  61. return false
  62. }
  63. expect_token :: proc(p: ^Parser, kind: Token_Kind) -> Error {
  64. prev := p.curr_token
  65. advance_token(p)
  66. if prev.kind == kind {
  67. return nil
  68. }
  69. return .Unexpected_Token
  70. }
  71. parse_colon :: proc(p: ^Parser) -> (err: Error) {
  72. colon_err := expect_token(p, .Colon)
  73. if colon_err == nil {
  74. return nil
  75. }
  76. return .Expected_Colon_After_Key
  77. }
  78. parse_comma :: proc(p: ^Parser) -> (do_break: bool) {
  79. switch p.spec {
  80. case .JSON5, .MJSON:
  81. if allow_token(p, .Comma) {
  82. return false
  83. }
  84. return false
  85. case .JSON:
  86. if !allow_token(p, .Comma) {
  87. return true
  88. }
  89. }
  90. return false
  91. }
  92. parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
  93. err = .None
  94. token := p.curr_token
  95. #partial switch token.kind {
  96. case .Null:
  97. advance_token(p)
  98. value = Null{}
  99. return
  100. case .False:
  101. advance_token(p)
  102. value = Boolean(false)
  103. return
  104. case .True:
  105. advance_token(p)
  106. value = Boolean(true)
  107. return
  108. case .Integer:
  109. advance_token(p)
  110. i, _ := strconv.parse_i64(token.text)
  111. value = Integer(i)
  112. return
  113. case .Float:
  114. advance_token(p)
  115. f, _ := strconv.parse_f64(token.text)
  116. value = Float(f)
  117. return
  118. case .Ident:
  119. if p.spec == .MJSON {
  120. advance_token(p)
  121. return string(token.text), nil
  122. }
  123. case .String:
  124. advance_token(p)
  125. return unquote_string(token, p.spec, p.allocator)
  126. case .Open_Brace:
  127. return parse_object(p)
  128. case .Open_Bracket:
  129. return parse_array(p)
  130. case:
  131. if p.spec != .JSON {
  132. switch {
  133. case allow_token(p, .Infinity):
  134. inf: u64 = 0x7ff0000000000000
  135. if token.text[0] == '-' {
  136. inf = 0xfff0000000000000
  137. }
  138. value = transmute(f64)inf
  139. return
  140. case allow_token(p, .NaN):
  141. nan: u64 = 0x7ff7ffffffffffff
  142. if token.text[0] == '-' {
  143. nan = 0xfff7ffffffffffff
  144. }
  145. value = transmute(f64)nan
  146. return
  147. }
  148. }
  149. }
  150. err = .Unexpected_Token
  151. advance_token(p)
  152. return
  153. }
  154. parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
  155. err = .None
  156. expect_token(p, .Open_Bracket) or_return
  157. array: Array
  158. array.allocator = p.allocator
  159. defer if err != nil {
  160. for elem in array {
  161. destroy_value(elem)
  162. }
  163. delete(array)
  164. }
  165. for p.curr_token.kind != .Close_Bracket {
  166. elem := parse_value(p) or_return
  167. append(&array, elem)
  168. if parse_comma(p) {
  169. break
  170. }
  171. }
  172. expect_token(p, .Close_Bracket) or_return
  173. value = array
  174. return
  175. }
  176. @(private)
  177. bytes_make :: proc(size, alignment: int, allocator: mem.Allocator) -> (bytes: []byte, err: Error) {
  178. b, berr := mem.alloc_bytes(size, alignment, allocator)
  179. if berr != nil {
  180. if berr == .Out_Of_Memory {
  181. err = .Out_Of_Memory
  182. } else {
  183. err = .Invalid_Allocator
  184. }
  185. }
  186. bytes = b
  187. return
  188. }
  189. clone_string :: proc(s: string, allocator: mem.Allocator) -> (str: string, err: Error) {
  190. n := len(s)
  191. b := bytes_make(n+1, 1, allocator) or_return
  192. copy(b, s)
  193. if len(b) > n {
  194. b[n] = 0
  195. str = string(b[:n])
  196. }
  197. return
  198. }
  199. parse_object_key :: proc(p: ^Parser, key_allocator: mem.Allocator) -> (key: string, err: Error) {
  200. tok := p.curr_token
  201. if p.spec != .JSON {
  202. if allow_token(p, .Ident) {
  203. return clone_string(tok.text, key_allocator)
  204. }
  205. }
  206. if tok_err := expect_token(p, .String); tok_err != nil {
  207. err = .Expected_String_For_Object_Key
  208. return
  209. }
  210. return unquote_string(tok, p.spec, key_allocator)
  211. }
  212. parse_object_body :: proc(p: ^Parser, end_token: Token_Kind) -> (obj: Object, err: Error) {
  213. obj.allocator = p.allocator
  214. defer if err != nil {
  215. for key, elem in obj {
  216. delete(key, p.allocator)
  217. destroy_value(elem)
  218. }
  219. delete(obj)
  220. }
  221. for p.curr_token.kind != end_token {
  222. key := parse_object_key(p, p.allocator) or_return
  223. parse_colon(p) or_return
  224. elem := parse_value(p) or_return
  225. if key in obj {
  226. err = .Duplicate_Object_Key
  227. delete(key, p.allocator)
  228. return
  229. }
  230. obj[key] = elem
  231. if parse_comma(p) {
  232. break
  233. }
  234. }
  235. return obj, .None
  236. }
  237. parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
  238. expect_token(p, .Open_Brace) or_return
  239. obj := parse_object_body(p, .Close_Brace) or_return
  240. expect_token(p, .Close_Brace) or_return
  241. return obj, .None
  242. }
  243. // IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
  244. unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> (value: string, err: Error) {
  245. get_u2_rune :: proc(s: string) -> rune {
  246. if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
  247. return -1
  248. }
  249. r: rune
  250. for c in s[2:4] {
  251. x: rune
  252. switch c {
  253. case '0'..='9': x = c - '0'
  254. case 'a'..='f': x = c - 'a' + 10
  255. case 'A'..='F': x = c - 'A' + 10
  256. case: return -1
  257. }
  258. r = r*16 + x
  259. }
  260. return r
  261. }
  262. get_u4_rune :: proc(s: string) -> rune {
  263. if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
  264. return -1
  265. }
  266. r: rune
  267. for c in s[2:6] {
  268. x: rune
  269. switch c {
  270. case '0'..='9': x = c - '0'
  271. case 'a'..='f': x = c - 'a' + 10
  272. case 'A'..='F': x = c - 'A' + 10
  273. case: return -1
  274. }
  275. r = r*16 + x
  276. }
  277. return r
  278. }
  279. if token.kind != .String {
  280. return "", nil
  281. }
  282. s := token.text
  283. if len(s) <= 2 {
  284. return "", nil
  285. }
  286. quote := s[0]
  287. if s[0] != s[len(s)-1] {
  288. // Invalid string
  289. return "", nil
  290. }
  291. s = s[1:len(s)-1]
  292. i := 0
  293. for i < len(s) {
  294. c := s[i]
  295. if c == '\\' || c == quote || c < ' ' {
  296. break
  297. }
  298. if c < utf8.RUNE_SELF {
  299. i += 1
  300. continue
  301. }
  302. r, w := utf8.decode_rune_in_string(s)
  303. if r == utf8.RUNE_ERROR && w == 1 {
  304. break
  305. }
  306. i += w
  307. }
  308. if i == len(s) {
  309. return clone_string(s, allocator)
  310. }
  311. b := bytes_make(len(s) + 2*utf8.UTF_MAX, 1, allocator) or_return
  312. w := copy(b, s[0:i])
  313. if len(b) == 0 && allocator.data == nil {
  314. // `unmarshal_count_array` calls us with a nil allocator
  315. return string(b[:w]), nil
  316. }
  317. loop: for i < len(s) {
  318. c := s[i]
  319. switch {
  320. case c == '\\':
  321. i += 1
  322. if i >= len(s) {
  323. break loop
  324. }
  325. switch s[i] {
  326. case: break loop
  327. case '"', '\'', '\\', '/':
  328. b[w] = s[i]
  329. i += 1
  330. w += 1
  331. case 'b':
  332. b[w] = '\b'
  333. i += 1
  334. w += 1
  335. case 'f':
  336. b[w] = '\f'
  337. i += 1
  338. w += 1
  339. case 'r':
  340. b[w] = '\r'
  341. i += 1
  342. w += 1
  343. case 't':
  344. b[w] = '\t'
  345. i += 1
  346. w += 1
  347. case 'n':
  348. b[w] = '\n'
  349. i += 1
  350. w += 1
  351. case 'u':
  352. i -= 1 // Include the \u in the check for sanity sake
  353. r := get_u4_rune(s[i:])
  354. if r < 0 {
  355. break loop
  356. }
  357. i += 6
  358. buf, buf_width := utf8.encode_rune(r)
  359. copy(b[w:], buf[:buf_width])
  360. w += buf_width
  361. case '0':
  362. if spec != .JSON {
  363. b[w] = '\x00'
  364. i += 1
  365. w += 1
  366. } else {
  367. break loop
  368. }
  369. case 'v':
  370. if spec != .JSON {
  371. b[w] = '\v'
  372. i += 1
  373. w += 1
  374. } else {
  375. break loop
  376. }
  377. case 'x':
  378. if spec != .JSON {
  379. i -= 1 // Include the \x in the check for sanity sake
  380. r := get_u2_rune(s[i:])
  381. if r < 0 {
  382. break loop
  383. }
  384. i += 4
  385. buf, buf_width := utf8.encode_rune(r)
  386. copy(b[w:], buf[:buf_width])
  387. w += buf_width
  388. } else {
  389. break loop
  390. }
  391. }
  392. case c == quote, c < ' ':
  393. break loop
  394. case c < utf8.RUNE_SELF:
  395. b[w] = c
  396. i += 1
  397. w += 1
  398. case:
  399. r, width := utf8.decode_rune_in_string(s[i:])
  400. i += width
  401. buf, buf_width := utf8.encode_rune(r)
  402. assert(buf_width <= width)
  403. copy(b[w:], buf[:buf_width])
  404. w += buf_width
  405. }
  406. }
  407. return string(b[:w]), nil
  408. }