parser.odin 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. package json
  2. import "core:mem"
  3. import "core:unicode/utf8"
  4. import "core:strconv"
  5. Parser :: struct {
  6. tok: Tokenizer,
  7. prev_token: Token,
  8. curr_token: Token,
  9. spec: Specification,
  10. allocator: mem.Allocator,
  11. parse_integers: bool,
  12. }
  13. make_parser :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
  14. return make_parser_from_string(string(data), spec, parse_integers, allocator)
  15. }
  16. make_parser_from_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
  17. p: Parser
  18. p.tok = make_tokenizer(data, spec, parse_integers)
  19. p.spec = spec
  20. p.allocator = allocator
  21. assert(p.allocator.procedure != nil)
  22. advance_token(&p)
  23. return p
  24. }
  25. parse :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
  26. return parse_string(string(data), spec, parse_integers, allocator)
  27. }
  28. parse_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
  29. context.allocator = allocator
  30. p := make_parser_from_string(data, spec, parse_integers, allocator)
  31. switch p.spec {
  32. case .JSON:
  33. return parse_object(&p)
  34. case .JSON5:
  35. return parse_value(&p)
  36. case .MJSON:
  37. #partial switch p.curr_token.kind {
  38. case .Ident, .String:
  39. return parse_object_body(&p, .EOF)
  40. }
  41. return parse_value(&p)
  42. }
  43. return parse_object(&p)
  44. }
  45. token_end_pos :: proc(tok: Token) -> Pos {
  46. end := tok.pos
  47. end.offset += len(tok.text)
  48. return end
  49. }
  50. advance_token :: proc(p: ^Parser) -> (Token, Error) {
  51. err: Error
  52. p.prev_token = p.curr_token
  53. p.curr_token, err = get_token(&p.tok)
  54. return p.prev_token, err
  55. }
  56. allow_token :: proc(p: ^Parser, kind: Token_Kind) -> bool {
  57. if p.curr_token.kind == kind {
  58. advance_token(p)
  59. return true
  60. }
  61. return false
  62. }
  63. expect_token :: proc(p: ^Parser, kind: Token_Kind) -> Error {
  64. prev := p.curr_token
  65. advance_token(p)
  66. if prev.kind == kind {
  67. return nil
  68. }
  69. return .Unexpected_Token
  70. }
  71. parse_colon :: proc(p: ^Parser) -> (err: Error) {
  72. colon_err := expect_token(p, .Colon)
  73. if colon_err == nil {
  74. return nil
  75. }
  76. return .Expected_Colon_After_Key
  77. }
  78. parse_comma :: proc(p: ^Parser) -> (do_break: bool) {
  79. switch p.spec {
  80. case .JSON5, .MJSON:
  81. if allow_token(p, .Comma) {
  82. return false
  83. }
  84. return false
  85. case .JSON:
  86. if !allow_token(p, .Comma) {
  87. return true
  88. }
  89. }
  90. return false
  91. }
  92. parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
  93. token := p.curr_token
  94. #partial switch token.kind {
  95. case .Null:
  96. advance_token(p)
  97. value = Null{}
  98. return
  99. case .False:
  100. advance_token(p)
  101. value = Boolean(false)
  102. return
  103. case .True:
  104. advance_token(p)
  105. value = Boolean(true)
  106. return
  107. case .Integer:
  108. advance_token(p)
  109. i, _ := strconv.parse_i64(token.text)
  110. value = Integer(i)
  111. return
  112. case .Float:
  113. advance_token(p)
  114. f, _ := strconv.parse_f64(token.text)
  115. value = Float(f)
  116. return
  117. case .Ident:
  118. if p.spec == .MJSON {
  119. advance_token(p)
  120. return string(token.text), nil
  121. }
  122. case .String:
  123. advance_token(p)
  124. return unquote_string(token, p.spec, p.allocator)
  125. case .Open_Brace:
  126. return parse_object(p)
  127. case .Open_Bracket:
  128. return parse_array(p)
  129. case:
  130. if p.spec != .JSON {
  131. switch {
  132. case allow_token(p, .Infinity):
  133. inf: u64 = 0x7ff0000000000000
  134. if token.text[0] == '-' {
  135. inf = 0xfff0000000000000
  136. }
  137. value = transmute(f64)inf
  138. return
  139. case allow_token(p, .NaN):
  140. nan: u64 = 0x7ff7ffffffffffff
  141. if token.text[0] == '-' {
  142. nan = 0xfff7ffffffffffff
  143. }
  144. value = transmute(f64)nan
  145. return
  146. }
  147. }
  148. }
  149. err = .Unexpected_Token
  150. advance_token(p)
  151. return
  152. }
  153. parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
  154. expect_token(p, .Open_Bracket) or_return
  155. array: Array
  156. array.allocator = p.allocator
  157. defer if err != nil {
  158. for elem in array {
  159. destroy_value(elem)
  160. }
  161. delete(array)
  162. }
  163. for p.curr_token.kind != .Close_Bracket {
  164. elem := parse_value(p) or_return
  165. append(&array, elem)
  166. if parse_comma(p) {
  167. break
  168. }
  169. }
  170. expect_token(p, .Close_Bracket) or_return
  171. value = array
  172. return
  173. }
  174. @(private)
  175. bytes_make :: proc(size, alignment: int, allocator: mem.Allocator) -> (bytes: []byte, err: Error) {
  176. b, berr := mem.alloc_bytes(size, alignment, allocator)
  177. if berr != nil {
  178. if berr == .Out_Of_Memory {
  179. err = .Out_Of_Memory
  180. } else {
  181. err = .Invalid_Allocator
  182. }
  183. }
  184. bytes = b
  185. return
  186. }
  187. clone_string :: proc(s: string, allocator: mem.Allocator) -> (str: string, err: Error) {
  188. n := len(s)
  189. b := bytes_make(n+1, 1, allocator) or_return
  190. copy(b, s)
  191. if len(b) > n {
  192. b[n] = 0
  193. str = string(b[:n])
  194. }
  195. return
  196. }
  197. parse_object_key :: proc(p: ^Parser, key_allocator: mem.Allocator) -> (key: string, err: Error) {
  198. tok := p.curr_token
  199. if p.spec != .JSON {
  200. if allow_token(p, .Ident) {
  201. return clone_string(tok.text, key_allocator)
  202. }
  203. }
  204. if tok_err := expect_token(p, .String); tok_err != nil {
  205. err = .Expected_String_For_Object_Key
  206. return
  207. }
  208. return unquote_string(tok, p.spec, key_allocator)
  209. }
  210. parse_object_body :: proc(p: ^Parser, end_token: Token_Kind) -> (obj: Object, err: Error) {
  211. obj.allocator = p.allocator
  212. defer if err != nil {
  213. for key, elem in obj {
  214. delete(key, p.allocator)
  215. destroy_value(elem)
  216. }
  217. delete(obj)
  218. }
  219. for p.curr_token.kind != end_token {
  220. key := parse_object_key(p, p.allocator) or_return
  221. parse_colon(p) or_return
  222. elem := parse_value(p) or_return
  223. if key in obj {
  224. err = .Duplicate_Object_Key
  225. delete(key, p.allocator)
  226. return
  227. }
  228. obj[key] = elem
  229. if parse_comma(p) {
  230. break
  231. }
  232. }
  233. return
  234. }
  235. parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
  236. expect_token(p, .Open_Brace) or_return
  237. obj := parse_object_body(p, .Close_Brace) or_return
  238. expect_token(p, .Close_Brace) or_return
  239. value = obj
  240. return
  241. }
  242. // IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
  243. unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> (value: string, err: Error) {
  244. get_u2_rune :: proc(s: string) -> rune {
  245. if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
  246. return -1
  247. }
  248. r: rune
  249. for c in s[2:4] {
  250. x: rune
  251. switch c {
  252. case '0'..='9': x = c - '0'
  253. case 'a'..='f': x = c - 'a' + 10
  254. case 'A'..='F': x = c - 'A' + 10
  255. case: return -1
  256. }
  257. r = r*16 + x
  258. }
  259. return r
  260. }
  261. get_u4_rune :: proc(s: string) -> rune {
  262. if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
  263. return -1
  264. }
  265. r: rune
  266. for c in s[2:6] {
  267. x: rune
  268. switch c {
  269. case '0'..='9': x = c - '0'
  270. case 'a'..='f': x = c - 'a' + 10
  271. case 'A'..='F': x = c - 'A' + 10
  272. case: return -1
  273. }
  274. r = r*16 + x
  275. }
  276. return r
  277. }
  278. if token.kind != .String {
  279. return "", nil
  280. }
  281. s := token.text
  282. if len(s) <= 2 {
  283. return "", nil
  284. }
  285. quote := s[0]
  286. if s[0] != s[len(s)-1] {
  287. // Invalid string
  288. return "", nil
  289. }
  290. s = s[1:len(s)-1]
  291. i := 0
  292. for i < len(s) {
  293. c := s[i]
  294. if c == '\\' || c == quote || c < ' ' {
  295. break
  296. }
  297. if c < utf8.RUNE_SELF {
  298. i += 1
  299. continue
  300. }
  301. r, w := utf8.decode_rune_in_string(s)
  302. if r == utf8.RUNE_ERROR && w == 1 {
  303. break
  304. }
  305. i += w
  306. }
  307. if i == len(s) {
  308. return clone_string(s, allocator)
  309. }
  310. b := bytes_make(len(s) + 2*utf8.UTF_MAX, 1, allocator) or_return
  311. w := copy(b, s[0:i])
  312. loop: for i < len(s) {
  313. c := s[i]
  314. switch {
  315. case c == '\\':
  316. i += 1
  317. if i >= len(s) {
  318. break loop
  319. }
  320. switch s[i] {
  321. case: break loop
  322. case '"', '\'', '\\', '/':
  323. b[w] = s[i]
  324. i += 1
  325. w += 1
  326. case 'b':
  327. b[w] = '\b'
  328. i += 1
  329. w += 1
  330. case 'f':
  331. b[w] = '\f'
  332. i += 1
  333. w += 1
  334. case 'r':
  335. b[w] = '\r'
  336. i += 1
  337. w += 1
  338. case 't':
  339. b[w] = '\t'
  340. i += 1
  341. w += 1
  342. case 'n':
  343. b[w] = '\n'
  344. i += 1
  345. w += 1
  346. case 'u':
  347. i -= 1 // Include the \u in the check for sanity sake
  348. r := get_u4_rune(s[i:])
  349. if r < 0 {
  350. break loop
  351. }
  352. i += 6
  353. buf, buf_width := utf8.encode_rune(r)
  354. copy(b[w:], buf[:buf_width])
  355. w += buf_width
  356. case '0':
  357. if spec != .JSON {
  358. b[w] = '\x00'
  359. i += 1
  360. w += 1
  361. } else {
  362. break loop
  363. }
  364. case 'v':
  365. if spec != .JSON {
  366. b[w] = '\v'
  367. i += 1
  368. w += 1
  369. } else {
  370. break loop
  371. }
  372. case 'x':
  373. if spec != .JSON {
  374. i -= 1 // Include the \x in the check for sanity sake
  375. r := get_u2_rune(s[i:])
  376. if r < 0 {
  377. break loop
  378. }
  379. i += 4
  380. buf, buf_width := utf8.encode_rune(r)
  381. copy(b[w:], buf[:buf_width])
  382. w += buf_width
  383. } else {
  384. break loop
  385. }
  386. }
  387. case c == quote, c < ' ':
  388. break loop
  389. case c < utf8.RUNE_SELF:
  390. b[w] = c
  391. i += 1
  392. w += 1
  393. case:
  394. r, width := utf8.decode_rune_in_string(s[i:])
  395. i += width
  396. buf, buf_width := utf8.encode_rune(r)
  397. assert(buf_width <= width)
  398. copy(b[w:], buf[:buf_width])
  399. w += buf_width
  400. }
  401. }
  402. return string(b[:w]), nil
  403. }