parser.odin 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. package json
  2. import "core:mem"
  3. import "core:unicode/utf8"
  4. import "core:unicode/utf16"
  5. import "core:strconv"
  6. Parser :: struct {
  7. tok: Tokenizer,
  8. prev_token: Token,
  9. curr_token: Token,
  10. spec: Specification,
  11. allocator: mem.Allocator,
  12. parse_integers: bool,
  13. }
  14. make_parser :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
  15. return make_parser_from_string(string(data), spec, parse_integers, allocator)
  16. }
  17. make_parser_from_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
  18. p: Parser
  19. p.tok = make_tokenizer(data, spec, parse_integers)
  20. p.spec = spec
  21. p.allocator = allocator
  22. assert(p.allocator.procedure != nil)
  23. advance_token(&p)
  24. return p
  25. }
  26. parse :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
  27. return parse_string(string(data), spec, parse_integers, allocator)
  28. }
  29. parse_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
  30. context.allocator = allocator
  31. p := make_parser_from_string(data, spec, parse_integers, allocator)
  32. switch p.spec {
  33. case .JSON:
  34. return parse_object(&p)
  35. case .JSON5:
  36. return parse_value(&p)
  37. case .SJSON:
  38. #partial switch p.curr_token.kind {
  39. case .Ident, .String:
  40. return parse_object_body(&p, .EOF)
  41. }
  42. return parse_value(&p)
  43. }
  44. return parse_object(&p)
  45. }
  46. token_end_pos :: proc(tok: Token) -> Pos {
  47. end := tok.pos
  48. end.offset += len(tok.text)
  49. return end
  50. }
  51. advance_token :: proc(p: ^Parser) -> (Token, Error) {
  52. err: Error
  53. p.prev_token = p.curr_token
  54. p.curr_token, err = get_token(&p.tok)
  55. return p.prev_token, err
  56. }
  57. allow_token :: proc(p: ^Parser, kind: Token_Kind) -> bool {
  58. if p.curr_token.kind == kind {
  59. advance_token(p)
  60. return true
  61. }
  62. return false
  63. }
  64. expect_token :: proc(p: ^Parser, kind: Token_Kind) -> Error {
  65. prev := p.curr_token
  66. advance_token(p)
  67. if prev.kind == kind {
  68. return nil
  69. }
  70. return .Unexpected_Token
  71. }
  72. parse_colon :: proc(p: ^Parser) -> (err: Error) {
  73. colon_err := expect_token(p, .Colon)
  74. if colon_err == nil {
  75. return nil
  76. }
  77. return .Expected_Colon_After_Key
  78. }
  79. parse_comma :: proc(p: ^Parser) -> (do_break: bool) {
  80. switch p.spec {
  81. case .JSON5, .MJSON:
  82. if allow_token(p, .Comma) {
  83. return false
  84. }
  85. return false
  86. case .JSON:
  87. if !allow_token(p, .Comma) {
  88. return true
  89. }
  90. }
  91. return false
  92. }
  93. parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
  94. err = .None
  95. token := p.curr_token
  96. #partial switch token.kind {
  97. case .Null:
  98. advance_token(p)
  99. value = Null{}
  100. return
  101. case .False:
  102. advance_token(p)
  103. value = Boolean(false)
  104. return
  105. case .True:
  106. advance_token(p)
  107. value = Boolean(true)
  108. return
  109. case .Integer:
  110. advance_token(p)
  111. i, _ := strconv.parse_i64(token.text)
  112. value = Integer(i)
  113. return
  114. case .Float:
  115. advance_token(p)
  116. f, _ := strconv.parse_f64(token.text)
  117. value = Float(f)
  118. return
  119. case .Ident:
  120. if p.spec == .MJSON {
  121. advance_token(p)
  122. return string(token.text), nil
  123. }
  124. case .String:
  125. advance_token(p)
  126. return unquote_string(token, p.spec, p.allocator)
  127. case .Open_Brace:
  128. return parse_object(p)
  129. case .Open_Bracket:
  130. return parse_array(p)
  131. case:
  132. if p.spec != .JSON {
  133. switch {
  134. case allow_token(p, .Infinity):
  135. inf: u64 = 0x7ff0000000000000
  136. if token.text[0] == '-' {
  137. inf = 0xfff0000000000000
  138. }
  139. value = transmute(f64)inf
  140. return
  141. case allow_token(p, .NaN):
  142. nan: u64 = 0x7ff7ffffffffffff
  143. if token.text[0] == '-' {
  144. nan = 0xfff7ffffffffffff
  145. }
  146. value = transmute(f64)nan
  147. return
  148. }
  149. }
  150. }
  151. err = .Unexpected_Token
  152. advance_token(p)
  153. return
  154. }
  155. parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
  156. err = .None
  157. expect_token(p, .Open_Bracket) or_return
  158. array: Array
  159. array.allocator = p.allocator
  160. defer if err != nil {
  161. for elem in array {
  162. destroy_value(elem)
  163. }
  164. delete(array)
  165. }
  166. for p.curr_token.kind != .Close_Bracket {
  167. elem := parse_value(p) or_return
  168. append(&array, elem)
  169. if parse_comma(p) {
  170. break
  171. }
  172. }
  173. expect_token(p, .Close_Bracket) or_return
  174. value = array
  175. return
  176. }
  177. @(private)
  178. bytes_make :: proc(size, alignment: int, allocator: mem.Allocator) -> (bytes: []byte, err: Error) {
  179. b, berr := mem.alloc_bytes(size, alignment, allocator)
  180. if berr != nil {
  181. if berr == .Out_Of_Memory {
  182. err = .Out_Of_Memory
  183. } else {
  184. err = .Invalid_Allocator
  185. }
  186. }
  187. bytes = b
  188. return
  189. }
  190. clone_string :: proc(s: string, allocator: mem.Allocator) -> (str: string, err: Error) {
  191. n := len(s)
  192. b := bytes_make(n+1, 1, allocator) or_return
  193. copy(b, s)
  194. if len(b) > n {
  195. b[n] = 0
  196. str = string(b[:n])
  197. }
  198. return
  199. }
  200. parse_object_key :: proc(p: ^Parser, key_allocator: mem.Allocator) -> (key: string, err: Error) {
  201. tok := p.curr_token
  202. if p.spec != .JSON {
  203. if allow_token(p, .Ident) {
  204. return clone_string(tok.text, key_allocator)
  205. }
  206. }
  207. if tok_err := expect_token(p, .String); tok_err != nil {
  208. err = .Expected_String_For_Object_Key
  209. return
  210. }
  211. return unquote_string(tok, p.spec, key_allocator)
  212. }
  213. parse_object_body :: proc(p: ^Parser, end_token: Token_Kind) -> (obj: Object, err: Error) {
  214. obj.allocator = p.allocator
  215. defer if err != nil {
  216. for key, elem in obj {
  217. delete(key, p.allocator)
  218. destroy_value(elem)
  219. }
  220. delete(obj)
  221. }
  222. for p.curr_token.kind != end_token {
  223. key := parse_object_key(p, p.allocator) or_return
  224. parse_colon(p) or_return
  225. elem := parse_value(p) or_return
  226. if key in obj {
  227. err = .Duplicate_Object_Key
  228. delete(key, p.allocator)
  229. return
  230. }
  231. obj[key] = elem
  232. if parse_comma(p) {
  233. break
  234. }
  235. }
  236. return obj, .None
  237. }
  238. parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
  239. expect_token(p, .Open_Brace) or_return
  240. obj := parse_object_body(p, .Close_Brace) or_return
  241. expect_token(p, .Close_Brace) or_return
  242. return obj, .None
  243. }
  244. // IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
  245. unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> (value: string, err: Error) {
  246. get_u2_rune :: proc(s: string) -> rune {
  247. if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
  248. return -1
  249. }
  250. r: rune
  251. for c in s[2:4] {
  252. x: rune
  253. switch c {
  254. case '0'..='9': x = c - '0'
  255. case 'a'..='f': x = c - 'a' + 10
  256. case 'A'..='F': x = c - 'A' + 10
  257. case: return -1
  258. }
  259. r = r*16 + x
  260. }
  261. return r
  262. }
  263. get_u4_rune :: proc(s: string) -> rune {
  264. if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
  265. return -1
  266. }
  267. r: rune
  268. for c in s[2:6] {
  269. x: rune
  270. switch c {
  271. case '0'..='9': x = c - '0'
  272. case 'a'..='f': x = c - 'a' + 10
  273. case 'A'..='F': x = c - 'A' + 10
  274. case: return -1
  275. }
  276. r = r*16 + x
  277. }
  278. return r
  279. }
  280. if token.kind != .String {
  281. return "", nil
  282. }
  283. s := token.text
  284. if len(s) <= 2 {
  285. return "", nil
  286. }
  287. quote := s[0]
  288. if s[0] != s[len(s)-1] {
  289. // Invalid string
  290. return "", nil
  291. }
  292. s = s[1:len(s)-1]
  293. i := 0
  294. for i < len(s) {
  295. c := s[i]
  296. if c == '\\' || c == quote || c < ' ' {
  297. break
  298. }
  299. if c < utf8.RUNE_SELF {
  300. i += 1
  301. continue
  302. }
  303. r, w := utf8.decode_rune_in_string(s[i:])
  304. if r == utf8.RUNE_ERROR && w == 1 {
  305. break
  306. }
  307. i += w
  308. }
  309. if i == len(s) {
  310. return clone_string(s, allocator)
  311. }
  312. b := bytes_make(len(s) + 2*utf8.UTF_MAX, 1, allocator) or_return
  313. w := copy(b, s[0:i])
  314. if len(b) == 0 && allocator.data == nil {
  315. // `unmarshal_count_array` calls us with a nil allocator
  316. return string(b[:w]), nil
  317. }
  318. loop: for i < len(s) {
  319. c := s[i]
  320. switch {
  321. case c == '\\':
  322. i += 1
  323. if i >= len(s) {
  324. break loop
  325. }
  326. switch s[i] {
  327. case: break loop
  328. case '"', '\'', '\\', '/':
  329. b[w] = s[i]
  330. i += 1
  331. w += 1
  332. case 'b':
  333. b[w] = '\b'
  334. i += 1
  335. w += 1
  336. case 'f':
  337. b[w] = '\f'
  338. i += 1
  339. w += 1
  340. case 'r':
  341. b[w] = '\r'
  342. i += 1
  343. w += 1
  344. case 't':
  345. b[w] = '\t'
  346. i += 1
  347. w += 1
  348. case 'n':
  349. b[w] = '\n'
  350. i += 1
  351. w += 1
  352. case 'u':
  353. i -= 1 // Include the \u in the check for sanity sake
  354. r := get_u4_rune(s[i:])
  355. if r < 0 {
  356. break loop
  357. }
  358. i += 6
  359. // If this is a surrogate pair, decode as such by taking the next rune too.
  360. if r >= utf8.SURROGATE_MIN && r <= utf8.SURROGATE_HIGH_MAX && len(s) > i + 2 && s[i:i+2] == "\\u" {
  361. r2 := get_u4_rune(s[i:])
  362. if r2 >= utf8.SURROGATE_LOW_MIN && r2 <= utf8.SURROGATE_MAX {
  363. i += 6
  364. r = utf16.decode_surrogate_pair(r, r2)
  365. }
  366. }
  367. buf, buf_width := utf8.encode_rune(r)
  368. copy(b[w:], buf[:buf_width])
  369. w += buf_width
  370. case '0':
  371. if spec != .JSON {
  372. b[w] = '\x00'
  373. i += 1
  374. w += 1
  375. } else {
  376. break loop
  377. }
  378. case 'v':
  379. if spec != .JSON {
  380. b[w] = '\v'
  381. i += 1
  382. w += 1
  383. } else {
  384. break loop
  385. }
  386. case 'x':
  387. if spec != .JSON {
  388. i -= 1 // Include the \x in the check for sanity sake
  389. r := get_u2_rune(s[i:])
  390. if r < 0 {
  391. break loop
  392. }
  393. i += 4
  394. buf, buf_width := utf8.encode_rune(r)
  395. copy(b[w:], buf[:buf_width])
  396. w += buf_width
  397. } else {
  398. break loop
  399. }
  400. }
  401. case c == quote, c < ' ':
  402. break loop
  403. case c < utf8.RUNE_SELF:
  404. b[w] = c
  405. i += 1
  406. w += 1
  407. case:
  408. r, width := utf8.decode_rune_in_string(s[i:])
  409. i += width
  410. buf, buf_width := utf8.encode_rune(r)
  411. assert(buf_width <= width)
  412. copy(b[w:], buf[:buf_width])
  413. w += buf_width
  414. }
  415. }
  416. return string(b[:w]), nil
  417. }