123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484 |
- package encoding_json
- import "core:mem"
- import "core:unicode/utf8"
- import "core:unicode/utf16"
- import "core:strconv"
- Parser :: struct {
- tok: Tokenizer,
- prev_token: Token,
- curr_token: Token,
- spec: Specification,
- allocator: mem.Allocator,
- parse_integers: bool,
- }
- make_parser :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
- return make_parser_from_string(string(data), spec, parse_integers, allocator)
- }
- make_parser_from_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
- p: Parser
- p.tok = make_tokenizer(data, spec, parse_integers)
- p.spec = spec
- p.allocator = allocator
- assert(p.allocator.procedure != nil)
- advance_token(&p)
- return p
- }
- parse :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator, loc := #caller_location) -> (Value, Error) {
- return parse_string(string(data), spec, parse_integers, allocator, loc)
- }
- parse_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator, loc := #caller_location) -> (Value, Error) {
- context.allocator = allocator
- p := make_parser_from_string(data, spec, parse_integers, allocator)
- switch p.spec {
- case .JSON:
- return parse_object(&p, loc)
- case .JSON5:
- return parse_value(&p, loc)
- case .SJSON:
- #partial switch p.curr_token.kind {
- case .Ident, .String:
- return parse_object_body(&p, .EOF, loc)
- }
- return parse_value(&p, loc)
- }
- return parse_object(&p, loc)
- }
- token_end_pos :: proc(tok: Token) -> Pos {
- end := tok.pos
- end.offset += len(tok.text)
- return end
- }
- advance_token :: proc(p: ^Parser) -> (Token, Error) {
- err: Error
- p.prev_token = p.curr_token
- p.curr_token, err = get_token(&p.tok)
- return p.prev_token, err
- }
- allow_token :: proc(p: ^Parser, kind: Token_Kind) -> bool {
- if p.curr_token.kind == kind {
- advance_token(p)
- return true
- }
- return false
- }
- expect_token :: proc(p: ^Parser, kind: Token_Kind) -> Error {
- prev := p.curr_token
- advance_token(p)
- if prev.kind == kind {
- return nil
- }
- return .Unexpected_Token
- }
- parse_colon :: proc(p: ^Parser) -> (err: Error) {
- colon_err := expect_token(p, .Colon)
- if colon_err == nil {
- return nil
- }
- return .Expected_Colon_After_Key
- }
- parse_comma :: proc(p: ^Parser) -> (do_break: bool) {
- switch p.spec {
- case .JSON5, .MJSON:
- if allow_token(p, .Comma) {
- return false
- }
- return false
- case .JSON:
- if !allow_token(p, .Comma) {
- return true
- }
- }
- return false
- }
- parse_value :: proc(p: ^Parser, loc := #caller_location) -> (value: Value, err: Error) {
- err = .None
- token := p.curr_token
- #partial switch token.kind {
- case .Null:
- advance_token(p)
- value = Null{}
- return
- case .False:
- advance_token(p)
- value = Boolean(false)
- return
- case .True:
- advance_token(p)
- value = Boolean(true)
- return
- case .Integer:
- advance_token(p)
- i, _ := strconv.parse_i64(token.text)
- value = Integer(i)
- return
- case .Float:
- advance_token(p)
- f, _ := strconv.parse_f64(token.text)
- value = Float(f)
- return
-
- case .Ident:
- if p.spec == .MJSON {
- advance_token(p)
- return string(token.text), nil
- }
-
- case .String:
- advance_token(p)
- return unquote_string(token, p.spec, p.allocator, loc)
- case .Open_Brace:
- return parse_object(p, loc)
- case .Open_Bracket:
- return parse_array(p, loc)
- case:
- if p.spec != .JSON {
- switch {
- case allow_token(p, .Infinity):
- inf: u64 = 0x7ff0000000000000
- if token.text[0] == '-' {
- inf = 0xfff0000000000000
- }
- value = transmute(f64)inf
- return
- case allow_token(p, .NaN):
- nan: u64 = 0x7ff7ffffffffffff
- if token.text[0] == '-' {
- nan = 0xfff7ffffffffffff
- }
- value = transmute(f64)nan
- return
- }
- }
- }
- err = .Unexpected_Token
- advance_token(p)
- return
- }
- parse_array :: proc(p: ^Parser, loc := #caller_location) -> (value: Value, err: Error) {
- err = .None
- expect_token(p, .Open_Bracket) or_return
- array: Array
- array.allocator = p.allocator
- defer if err != nil {
- for elem in array {
- destroy_value(elem, loc=loc)
- }
- delete(array, loc)
- }
- for p.curr_token.kind != .Close_Bracket {
- elem := parse_value(p, loc) or_return
- append(&array, elem, loc)
-
- if parse_comma(p) {
- break
- }
- }
- expect_token(p, .Close_Bracket) or_return
- value = array
- return
- }
- @(private)
- bytes_make :: proc(size, alignment: int, allocator: mem.Allocator, loc := #caller_location) -> (bytes: []byte, err: Error) {
- b, berr := mem.alloc_bytes(size, alignment, allocator, loc)
- if berr != nil {
- if berr == .Out_Of_Memory {
- err = .Out_Of_Memory
- } else {
- err = .Invalid_Allocator
- }
- }
- bytes = b
- return
- }
- clone_string :: proc(s: string, allocator: mem.Allocator, loc := #caller_location) -> (str: string, err: Error) {
- n := len(s)
- b := bytes_make(n+1, 1, allocator, loc) or_return
- copy(b, s)
- if len(b) > n {
- b[n] = 0
- str = string(b[:n])
- }
- return
- }
- parse_object_key :: proc(p: ^Parser, key_allocator: mem.Allocator, loc := #caller_location) -> (key: string, err: Error) {
- tok := p.curr_token
- if p.spec != .JSON {
- if allow_token(p, .Ident) {
- return clone_string(tok.text, key_allocator, loc)
- }
- }
- if tok_err := expect_token(p, .String); tok_err != nil {
- err = .Expected_String_For_Object_Key
- return
- }
- return unquote_string(tok, p.spec, key_allocator, loc)
- }
- parse_object_body :: proc(p: ^Parser, end_token: Token_Kind, loc := #caller_location) -> (obj: Object, err: Error) {
- obj = make(Object, allocator=p.allocator, loc=loc)
- defer if err != nil {
- for key, elem in obj {
- delete(key, p.allocator, loc)
- destroy_value(elem, loc=loc)
- }
- delete(obj, loc)
- }
- for p.curr_token.kind != end_token {
- key := parse_object_key(p, p.allocator, loc) or_return
- parse_colon(p) or_return
- elem := parse_value(p, loc) or_return
- if key in obj {
- err = .Duplicate_Object_Key
- delete(key, p.allocator, loc)
- return
- }
- // NOTE(gonz): There are code paths for which this traversal ends up
- // inserting empty key/values into the object and for those we do not
- // want to allocate anything
- if key != "" {
- reserve_error := reserve(&obj, len(obj) + 1, loc)
- if reserve_error == mem.Allocator_Error.Out_Of_Memory {
- return nil, .Out_Of_Memory
- }
- obj[key] = elem
- }
- if parse_comma(p) {
- break
- }
- }
- return obj, .None
- }
- parse_object :: proc(p: ^Parser, loc := #caller_location) -> (value: Value, err: Error) {
- expect_token(p, .Open_Brace) or_return
- obj := parse_object_body(p, .Close_Brace, loc) or_return
- expect_token(p, .Close_Brace) or_return
- return obj, .None
- }
- // IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
- unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator, loc := #caller_location) -> (value: string, err: Error) {
- get_u2_rune :: proc(s: string) -> rune {
- if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
- return -1
- }
- r: rune
- for c in s[2:4] {
- x: rune
- switch c {
- case '0'..='9': x = c - '0'
- case 'a'..='f': x = c - 'a' + 10
- case 'A'..='F': x = c - 'A' + 10
- case: return -1
- }
- r = r*16 + x
- }
- return r
- }
- get_u4_rune :: proc(s: string) -> rune {
- if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
- return -1
- }
- r: rune
- for c in s[2:6] {
- x: rune
- switch c {
- case '0'..='9': x = c - '0'
- case 'a'..='f': x = c - 'a' + 10
- case 'A'..='F': x = c - 'A' + 10
- case: return -1
- }
- r = r*16 + x
- }
- return r
- }
- if token.kind != .String {
- return "", nil
- }
- s := token.text
- if len(s) <= 2 {
- return "", nil
- }
- quote := s[0]
- if s[0] != s[len(s)-1] {
- // Invalid string
- return "", nil
- }
- s = s[1:len(s)-1]
- i := 0
- for i < len(s) {
- c := s[i]
- if c == '\\' || c == quote || c < ' ' {
- break
- }
- if c < utf8.RUNE_SELF {
- i += 1
- continue
- }
- r, w := utf8.decode_rune_in_string(s[i:])
- if r == utf8.RUNE_ERROR && w == 1 {
- break
- }
- i += w
- }
- if i == len(s) {
- return clone_string(s, allocator, loc)
- }
- b := bytes_make(len(s) + 2*utf8.UTF_MAX, 1, allocator) or_return
- w := copy(b, s[0:i])
- if len(b) == 0 && allocator.data == nil {
- // `unmarshal_count_array` calls us with a nil allocator
- return string(b[:w]), nil
- }
- loop: for i < len(s) {
- c := s[i]
- switch {
- case c == '\\':
- i += 1
- if i >= len(s) {
- break loop
- }
- switch s[i] {
- case: break loop
- case '"', '\'', '\\', '/':
- b[w] = s[i]
- i += 1
- w += 1
- case 'b':
- b[w] = '\b'
- i += 1
- w += 1
- case 'f':
- b[w] = '\f'
- i += 1
- w += 1
- case 'r':
- b[w] = '\r'
- i += 1
- w += 1
- case 't':
- b[w] = '\t'
- i += 1
- w += 1
- case 'n':
- b[w] = '\n'
- i += 1
- w += 1
- case 'u':
- i -= 1 // Include the \u in the check for sanity sake
- r := get_u4_rune(s[i:])
- if r < 0 {
- break loop
- }
- i += 6
- // If this is a surrogate pair, decode as such by taking the next rune too.
- if r >= utf8.SURROGATE_MIN && r <= utf8.SURROGATE_HIGH_MAX && len(s) > i + 2 && s[i:i+2] == "\\u" {
- r2 := get_u4_rune(s[i:])
- if r2 >= utf8.SURROGATE_LOW_MIN && r2 <= utf8.SURROGATE_MAX {
- i += 6
- r = utf16.decode_surrogate_pair(r, r2)
- }
- }
- buf, buf_width := utf8.encode_rune(r)
- copy(b[w:], buf[:buf_width])
- w += buf_width
- case '0':
- if spec != .JSON {
- b[w] = '\x00'
- i += 1
- w += 1
- } else {
- break loop
- }
- case 'v':
- if spec != .JSON {
- b[w] = '\v'
- i += 1
- w += 1
- } else {
- break loop
- }
- case 'x':
- if spec != .JSON {
- i -= 1 // Include the \x in the check for sanity sake
- r := get_u2_rune(s[i:])
- if r < 0 {
- break loop
- }
- i += 4
- buf, buf_width := utf8.encode_rune(r)
- copy(b[w:], buf[:buf_width])
- w += buf_width
- } else {
- break loop
- }
- }
- case c == quote, c < ' ':
- break loop
- case c < utf8.RUNE_SELF:
- b[w] = c
- i += 1
- w += 1
- case:
- r, width := utf8.decode_rune_in_string(s[i:])
- i += width
- buf, buf_width := utf8.encode_rune(r)
- assert(buf_width <= width)
- copy(b[w:], buf[:buf_width])
- w += buf_width
- }
- }
- return string(b[:w]), nil
- }
|