Browse Source

Add `json.unmarshal`

gingerBill 3 years ago
parent
commit
6a271355a6

+ 35 - 19
core/encoding/json/parser.odin

@@ -10,11 +10,10 @@ Parser :: struct {
 	curr_token:     Token,
 	spec:           Specification,
 	allocator:      mem.Allocator,
-	unmarshal_data: any,
 	parse_integers: bool,
 }
 
-make_parser :: proc(data: []byte, spec := Specification.JSON, parse_integers := false, allocator := context.allocator) -> Parser {
+make_parser :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
 	p: Parser
 	p.tok = make_tokenizer(data, spec, parse_integers)
 	p.spec = spec
@@ -24,7 +23,7 @@ make_parser :: proc(data: []byte, spec := Specification.JSON, parse_integers :=
 	return p
 }
 
-parse :: proc(data: []byte, spec := Specification.JSON, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
+parse :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
 	context.allocator = allocator
 	p := make_parser(data, spec, parse_integers, allocator)
 
@@ -94,7 +93,7 @@ parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
 		advance_token(p)
 		return
 	case .String:
-		value = String(unquote_string(token, p.spec, p.allocator))
+		value = unquote_string(token, p.spec, p.allocator) or_return
 		advance_token(p)
 		return
 
@@ -161,24 +160,41 @@ parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
 	return
 }
 
-clone_string :: proc(s: string, allocator: mem.Allocator) -> string {
+@(private)
+bytes_make :: proc(size, alignment: int, allocator: mem.Allocator) -> (bytes: []byte, err: Error) {
+	b, berr := mem.alloc_bytes(size, alignment, allocator)
+	if berr != nil {
+		if berr == .Out_Of_Memory {
+			err = .Out_Of_Memory
+		} else {
+			err = .Invalid_Allocator
+		}
+	}
+	bytes = b
+	return
+}
+
+clone_string :: proc(s: string, allocator: mem.Allocator) -> (str: string, err: Error) {
 	n := len(s)
-	b := make([]byte, n+1, allocator)
+	b := bytes_make(n+1, 1, allocator) or_return
 	copy(b, s)
-	b[n] = 0
-	return string(b[:n])
+	if len(b) > n {
+		b[n] = 0
+		str = string(b[:n])
+	}
+	return
 }
 
-parse_object_key :: proc(p: ^Parser) -> (key: string, err: Error) {
+parse_object_key :: proc(p: ^Parser, key_allocator: mem.Allocator) -> (key: string, err: Error) {
 	tok := p.curr_token
 	if p.spec == Specification.JSON5 {
 		if tok.kind == .String {
 			expect_token(p, .String)
-			key = unquote_string(tok, p.spec, p.allocator)
+			key = unquote_string(tok, p.spec, key_allocator) or_return
 			return
 		} else if tok.kind == .Ident {
 			expect_token(p, .Ident)
-			key = clone_string(tok.text, p.allocator)
+			key = clone_string(tok.text, key_allocator) or_return
 			return
 		}
 	}
@@ -186,7 +202,7 @@ parse_object_key :: proc(p: ^Parser) -> (key: string, err: Error) {
 		err = .Expected_String_For_Object_Key
 		return
 	}
-	key = unquote_string(tok, p.spec, p.allocator)
+	key = unquote_string(tok, p.spec, key_allocator) or_return
 	return
 }
 
@@ -205,7 +221,7 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
 
 	for p.curr_token.kind != .Close_Brace {
 		key: string
-		key, err = parse_object_key(p)
+		key, err = parse_object_key(p, p.allocator)
 		if err != .None {
 			delete(key, p.allocator)
 			return
@@ -248,7 +264,7 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
 
 
 // IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
-unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> string {
+unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> (value: string, err: Error) {
 	get_u2_rune :: proc(s: string) -> rune {
 		if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
 			return -1
@@ -287,16 +303,16 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a
 	}
 
 	if token.kind != .String {
-		return ""
+		return "", nil
 	}
 	s := token.text
 	if len(s) <= 2 {
-		return ""
+		return "", nil
 	}
 	quote := s[0]
 	if s[0] != s[len(s)-1] {
 		// Invalid string
-		return ""
+		return "", nil
 	}
 	s = s[1:len(s)-1]
 
@@ -320,7 +336,7 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a
 		return clone_string(s, allocator)
 	}
 
-	b := make([]byte, len(s) + 2*utf8.UTF_MAX, allocator)
+	b := bytes_make(len(s) + 2*utf8.UTF_MAX, 1, allocator) or_return
 	w := copy(b, s[0:i])
 	loop: for i < len(s) {
 		c := s[i]
@@ -423,5 +439,5 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a
 		}
 	}
 
-	return string(b[:w])
+	return string(b[:w]), nil
 }

+ 1 - 1
core/encoding/json/tokenizer.odin

@@ -53,7 +53,7 @@ Tokenizer :: struct {
 
 
 
-make_tokenizer :: proc(data: []byte, spec := Specification.JSON, parse_integers := false) -> Tokenizer {
+make_tokenizer :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false) -> Tokenizer {
 	t := Tokenizer{pos = {line=1}, data = data, spec = spec, parse_integers = parse_integers}
 	next_rune(&t)
 	if t.r == utf8.RUNE_BOM {

+ 6 - 0
core/encoding/json/types.odin

@@ -6,6 +6,8 @@ Specification :: enum {
 	// MJSON, // http://bitsquid.blogspot.com/2009/09/json-configuration-data.html
 }
 
+DEFAULT_SPECIFICATION :: Specification.JSON5
+
 Null    :: distinct rawptr
 Integer :: i64
 Float   :: f64
@@ -41,6 +43,10 @@ Error :: enum {
 	Expected_String_For_Object_Key,
 	Duplicate_Object_Key,
 	Expected_Colon_After_Key,
+	
+	// Allocating Errors
+	Invalid_Allocator,
+	Out_Of_Memory,
 }
 
 

+ 562 - 0
core/encoding/json/unmarshal.odin

@@ -0,0 +1,562 @@
+package json
+
+import "core:fmt"
+import "core:mem"
+import "core:math"
+import "core:reflect"
+import "core:strconv"
+import "core:strings"
+import "core:runtime"
+
+Unmarshal_Data_Error :: enum {
+	Invalid_Data,
+	Invalid_Parameter,
+	Non_Pointer_Parameter,
+	Multiple_Use_Field,
+}
+
+Unsupported_Type_Error :: struct {
+	id:    typeid,
+	token: Token,
+}
+
+Unmarshal_Error :: union {
+	Error,
+	Unmarshal_Data_Error,
+	Unsupported_Type_Error,
+}
+
+unmarshal_any :: proc(data: []byte, v: any, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
+	v := v
+	if v == nil || v.id == nil {
+		return .Invalid_Parameter
+	}
+	v = reflect.any_base(v)
+	ti := type_info_of(v.id)
+	if !reflect.is_pointer(ti) || ti.id == rawptr {
+		return .Non_Pointer_Parameter
+	}
+	
+	
+	if !is_valid(data, spec, true) {
+		return .Invalid_Data
+	}
+	p := make_parser(data, spec, true, allocator)
+	
+	data := any{(^rawptr)(v.data)^, ti.variant.(reflect.Type_Info_Pointer).elem.id}
+	if v.data == nil {
+		return .Invalid_Parameter
+	}
+	return unmarsal_value(&p, data)
+}
+
+
+unmarshal :: proc(data: []byte, ptr: ^$T, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
+	return unmarshal_any(data, ptr, spec, allocator)
+}
+
+unmarshal_string :: proc(data: string, ptr: ^$T, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
+	return unmarshal_any(transmute([]byte)data, ptr, spec, allocator)
+}
+
+
+@(private)
+assign_bool :: proc(val: any, b: bool) -> bool {
+	v := reflect.any_core(val)
+	switch dst in &v {
+	case bool: dst = auto_cast b
+	case b8:   dst = auto_cast b
+	case b16:  dst = auto_cast b
+	case b32:  dst = auto_cast b
+	case b64:  dst = auto_cast b
+	case: return false
+	}
+	return true
+}
+@(private)
+assign_int :: proc(val: any, i: $T) -> bool {
+	v := reflect.any_core(val)
+	switch dst in &v {
+	case i8:      dst = auto_cast i
+	case i16:     dst = auto_cast i
+	case i16le:   dst = auto_cast i
+	case i16be:   dst = auto_cast i
+	case i32:     dst = auto_cast i
+	case i32le:   dst = auto_cast i
+	case i32be:   dst = auto_cast i
+	case i64:     dst = auto_cast i
+	case i64le:   dst = auto_cast i
+	case i64be:   dst = auto_cast i
+	case i128:    dst = auto_cast i
+	case i128le:  dst = auto_cast i
+	case i128be:  dst = auto_cast i
+	case u8:      dst = auto_cast i
+	case u16:     dst = auto_cast i
+	case u16le:   dst = auto_cast i
+	case u16be:   dst = auto_cast i
+	case u32:     dst = auto_cast i
+	case u32le:   dst = auto_cast i
+	case u32be:   dst = auto_cast i
+	case u64:     dst = auto_cast i
+	case u64le:   dst = auto_cast i
+	case u64be:   dst = auto_cast i
+	case u128:    dst = auto_cast i
+	case u128le:  dst = auto_cast i
+	case u128be:  dst = auto_cast i
+	case int:     dst = auto_cast i
+	case uint:    dst = auto_cast i
+	case uintptr: dst = auto_cast i
+	case: return false
+	}
+	return true
+}
+@(private)
+assign_float :: proc(val: any, i: $T) -> bool {
+	v := reflect.any_core(val)
+	switch dst in &v {
+	case f16:     dst = auto_cast i
+	case f16le:   dst = auto_cast i
+	case f16be:   dst = auto_cast i
+	case f32:     dst = auto_cast i
+	case f32le:   dst = auto_cast i
+	case f32be:   dst = auto_cast i
+	case f64:     dst = auto_cast i
+	case f64le:   dst = auto_cast i
+	case f64be:   dst = auto_cast i
+	case: return false
+	}
+	return true
+}
+
+
+
+@(private)
+unmarsal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
+	UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
+	
+	token := p.curr_token
+	#partial switch token.kind {
+	case .Null:
+		ti := type_info_of(v.id)
+		mem.zero(v.data, ti.size)
+		advance_token(p)
+		return
+	case .False, .True:
+		if assign_bool(v, token.kind == .True) {
+			return
+		}
+		return UNSUPPORTED_TYPE
+
+	case .Integer:
+		advance_token(p)
+		i, _ := strconv.parse_i128(token.text)
+		if assign_int(v, i) {
+			return
+		}
+		if assign_float(v, i) {
+			return
+		}
+		return
+	case .Float:
+		advance_token(p)
+		f, _ := strconv.parse_f64(token.text)
+		if assign_float(v, f) {
+			return
+		}
+		if i, fract := math.modf(f); fract == 0 {
+			if assign_int(v, i) {
+				return
+			}
+			if assign_float(v, i) {
+				return
+			}
+		}
+		return UNSUPPORTED_TYPE
+	case .String:
+		advance_token(p)
+		str := unquote_string(token, p.spec, p.allocator) or_return
+		val := reflect.any_base(v)
+		switch dst in &val {
+		case string:
+			dst = str
+			return
+		case cstring:  
+			if str == "" {
+				dst = strings.clone_to_cstring("", p.allocator)
+			} else {
+				// NOTE: This is valid because 'clone_string' appends a NUL terminator
+				dst = cstring(raw_data(str)) 
+			}
+			return
+		}
+		defer delete(str, p.allocator)
+		
+		ti := type_info_of(val.id)
+		#partial switch variant in ti.variant {
+		case reflect.Type_Info_Enum:
+			for name, i in variant.names {
+				if name == str {
+					assign_int(val, variant.values[i])
+					return nil
+				}
+			}
+			// TODO(bill): should this be an error or not?
+			return nil
+			
+		case reflect.Type_Info_Integer:
+			i, ok := strconv.parse_i128(token.text)
+			if !ok {
+				return UNSUPPORTED_TYPE
+			}
+			if assign_int(val, i) {
+				return
+			}
+			if assign_float(val, i) {
+				return
+			}
+		case reflect.Type_Info_Float:
+			f, ok := strconv.parse_f64(token.text)
+			if !ok {
+				return UNSUPPORTED_TYPE
+			}
+			if assign_int(val, f) {
+				return
+			}
+			if assign_float(val, f) {
+				return
+			}
+		}
+		
+		return UNSUPPORTED_TYPE
+
+
+	case .Open_Brace:
+		return unmarsal_object(p, v)
+
+	case .Open_Bracket:
+		return unmarsal_array(p, v)
+
+	case:
+		if p.spec == Specification.JSON5 {
+			#partial switch token.kind {
+			case .Infinity:
+				advance_token(p)
+				f: f64 = 0h7ff0000000000000
+				if token.text[0] == '-' {
+					f = 0hfff0000000000000
+				}
+				if assign_float(v, f) {
+					return
+				}
+				return UNSUPPORTED_TYPE
+			case .NaN:
+				advance_token(p)
+				f: f64 = 0h7ff7ffffffffffff
+				if token.text[0] == '-' {
+					f = 0hfff7ffffffffffff
+				}
+				if assign_float(v, f) {
+					return
+				}
+				return UNSUPPORTED_TYPE
+			}
+		}
+	}
+
+	advance_token(p)
+	return
+
+}
+
+
+@(private)
+unmarsal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_location) -> Token {
+	prev := p.curr_token
+	err := expect_token(p, kind)
+	assert(err == nil, "unmarsal_expect_token", loc)
+	return prev
+}
+
+
+@(private)
+unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
+	original_val := v
+	UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
+	
+	assert(expect_token(p, .Open_Brace) == nil)
+
+	v := v
+	v = reflect.any_base(v)
+	ti := type_info_of(v.id)
+	
+	#partial switch t in ti.variant {
+	case reflect.Type_Info_Struct:
+		if t.is_raw_union {
+			return UNSUPPORTED_TYPE
+		}
+	
+		struct_loop: for p.curr_token.kind != .Close_Brace {
+			key, _ := parse_object_key(p, p.allocator)
+			defer delete(key, p.allocator)
+			
+			unmarsal_expect_token(p, .Colon)						
+			
+			fields := reflect.struct_fields_zipped(ti.id)
+			
+			field_used := make([]bool, len(fields), context.temp_allocator)
+			
+			use_field_idx := -1
+			
+			for field, field_idx in fields {
+				tag_value := string(reflect.struct_tag_get(field.tag, "json"))
+				if key == tag_value {
+					use_field_idx = field_idx
+					break
+				}
+			}
+			
+			if use_field_idx < 0 {
+				for field, field_idx in fields {
+					if key == field.name {
+						use_field_idx = field_idx
+						break
+					}
+				}
+			}
+			
+			if use_field_idx >= 0 {
+				if field_used[use_field_idx] {
+					return .Multiple_Use_Field
+				}
+				field_used[use_field_idx] = true
+				offset := fields[use_field_idx].offset
+				type := fields[use_field_idx].type
+				name := fields[use_field_idx].name
+				
+
+				field_ptr := rawptr(uintptr(v.data) + offset)
+				field := any{field_ptr, type.id}
+				unmarsal_value(p, field) or_return
+				
+				if p.spec == Specification.JSON5 {
+					// Allow trailing commas
+					if allow_token(p, .Comma) {
+						continue struct_loop
+					}
+				} else {
+					// Disallow trailing commas
+					if allow_token(p, .Comma) {
+						continue struct_loop
+					} else {
+						break struct_loop
+					}
+				}
+				
+				continue struct_loop
+			}
+			
+			return Unsupported_Type_Error{v.id, p.curr_token}
+		}
+		
+	case reflect.Type_Info_Map:
+		if !reflect.is_string(t.key) {
+			return UNSUPPORTED_TYPE
+		}
+		raw_map := (^mem.Raw_Map)(v.data)
+		if raw_map.entries.allocator.procedure == nil {
+			raw_map.entries.allocator = p.allocator
+		}
+		
+		header := runtime.__get_map_header_runtime(raw_map, t)
+		
+		elem_backing := bytes_make(t.value.size, t.value.align, p.allocator) or_return
+		defer delete(elem_backing, p.allocator)
+		
+		map_backing_value := any{raw_data(elem_backing), t.value.id}
+		
+		pass := 0
+		
+		map_loop: for p.curr_token.kind != .Close_Brace {
+			defer pass += 1
+			
+			key, _ := parse_object_key(p, p.allocator)
+			unmarsal_expect_token(p, .Colon)
+			
+			
+			mem.zero_slice(elem_backing)
+			if err := unmarsal_value(p, map_backing_value); err != nil {
+				delete(key, p.allocator)
+				return err
+			}
+			
+			hash := runtime.Map_Hash {
+				hash = runtime.default_hasher_string(&key, 0),
+				key_ptr = &key,
+			}
+			
+			key_cstr: cstring
+			if reflect.is_cstring(t.key) {
+				key_cstr = cstring(raw_data(key))
+				hash.key_ptr = &key_cstr
+			}
+			
+			set_ptr := runtime.__dynamic_map_set(header, hash, map_backing_value.data)
+			if set_ptr == nil {
+				delete(key, p.allocator)
+			} 
+		
+			if p.spec == Specification.JSON5 {
+				// Allow trailing commas
+				if allow_token(p, .Comma) {
+					continue map_loop
+				}
+			} else {
+				// Disallow trailing commas
+				if allow_token(p, .Comma) {
+					continue map_loop
+				} else {
+					break map_loop
+				}
+			}
+		}
+		
+	case reflect.Type_Info_Enumerated_Array:
+		index_type := reflect.type_info_base(t.index)
+		enum_type := index_type.variant.(reflect.Type_Info_Enum)
+	
+		enumerated_array_loop: for p.curr_token.kind != .Close_Brace {
+			key, _ := parse_object_key(p, p.allocator)
+			unmarsal_expect_token(p, .Colon)
+			defer delete(key, p.allocator)
+
+			index := -1
+			for name, i in enum_type.names {
+				if key == name {
+					index = int(enum_type.values[i] - t.min_value)
+					break
+				}
+			}
+			if index < 0 || index >= t.count {
+				return UNSUPPORTED_TYPE
+			}
+						
+			index_ptr := rawptr(uintptr(v.data) + uintptr(index*t.elem_size))
+			index_any := any{index_ptr, t.elem.id}
+			
+			unmarsal_value(p, index_any) or_return
+		
+			if p.spec == Specification.JSON5 {
+				// Allow trailing commas
+				if allow_token(p, .Comma) {
+					continue enumerated_array_loop
+				}
+			} else {
+				// Disallow trailing commas
+				if allow_token(p, .Comma) {
+					continue enumerated_array_loop
+				} else {
+					break enumerated_array_loop
+				}
+			}
+		}
+
+		return nil
+	
+	case:
+		return UNSUPPORTED_TYPE
+	}
+	
+	assert(expect_token(p, .Close_Brace) == nil)
+	return
+}
+
+
+@(private)
+unmarsal_count_array :: proc(p: ^Parser) -> (length: uintptr) {
+	p_backup := p^
+	p.allocator = mem.nil_allocator()
+	unmarsal_expect_token(p, .Open_Bracket)
+	array_length_loop: for p.curr_token.kind != .Close_Bracket {
+		_, _ = parse_value(p)
+		length += 1
+
+		if allow_token(p, .Comma) {
+			continue
+		} else {
+			break
+		}
+	}
+	p^ = p_backup
+	return
+}
+
+@(private)
+unmarsal_array :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
+	assign_array :: proc(p: ^Parser, base: rawptr, elem: ^reflect.Type_Info, length: uintptr) -> Unmarshal_Error {
+		unmarsal_expect_token(p, .Open_Bracket)
+		
+		for idx: uintptr = 0; p.curr_token.kind != .Close_Bracket; idx += 1 {
+			assert(idx < length)
+			
+			elem_ptr := rawptr(uintptr(base) + idx*uintptr(elem.size))
+			elem := any{elem_ptr, elem.id}
+			
+			unmarsal_value(p, elem) or_return
+			
+			if allow_token(p, .Comma) {
+				continue
+			} else {
+				break
+			}	
+		}
+		
+		unmarsal_expect_token(p, .Close_Bracket)
+		
+		
+		return nil
+	}
+
+	UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
+	
+	ti := reflect.type_info_base(type_info_of(v.id))
+	
+	length := unmarsal_count_array(p)
+	
+	#partial switch t in ti.variant {
+	case reflect.Type_Info_Slice:	
+		raw := (^mem.Raw_Slice)(v.data)
+		data := bytes_make(t.elem.size * int(length), t.elem.align, p.allocator) or_return
+		raw.data = raw_data(data)
+		raw.len = int(length)
+			
+		return assign_array(p, raw.data, t.elem, length)
+		
+	case reflect.Type_Info_Dynamic_Array:
+		raw := (^mem.Raw_Dynamic_Array)(v.data)
+		data := bytes_make(t.elem.size * int(length), t.elem.align, p.allocator) or_return
+		raw.data = raw_data(data)
+		raw.len = int(length)
+		raw.cap = int(length)
+		raw.allocator = p.allocator
+		
+		return assign_array(p, raw.data, t.elem, length)
+		
+	case reflect.Type_Info_Array:
+		// NOTE(bill): Allow lengths which are less than the dst array
+		if int(length) > t.count {
+			return UNSUPPORTED_TYPE
+		}
+		
+		return assign_array(p, v.data, t.elem, length)
+		
+	case reflect.Type_Info_Enumerated_Array:
+		// NOTE(bill): Allow lengths which are less than the dst array
+		if int(length) > t.count {
+			return UNSUPPORTED_TYPE
+		}
+		
+		return assign_array(p, v.data, t.elem, length)
+	}
+		
+	return UNSUPPORTED_TYPE
+}

+ 1 - 1
core/encoding/json/validator.odin

@@ -3,7 +3,7 @@ package json
 import "core:mem"
 
 // NOTE(bill): is_valid will not check for duplicate keys
-is_valid :: proc(data: []byte, spec := Specification.JSON, parse_integers := false) -> bool {
+is_valid :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false) -> bool {
 	p := make_parser(data, spec, parse_integers, mem.nil_allocator())
 	if p.spec == Specification.JSON5 {
 		return validate_value(&p)