Browse Source

Merge branch 'master' of https://github.com/odin-lang/Odin

gingerBill 2 years ago
parent
commit
e3360a0e5d

+ 3 - 3
core/encoding/json/marshal.odin

@@ -153,7 +153,7 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
 		case complex128: r, i = f64(real(z)), f64(imag(z))
 		case: return .Unsupported_Type
 		}
-	
+
 		io.write_byte(w, '[')    or_return
 		io.write_f64(w, r)       or_return
 		io.write_string(w, ", ") or_return
@@ -165,8 +165,8 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
 
 	case runtime.Type_Info_String:
 		switch s in a {
-		case string:  io.write_quoted_string(w, s)         or_return
-		case cstring: io.write_quoted_string(w, string(s)) or_return
+		case string:  io.write_quoted_string(w, s, '"', nil, true)         or_return
+		case cstring: io.write_quoted_string(w, string(s), '"', nil, true) or_return
 		}
 
 	case runtime.Type_Info_Boolean:

+ 10 - 1
core/encoding/json/parser.odin

@@ -2,6 +2,7 @@ package json
 
 import "core:mem"
 import "core:unicode/utf8"
+import "core:unicode/utf16"
 import "core:strconv"
 
 Parser :: struct {
@@ -403,11 +404,19 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a
 				}
 				i += 6
 
+				// If this is a surrogate pair, decode as such by taking the next rune too.
+				if r >= utf8.SURROGATE_MIN && r <= utf8.SURROGATE_HIGH_MAX && len(s) > i + 2 && s[i:i+2] == "\\u" {
+					r2 := get_u4_rune(s[i:])
+					if r2 >= utf8.SURROGATE_LOW_MIN && r2 <= utf8.SURROGATE_MAX {
+						i += 6
+						r = utf16.decode_surrogate_pair(r, r2)
+					}
+				}
+
 				buf, buf_width := utf8.encode_rune(r)
 				copy(b[w:], buf[:buf_width])
 				w += buf_width
 
-
 			case '0':
 				if spec != .JSON {
 					b[w] = '\x00'

+ 21 - 8
core/io/util.odin

@@ -2,6 +2,7 @@ package io
 
 import "core:strconv"
 import "core:unicode/utf8"
+import "core:unicode/utf16"
 
 read_ptr :: proc(r: Reader, p: rawptr, byte_size: int, n_read: ^int = nil) -> (n: int, err: Error) {
 	return read(r, ([^]byte)(p)[:byte_size], n_read)
@@ -146,7 +147,7 @@ write_encoded_rune :: proc(w: Writer, r: rune, write_quote := true, n_written: ^
 	return
 }
 
-write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false, n_written: ^int = nil) -> (n: int, err: Error) {
+write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false, n_written: ^int = nil, for_json := false) -> (n: int, err: Error) {
 	is_printable :: proc(r: rune) -> bool {
 		if r <= 0xff {
 			switch r {
@@ -163,7 +164,7 @@ write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false,
 	defer if n_written != nil {
 		n_written^ += n
 	}
-	
+
 	if html_safe {
 		switch r {
 		case '<', '>', '&':
@@ -211,17 +212,29 @@ write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false,
 				write_byte(w, DIGITS_LOWER[c>>uint(s) & 0xf], &n) or_return
 			}
 		case:
-			write_byte(w, '\\', &n) or_return
-			write_byte(w, 'U', &n)  or_return
-			for s := 28; s >= 0; s -= 4 {
-				write_byte(w, DIGITS_LOWER[c>>uint(s) & 0xf], &n) or_return
+			if for_json {
+				buf: [2]u16
+				utf16.encode(buf[:], []rune{c})
+				for bc in buf {
+					write_byte(w, '\\', &n) or_return
+					write_byte(w, 'u', &n)  or_return
+					for s := 12; s >= 0; s -= 4 {
+						write_byte(w, DIGITS_LOWER[bc>>uint(s) & 0xf], &n) or_return
+					}
+				}
+			} else {
+				write_byte(w, '\\', &n) or_return
+				write_byte(w, 'U', &n)  or_return
+				for s := 24; s >= 0; s -= 4 {
+					write_byte(w, DIGITS_LOWER[c>>uint(s) & 0xf], &n) or_return
+				}
 			}
 		}
 	}
 	return
 }
 
-write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written: ^int = nil) -> (n: int, err: Error) {
+write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written: ^int = nil, for_json := false) -> (n: int, err: Error) {
 	defer if n_written != nil {
 		n_written^ += n
 	}
@@ -240,7 +253,7 @@ write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written
 			continue
 		}
 
-		n_wrapper(write_escaped_rune(w, r, quote), &n) or_return
+		n_wrapper(write_escaped_rune(w, r, quote, false, nil, for_json), &n) or_return
 
 	}
 	write_byte(w, quote, &n) or_return

+ 5 - 0
core/unicode/utf8/utf8.odin

@@ -10,6 +10,11 @@ UTF_MAX    :: 4
 SURROGATE_MIN :: 0xd800
 SURROGATE_MAX :: 0xdfff
 
+// A high/leading surrogate is in range SURROGATE_MIN..SURROGATE_HIGH_MAX,
+// A low/trailing surrogate is in range SURROGATE_LOW_MIN..SURROGATE_MAX.
+SURROGATE_HIGH_MAX :: 0xdbff
+SURROGATE_LOW_MIN  :: 0xdc00
+
 T1 :: 0b0000_0000
 TX :: 0b1000_0000
 T2 :: 0b1100_0000

+ 15 - 1
tests/core/encoding/json/test_core_json.odin

@@ -32,6 +32,7 @@ main :: proc() {
 	parse_json(&t)
 	marshal_json(&t)
 	unmarshal_json(&t)
+	surrogate(&t)
 
 	fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
 	if TEST_fail > 0 {
@@ -344,4 +345,17 @@ unmarshal_json :: proc(t: ^testing.T) {
 	for p, i in g.products {
 		expect(t, p == original_data.products[i], "Producted unmarshaled improperly")
 	}
-}
+}
+
+@test
+surrogate :: proc(t: ^testing.T) {
+	input := `+ + * 😃 - /`
+
+	out, err := json.marshal(input)
+	expect(t, err == nil, fmt.tprintf("Expected `json.marshal(%q)` to return a nil error, got %v", input, err))
+
+	back: string
+	uerr := json.unmarshal(out, &back)
+	expect(t, uerr == nil, fmt.tprintf("Expected `json.unmarshal(%q)` to return a nil error, got %v", string(out), uerr))
+	expect(t, back == input, fmt.tprintf("Expected `json.unmarshal(%q)` to return %q, got %v", string(out), input, uerr))
+}