Browse Source

Merge pull request #2533 from laytan/add-hex-and-expand-percent-decoding

add encoding/hex and use it to expand the percent decoding chars
Jeroen van Rijn 2 years ago
parent
commit
1896ae5d15
2 changed files with 78 additions and 39 deletions
  1. 73 0
      core/encoding/hex/hex.odin
  2. 5 39
      core/net/url.odin

+ 73 - 0
core/encoding/hex/hex.odin

@@ -0,0 +1,73 @@
+package hex
+
+import "core:strings"
+
+encode :: proc(src: []byte, allocator := context.allocator) -> []byte #no_bounds_check {
+	dst := make([]byte, len(src) * 2, allocator)
+	for i := 0; i < len(src); i += 1 {
+		v := src[i]
+		dst[i]   = HEXTABLE[v>>4]
+		dst[i+1] = HEXTABLE[v&0x0f]
+		i += 2
+	}
+
+	return dst
+}
+
+
+decode :: proc(src: []byte, allocator := context.allocator) -> (dst: []byte, ok: bool) #no_bounds_check {
+	if len(src) % 2 == 1 {
+		return
+	}
+
+	dst = make([]byte, len(src) / 2, allocator)
+	for i, j := 0, 1; j < len(src); j += 2 {
+		p := src[j-1]
+		q := src[j]
+
+		a := hex_digit(p) or_return
+		b := hex_digit(q) or_return
+
+		dst[i] = (a << 4) | b
+		i += 1
+	}
+
+	return dst, true
+}
+
+// Decodes the given sequence into one byte.
+// Should be called with one byte worth of the source, eg: 0x23 -> '#'.
+decode_sequence :: proc(str: string) -> (res: byte, ok: bool) {
+	str := str
+	if strings.has_prefix(str, "0x") || strings.has_prefix(str, "0X") {
+		str = str[2:]
+	}
+
+	if len(str) != 2 {
+		return 0, false
+	}
+
+	upper := hex_digit(str[0]) or_return
+	lower := hex_digit(str[1]) or_return
+
+	return upper << 4 | lower, true
+}
+
+@(private)
+HEXTABLE := [16]byte {
+	'0', '1', '2', '3',
+	'4', '5', '6', '7',
+	'8', '9', 'a', 'b',
+	'c', 'd', 'e', 'f',
+}
+
+@(private)
+hex_digit :: proc(char: byte) -> (u8, bool) {
+	switch char {
+	case '0' ..= '9': return char - '0', true
+	case 'a' ..= 'f': return char - 'a' + 10, true
+	case 'A' ..= 'F': return char - 'A' + 10, true
+	case:             return 0, false
+	}
+}
+

+ 5 - 39
core/net/url.odin

@@ -19,7 +19,7 @@ package net
 import "core:strings"
 import "core:strconv"
 import "core:unicode/utf8"
-import "core:mem"
+import "core:encoding/hex"
 
 split_url :: proc(url: string, allocator := context.allocator) -> (scheme, host, path: string, queries: map[string]string) {
 	s := url
@@ -127,12 +127,10 @@ percent_decode :: proc(encoded_string: string, allocator := context.allocator) -
 	builder_grow(&b, len(encoded_string))
 	defer if !ok do builder_destroy(&b)
 
-	stack_buf: [4]u8
-	pending := mem.buffer_from_slice(stack_buf[:])
 	s := encoded_string
 
 	for len(s) > 0 {
-		i := index_rune(s, '%')
+		i := index_byte(s, '%')
 		if i == -1 {
 			write_string(&b, s) // no '%'s; the string is already decoded
 			break
@@ -145,47 +143,15 @@ percent_decode :: proc(encoded_string: string, allocator := context.allocator) -
 		s = s[1:]
 
 		if s[0] == '%' {
-			write_rune(&b, '%')
+			write_byte(&b, '%')
 			s = s[1:]
 			continue
 		}
 
 		if len(s) < 2 do return // percent without encoded value
 
-		n: int
-		n, _ = strconv.parse_int(s[:2], 16)
-		switch n {
-		case 0x20:  write_rune(&b, ' ')
-		case 0x21:  write_rune(&b, '!')
-		case 0x23:  write_rune(&b, '#')
-		case 0x24:  write_rune(&b, '$')
-		case 0x25:  write_rune(&b, '%')
-		case 0x26:  write_rune(&b, '&')
-		case 0x27:  write_rune(&b, '\'')
-		case 0x28:  write_rune(&b, '(')
-		case 0x29:  write_rune(&b, ')')
-		case 0x2A:  write_rune(&b, '*')
-		case 0x2B:  write_rune(&b, '+')
-		case 0x2C:  write_rune(&b, ',')
-		case 0x2F:  write_rune(&b, '/')
-		case 0x3A:  write_rune(&b, ':')
-		case 0x3B:  write_rune(&b, ';')
-		case 0x3D:  write_rune(&b, '=')
-		case 0x3F:  write_rune(&b, '?')
-		case 0x40:  write_rune(&b, '@')
-		case 0x5B:  write_rune(&b, '[')
-		case 0x5D:  write_rune(&b, ']')
-		case:
-			// utf-8 bytes
-			// TODO(tetra): Audit this - 4 bytes???
-			append(&pending, s[0])
-			append(&pending, s[1])
-			if len(pending) == 4 {
-				r, _ := utf8.decode_rune(pending[:])
-				write_rune(&b, r)
-				clear(&pending)
-			}
-		}
+		val := hex.decode_sequence(s[:2]) or_return
+		write_byte(&b, val)
 		s = s[2:]
 	}