3 years ago · e76a5d8e12
--- a/core/encoding/varint/leb128.odin
+++ b/core/encoding/varint/leb128.odin
@@ -6,21 +6,31 @@
 
				 		Jeroen van Rijn: Initial implementation.
			
 
				 */
			
 
				 
			
 
				-// package varint implements variable length integer encoding and decoding
			
 
				-// using the LEB128 format as used by DWARF debug and other file formats
			
 
				+// package varint implements variable length integer encoding and decoding using
			
 
				+// the LEB128 format as used by DWARF debug info, Android .dex and other file formats.
			
 
				 package varint
			
 
				 
			
 
				-// Decode a slice of bytes encoding an unsigned LEB128 integer into value and number of bytes used.
			
 
				-// Returns `size` == 0 for an invalid value, empty slice, or a varint > 16 bytes.
			
 
				 // In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file.
			
 
				-decode_uleb128 :: proc(buf: []u8) -> (val: u128, size: int) {
			
 
				+// Instead we'll set limits on the values we'll encode/decode
			
 
				+// 18 * 7 bits = 126, which means that a possible 19th byte may at most be `0b0000_0011`.
			
 
				+LEB128_MAX_BYTES    :: 19
			
 
				+
			
 
				+Error :: enum {
			
 
				+	None             = 0,
			
 
				+	Buffer_Too_Small = 1,
			
 
				+	Value_Too_Large  = 2,
			
 
				+}
			
 
				+
			
 
				+// Decode a slice of bytes encoding an unsigned LEB128 integer into value and number of bytes used.
			
 
				+// Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes.
			
 
				+decode_uleb128 :: proc(buf: []u8) -> (val: u128, size: int, err: Error) {
			
 
				 	more := true
			
 
				 
			
 
				 	for v, i in buf {
			
 
				 		size = i + 1
			
 
				 
			
 
				-		if size > size_of(u128) {
			
 
				-			return
			
 
				+		if size == LEB128_MAX_BYTES && v > 0b0000_0011 {
			
 
				+			return 0, 0, .Value_Too_Large
			
 
				 		}
			
 
				 
			
 
				 		val |= u128(v & 0x7f) << uint(i * 7)
			
@@ -33,25 +43,26 @@ decode_uleb128 :: proc(buf: []u8) -> (val: u128, size: int) {
 
				 
			
 
				 	// If the buffer runs out before the number ends, return an error.
			
 
				 	if more {
			
 
				-		return 0, 0
			
 
				+		return 0, 0, .Buffer_Too_Small
			
 
				 	}
			
 
				 	return
			
 
				 }
			
 
				 
			
 
				 // Decode a slice of bytes encoding a signed LEB128 integer into value and number of bytes used.
			
 
				-// Returns `size` == 0 for an invalid value, empty slice, or a varint > 16 bytes.
			
 
				-// In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file.
			
 
				-decode_ileb128 :: proc(buf: []u8) -> (val: i128, size: int) {
			
 
				+// Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes.
			
 
				+decode_ileb128 :: proc(buf: []u8) -> (val: i128, size: int, err: Error) {
			
 
				 	shift: uint
			
 
				 
			
 
				 	if len(buf) == 0 {
			
 
				-		return
			
 
				+		return 0, 0, .Buffer_Too_Small
			
 
				 	}
			
 
				 
			
 
				 	for v in buf {
			
 
				 		size += 1
			
 
				-		if size > size_of(i128) {
			
 
				-			return
			
 
				+
			
 
				+		// 18 * 7 bits = 126, which means that a possible 19th byte may at most be 0b0000_0011.
			
 
				+		if size == LEB128_MAX_BYTES && v > 0b0000_0011 {
			
 
				+			return 0, 0, .Value_Too_Large
			
 
				 		}
			
 
				 
			
 
				 		val |= i128(v & 0x7f) << shift
			
@@ -64,4 +75,61 @@ decode_ileb128 :: proc(buf: []u8) -> (val: i128, size: int) {
 
				 		val |= max(i128) << shift
			
 
				 	}
			
 
				 	return
			
 
				+}
			
 
				+
			
 
				+// Encode `val` into `buf` as an unsigned LEB128 encoded series of bytes.
			
 
				+// `buf` must be appropriately sized.
			
 
				+encode_uleb128 :: proc(buf: []u8, val: u128) -> (size: int, err: Error) {
			
 
				+	val := val
			
 
				+
			
 
				+	for {
			
 
				+		size += 1
			
 
				+
			
 
				+		if size > len(buf) {
			
 
				+			return 0, .Buffer_Too_Small
			
 
				+		}
			
 
				+
			
 
				+		low := val & 0x7f
			
 
				+		val >>= 7
			
 
				+
			
 
				+		if val > 0 {
			
 
				+			low |= 0x80 // more bytes to follow
			
 
				+		}
			
 
				+		buf[size - 1] = u8(low)
			
 
				+
			
 
				+		if val == 0 { break }
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+SIGN_MASK :: (i128(1) << 121) // sign extend mask
			
 
				+
			
 
				+// Encode `val` into `buf` as a signed LEB128 encoded series of bytes.
			
 
				+// `buf` must be appropriately sized.
			
 
				+encode_ileb128 :: proc(buf: []u8, val: i128) -> (size: int, err: Error) {
			
 
				+	val      := val
			
 
				+	more     := true
			
 
				+
			
 
				+	for more {
			
 
				+		size += 1
			
 
				+
			
 
				+		if size > len(buf) {
			
 
				+			return 0, .Buffer_Too_Small
			
 
				+		}
			
 
				+
			
 
				+		low := val & 0x7f
			
 
				+		val >>= 7
			
 
				+
			
 
				+		low = (low ~ SIGN_MASK) - SIGN_MASK
			
 
				+
			
 
				+		if (val == 0 && low & 0x40 != 0x40) || (val == -1 && low & 0x40 == 0x40) {
			
 
				+			more = false
			
 
				+		} else {
			
 
				+			low |= 0x80
			
 
				+		}
			
 
				+
			
 
				+		buf[size - 1] = u8(low)
			
 
				+	}
			
 
				+	return
			
 
				 }
			
--- a/tests/core/encoding/varint/test_core_varint.odin
+++ b/tests/core/encoding/varint/test_core_varint.odin
@@ -4,6 +4,7 @@ import "core:encoding/varint"
 
				 import "core:testing"
			
 
				 import "core:fmt"
			
 
				 import "core:os"
			
 
				+import "core:slice"
			
 
				 
			
 
				 TEST_count := 0
			
 
				 TEST_fail  := 0
			
@@ -39,18 +40,40 @@ main :: proc() {
 
				 
			
 
				 @(test)
			
 
				 test_dwarf :: proc(t: ^testing.T) {
			
 
				+	buf: [varint.LEB128_MAX_BYTES]u8
			
 
				+
			
 
				 	for vector in ULEB_Vectors {
			
 
				-		val, size := varint.decode_uleb128(vector.encoded)
			
 
				+		val, size, err := varint.decode_uleb128(vector.encoded)
			
 
				 
			
 
				 		msg := fmt.tprintf("Expected %02x to decode to %v consuming %v bytes, got %v and %v", vector.encoded, vector.value, vector.size, val, size)
			
 
				 		expect(t, size == vector.size && val == vector.value, msg)
			
 
				+
			
 
				+		msg  = fmt.tprintf("Expected decoder to return error %v, got %v", vector.error, err)
			
 
				+		expect(t, err == vector.error, msg)
			
 
				+
			
 
				+		if err == .None { // Try to roundtrip
			
 
				+			size, err = varint.encode_uleb128(buf[:], vector.value)
			
 
				+
			
 
				+			msg = fmt.tprintf("Expected %v to encode to %02x, got %02x", vector.value, vector.encoded, buf[:size])
			
 
				+			expect(t, size == vector.size && slice.simple_equal(vector.encoded, buf[:size]), msg)
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	for vector in ILEB_Vectors {
			
 
				-		val, size := varint.decode_ileb128(vector.encoded)
			
 
				+		val, size, err := varint.decode_ileb128(vector.encoded)
			
 
				 
			
 
				 		msg := fmt.tprintf("Expected %02x to decode to %v consuming %v bytes, got %v and %v", vector.encoded, vector.value, vector.size, val, size)
			
 
				 		expect(t, size == vector.size && val == vector.value, msg)
			
 
				+
			
 
				+		msg  = fmt.tprintf("Expected decoder to return error %v, got %v", vector.error, err)
			
 
				+		expect(t, err == vector.error, msg)
			
 
				+
			
 
				+		if err == .None { // Try to roundtrip
			
 
				+			size, err = varint.encode_ileb128(buf[:], vector.value)
			
 
				+
			
 
				+			msg = fmt.tprintf("Expected %v to encode to %02x, got %02x", vector.value, vector.encoded, buf[:size])
			
 
				+			expect(t, size == vector.size && slice.simple_equal(vector.encoded, buf[:size]), msg)
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -58,24 +81,28 @@ ULEB_Test_Vector :: struct {
 
				 	encoded: []u8,
			
 
				 	value:   u128,
			
 
				 	size:    int,
			
 
				+	error:   varint.Error,
			
 
				 }
			
 
				 
			
 
				 ULEB_Vectors :: []ULEB_Test_Vector{
			
 
				-	{ []u8{0x00},             0,      1 },
			
 
				-    { []u8{0x7f},             127,    1 },
			
 
				-	{ []u8{0xE5, 0x8E, 0x26}, 624485, 3 },
			
 
				-    { []u8{0x80},             0,      0 },
			
 
				-    { []u8{},                 0,      0 },
			
 
				+	{ []u8{0x00},             0,      1, .None },
			
 
				+    { []u8{0x7f},             127,    1, .None },
			
 
				+	{ []u8{0xE5, 0x8E, 0x26}, 624485, 3, .None },
			
 
				+    { []u8{0x80},             0,      0, .Buffer_Too_Small },
			
 
				+    { []u8{},                 0,      0, .Buffer_Too_Small },
			
 
				 }
			
 
				 
			
 
				 ILEB_Test_Vector :: struct {
			
 
				 	encoded: []u8,
			
 
				 	value:   i128,
			
 
				 	size:    int,
			
 
				+	error:   varint.Error,
			
 
				 }
			
 
				 
			
 
				 ILEB_Vectors :: []ILEB_Test_Vector{
			
 
				-	{ []u8{0x00},             0,       1 },
			
 
				-	{ []u8{0xC0, 0xBB, 0x78}, -123456, 3 },
			
 
				-    { []u8{},                 0,       0 },
			
 
				+	{ []u8{0x00},             0,       1, .None },
			
 
				+	{ []u8{0x3f},             63,      1, .None },
			
 
				+	{ []u8{0x40},             -64,     1, .None },
			
 
				+	{ []u8{0xC0, 0xBB, 0x78}, -123456, 3, .None },
			
 
				+    { []u8{},                 0,       0, .Buffer_Too_Small },
			
 
				 }