hace 4 años · 1cfe226686
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -131,13 +131,16 @@ Code_Buffer :: struct #packed {
 
				 	This simplifies end-of-stream handling where bits may be left in the bit buffer.
			
 
				 */
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.Error) {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Slice"); }
			
 
				 
			
 
				-	if len(z.input_data) >= size {
			
 
				-		res = z.input_data[:size];
			
 
				-		z.input_data = z.input_data[size:];
			
 
				-		return res, .None;
			
 
				+	#no_bounds_check {
			
 
				+		if len(z.input_data) >= size {
			
 
				+			res = z.input_data[:size];
			
 
				+			z.input_data = z.input_data[size:];
			
 
				+			return res, .None;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if z.input_fully_in_memory {
			
@@ -160,6 +163,7 @@ read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.
 
				 	return []u8{}, e;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 read_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Data"); }
			
 
				 
			
@@ -171,9 +175,18 @@ read_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 
				 	return T{}, e;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read u8"); }
			
 
				 
			
 
				+	#no_bounds_check {
			
 
				+		if len(z.input_data) >= 1 {
			
 
				+			res = z.input_data[0];
			
 
				+			z.input_data = z.input_data[1:];
			
 
				+			return res, .None;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	b, e := read_slice(z, 1);
			
 
				 	if e == .None {
			
 
				 		return b[0], .None;
			
@@ -182,14 +195,17 @@ read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
 
				 	return 0, e;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Peek Data"); }
			
 
				 
			
 
				 	size :: size_of(T);
			
 
				 
			
 
				-	if len(z.input_data) >= size {
			
 
				-		buf := z.input_data[:size];
			
 
				-		return (^T)(&buf[0])^, .None;
			
 
				+	#no_bounds_check {
			
 
				+		if len(z.input_data) >= size {
			
 
				+			buf := z.input_data[:size];
			
 
				+			return (^T)(&buf[0])^, .None;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if z.input_fully_in_memory {
			
@@ -224,12 +240,14 @@ peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 
				 }
			
 
				 
			
 
				 // Sliding window read back
			
 
				+@(optimization_mode="speed")
			
 
				 peek_back_byte :: #force_inline proc(cb: ^Code_Buffer, offset: i64) -> (res: u8, err: io.Error) {
			
 
				 	// Look back into the sliding window.
			
 
				 	return cb.last[offset & cb.window_mask], .None;
			
 
				 }
			
 
				 
			
 
				 // Generalized bit reader LSB
			
 
				+@(optimization_mode="speed")
			
 
				 refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Refill LSB"); }
			
 
				 	for {
			
@@ -254,11 +272,13 @@ refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 consume_bits_lsb :: #force_inline proc(cb: ^Code_Buffer, width: u8) {
			
 
				 	cb.code_buffer >>= width;
			
 
				 	cb.num_bits -= u64(width);
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 peek_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
			
 
				 	if cb.num_bits < u64(width) {
			
 
				 		refill_lsb(z, cb);
			
@@ -267,23 +287,27 @@ peek_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) ->
 
				 	return u32(cb.code_buffer & ~(~u64(0) << width));
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
			
 
				 	assert(cb.num_bits >= u64(width));
			
 
				 	return u32(cb.code_buffer & ~(~u64(0) << width));
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 read_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
			
 
				 	k := peek_bits_lsb(z, cb, width);
			
 
				 	consume_bits_lsb(cb, width);
			
 
				 	return k;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
			
 
				 	k := peek_bits_no_refill_lsb(z, cb, width);
			
 
				 	consume_bits_lsb(cb, width);
			
 
				 	return k;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 discard_to_next_byte_lsb :: proc(cb: ^Code_Buffer) {
			
 
				 	discard := u8(cb.num_bits & 7);
			
 
				 	consume_bits_lsb(cb, discard);
			
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -115,7 +115,7 @@ Huffman_Table :: struct {
 
				 };
			
 
				 
			
 
				 // Implementation starts here
			
 
				-
			
 
				+@(optimization_mode="speed")
			
 
				 z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
			
 
				 	assert(bits <= 16);
			
 
				 	// NOTE: Can optimize with llvm.bitreverse.i64 or some bit twiddling
			
@@ -130,6 +130,7 @@ z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
 
				 	return;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err: io.Error) #no_bounds_check {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Write Byte"); }
			
 
				 	c := c;
			
@@ -146,6 +147,7 @@ write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err:
 
				 	return .None;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.Error) {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Byte"); }
			
 
				 	/*
			
@@ -168,6 +170,7 @@ repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.
 
				 	return .None;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) -> (err: io.Error) {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Bytes"); }
			
 
				 	/*
			
@@ -199,6 +202,7 @@ allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_T
 
				 	return new(Huffman_Table, allocator), nil;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Build Huffman Table"); }
			
 
				 	sizes:     [HUFFMAN_MAX_BITS+1]int;
			
@@ -258,6 +262,7 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 
				 	return nil;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman Slow"); }
			
 
				 	code := u16(compress.peek_bits_lsb(z, cb, 16));
			
@@ -289,6 +294,7 @@ decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table
 
				 	return r, nil;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 decode_huffman :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman"); }
			
 
				 	if cb.num_bits < 16 {
			
@@ -309,6 +315,7 @@ decode_huffman :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r:
 
				 	return decode_huffman_slowpath(z, cb, t);
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Parse Huffman Block"); }
			
 
				 	#no_bounds_check for {
			
@@ -379,6 +386,7 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 
				 	}
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := context.allocator) -> (err: Error) #no_bounds_check {
			
 
				 	/*
			
 
				 		ctx.input must be an io.Stream backed by an implementation that supports:
			
@@ -459,7 +467,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
 
				 	return nil;
			
 
				 }
			
 
				 
			
 
				-// @(optimization_mode="speed")
			
 
				+@(optimization_mode="speed")
			
 
				 inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := context.allocator) -> (err: Error) #no_bounds_check {
			
 
				 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Inflate Raw"); }
			
 
				 	final := u32(0);
			
--- a/core/hash/crc.odin
+++ b/core/hash/crc.odin
@@ -1,15 +1,18 @@
 
				 package hash
			
 
				 
			
 
				-crc32 :: proc(data: []byte, seed := u32(0)) -> u32 #no_bounds_check {
			
 
				+@(optimization_mode="speed")
			
 
				+crc32 :: proc(data: []byte, seed := u32(0)) -> u32 {
			
 
				 	result := ~u32(seed);
			
 
				-	for b in data {
			
 
				+	 #no_bounds_check for b in data {
			
 
				 		result = result>>8 ~ _crc32_table[(result ~ u32(b)) & 0xff];
			
 
				 	}
			
 
				 	return ~result;
			
 
				 }
			
 
				+
			
 
				+@(optimization_mode="speed")
			
 
				 crc64 :: proc(data: []byte, seed := u32(0)) -> u64 #no_bounds_check {
			
 
				 	result := ~u64(seed);
			
 
				-	for b in data {
			
 
				+	 #no_bounds_check for b in data {
			
 
				 		result = result>>8 ~ _crc64_table[(result ~ u64(b)) & 0xff];
			
 
				 	}
			
 
				 	return ~result;
			
--- a/core/hash/hash.odin
+++ b/core/hash/hash.odin
@@ -2,16 +2,18 @@ package hash
 
				 
			
 
				 import "core:mem"
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 adler32 :: proc(data: []byte, seed := u32(1)) -> u32 {
			
 
				 	ADLER_CONST :: 65521;
			
 
				 	a, b: u32 = seed & 0xFFFF, seed >> 16;
			
 
				-	for x in data {
			
 
				+	#no_bounds_check for x in data {
			
 
				 		a = (a + u32(x)) % ADLER_CONST;
			
 
				 		b = (b + a) % ADLER_CONST;
			
 
				 	}
			
 
				 	return (b << 16) | a;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 djb2 :: proc(data: []byte) -> u32 {
			
 
				 	hash: u32 = 5381;
			
 
				 	for b in data {
			
@@ -20,6 +22,7 @@ djb2 :: proc(data: []byte) -> u32 {
 
				 	return hash;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 fnv32 :: proc(data: []byte) -> u32 {
			
 
				 	h: u32 = 0x811c9dc5;
			
 
				 	for b in data {
			
@@ -28,6 +31,7 @@ fnv32 :: proc(data: []byte) -> u32 {
 
				 	return h;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 fnv64 :: proc(data: []byte) -> u64 {
			
 
				 	h: u64 = 0xcbf29ce484222325;
			
 
				 	for b in data {
			
@@ -36,6 +40,7 @@ fnv64 :: proc(data: []byte) -> u64 {
 
				 	return h;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 fnv32a :: proc(data: []byte) -> u32 {
			
 
				 	h: u32 = 0x811c9dc5;
			
 
				 	for b in data {
			
@@ -44,6 +49,7 @@ fnv32a :: proc(data: []byte) -> u32 {
 
				 	return h;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 fnv64a :: proc(data: []byte) -> u64 {
			
 
				 	h: u64 = 0xcbf29ce484222325;
			
 
				 	for b in data {
			
@@ -52,6 +58,7 @@ fnv64a :: proc(data: []byte) -> u64 {
 
				 	return h;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 jenkins :: proc(data: []byte) -> u32 {
			
 
				 	hash: u32 = 0;
			
 
				 	for b in data {
			
@@ -65,6 +72,7 @@ jenkins :: proc(data: []byte) -> u32 {
 
				 	return hash;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 murmur32 :: proc(data: []byte) -> u32 {
			
 
				 	c1_32: u32 : 0xcc9e2d51;
			
 
				 	c2_32: u32 : 0x1b873593;
			
@@ -114,6 +122,7 @@ murmur32 :: proc(data: []byte) -> u32 {
 
				 	return h1;
			
 
				 }
			
 
				 
			
 
				+@(optimization_mode="speed")
			
 
				 murmur64 :: proc(data: []byte) -> u64 {
			
 
				 	SEED :: 0x9747b28c;
			
 
				 
			
@@ -219,7 +228,7 @@ murmur64 :: proc(data: []byte) -> u64 {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-
			
 
				+@(optimization_mode="speed")
			
 
				 sdbm :: proc(data: []byte) -> u32 {
			
 
				 	hash: u32 = 0;
			
 
				 	for b in data {