Ver código fonte

ZLIB: Moar faster.

Jeroen van Rijn 4 anos atrás
pai
commit
30a5808460
3 arquivos alterados com 109 adições e 159 exclusões
  1. 34 60
      core/compress/common.odin
  2. 19 22
      core/compress/gzip/gzip.odin
  3. 56 77
      core/compress/zlib/zlib.odin

+ 34 - 60
core/compress/common.odin

@@ -127,10 +127,9 @@ Deflate_Error :: enum {
 
 
 // General I/O context for ZLIB, LZW, etc.
-Context :: struct #packed {
-	input:             io.Stream,
+Context :: struct {
 	input_data:        []u8,
-
+	input:             io.Stream,
 	output:            ^bytes.Buffer,
 	bytes_written:     i64,
 
@@ -140,14 +139,9 @@ Context :: struct #packed {
 	size_packed:   i64,
 	size_unpacked: i64,
 
-	/*
-		Used to update hash as we write instead of all at once.
-	*/
-	rolling_hash:  u32,
-	/*
-		Reserved
-	*/
-	reserved:      [2]u32,
+	code_buffer: u64,
+	num_bits:    u64,
+
 	/*
 		Flags:
 			`input_fully_in_memory` tells us whether we're EOF when `input_data` is empty.
@@ -155,28 +149,8 @@ Context :: struct #packed {
 	*/
 	input_fully_in_memory: b8,
 	input_refills_from_stream: b8,
-	output_to_stream: b8,
-	reserved_flag: b8,
-
-	bit_buffer_stuff: [3]u64,
-
-
 }
-// #assert(size_of(Context) == 128);
 
-/*
-	Compression algorithm context
-*/
-Code_Buffer :: struct #packed {
-	code_buffer: u64,
-	num_bits:    u64,
-	/*
-		Sliding window buffer. Size must be a power of two.
-	*/
-	window_mask: i64,
-	last:        [dynamic]u8,
-}
-#assert(size_of(Code_Buffer) == 64);
 
 // Stream helpers
 /*
@@ -290,26 +264,26 @@ peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 
 // Sliding window read back
 @(optimization_mode="speed")
-peek_back_byte :: #force_inline proc(cb: ^Code_Buffer, offset: i64) -> (res: u8, err: io.Error) {
+peek_back_byte :: #force_inline proc(z: ^Context, offset: i64) -> (res: u8, err: io.Error) {
 	// Look back into the sliding window.
-	return cb.last[offset & cb.window_mask], .None;
+	return z.output.buf[z.bytes_written - offset], .None;
 }
 
 // Generalized bit reader LSB
 @(optimization_mode="speed")
-refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
+refill_lsb :: proc(z: ^Context, width := i8(24)) {
 	refill := u64(width);
 
 	for {
-		if cb.num_bits > refill {
+		if z.num_bits > refill {
 			break;
 		}
-		if cb.code_buffer == 0 && cb.num_bits > 63 {
-			cb.num_bits = 0;
+		if z.code_buffer == 0 && z.num_bits > 63 {
+			z.num_bits = 0;
 		}
-		if cb.code_buffer >= 1 << uint(cb.num_bits) {
+		if z.code_buffer >= 1 << uint(z.num_bits) {
 			// Code buffer is malformed.
-			cb.num_bits = max(u64);
+			z.num_bits = max(u64);
 			return;
 		}
 		b, err := read_u8(z);
@@ -317,48 +291,48 @@ refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
 			// This is fine at the end of the file.
 			return;
 		}
-		cb.code_buffer |= (u64(b) << u8(cb.num_bits));
-		cb.num_bits += 8;
+		z.code_buffer |= (u64(b) << u8(z.num_bits));
+		z.num_bits += 8;
 	}
 }
 
 @(optimization_mode="speed")
-consume_bits_lsb :: #force_inline proc(cb: ^Code_Buffer, width: u8) {
-	cb.code_buffer >>= width;
-	cb.num_bits -= u64(width);
+consume_bits_lsb :: #force_inline proc(z: ^Context, width: u8) {
+	z.code_buffer >>= width;
+	z.num_bits -= u64(width);
 }
 
 @(optimization_mode="speed")
-peek_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	if cb.num_bits < u64(width) {
-		refill_lsb(z, cb);
+peek_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	if z.num_bits < u64(width) {
+		refill_lsb(z);
 	}
 	// assert(z.num_bits >= i8(width));
-	return u32(cb.code_buffer & ~(~u64(0) << width));
+	return u32(z.code_buffer & ~(~u64(0) << width));
 }
 
 @(optimization_mode="speed")
-peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	assert(cb.num_bits >= u64(width));
-	return u32(cb.code_buffer & ~(~u64(0) << width));
+peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	assert(z.num_bits >= u64(width));
+	return u32(z.code_buffer & ~(~u64(0) << width));
 }
 
 @(optimization_mode="speed")
-read_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	k := peek_bits_lsb(z, cb, width);
-	consume_bits_lsb(cb, width);
+read_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	k := peek_bits_lsb(z, width);
+	consume_bits_lsb(z, width);
 	return k;
 }
 
 @(optimization_mode="speed")
-read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	k := peek_bits_no_refill_lsb(z, cb, width);
-	consume_bits_lsb(cb, width);
+read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	k := peek_bits_no_refill_lsb(z, width);
+	consume_bits_lsb(z, width);
 	return k;
 }
 
 @(optimization_mode="speed")
-discard_to_next_byte_lsb :: proc(cb: ^Code_Buffer) {
-	discard := u8(cb.num_bits & 7);
-	consume_bits_lsb(cb, discard);
+discard_to_next_byte_lsb :: proc(z: ^Context) {
+	discard := u8(z.num_bits & 7);
+	consume_bits_lsb(z, discard);
 }

+ 19 - 22
core/compress/gzip/gzip.odin

@@ -133,13 +133,13 @@ load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_siz
 	return;
 }
 
-load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
+load_from_stream :: proc(z: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	buf := buf;
 	expected_output_size := expected_output_size;
 
 	input_data_consumed := 0;
 
-	ctx.output = buf;
+	z.output = buf;
 
 	if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
 		return E_GZIP.Payload_Size_Exceeds_Max_Payload;
@@ -151,7 +151,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 	b: []u8;
 
-	header, e := compress.read_data(ctx, Header);
+	header, e := compress.read_data(z, Header);
 	if e != .None {
 		return E_General.File_Too_Short;
 	}
@@ -180,7 +180,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 	// printf("os: %v\n", OS_Name[header.os]);
 
 	if .extra in header.flags {
-		xlen, e_extra := compress.read_data(ctx, u16le);
+		xlen, e_extra := compress.read_data(z, u16le);
 		input_data_consumed += 2;
 
 		if e_extra != .None {
@@ -198,7 +198,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 		for xlen >= 4 {
 			// println("Parsing Extra field(s).");
-			field_id, field_error = compress.read_data(ctx, [2]u8);
+			field_id, field_error = compress.read_data(z, [2]u8);
 			if field_error != .None {
 				// printf("Parsing Extra returned: %v\n", field_error);
 				return E_General.Stream_Too_Short;
@@ -206,7 +206,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 			xlen -= 2;
 			input_data_consumed += 2;
 
-			field_length, field_error = compress.read_data(ctx, u16le);
+			field_length, field_error = compress.read_data(z, u16le);
 			if field_error != .None {
 				// printf("Parsing Extra returned: %v\n", field_error);
 				return E_General.Stream_Too_Short;
@@ -222,7 +222,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 			// printf("    Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
 			if field_length > 0 {
-				b, field_error = compress.read_slice(ctx, int(field_length));
+				b, field_error = compress.read_slice(z, int(field_length));
 				if field_error != .None {
 					// printf("Parsing Extra returned: %v\n", field_error);
 					return E_General.Stream_Too_Short;
@@ -246,7 +246,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 		name_error: io.Error;
 
 		for i < len(name) {
-			b, name_error = compress.read_slice(ctx, 1);
+			b, name_error = compress.read_slice(z, 1);
 			if name_error != .None {
 				return E_General.Stream_Too_Short;
 			}
@@ -270,7 +270,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 		comment_error: io.Error;
 
 		for i < len(comment) {
-			b, comment_error = compress.read_slice(ctx, 1);
+			b, comment_error = compress.read_slice(z, 1);
 			if comment_error != .None {
 				return E_General.Stream_Too_Short;
 			}
@@ -289,7 +289,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 	if .header_crc in header.flags {
 		crc_error: io.Error;
-		_, crc_error = compress.read_slice(ctx, 2);
+		_, crc_error = compress.read_slice(z, 2);
 		input_data_consumed += 2;
 		if crc_error != .None {
 			return E_General.Stream_Too_Short;
@@ -303,9 +303,6 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 	/*
 		We should have arrived at the ZLIB payload.
 	*/
-	code_buffer := compress.Code_Buffer{};
-	cb := &code_buffer;
-
 	payload_u32le: u32le;
 
 	// fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size);
@@ -325,10 +322,10 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 			We'll still want to ensure there's capacity left in the output buffer when we write, of course.
 
 		*/
-		if ctx.input_fully_in_memory && known_gzip_size > -1 {
+		if z.input_fully_in_memory && known_gzip_size > -1 {
 			offset := known_gzip_size - input_data_consumed - 4;
-			if len(ctx.input_data) >= offset + 4 {
-				length_bytes         := ctx.input_data[offset:][:4];
+			if len(z.input_data) >= offset + 4 {
+				length_bytes         := z.input_data[offset:][:4];
 				payload_u32le         = (^u32le)(&length_bytes[0])^;
 				expected_output_size = int(payload_u32le);
 			}
@@ -342,27 +339,27 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 	// fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size);
 
-	zlib_error := zlib.inflate_raw(z=ctx, cb=&code_buffer, expected_output_size=expected_output_size);
+	zlib_error := zlib.inflate_raw(z=z, expected_output_size=expected_output_size);
 	if zlib_error != nil {
 		return zlib_error;
 	}
 	/*
 		Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
 	*/
-	compress.discard_to_next_byte_lsb(cb);
+	compress.discard_to_next_byte_lsb(z);
 
 	footer_error: io.Error;
 
 	payload_crc_b: [4]u8;
 	for _, i in payload_crc_b {
-		if cb.num_bits >= 8 {
-			payload_crc_b[i] = u8(compress.read_bits_lsb(ctx, cb, 8));
+		if z.num_bits >= 8 {
+			payload_crc_b[i] = u8(compress.read_bits_lsb(z, 8));
 		} else {
-			payload_crc_b[i], footer_error = compress.read_u8(ctx);
+			payload_crc_b[i], footer_error = compress.read_u8(z);
 		}
 	}
 	payload_crc := transmute(u32le)payload_crc_b;
-	payload_u32le, footer_error = compress.read_data(ctx, u32le);
+	payload_u32le, footer_error = compress.read_data(z, u32le);
 
 	payload := bytes.buffer_to_bytes(buf);
 

+ 56 - 77
core/compress/zlib/zlib.odin

@@ -30,8 +30,7 @@ import "core:bytes"
 	`Context.rolling_hash` if not inlining it is still faster.
 
 */
-Context     :: compress.Context;
-Code_Buffer :: compress.Code_Buffer;
+Context :: compress.Context;
 
 Compression_Method :: enum u8 {
 	DEFLATE  = 8,
@@ -166,7 +165,7 @@ grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
 */
 
 @(optimization_mode="speed")
-write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err: io.Error) #no_bounds_check {
+write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
 	/*
 		Resize if needed.
 	*/
@@ -179,14 +178,13 @@ write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err:
 
 	#no_bounds_check {
 		z.output.buf[z.bytes_written] = c;
-		cb.last[z.bytes_written & cb.window_mask] = c;
 	}
 	z.bytes_written += 1;
 	return .None;
 }
 
 @(optimization_mode="speed")
-repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
+repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
@@ -206,7 +204,6 @@ repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.
 	#no_bounds_check {
 		for _ in 0..<count {
 			z.output.buf[z.bytes_written] = c;
-			cb.last[z.bytes_written & cb.window_mask] = c;
 			z.bytes_written += 1;
 		}
 	}
@@ -215,14 +212,14 @@ repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.
 }
 
 @(optimization_mode="speed")
-repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) -> (err: io.Error) {
+repl_bytes :: proc(z: ^Context, count: u16, distance: u16) -> (err: io.Error) {
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
 		the output stream, just give it _that_ slice.
 	*/
 
-	offset := z.bytes_written - i64(distance);
+	offset := i64(distance);
 
 	if int(z.bytes_written) + int(count) >= len(z.output.buf) {
 		e := grow_buffer(&z.output.buf);
@@ -233,10 +230,9 @@ repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) ->
 
 	#no_bounds_check {
 		for _ in 0..<count {
-			c := cb.last[offset & cb.window_mask];
+			c := z.output.buf[z.bytes_written - offset];
 			z.output.buf[z.bytes_written] = c;
-			cb.last[z.bytes_written & cb.window_mask] = c;
-			z.bytes_written += 1; offset += 1;
+			z.bytes_written += 1;
 		}
 	}
 
@@ -308,8 +304,8 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 }
 
 @(optimization_mode="speed")
-decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
-	code := u16(compress.peek_bits_lsb(z, cb, 16));
+decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+	code := u16(compress.peek_bits_lsb(z,16));
 
 	k := int(z_bit_reverse(code, 16));
 	s: u8;
@@ -332,41 +328,41 @@ decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table
 		return 0, E_Deflate.Bad_Huffman_Code;
 	}
 
-	compress.consume_bits_lsb(cb, s);
+	compress.consume_bits_lsb(z, s);
 
 	r = t.value[b];
 	return r, nil;
 }
 
 @(optimization_mode="speed")
-decode_huffman :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
-	if cb.num_bits < 16 {
-		if cb.num_bits > 63 {
+decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+	if z.num_bits < 16 {
+		if z.num_bits > 63 {
 			return 0, E_ZLIB.Code_Buffer_Malformed;
 		}
-		compress.refill_lsb(z, cb);
-		if cb.num_bits > 63 {
+		compress.refill_lsb(z);
+		if z.num_bits > 63 {
 			return 0, E_General.Stream_Too_Short;
 		}
 	}
-	#no_bounds_check b := t.fast[cb.code_buffer & ZFAST_MASK];
+	#no_bounds_check b := t.fast[z.code_buffer & ZFAST_MASK];
 	if b != 0 {
 		s := u8(b >> ZFAST_BITS);
-		compress.consume_bits_lsb(cb, s);
+		compress.consume_bits_lsb(z, s);
 		return b & 511, nil;
 	}
-	return decode_huffman_slowpath(z, cb, t);
+	return decode_huffman_slowpath(z, t);
 }
 
 @(optimization_mode="speed")
-parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
+parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
 	#no_bounds_check for {
-		value, e := decode_huffman(z, cb, z_repeat);
+		value, e := decode_huffman(z, z_repeat);
 		if e != nil {
 			return err;
 		}
 		if value < 256 {
-			e := write_byte(z, cb, u8(value));
+			e := write_byte(z, u8(value));
 			if e != .None {
 				return E_General.Output_Too_Short;
 			}
@@ -379,17 +375,17 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 			value -= 257;
 			length := Z_LENGTH_BASE[value];
 			if Z_LENGTH_EXTRA[value] > 0 {
-				length += u16(compress.read_bits_lsb(z, cb, Z_LENGTH_EXTRA[value]));
+				length += u16(compress.read_bits_lsb(z, Z_LENGTH_EXTRA[value]));
 			}
 
-			value, e = decode_huffman(z, cb, z_offset);
+			value, e = decode_huffman(z, z_offset);
 			if e != nil {
 				return E_Deflate.Bad_Huffman_Code;
 			}
 
 			distance := Z_DIST_BASE[value];
 			if Z_DIST_EXTRA[value] > 0 {
-				distance += u16(compress.read_bits_lsb(z, cb, Z_DIST_EXTRA[value]));
+				distance += u16(compress.read_bits_lsb(z, Z_DIST_EXTRA[value]));
 			}
 
 			if z.bytes_written < i64(distance) {
@@ -397,7 +393,6 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 				return E_Deflate.Bad_Distance;
 			}
 
-			offset := i64(z.bytes_written - i64(distance));
 			/*
 				These might be sped up with a repl_byte call that copies
 				from the already written output more directly, and that
@@ -410,15 +405,15 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 					Replicate the last outputted byte, length times.
 				*/
 				if length > 0 {
-					c := cb.last[offset & cb.window_mask];
-					e := repl_byte(z, cb, length, c);
+					c := z.output.buf[z.bytes_written - i64(distance)];
+					e := repl_byte(z, length, c);
 					if e != .None {
 						return E_General.Output_Too_Short;
 					}
 				}
 			} else {
 				if length > 0 {
-					e := repl_bytes(z, cb, length, distance);
+					e := repl_bytes(z, length, distance);
 					if e != .None {
 						return E_General.Output_Too_Short;
 					}
@@ -442,9 +437,6 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 		DEFLATE stream.
 	*/
 
-	code_buffer := Code_Buffer{};
-	cb := &code_buffer;
-
 	if !raw {
 		data_size := io.size(ctx.input);
 		if data_size < 6 {
@@ -462,8 +454,6 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 		if cinfo > 7 {
 			return E_ZLIB.Unsupported_Window_Size;
 		}
-		cb.window_mask = i64((1 << (cinfo + 8) - 1));
-
 		flg, _ := compress.read_u8(ctx);
 
 		fcheck  := flg & 0x1f;
@@ -488,23 +478,21 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 			at the end to compare checksums.
 		*/
 
-		// Seed the Adler32 rolling checksum.
-		ctx.rolling_hash = 1;
 	}
 
 	// Parse ZLIB stream without header.
-	err = inflate_raw(z=ctx, cb=cb, expected_output_size=expected_output_size);
+	err = inflate_raw(z=ctx, expected_output_size=expected_output_size);
 	if err != nil {
 		return err;
 	}
 
 	if !raw {
-		compress.discard_to_next_byte_lsb(cb);
-		adler32 := compress.read_bits_lsb(ctx, cb, 8) << 24 | compress.read_bits_lsb(ctx, cb, 8) << 16 | compress.read_bits_lsb(ctx, cb, 8) << 8 | compress.read_bits_lsb(ctx, cb, 8);
+		compress.discard_to_next_byte_lsb(ctx);
+		adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8);
 
-		ctx.rolling_hash = hash.adler32(ctx.output.buf[:]);
+		output_hash := hash.adler32(ctx.output.buf[:]);
 
-		if ctx.rolling_hash != u32(adler32) {
+		if output_hash != u32(adler32) {
 			return E_General.Checksum_Failed;
 		}
 	}
@@ -512,7 +500,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 }
 
 @(optimization_mode="speed")
-inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+inflate_from_stream_raw :: proc(z: ^Context, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	expected_output_size := expected_output_size;
 
 	if expected_output_size <= 0 {
@@ -536,8 +524,8 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 		return .Resize_Failed;
 	}
 
-	cb.num_bits    = 0;
-	cb.code_buffer = 0;
+	z.num_bits    = 0;
+	z.code_buffer = 0;
 
 	z_repeat:      ^Huffman_Table;
 	z_offset:      ^Huffman_Table;
@@ -559,21 +547,12 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 	defer free(z_offset);
 	defer free(codelength_ht);
 
-	if cb.window_mask == 0 {
-		cb.window_mask = DEFLATE_MAX_DISTANCE - 1;
-	}
-
-	// Allocate rolling window buffer.
-	cb.last = mem.make_dynamic_array_len_cap([dynamic]u8, cb.window_mask + 1, cb.window_mask + 1, allocator);
-	defer delete(cb.last);
-
-
 	final := u32(0);
 	type  := u32(0);
 
 	for {
-		final = compress.read_bits_lsb(z, cb, 1);
-		type  = compress.read_bits_lsb(z, cb, 2);
+		final = compress.read_bits_lsb(z, 1);
+		type  = compress.read_bits_lsb(z, 2);
 
 		// fmt.printf("Final: %v | Type: %v\n", final, type);
 
@@ -582,10 +561,10 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 			// Uncompressed block
 
 			// Discard bits until next byte boundary
-			compress.discard_to_next_byte_lsb(cb);
+			compress.discard_to_next_byte_lsb(z);
 
-			uncompressed_len  := i16(compress.read_bits_lsb(z, cb, 16));
-			length_check      := i16(compress.read_bits_lsb(z, cb, 16));
+			uncompressed_len  := i16(compress.read_bits_lsb(z, 16));
+			length_check      := i16(compress.read_bits_lsb(z, 16));
 
 			// fmt.printf("LEN: %v, ~LEN: %v, NLEN: %v, ~NLEN: %v\n", uncompressed_len, ~uncompressed_len, length_check, ~length_check);
 
@@ -599,9 +578,9 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 				and a single Adler32 update after.
 			*/
 			#no_bounds_check for uncompressed_len > 0 {
-				compress.refill_lsb(z, cb);
-				lit := compress.read_bits_lsb(z, cb, 8);
-				write_byte(z, cb, u8(lit));
+				compress.refill_lsb(z);
+				lit := compress.read_bits_lsb(z, 8);
+				write_byte(z, u8(lit));
 				uncompressed_len -= 1;
 			}
 		case 3:
@@ -625,14 +604,14 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 				//i: u32;
 				n: u32;
 
-				compress.refill_lsb(z, cb, 14);
-				hlit  := compress.read_bits_no_refill_lsb(z, cb, 5) + 257;
-				hdist := compress.read_bits_no_refill_lsb(z, cb, 5) + 1;
-				hclen := compress.read_bits_no_refill_lsb(z, cb, 4) + 4;
+				compress.refill_lsb(z, 14);
+				hlit  := compress.read_bits_no_refill_lsb(z, 5) + 257;
+				hdist := compress.read_bits_no_refill_lsb(z, 5) + 1;
+				hclen := compress.read_bits_no_refill_lsb(z, 4) + 4;
 				ntot  := hlit + hdist;
 
 				#no_bounds_check for i in 0..<hclen {
-					s := compress.read_bits_lsb(z, cb, 3);
+					s := compress.read_bits_lsb(z, 3);
 					codelength_sizes[Z_LENGTH_DEZIGZAG[i]] = u8(s);
 				}
 				err = build_huffman(codelength_ht, codelength_sizes[:]);
@@ -644,7 +623,7 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 				c: u16;
 
 				for n < ntot {
-					c, err = decode_huffman(z, cb, codelength_ht);
+					c, err = decode_huffman(z, codelength_ht);
 					if err != nil {
 						return err;
 					}
@@ -657,18 +636,18 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 						n += 1;
 					} else {
 						fill := u8(0);
-						compress.refill_lsb(z, cb, 7);
+						compress.refill_lsb(z, 7);
 						switch c {
 						case 16:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 2) + 3);
+							c = u16(compress.read_bits_no_refill_lsb(z, 2) + 3);
 							if n == 0 {
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 							}
 							fill = lencodes[n - 1];
 						case 17:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 3) + 3);
+							c = u16(compress.read_bits_no_refill_lsb(z, 3) + 3);
 						case 18:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 7) + 11);
+							c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
 						case:
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 						}
@@ -698,7 +677,7 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 					return err;
 				}
 			}
-			err = parse_huffman_block(z, cb, z_repeat, z_offset);
+			err = parse_huffman_block(z, z_repeat, z_offset);
 			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
 			if err != nil {
 				return err;
@@ -734,7 +713,7 @@ inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, e
 	return err;
 }
 
-inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
+inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
 	ctx := Context{};
 
 	r := bytes.Reader{};
@@ -746,7 +725,7 @@ inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_B
 
 	ctx.output = buf;
 
-	return inflate_from_stream_raw(z=&ctx, cb=cb, expected_output_size=expected_output_size);
+	return inflate_from_stream_raw(z=&ctx, expected_output_size=expected_output_size);
 }
 
 inflate     :: proc{inflate_from_stream, inflate_from_byte_array};