Browse Source

ZLIB: Moar faster.

Jeroen van Rijn 4 years ago
parent
commit
30a5808460
3 changed files with 109 additions and 159 deletions
  1. 34 60
      core/compress/common.odin
  2. 19 22
      core/compress/gzip/gzip.odin
  3. 56 77
      core/compress/zlib/zlib.odin

+ 34 - 60
core/compress/common.odin

@@ -127,10 +127,9 @@ Deflate_Error :: enum {
 
 
 
 
 // General I/O context for ZLIB, LZW, etc.
 // General I/O context for ZLIB, LZW, etc.
-Context :: struct #packed {
-	input:             io.Stream,
+Context :: struct {
 	input_data:        []u8,
 	input_data:        []u8,
-
+	input:             io.Stream,
 	output:            ^bytes.Buffer,
 	output:            ^bytes.Buffer,
 	bytes_written:     i64,
 	bytes_written:     i64,
 
 
@@ -140,14 +139,9 @@ Context :: struct #packed {
 	size_packed:   i64,
 	size_packed:   i64,
 	size_unpacked: i64,
 	size_unpacked: i64,
 
 
-	/*
-		Used to update hash as we write instead of all at once.
-	*/
-	rolling_hash:  u32,
-	/*
-		Reserved
-	*/
-	reserved:      [2]u32,
+	code_buffer: u64,
+	num_bits:    u64,
+
 	/*
 	/*
 		Flags:
 		Flags:
 			`input_fully_in_memory` tells us whether we're EOF when `input_data` is empty.
 			`input_fully_in_memory` tells us whether we're EOF when `input_data` is empty.
@@ -155,28 +149,8 @@ Context :: struct #packed {
 	*/
 	*/
 	input_fully_in_memory: b8,
 	input_fully_in_memory: b8,
 	input_refills_from_stream: b8,
 	input_refills_from_stream: b8,
-	output_to_stream: b8,
-	reserved_flag: b8,
-
-	bit_buffer_stuff: [3]u64,
-
-
 }
 }
-// #assert(size_of(Context) == 128);
 
 
-/*
-	Compression algorithm context
-*/
-Code_Buffer :: struct #packed {
-	code_buffer: u64,
-	num_bits:    u64,
-	/*
-		Sliding window buffer. Size must be a power of two.
-	*/
-	window_mask: i64,
-	last:        [dynamic]u8,
-}
-#assert(size_of(Code_Buffer) == 64);
 
 
 // Stream helpers
 // Stream helpers
 /*
 /*
@@ -290,26 +264,26 @@ peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 
 
 // Sliding window read back
 // Sliding window read back
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-peek_back_byte :: #force_inline proc(cb: ^Code_Buffer, offset: i64) -> (res: u8, err: io.Error) {
+peek_back_byte :: #force_inline proc(z: ^Context, offset: i64) -> (res: u8, err: io.Error) {
 	// Look back into the sliding window.
 	// Look back into the sliding window.
-	return cb.last[offset & cb.window_mask], .None;
+	return z.output.buf[z.bytes_written - offset], .None;
 }
 }
 
 
 // Generalized bit reader LSB
 // Generalized bit reader LSB
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
+refill_lsb :: proc(z: ^Context, width := i8(24)) {
 	refill := u64(width);
 	refill := u64(width);
 
 
 	for {
 	for {
-		if cb.num_bits > refill {
+		if z.num_bits > refill {
 			break;
 			break;
 		}
 		}
-		if cb.code_buffer == 0 && cb.num_bits > 63 {
-			cb.num_bits = 0;
+		if z.code_buffer == 0 && z.num_bits > 63 {
+			z.num_bits = 0;
 		}
 		}
-		if cb.code_buffer >= 1 << uint(cb.num_bits) {
+		if z.code_buffer >= 1 << uint(z.num_bits) {
 			// Code buffer is malformed.
 			// Code buffer is malformed.
-			cb.num_bits = max(u64);
+			z.num_bits = max(u64);
 			return;
 			return;
 		}
 		}
 		b, err := read_u8(z);
 		b, err := read_u8(z);
@@ -317,48 +291,48 @@ refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
 			// This is fine at the end of the file.
 			// This is fine at the end of the file.
 			return;
 			return;
 		}
 		}
-		cb.code_buffer |= (u64(b) << u8(cb.num_bits));
-		cb.num_bits += 8;
+		z.code_buffer |= (u64(b) << u8(z.num_bits));
+		z.num_bits += 8;
 	}
 	}
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-consume_bits_lsb :: #force_inline proc(cb: ^Code_Buffer, width: u8) {
-	cb.code_buffer >>= width;
-	cb.num_bits -= u64(width);
+consume_bits_lsb :: #force_inline proc(z: ^Context, width: u8) {
+	z.code_buffer >>= width;
+	z.num_bits -= u64(width);
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-peek_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	if cb.num_bits < u64(width) {
-		refill_lsb(z, cb);
+peek_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	if z.num_bits < u64(width) {
+		refill_lsb(z);
 	}
 	}
 	// assert(z.num_bits >= i8(width));
 	// assert(z.num_bits >= i8(width));
-	return u32(cb.code_buffer & ~(~u64(0) << width));
+	return u32(z.code_buffer & ~(~u64(0) << width));
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	assert(cb.num_bits >= u64(width));
-	return u32(cb.code_buffer & ~(~u64(0) << width));
+peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	assert(z.num_bits >= u64(width));
+	return u32(z.code_buffer & ~(~u64(0) << width));
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-read_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	k := peek_bits_lsb(z, cb, width);
-	consume_bits_lsb(cb, width);
+read_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	k := peek_bits_lsb(z, width);
+	consume_bits_lsb(z, width);
 	return k;
 	return k;
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	k := peek_bits_no_refill_lsb(z, cb, width);
-	consume_bits_lsb(cb, width);
+read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	k := peek_bits_no_refill_lsb(z, width);
+	consume_bits_lsb(z, width);
 	return k;
 	return k;
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-discard_to_next_byte_lsb :: proc(cb: ^Code_Buffer) {
-	discard := u8(cb.num_bits & 7);
-	consume_bits_lsb(cb, discard);
+discard_to_next_byte_lsb :: proc(z: ^Context) {
+	discard := u8(z.num_bits & 7);
+	consume_bits_lsb(z, discard);
 }
 }

+ 19 - 22
core/compress/gzip/gzip.odin

@@ -133,13 +133,13 @@ load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_siz
 	return;
 	return;
 }
 }
 
 
-load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
+load_from_stream :: proc(z: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	buf := buf;
 	buf := buf;
 	expected_output_size := expected_output_size;
 	expected_output_size := expected_output_size;
 
 
 	input_data_consumed := 0;
 	input_data_consumed := 0;
 
 
-	ctx.output = buf;
+	z.output = buf;
 
 
 	if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
 	if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
 		return E_GZIP.Payload_Size_Exceeds_Max_Payload;
 		return E_GZIP.Payload_Size_Exceeds_Max_Payload;
@@ -151,7 +151,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 
 	b: []u8;
 	b: []u8;
 
 
-	header, e := compress.read_data(ctx, Header);
+	header, e := compress.read_data(z, Header);
 	if e != .None {
 	if e != .None {
 		return E_General.File_Too_Short;
 		return E_General.File_Too_Short;
 	}
 	}
@@ -180,7 +180,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 	// printf("os: %v\n", OS_Name[header.os]);
 	// printf("os: %v\n", OS_Name[header.os]);
 
 
 	if .extra in header.flags {
 	if .extra in header.flags {
-		xlen, e_extra := compress.read_data(ctx, u16le);
+		xlen, e_extra := compress.read_data(z, u16le);
 		input_data_consumed += 2;
 		input_data_consumed += 2;
 
 
 		if e_extra != .None {
 		if e_extra != .None {
@@ -198,7 +198,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 
 		for xlen >= 4 {
 		for xlen >= 4 {
 			// println("Parsing Extra field(s).");
 			// println("Parsing Extra field(s).");
-			field_id, field_error = compress.read_data(ctx, [2]u8);
+			field_id, field_error = compress.read_data(z, [2]u8);
 			if field_error != .None {
 			if field_error != .None {
 				// printf("Parsing Extra returned: %v\n", field_error);
 				// printf("Parsing Extra returned: %v\n", field_error);
 				return E_General.Stream_Too_Short;
 				return E_General.Stream_Too_Short;
@@ -206,7 +206,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 			xlen -= 2;
 			xlen -= 2;
 			input_data_consumed += 2;
 			input_data_consumed += 2;
 
 
-			field_length, field_error = compress.read_data(ctx, u16le);
+			field_length, field_error = compress.read_data(z, u16le);
 			if field_error != .None {
 			if field_error != .None {
 				// printf("Parsing Extra returned: %v\n", field_error);
 				// printf("Parsing Extra returned: %v\n", field_error);
 				return E_General.Stream_Too_Short;
 				return E_General.Stream_Too_Short;
@@ -222,7 +222,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 
 			// printf("    Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
 			// printf("    Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
 			if field_length > 0 {
 			if field_length > 0 {
-				b, field_error = compress.read_slice(ctx, int(field_length));
+				b, field_error = compress.read_slice(z, int(field_length));
 				if field_error != .None {
 				if field_error != .None {
 					// printf("Parsing Extra returned: %v\n", field_error);
 					// printf("Parsing Extra returned: %v\n", field_error);
 					return E_General.Stream_Too_Short;
 					return E_General.Stream_Too_Short;
@@ -246,7 +246,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 		name_error: io.Error;
 		name_error: io.Error;
 
 
 		for i < len(name) {
 		for i < len(name) {
-			b, name_error = compress.read_slice(ctx, 1);
+			b, name_error = compress.read_slice(z, 1);
 			if name_error != .None {
 			if name_error != .None {
 				return E_General.Stream_Too_Short;
 				return E_General.Stream_Too_Short;
 			}
 			}
@@ -270,7 +270,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 		comment_error: io.Error;
 		comment_error: io.Error;
 
 
 		for i < len(comment) {
 		for i < len(comment) {
-			b, comment_error = compress.read_slice(ctx, 1);
+			b, comment_error = compress.read_slice(z, 1);
 			if comment_error != .None {
 			if comment_error != .None {
 				return E_General.Stream_Too_Short;
 				return E_General.Stream_Too_Short;
 			}
 			}
@@ -289,7 +289,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 
 	if .header_crc in header.flags {
 	if .header_crc in header.flags {
 		crc_error: io.Error;
 		crc_error: io.Error;
-		_, crc_error = compress.read_slice(ctx, 2);
+		_, crc_error = compress.read_slice(z, 2);
 		input_data_consumed += 2;
 		input_data_consumed += 2;
 		if crc_error != .None {
 		if crc_error != .None {
 			return E_General.Stream_Too_Short;
 			return E_General.Stream_Too_Short;
@@ -303,9 +303,6 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 	/*
 	/*
 		We should have arrived at the ZLIB payload.
 		We should have arrived at the ZLIB payload.
 	*/
 	*/
-	code_buffer := compress.Code_Buffer{};
-	cb := &code_buffer;
-
 	payload_u32le: u32le;
 	payload_u32le: u32le;
 
 
 	// fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size);
 	// fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size);
@@ -325,10 +322,10 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 			We'll still want to ensure there's capacity left in the output buffer when we write, of course.
 			We'll still want to ensure there's capacity left in the output buffer when we write, of course.
 
 
 		*/
 		*/
-		if ctx.input_fully_in_memory && known_gzip_size > -1 {
+		if z.input_fully_in_memory && known_gzip_size > -1 {
 			offset := known_gzip_size - input_data_consumed - 4;
 			offset := known_gzip_size - input_data_consumed - 4;
-			if len(ctx.input_data) >= offset + 4 {
-				length_bytes         := ctx.input_data[offset:][:4];
+			if len(z.input_data) >= offset + 4 {
+				length_bytes         := z.input_data[offset:][:4];
 				payload_u32le         = (^u32le)(&length_bytes[0])^;
 				payload_u32le         = (^u32le)(&length_bytes[0])^;
 				expected_output_size = int(payload_u32le);
 				expected_output_size = int(payload_u32le);
 			}
 			}
@@ -342,27 +339,27 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 
 	// fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size);
 	// fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size);
 
 
-	zlib_error := zlib.inflate_raw(z=ctx, cb=&code_buffer, expected_output_size=expected_output_size);
+	zlib_error := zlib.inflate_raw(z=z, expected_output_size=expected_output_size);
 	if zlib_error != nil {
 	if zlib_error != nil {
 		return zlib_error;
 		return zlib_error;
 	}
 	}
 	/*
 	/*
 		Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
 		Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
 	*/
 	*/
-	compress.discard_to_next_byte_lsb(cb);
+	compress.discard_to_next_byte_lsb(z);
 
 
 	footer_error: io.Error;
 	footer_error: io.Error;
 
 
 	payload_crc_b: [4]u8;
 	payload_crc_b: [4]u8;
 	for _, i in payload_crc_b {
 	for _, i in payload_crc_b {
-		if cb.num_bits >= 8 {
-			payload_crc_b[i] = u8(compress.read_bits_lsb(ctx, cb, 8));
+		if z.num_bits >= 8 {
+			payload_crc_b[i] = u8(compress.read_bits_lsb(z, 8));
 		} else {
 		} else {
-			payload_crc_b[i], footer_error = compress.read_u8(ctx);
+			payload_crc_b[i], footer_error = compress.read_u8(z);
 		}
 		}
 	}
 	}
 	payload_crc := transmute(u32le)payload_crc_b;
 	payload_crc := transmute(u32le)payload_crc_b;
-	payload_u32le, footer_error = compress.read_data(ctx, u32le);
+	payload_u32le, footer_error = compress.read_data(z, u32le);
 
 
 	payload := bytes.buffer_to_bytes(buf);
 	payload := bytes.buffer_to_bytes(buf);
 
 

+ 56 - 77
core/compress/zlib/zlib.odin

@@ -30,8 +30,7 @@ import "core:bytes"
 	`Context.rolling_hash` if not inlining it is still faster.
 	`Context.rolling_hash` if not inlining it is still faster.
 
 
 */
 */
-Context     :: compress.Context;
-Code_Buffer :: compress.Code_Buffer;
+Context :: compress.Context;
 
 
 Compression_Method :: enum u8 {
 Compression_Method :: enum u8 {
 	DEFLATE  = 8,
 	DEFLATE  = 8,
@@ -166,7 +165,7 @@ grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
 */
 */
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err: io.Error) #no_bounds_check {
+write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
 	/*
 	/*
 		Resize if needed.
 		Resize if needed.
 	*/
 	*/
@@ -179,14 +178,13 @@ write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err:
 
 
 	#no_bounds_check {
 	#no_bounds_check {
 		z.output.buf[z.bytes_written] = c;
 		z.output.buf[z.bytes_written] = c;
-		cb.last[z.bytes_written & cb.window_mask] = c;
 	}
 	}
 	z.bytes_written += 1;
 	z.bytes_written += 1;
 	return .None;
 	return .None;
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
+repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
 	/*
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
 		without having to worry about wrapping, so no need for a temp allocation to give to
@@ -206,7 +204,6 @@ repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.
 	#no_bounds_check {
 	#no_bounds_check {
 		for _ in 0..<count {
 		for _ in 0..<count {
 			z.output.buf[z.bytes_written] = c;
 			z.output.buf[z.bytes_written] = c;
-			cb.last[z.bytes_written & cb.window_mask] = c;
 			z.bytes_written += 1;
 			z.bytes_written += 1;
 		}
 		}
 	}
 	}
@@ -215,14 +212,14 @@ repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) -> (err: io.Error) {
+repl_bytes :: proc(z: ^Context, count: u16, distance: u16) -> (err: io.Error) {
 	/*
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
 		without having to worry about wrapping, so no need for a temp allocation to give to
 		the output stream, just give it _that_ slice.
 		the output stream, just give it _that_ slice.
 	*/
 	*/
 
 
-	offset := z.bytes_written - i64(distance);
+	offset := i64(distance);
 
 
 	if int(z.bytes_written) + int(count) >= len(z.output.buf) {
 	if int(z.bytes_written) + int(count) >= len(z.output.buf) {
 		e := grow_buffer(&z.output.buf);
 		e := grow_buffer(&z.output.buf);
@@ -233,10 +230,9 @@ repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) ->
 
 
 	#no_bounds_check {
 	#no_bounds_check {
 		for _ in 0..<count {
 		for _ in 0..<count {
-			c := cb.last[offset & cb.window_mask];
+			c := z.output.buf[z.bytes_written - offset];
 			z.output.buf[z.bytes_written] = c;
 			z.output.buf[z.bytes_written] = c;
-			cb.last[z.bytes_written & cb.window_mask] = c;
-			z.bytes_written += 1; offset += 1;
+			z.bytes_written += 1;
 		}
 		}
 	}
 	}
 
 
@@ -308,8 +304,8 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
-	code := u16(compress.peek_bits_lsb(z, cb, 16));
+decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+	code := u16(compress.peek_bits_lsb(z,16));
 
 
 	k := int(z_bit_reverse(code, 16));
 	k := int(z_bit_reverse(code, 16));
 	s: u8;
 	s: u8;
@@ -332,41 +328,41 @@ decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table
 		return 0, E_Deflate.Bad_Huffman_Code;
 		return 0, E_Deflate.Bad_Huffman_Code;
 	}
 	}
 
 
-	compress.consume_bits_lsb(cb, s);
+	compress.consume_bits_lsb(z, s);
 
 
 	r = t.value[b];
 	r = t.value[b];
 	return r, nil;
 	return r, nil;
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-decode_huffman :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
-	if cb.num_bits < 16 {
-		if cb.num_bits > 63 {
+decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+	if z.num_bits < 16 {
+		if z.num_bits > 63 {
 			return 0, E_ZLIB.Code_Buffer_Malformed;
 			return 0, E_ZLIB.Code_Buffer_Malformed;
 		}
 		}
-		compress.refill_lsb(z, cb);
-		if cb.num_bits > 63 {
+		compress.refill_lsb(z);
+		if z.num_bits > 63 {
 			return 0, E_General.Stream_Too_Short;
 			return 0, E_General.Stream_Too_Short;
 		}
 		}
 	}
 	}
-	#no_bounds_check b := t.fast[cb.code_buffer & ZFAST_MASK];
+	#no_bounds_check b := t.fast[z.code_buffer & ZFAST_MASK];
 	if b != 0 {
 	if b != 0 {
 		s := u8(b >> ZFAST_BITS);
 		s := u8(b >> ZFAST_BITS);
-		compress.consume_bits_lsb(cb, s);
+		compress.consume_bits_lsb(z, s);
 		return b & 511, nil;
 		return b & 511, nil;
 	}
 	}
-	return decode_huffman_slowpath(z, cb, t);
+	return decode_huffman_slowpath(z, t);
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
+parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
 	#no_bounds_check for {
 	#no_bounds_check for {
-		value, e := decode_huffman(z, cb, z_repeat);
+		value, e := decode_huffman(z, z_repeat);
 		if e != nil {
 		if e != nil {
 			return err;
 			return err;
 		}
 		}
 		if value < 256 {
 		if value < 256 {
-			e := write_byte(z, cb, u8(value));
+			e := write_byte(z, u8(value));
 			if e != .None {
 			if e != .None {
 				return E_General.Output_Too_Short;
 				return E_General.Output_Too_Short;
 			}
 			}
@@ -379,17 +375,17 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 			value -= 257;
 			value -= 257;
 			length := Z_LENGTH_BASE[value];
 			length := Z_LENGTH_BASE[value];
 			if Z_LENGTH_EXTRA[value] > 0 {
 			if Z_LENGTH_EXTRA[value] > 0 {
-				length += u16(compress.read_bits_lsb(z, cb, Z_LENGTH_EXTRA[value]));
+				length += u16(compress.read_bits_lsb(z, Z_LENGTH_EXTRA[value]));
 			}
 			}
 
 
-			value, e = decode_huffman(z, cb, z_offset);
+			value, e = decode_huffman(z, z_offset);
 			if e != nil {
 			if e != nil {
 				return E_Deflate.Bad_Huffman_Code;
 				return E_Deflate.Bad_Huffman_Code;
 			}
 			}
 
 
 			distance := Z_DIST_BASE[value];
 			distance := Z_DIST_BASE[value];
 			if Z_DIST_EXTRA[value] > 0 {
 			if Z_DIST_EXTRA[value] > 0 {
-				distance += u16(compress.read_bits_lsb(z, cb, Z_DIST_EXTRA[value]));
+				distance += u16(compress.read_bits_lsb(z, Z_DIST_EXTRA[value]));
 			}
 			}
 
 
 			if z.bytes_written < i64(distance) {
 			if z.bytes_written < i64(distance) {
@@ -397,7 +393,6 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 				return E_Deflate.Bad_Distance;
 				return E_Deflate.Bad_Distance;
 			}
 			}
 
 
-			offset := i64(z.bytes_written - i64(distance));
 			/*
 			/*
 				These might be sped up with a repl_byte call that copies
 				These might be sped up with a repl_byte call that copies
 				from the already written output more directly, and that
 				from the already written output more directly, and that
@@ -410,15 +405,15 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 					Replicate the last outputted byte, length times.
 					Replicate the last outputted byte, length times.
 				*/
 				*/
 				if length > 0 {
 				if length > 0 {
-					c := cb.last[offset & cb.window_mask];
-					e := repl_byte(z, cb, length, c);
+					c := z.output.buf[z.bytes_written - i64(distance)];
+					e := repl_byte(z, length, c);
 					if e != .None {
 					if e != .None {
 						return E_General.Output_Too_Short;
 						return E_General.Output_Too_Short;
 					}
 					}
 				}
 				}
 			} else {
 			} else {
 				if length > 0 {
 				if length > 0 {
-					e := repl_bytes(z, cb, length, distance);
+					e := repl_bytes(z, length, distance);
 					if e != .None {
 					if e != .None {
 						return E_General.Output_Too_Short;
 						return E_General.Output_Too_Short;
 					}
 					}
@@ -442,9 +437,6 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 		DEFLATE stream.
 		DEFLATE stream.
 	*/
 	*/
 
 
-	code_buffer := Code_Buffer{};
-	cb := &code_buffer;
-
 	if !raw {
 	if !raw {
 		data_size := io.size(ctx.input);
 		data_size := io.size(ctx.input);
 		if data_size < 6 {
 		if data_size < 6 {
@@ -462,8 +454,6 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 		if cinfo > 7 {
 		if cinfo > 7 {
 			return E_ZLIB.Unsupported_Window_Size;
 			return E_ZLIB.Unsupported_Window_Size;
 		}
 		}
-		cb.window_mask = i64((1 << (cinfo + 8) - 1));
-
 		flg, _ := compress.read_u8(ctx);
 		flg, _ := compress.read_u8(ctx);
 
 
 		fcheck  := flg & 0x1f;
 		fcheck  := flg & 0x1f;
@@ -488,23 +478,21 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 			at the end to compare checksums.
 			at the end to compare checksums.
 		*/
 		*/
 
 
-		// Seed the Adler32 rolling checksum.
-		ctx.rolling_hash = 1;
 	}
 	}
 
 
 	// Parse ZLIB stream without header.
 	// Parse ZLIB stream without header.
-	err = inflate_raw(z=ctx, cb=cb, expected_output_size=expected_output_size);
+	err = inflate_raw(z=ctx, expected_output_size=expected_output_size);
 	if err != nil {
 	if err != nil {
 		return err;
 		return err;
 	}
 	}
 
 
 	if !raw {
 	if !raw {
-		compress.discard_to_next_byte_lsb(cb);
-		adler32 := compress.read_bits_lsb(ctx, cb, 8) << 24 | compress.read_bits_lsb(ctx, cb, 8) << 16 | compress.read_bits_lsb(ctx, cb, 8) << 8 | compress.read_bits_lsb(ctx, cb, 8);
+		compress.discard_to_next_byte_lsb(ctx);
+		adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8);
 
 
-		ctx.rolling_hash = hash.adler32(ctx.output.buf[:]);
+		output_hash := hash.adler32(ctx.output.buf[:]);
 
 
-		if ctx.rolling_hash != u32(adler32) {
+		if output_hash != u32(adler32) {
 			return E_General.Checksum_Failed;
 			return E_General.Checksum_Failed;
 		}
 		}
 	}
 	}
@@ -512,7 +500,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 }
 }
 
 
 @(optimization_mode="speed")
 @(optimization_mode="speed")
-inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+inflate_from_stream_raw :: proc(z: ^Context, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	expected_output_size := expected_output_size;
 	expected_output_size := expected_output_size;
 
 
 	if expected_output_size <= 0 {
 	if expected_output_size <= 0 {
@@ -536,8 +524,8 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 		return .Resize_Failed;
 		return .Resize_Failed;
 	}
 	}
 
 
-	cb.num_bits    = 0;
-	cb.code_buffer = 0;
+	z.num_bits    = 0;
+	z.code_buffer = 0;
 
 
 	z_repeat:      ^Huffman_Table;
 	z_repeat:      ^Huffman_Table;
 	z_offset:      ^Huffman_Table;
 	z_offset:      ^Huffman_Table;
@@ -559,21 +547,12 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 	defer free(z_offset);
 	defer free(z_offset);
 	defer free(codelength_ht);
 	defer free(codelength_ht);
 
 
-	if cb.window_mask == 0 {
-		cb.window_mask = DEFLATE_MAX_DISTANCE - 1;
-	}
-
-	// Allocate rolling window buffer.
-	cb.last = mem.make_dynamic_array_len_cap([dynamic]u8, cb.window_mask + 1, cb.window_mask + 1, allocator);
-	defer delete(cb.last);
-
-
 	final := u32(0);
 	final := u32(0);
 	type  := u32(0);
 	type  := u32(0);
 
 
 	for {
 	for {
-		final = compress.read_bits_lsb(z, cb, 1);
-		type  = compress.read_bits_lsb(z, cb, 2);
+		final = compress.read_bits_lsb(z, 1);
+		type  = compress.read_bits_lsb(z, 2);
 
 
 		// fmt.printf("Final: %v | Type: %v\n", final, type);
 		// fmt.printf("Final: %v | Type: %v\n", final, type);
 
 
@@ -582,10 +561,10 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 			// Uncompressed block
 			// Uncompressed block
 
 
 			// Discard bits until next byte boundary
 			// Discard bits until next byte boundary
-			compress.discard_to_next_byte_lsb(cb);
+			compress.discard_to_next_byte_lsb(z);
 
 
-			uncompressed_len  := i16(compress.read_bits_lsb(z, cb, 16));
-			length_check      := i16(compress.read_bits_lsb(z, cb, 16));
+			uncompressed_len  := i16(compress.read_bits_lsb(z, 16));
+			length_check      := i16(compress.read_bits_lsb(z, 16));
 
 
 			// fmt.printf("LEN: %v, ~LEN: %v, NLEN: %v, ~NLEN: %v\n", uncompressed_len, ~uncompressed_len, length_check, ~length_check);
 			// fmt.printf("LEN: %v, ~LEN: %v, NLEN: %v, ~NLEN: %v\n", uncompressed_len, ~uncompressed_len, length_check, ~length_check);
 
 
@@ -599,9 +578,9 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 				and a single Adler32 update after.
 				and a single Adler32 update after.
 			*/
 			*/
 			#no_bounds_check for uncompressed_len > 0 {
 			#no_bounds_check for uncompressed_len > 0 {
-				compress.refill_lsb(z, cb);
-				lit := compress.read_bits_lsb(z, cb, 8);
-				write_byte(z, cb, u8(lit));
+				compress.refill_lsb(z);
+				lit := compress.read_bits_lsb(z, 8);
+				write_byte(z, u8(lit));
 				uncompressed_len -= 1;
 				uncompressed_len -= 1;
 			}
 			}
 		case 3:
 		case 3:
@@ -625,14 +604,14 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 				//i: u32;
 				//i: u32;
 				n: u32;
 				n: u32;
 
 
-				compress.refill_lsb(z, cb, 14);
-				hlit  := compress.read_bits_no_refill_lsb(z, cb, 5) + 257;
-				hdist := compress.read_bits_no_refill_lsb(z, cb, 5) + 1;
-				hclen := compress.read_bits_no_refill_lsb(z, cb, 4) + 4;
+				compress.refill_lsb(z, 14);
+				hlit  := compress.read_bits_no_refill_lsb(z, 5) + 257;
+				hdist := compress.read_bits_no_refill_lsb(z, 5) + 1;
+				hclen := compress.read_bits_no_refill_lsb(z, 4) + 4;
 				ntot  := hlit + hdist;
 				ntot  := hlit + hdist;
 
 
 				#no_bounds_check for i in 0..<hclen {
 				#no_bounds_check for i in 0..<hclen {
-					s := compress.read_bits_lsb(z, cb, 3);
+					s := compress.read_bits_lsb(z, 3);
 					codelength_sizes[Z_LENGTH_DEZIGZAG[i]] = u8(s);
 					codelength_sizes[Z_LENGTH_DEZIGZAG[i]] = u8(s);
 				}
 				}
 				err = build_huffman(codelength_ht, codelength_sizes[:]);
 				err = build_huffman(codelength_ht, codelength_sizes[:]);
@@ -644,7 +623,7 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 				c: u16;
 				c: u16;
 
 
 				for n < ntot {
 				for n < ntot {
-					c, err = decode_huffman(z, cb, codelength_ht);
+					c, err = decode_huffman(z, codelength_ht);
 					if err != nil {
 					if err != nil {
 						return err;
 						return err;
 					}
 					}
@@ -657,18 +636,18 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 						n += 1;
 						n += 1;
 					} else {
 					} else {
 						fill := u8(0);
 						fill := u8(0);
-						compress.refill_lsb(z, cb, 7);
+						compress.refill_lsb(z, 7);
 						switch c {
 						switch c {
 						case 16:
 						case 16:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 2) + 3);
+							c = u16(compress.read_bits_no_refill_lsb(z, 2) + 3);
 							if n == 0 {
 							if n == 0 {
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 							}
 							}
 							fill = lencodes[n - 1];
 							fill = lencodes[n - 1];
 						case 17:
 						case 17:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 3) + 3);
+							c = u16(compress.read_bits_no_refill_lsb(z, 3) + 3);
 						case 18:
 						case 18:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 7) + 11);
+							c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
 						case:
 						case:
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 						}
 						}
@@ -698,7 +677,7 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 					return err;
 					return err;
 				}
 				}
 			}
 			}
-			err = parse_huffman_block(z, cb, z_repeat, z_offset);
+			err = parse_huffman_block(z, z_repeat, z_offset);
 			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
 			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
 			if err != nil {
 			if err != nil {
 				return err;
 				return err;
@@ -734,7 +713,7 @@ inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, e
 	return err;
 	return err;
 }
 }
 
 
-inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
+inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
 	ctx := Context{};
 	ctx := Context{};
 
 
 	r := bytes.Reader{};
 	r := bytes.Reader{};
@@ -746,7 +725,7 @@ inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_B
 
 
 	ctx.output = buf;
 	ctx.output = buf;
 
 
-	return inflate_from_stream_raw(z=&ctx, cb=cb, expected_output_size=expected_output_size);
+	return inflate_from_stream_raw(z=&ctx, expected_output_size=expected_output_size);
 }
 }
 
 
 inflate     :: proc{inflate_from_stream, inflate_from_byte_array};
 inflate     :: proc{inflate_from_stream, inflate_from_byte_array};