Browse Source

ZLIB: Start optimization.

Jeroen van Rijn 4 years ago
parent
commit
352494cbb4

+ 10 - 4
core/bytes/util.odin

@@ -1,12 +1,18 @@
 package bytes
 package bytes
 
 
-import "core:intrinsics"
-import "core:mem"
-
 /*
 /*
-	Buffer type helpers
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+
+	`bytes.Buffer` type conversion helpers.
 */
 */
 
 
+import "core:intrinsics"
+import "core:mem"
+
 need_endian_conversion :: proc($FT: typeid, $TT: typeid) -> (res: bool) {
 need_endian_conversion :: proc($FT: typeid, $TT: typeid) -> (res: bool) {
 
 
 	// true if platform endian
 	// true if platform endian

+ 28 - 3
core/compress/common.odin

@@ -1,8 +1,20 @@
 package compress
 package compress
 
 
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation, optimization.
+*/
+
 import "core:io"
 import "core:io"
 import "core:image"
 import "core:image"
 
 
+when #config(TRACY_ENABLE, false) {
+	import tracy "shared:odin-tracy"
+}
+
 Error :: union {
 Error :: union {
 	General_Error,
 	General_Error,
 	Deflate_Error,
 	Deflate_Error,
@@ -71,15 +83,24 @@ Context :: struct {
 	*/
 	*/
 	eof: b8,
 	eof: b8,
 
 
-	input: io.Stream,
+	input:  io.Stream,
 	output: io.Stream,
 	output: io.Stream,
 	bytes_written: i64,
 	bytes_written: i64,
-	// Used to update hash as we write instead of all at once
+	/*
+		Used to update hash as we write instead of all at once.
+	*/
 	rolling_hash: u32,
 	rolling_hash: u32,
 
 
 	// Sliding window buffer. Size must be a power of two.
 	// Sliding window buffer. Size must be a power of two.
 	window_size: i64,
 	window_size: i64,
+	window_mask: i64,
 	last: ^[dynamic]byte,
 	last: ^[dynamic]byte,
+
+	/*
+		If we know the raw data size, we can optimize the reads.
+	*/
+	uncompressed_size: i64,
+	input_data: []u8,
 }
 }
 
 
 // Stream helpers
 // Stream helpers
@@ -93,6 +114,7 @@ Context :: struct {
 */
 */
 
 
 read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
 read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Data"); }
 	b := make([]u8, size_of(T), context.temp_allocator);
 	b := make([]u8, size_of(T), context.temp_allocator);
 	r, e1 := io.to_reader(c.input);
 	r, e1 := io.to_reader(c.input);
 	_, e2 := io.read(r, b);
 	_, e2 := io.read(r, b);
@@ -105,10 +127,12 @@ read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Err
 }
 }
 
 
 read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
 read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read u8"); }
 	return read_data(z, u8);
 	return read_data(z, u8);
 }
 }
 
 
 peek_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
 peek_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Peek Data"); }
 	// Get current position to read from.
 	// Get current position to read from.
 	curr, e1 := c.input->impl_seek(0, .Current);
 	curr, e1 := c.input->impl_seek(0, .Current);
 	if e1 != .None {
 	if e1 != .None {
@@ -136,6 +160,7 @@ peek_back_byte :: proc(c: ^Context, offset: i64) -> (res: u8, err: io.Error) {
 
 
 // Generalized bit reader LSB
 // Generalized bit reader LSB
 refill_lsb :: proc(z: ^Context, width := i8(24)) {
 refill_lsb :: proc(z: ^Context, width := i8(24)) {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Refill LSB"); }
 	for {
 	for {
 		if z.num_bits > width {
 		if z.num_bits > width {
 			break;
 			break;
@@ -146,7 +171,7 @@ refill_lsb :: proc(z: ^Context, width := i8(24)) {
 		if z.code_buffer >= 1 << uint(z.num_bits) {
 		if z.code_buffer >= 1 << uint(z.num_bits) {
 			// Code buffer is malformed.
 			// Code buffer is malformed.
 			z.num_bits = -100;
 			z.num_bits = -100;
-        	return;
+			return;
 		}
 		}
 		c, err := read_u8(z);
 		c, err := read_u8(z);
 		if err != .None {
 		if err != .None {

+ 11 - 0
core/compress/gzip/example.odin

@@ -1,6 +1,17 @@
 //+ignore
 //+ignore
 package gzip
 package gzip
 
 
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+		Ginger Bill:     Cosmetic changes.
+
+	A small GZIP implementation as an example.
+*/
+
 import "core:compress/gzip"
 import "core:compress/gzip"
 import "core:bytes"
 import "core:bytes"
 import "core:os"
 import "core:os"

+ 15 - 6
core/compress/gzip/gzip.odin

@@ -1,5 +1,19 @@
 package gzip
 package gzip
 
 
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+
+	This package implements support for the GZIP file format v4.3,
+	as specified in RFC 1952.
+
+	It is implemented in such a way that it lends itself naturally
+	to be the input to a complementary TAR implementation.
+*/
+
 import "core:compress/zlib"
 import "core:compress/zlib"
 import "core:compress"
 import "core:compress"
 import "core:os"
 import "core:os"
@@ -9,11 +23,6 @@ import "core:hash"
 
 
 /*
 /*
 
 
-	This package implements support for the GZIP file format v4.3,
-	as specified in RFC 1952.
-
-	It is implemented in such a way that it lends itself naturally
-	to be the input to a complementary TAR implementation.
 
 
 */
 */
 
 
@@ -200,7 +209,7 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con
 				xlen -= field_length;
 				xlen -= field_length;
 
 
 				// printf("%v\n", string(field_data));
 				// printf("%v\n", string(field_data));
-	 		}
+			}
 
 
 			if xlen != 0 {
 			if xlen != 0 {
 				return E_GZIP.Invalid_Extra_Data;
 				return E_GZIP.Invalid_Extra_Data;

+ 10 - 0
core/compress/zlib/example.odin

@@ -1,6 +1,16 @@
 //+ignore
 //+ignore
 package zlib
 package zlib
 
 
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+
+	An example of how to use `zlib.inflate`.
+*/
+
 import "core:compress/zlib"
 import "core:compress/zlib"
 import "core:bytes"
 import "core:bytes"
 import "core:fmt"
 import "core:fmt"

+ 93 - 17
core/compress/zlib/zlib.odin

@@ -1,11 +1,23 @@
 package zlib
 package zlib
 
 
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation, optimization.
+		Ginger Bill:     Cosmetic changes.
+*/
+
 import "core:compress"
 import "core:compress"
 
 
 import "core:mem"
 import "core:mem"
 import "core:io"
 import "core:io"
 import "core:bytes"
 import "core:bytes"
 import "core:hash"
 import "core:hash"
+
+when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" }
+
 /*
 /*
 	zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream.
 	zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream.
 	Returns: Error.
 	Returns: Error.
@@ -118,6 +130,7 @@ z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
 }
 }
 
 
 write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
 write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Write Byte"); }
 	c := c;
 	c := c;
 	buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
 	buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
 	z.rolling_hash = hash.adler32(buf, z.rolling_hash);
 	z.rolling_hash = hash.adler32(buf, z.rolling_hash);
@@ -126,17 +139,67 @@ write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_boun
 	if e != .None {
 	if e != .None {
 		return e;
 		return e;
 	}
 	}
-	z.last[z.bytes_written % z.window_size] = c;
+	z.last[z.bytes_written & z.window_mask] = c;
 
 
 	z.bytes_written += 1;
 	z.bytes_written += 1;
 	return .None;
 	return .None;
 }
 }
 
 
+repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Byte"); }
+	/*
+		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
+		without having to worry about wrapping, so no need for a temp allocation to give to
+		the output stream, just give it _that_ slice.
+	*/
+	buf := make([]u8, count, context.temp_allocator);
+	#no_bounds_check for i in 0..<count {
+		buf[i] = c;
+		z.last[z.bytes_written & z.window_mask] = c;
+		z.bytes_written += 1;
+	}
+	z.rolling_hash = hash.adler32(buf, z.rolling_hash);
+
+	_, e := z.output->impl_write(buf);
+	if e != .None {
+		return e;
+	}
+	return .None;
+}
+
+repl_bytes :: proc(z: ^Context, count: u16, distance: u16) -> (err: io.Error) {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Bytes"); }
+	/*
+		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
+		without having to worry about wrapping, so no need for a temp allocation to give to
+		the output stream, just give it _that_ slice.
+	*/
+	buf := make([]u8, count, context.temp_allocator);
+
+	offset := z.bytes_written - i64(distance);
+	#no_bounds_check for i in 0..<count {
+		c := z.last[offset & z.window_mask];
+
+		z.last[z.bytes_written & z.window_mask] = c;
+		buf[i] = c;
+		z.bytes_written += 1; offset += 1;
+	}
+	z.rolling_hash = hash.adler32(buf, z.rolling_hash);
+
+	_, e := z.output->impl_write(buf);
+	if e != .None {
+		return e;
+	}
+	return .None;
+}
+
+
 allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_Table, err: Error) {
 allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_Table, err: Error) {
 	return new(Huffman_Table, allocator), nil;
 	return new(Huffman_Table, allocator), nil;
 }
 }
 
 
 build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Build Huffman Table"); }
 	sizes:     [HUFFMAN_MAX_BITS+1]int;
 	sizes:     [HUFFMAN_MAX_BITS+1]int;
 	next_code: [HUFFMAN_MAX_BITS]int;
 	next_code: [HUFFMAN_MAX_BITS]int;
 
 
@@ -195,6 +258,7 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 }
 }
 
 
 decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
 decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman Slow"); }
 	code := u16(compress.peek_bits_lsb(z, 16));
 	code := u16(compress.peek_bits_lsb(z, 16));
 
 
 	k := int(z_bit_reverse(code, 16));
 	k := int(z_bit_reverse(code, 16));
@@ -225,6 +289,7 @@ decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err:
 }
 }
 
 
 decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
 decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman"); }
 	if z.num_bits < 16 {
 	if z.num_bits < 16 {
 		if z.num_bits == -100 {
 		if z.num_bits == -100 {
 			return 0, E_ZLIB.Code_Buffer_Malformed;
 			return 0, E_ZLIB.Code_Buffer_Malformed;
@@ -244,6 +309,7 @@ decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #
 }
 }
 
 
 parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
 parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Parse Huffman Block"); }
 	#no_bounds_check for {
 	#no_bounds_check for {
 		value, e := decode_huffman(z, z_repeat);
 		value, e := decode_huffman(z, z_repeat);
 		if e != nil {
 		if e != nil {
@@ -256,8 +322,8 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) ->
 			}
 			}
 		} else {
 		} else {
 			if value == 256 {
 			if value == 256 {
-      				// End of block
-      				return nil;
+					// End of block
+					return nil;
 			}
 			}
 
 
 			value -= 257;
 			value -= 257;
@@ -294,24 +360,30 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) ->
 					Replicate the last outputted byte, length times.
 					Replicate the last outputted byte, length times.
 				*/
 				*/
 				if length > 0 {
 				if length > 0 {
-					b, e := compress.peek_back_byte(z, offset);
-					if e != .None {
+					if offset >= 0 && offset < z.window_size {
+						c := z.last[offset];
+						e := repl_byte(z, length, c);
+						if e != .None {
+							return E_General.Output_Too_Short;
+						}
+					} else {
 						return E_General.Output_Too_Short;
 						return E_General.Output_Too_Short;
 					}
 					}
-					#no_bounds_check for _ in 0..<length {
-						write_byte(z, b);
-					}
 				}
 				}
 			} else {
 			} else {
 				if length > 0 {
 				if length > 0 {
-					#no_bounds_check for _ in 0..<length {
-						b, e := compress.peek_back_byte(z, offset);
-						if e != .None {
-							return E_General.Output_Too_Short;
-						}
-						write_byte(z, b);
-						offset += 1;
+					e := repl_bytes(z, length, distance);
+					if e != .None {
+						return E_General.Output_Too_Short;
 					}
 					}
+					// #no_bounds_check for _ in 0..<length {
+					// 	b, e := compress.peek_back_byte(z, offset);
+					// 	if e != .None {
+					// 		return E_General.Output_Too_Short;
+					// 	}
+					// 	write_byte(z, b);
+					// 	offset += 1;
+					// }
 				}
 				}
 			}
 			}
 		}
 		}
@@ -378,7 +450,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
 		ctx.rolling_hash = 1;
 		ctx.rolling_hash = 1;
 	}
 	}
 
 
- 	// Parse ZLIB stream without header.
+	// Parse ZLIB stream without header.
 	err = inflate_raw(ctx);
 	err = inflate_raw(ctx);
 	if err != nil {
 	if err != nil {
 		return err;
 		return err;
@@ -397,6 +469,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
 
 
 // @(optimization_mode="speed")
 // @(optimization_mode="speed")
 inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Inflate Raw"); }
 	final := u32(0);
 	final := u32(0);
 	type := u32(0);
 	type := u32(0);
 
 
@@ -426,6 +499,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
 	if z.window_size == 0 {
 	if z.window_size == 0 {
 		z.window_size = DEFLATE_MAX_DISTANCE;
 		z.window_size = DEFLATE_MAX_DISTANCE;
 	}
 	}
+	z.window_mask = z.window_size - 1;
 
 
 	// Allocate rolling window buffer.
 	// Allocate rolling window buffer.
 	last_b := mem.make_dynamic_array_len_cap([dynamic]u8, z.window_size, z.window_size, allocator);
 	last_b := mem.make_dynamic_array_len_cap([dynamic]u8, z.window_size, z.window_size, allocator);
@@ -440,6 +514,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
 
 
 		switch type {
 		switch type {
 		case 0:
 		case 0:
+			when #config(TRACY_ENABLE, false) { tracy.ZoneN("Literal Block"); }
 			// Uncompressed block
 			// Uncompressed block
 
 
 			// Discard bits until next byte boundary
 			// Discard bits until next byte boundary
@@ -468,6 +543,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
 		case 3:
 		case 3:
 			return E_Deflate.BType_3;
 			return E_Deflate.BType_3;
 		case:
 		case:
+			when #config(TRACY_ENABLE, false) { tracy.ZoneN("Huffman Block"); }
 			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
 			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
 			if type == 1 {
 			if type == 1 {
 				// Use fixed code lengths.
 				// Use fixed code lengths.
@@ -531,7 +607,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
 						case 18:
 						case 18:
 							c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
 							c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
 						case:
 						case:
-					         	return E_Deflate.Huffman_Bad_Code_Lengths;
+								return E_Deflate.Huffman_Bad_Code_Lengths;
 						}
 						}
 
 
 						if ntot - n < u32(c) {
 						if ntot - n < u32(c) {

+ 13 - 4
core/image/common.odin

@@ -1,5 +1,14 @@
 package image
 package image
 
 
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation, optimization.
+		Ginger Bill:     Cosmetic changes.
+*/
+
 import "core:bytes"
 import "core:bytes"
 import "core:mem"
 import "core:mem"
 
 
@@ -66,10 +75,10 @@ Image_Option:
 		If the image has an alpha channel, drop it.
 		If the image has an alpha channel, drop it.
 		You may want to use `.alpha_premultiply` in this case.
 		You may want to use `.alpha_premultiply` in this case.
 
 
-        NOTE: For PNG, this also skips handling of the tRNS chunk, if present,
-        unless you select `alpha_premultiply`.
-        In this case it'll premultiply the specified pixels in question only,
-        as the others are implicitly fully opaque.	
+		NOTE: For PNG, this also skips handling of the tRNS chunk, if present,
+		unless you select `alpha_premultiply`.
+		In this case it'll premultiply the specified pixels in question only,
+		as the others are implicitly fully opaque.	
 
 
 	`.alpha_premultiply`
 	`.alpha_premultiply`
 		If the image has an alpha channel, returns image data as follows:
 		If the image has an alpha channel, returns image data as follows:

+ 11 - 0
core/image/png/example.odin

@@ -1,6 +1,17 @@
 //+ignore
 //+ignore
 package png
 package png
 
 
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+		Ginger Bill:     Cosmetic changes.
+
+	An example of how to use `png.load`.
+*/
+
 import "core:compress"
 import "core:compress"
 import "core:image"
 import "core:image"
 import "core:image/png"
 import "core:image/png"

+ 11 - 4
core/image/png/helpers.odin

@@ -1,5 +1,16 @@
 package png
 package png
 
 
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+		Ginger Bill:     Cosmetic changes.
+
+	These are a few useful utility functions to work with PNG images.
+*/
+
 import "core:image"
 import "core:image"
 import "core:compress/zlib"
 import "core:compress/zlib"
 import coretime "core:time"
 import coretime "core:time"
@@ -7,10 +18,6 @@ import "core:strings"
 import "core:bytes"
 import "core:bytes"
 import "core:mem"
 import "core:mem"
 
 
-/*
-	These are a few useful utility functions to work with PNG images.
-*/
-
 /*
 /*
 	Cleanup of image-specific data.
 	Cleanup of image-specific data.
 	There are other helpers for cleanup of PNG-specific data.
 	There are other helpers for cleanup of PNG-specific data.

+ 9 - 0
core/image/png/png.odin

@@ -1,5 +1,14 @@
 package png
 package png
 
 
+/*
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-2 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+		Ginger Bill:     Cosmetic changes.
+*/
+
 import "core:compress"
 import "core:compress"
 import "core:compress/zlib"
 import "core:compress/zlib"
 import "core:image"
 import "core:image"