Browse Source

Merge pull request #1741 from Kelimion/shoco

Add Shoco short string compression.
Jeroen van Rijn 3 years ago
parent
commit
849efff070

+ 148 - 0
core/compress/shoco/model.odin

@@ -0,0 +1,148 @@
+/*
+	This file was generated, so don't edit this by hand.
+	Transliterated from https://github.com/Ed-von-Schleck/shoco/blob/master/shoco_model.h,
+	which is an English word model.
+*/
+
+// package shoco is an implementation of the shoco short string compressor
+package shoco
+
+DEFAULT_MODEL :: Shoco_Model {
+	min_char = 39,
+	max_char = 122,
+	characters_by_id = {
+		'e', 'a', 'i', 'o', 't', 'h', 'n', 'r', 's', 'l', 'u', 'c', 'w', 'm', 'd', 'b', 'p', 'f', 'g', 'v', 'y', 'k', '-', 'H', 'M', 'T', '\'', 'B', 'x', 'I', 'W', 'L',
+	},
+	ids_by_character = {
+		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 27, -1, -1, -1, -1, -1, 23, 29, -1, -1, 31, 24, -1, -1, -1, -1, -1, -1, 25, -1, -1, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 15, 11, 14, 0, 17, 18, 5, 2, -1, 21, 9, 13, 6, 3, 16, -1, 7, 8, 4, 10, 19, 12, 28, 20, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	},
+	successors_by_bigram = {
+		7, 4, 12, -1, 6, -1, 1, 0, 3, 5, -1, 9, -1, 8, 2, -1, 15, 14, -1, 10, 11, -1, -1, -1, -1, -1, -1, -1, 13, -1, -1, -1,
+		1, -1, 6, -1, 1, -1, 0, 3, 2, 4, 15, 11, -1, 9, 5, 10, 13, -1, 12, 8, 7, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		9, 11, -1, 4, 2, -1, 0, 8, 1, 5, -1, 6, -1, 3, 7, 15, -1, 12, 10, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		-1, -1, 14, 7, 5, -1, 1, 2, 8, 9, 0, 15, 6, 4, 11, -1, 12, 3, -1, 10, -1, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		2, 4, 3, 1, 5, 0, -1, 6, 10, 9, 7, 12, 11, -1, -1, -1, -1, 13, -1, -1, 8, -1, 15, -1, -1, -1, 14, -1, -1, -1, -1, -1,
+		0, 1, 2, 3, 4, -1, -1, 5, 9, 10, 6, -1, -1, 8, 15, 11, -1, 14, -1, -1, 7, -1, 13, -1, -1, -1, 12, -1, -1, -1, -1, -1,
+		2, 8, 7, 4, 3, -1, 9, -1, 6, 11, -1, 5, -1, -1, 0, -1, -1, 14, 1, 15, 10, 12, -1, -1, -1, -1, 13, -1, -1, -1, -1, -1,
+		0, 3, 1, 2, 6, -1, 9, 8, 4, 12, 13, 10, -1, 11, 7, -1, -1, 15, 14, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		0, 6, 3, 4, 1, 2, -1, -1, 5, 10, 7, 9, 11, 12, -1, -1, 8, 14, -1, -1, 15, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		0, 6, 2, 5, 9, -1, -1, -1, 10, 1, 8, -1, 12, 14, 4, -1, 15, 7, -1, 13, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		8, 10, 9, 15, 1, -1, 4, 0, 3, 2, -1, 6, -1, 12, 11, 13, 7, 14, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		1, 3, 6, 0, 4, 2, -1, 7, 13, 8, 9, 11, -1, -1, 15, -1, -1, -1, -1, -1, 10, 5, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		3, 0, 1, 4, -1, 2, 5, 6, 7, 8, -1, 14, -1, -1, 9, 15, -1, 12, -1, -1, -1, 10, 11, -1, -1, -1, 13, -1, -1, -1, -1, -1,
+		0, 1, 3, 2, 15, -1, 12, -1, 7, 14, 4, -1, -1, 9, -1, 8, 5, 10, -1, -1, 6, -1, 13, -1, -1, -1, 11, -1, -1, -1, -1, -1,
+		0, 3, 1, 2, -1, -1, 12, 6, 4, 9, 7, -1, -1, 14, 8, -1, -1, 15, 11, 13, 5, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		0, 5, 7, 2, 10, 13, -1, 6, 8, 1, 3, -1, -1, 14, 15, 11, -1, -1, -1, 12, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		0, 2, 6, 3, 7, 10, -1, 1, 9, 4, 8, -1, -1, 15, -1, 12, 5, -1, -1, -1, 11, -1, 13, -1, -1, -1, 14, -1, -1, -1, -1, -1,
+		1, 3, 4, 0, 7, -1, 12, 2, 11, 8, 6, 13, -1, -1, -1, -1, -1, 5, -1, -1, 10, 15, 9, -1, -1, -1, 14, -1, -1, -1, -1, -1,
+		1, 3, 5, 2, 13, 0, 9, 4, 7, 6, 8, -1, -1, 15, -1, 11, -1, -1, 10, -1, 14, -1, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		0, 2, 1, 3, -1, -1, -1, 6, -1, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		1, 11, 4, 0, 3, -1, 13, 12, 2, 7, -1, -1, 15, 10, 5, 8, 14, -1, -1, -1, -1, -1, 9, -1, -1, -1, 6, -1, -1, -1, -1, -1,
+		0, 9, 2, 14, 15, 4, 1, 13, 3, 5, -1, -1, 10, -1, -1, -1, -1, 6, 12, -1, 7, -1, 8, -1, -1, -1, 11, -1, -1, -1, -1, -1,
+		-1, 2, 14, -1, 1, 5, 8, 7, 4, 12, -1, 6, 9, 11, 13, 3, 10, 15, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		0, 1, 3, 2, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		4, 3, 1, 5, -1, -1, -1, 0, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		2, 8, 4, 1, -1, 0, -1, 6, -1, -1, 5, -1, 7, -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, -1,
+		12, 5, -1, -1, 1, -1, -1, 7, 0, 3, -1, 2, -1, 4, 6, -1, -1, -1, -1, 8, -1, -1, 15, -1, 13, 9, -1, -1, -1, -1, -1, 11,
+		1, 3, 2, 4, -1, -1, -1, 5, -1, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, 8, -1, -1,
+		5, 3, 4, 12, 1, 6, -1, -1, -1, -1, 8, 2, -1, -1, -1, -1, 0, 9, -1, -1, 11, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		-1, -1, -1, -1, 0, -1, 1, 12, 3, -1, -1, -1, -1, 5, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1, 6, -1, 10,
+		2, 3, 1, 4, -1, 0, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1,
+		5, 1, 3, 0, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, 9, -1, -1, 6, -1, 7,
+	},
+	successors_reversed = {
+		's', 't', 'c', 'l', 'm', 'a', 'd', 'r', 'v', 'T', 'A', 'L', 'e', 'M', 'Y', '-',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'-', 't', 'a', 'b', 's', 'h', 'c', 'r', 'n', 'w', 'p', 'm', 'l', 'd', 'i', 'f',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'u', 'e', 'i', 'a', 'o', 'r', 'y', 'l', 'I', 'E', 'R', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'e', 'a', 'o', 'i', 'u', 'A', 'y', 'E', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		't', 'n', 'f', 's', '\'', 'm', 'I', 'N', 'A', 'E', 'L', 'Z', 'r', 'V', 'R', 'C',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'o', 'a', 'y', 'i', 'u', 'e', 'I', 'L', 'D', '\'', 'E', 'Y', '\x00', '\x00', '\x00', '\x00',
+		'r', 'i', 'y', 'a', 'e', 'o', 'u', 'Y', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'h', 'o', 'e', 'E', 'i', 'u', 'r', 'w', 'a', 'H', 'y', 'R', 'Z', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'h', 'i', 'e', 'a', 'o', 'r', 'I', 'y', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'n', 't', 's', 'r', 'l', 'd', 'i', 'y', 'v', 'm', 'b', 'c', 'g', 'p', 'k', 'u',
+		'e', 'l', 'o', 'u', 'y', 'a', 'r', 'i', 's', 'j', 't', 'b', 'v', 'h', 'm', 'd',
+		'o', 'e', 'h', 'a', 't', 'k', 'i', 'r', 'l', 'u', 'y', 'c', 'q', 's', '-', 'd',
+		'e', 'i', 'o', 'a', 's', 'y', 'r', 'u', 'd', 'l', '-', 'g', 'n', 'v', 'm', 'f',
+		'r', 'n', 'd', 's', 'a', 'l', 't', 'e', 'm', 'c', 'v', 'y', 'i', 'x', 'f', 'p',
+		'o', 'e', 'r', 'a', 'i', 'f', 'u', 't', 'l', '-', 'y', 's', 'n', 'c', '\'', 'k',
+		'h', 'e', 'o', 'a', 'r', 'i', 'l', 's', 'u', 'n', 'g', 'b', '-', 't', 'y', 'm',
+		'e', 'a', 'i', 'o', 't', 'r', 'u', 'y', 'm', 's', 'l', 'b', '\'', '-', 'f', 'd',
+		'n', 's', 't', 'm', 'o', 'l', 'c', 'd', 'r', 'e', 'g', 'a', 'f', 'v', 'z', 'b',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'e', 'n', 'i', 's', 'h', 'l', 'f', 'y', '-', 'a', 'w', '\'', 'g', 'r', 'o', 't',
+		'e', 'l', 'i', 'y', 'd', 'o', 'a', 'f', 'u', 't', 's', 'k', 'w', 'v', 'm', 'p',
+		'e', 'a', 'o', 'i', 'u', 'p', 'y', 's', 'b', 'm', 'f', '\'', 'n', '-', 'l', 't',
+		'd', 'g', 'e', 't', 'o', 'c', 's', 'i', 'a', 'n', 'y', 'l', 'k', '\'', 'f', 'v',
+		'u', 'n', 'r', 'f', 'm', 't', 'w', 'o', 's', 'l', 'v', 'd', 'p', 'k', 'i', 'c',
+		'e', 'r', 'a', 'o', 'l', 'p', 'i', 't', 'u', 's', 'h', 'y', 'b', '-', '\'', 'm',
+		'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'e', 'i', 'o', 'a', 's', 'y', 't', 'd', 'r', 'n', 'c', 'm', 'l', 'u', 'g', 'f',
+		'e', 't', 'h', 'i', 'o', 's', 'a', 'u', 'p', 'c', 'l', 'w', 'm', 'k', 'f', 'y',
+		'h', 'o', 'e', 'i', 'a', 't', 'r', 'u', 'y', 'l', 's', 'w', 'c', 'f', '\'', '-',
+		'r', 't', 'l', 's', 'n', 'g', 'c', 'p', 'e', 'i', 'a', 'd', 'm', 'b', 'f', 'o',
+		'e', 'i', 'a', 'o', 'y', 'u', 'r', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+		'a', 'i', 'h', 'e', 'o', 'n', 'r', 's', 'l', 'd', 'k', '-', 'f', '\'', 'c', 'b',
+		'p', 't', 'c', 'a', 'i', 'e', 'h', 'q', 'u', 'f', '-', 'y', 'o', '\x00', '\x00', '\x00',
+		'o', 'e', 's', 't', 'i', 'd', '\'', 'l', 'b', '-', 'm', 'a', 'r', 'n', 'p', 'w',
+	},
+
+	character_count = 32,
+	successor_count = 16,
+
+	max_successor_n = 7,
+	packs = {
+		{ 0x80000000, 1, 2, { 26, 24, 24, 24, 24, 24, 24, 24 }, { 15,  3,  0,  0, 0, 0, 0, 0 }, 0xc0, 0x80 },
+		{ 0xc0000000, 2, 4, { 25, 22, 19, 16, 16, 16, 16, 16 }, { 15,  7,  7,  7, 0, 0, 0, 0 }, 0xe0, 0xc0 },
+		{ 0xe0000000, 4, 8, { 23, 19, 15, 11,  8,  5,  2,  0 }, { 31, 15, 15, 15, 7, 7, 7, 3 }, 0xf0, 0xe0 },
+	},
+}

+ 318 - 0
core/compress/shoco/shoco.odin

@@ -0,0 +1,318 @@
+/*
+	Copyright 2022 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-3 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+
+	An implementation of [shoco](https://github.com/Ed-von-Schleck/shoco) by Christian Schramm.
+*/
+
+// package shoco is an implementation of the shoco short string compressor
+package shoco
+
+import "core:intrinsics"
+import "core:compress"
+
+Shoco_Pack :: struct {
+	word:           u32,
+	bytes_packed:   i8,
+	bytes_unpacked: i8,
+	offsets:        [8]u16,
+	masks:          [8]i16,
+	header_mask:    u8,
+	header:         u8,
+}
+
+Shoco_Model :: struct {
+	min_char:             u8,
+	max_char:             u8,
+	characters_by_id:     []u8,
+	ids_by_character:     [256]i16,
+	successors_by_bigram: []i8,
+	successors_reversed:  []u8,
+
+	character_count:      u8,
+	successor_count:      u8,
+	max_successor_n:      i8,
+	packs:                []Shoco_Pack,
+}
+
+compress_bound :: proc(uncompressed_size: int) -> (worst_case_compressed_size: int) {
+	// Worst case compression happens when input is non-ASCII (128-255)
+	// Encoded as 0x00 + the byte in question.
+	return uncompressed_size * 2
+}
+
+decompress_bound :: proc(compressed_size: int, model := DEFAULT_MODEL) -> (maximum_decompressed_size: int) {
+	// Best case compression is 2:1
+	most: f64
+	for pack in model.packs {
+		val := f64(compressed_size) / f64(pack.bytes_packed) * f64(pack.bytes_unpacked)
+		most = max(most, val)
+	}
+	return int(most)
+}
+
+find_best_encoding :: proc(indices: []i16, n_consecutive: i8, model := DEFAULT_MODEL) -> (res: int) {
+	for p := len(model.packs); p > 0; p -= 1 {
+		pack := model.packs[p - 1]
+		if n_consecutive >= pack.bytes_unpacked {
+			have_index := true
+			for i := 0; i < int(pack.bytes_unpacked); i += 1 {
+				if indices[i] > pack.masks[i] {
+					have_index = false
+					break
+				}
+			}
+			if have_index {
+				return p - 1
+			}
+		}
+	}
+	return -1
+}
+
+validate_model :: proc(model: Shoco_Model) -> (int, compress.Error) {
+	if len(model.characters_by_id) != int(model.character_count) {
+		return 0, .Unknown_Compression_Method
+	}
+
+	if len(model.successors_by_bigram) != int(model.character_count) * int(model.character_count) {
+		return 0, .Unknown_Compression_Method
+	}
+
+	if len(model.successors_reversed) != int(model.successor_count) * int(model.max_char - model.min_char) {
+		return 0, .Unknown_Compression_Method
+	}
+
+	// Model seems legit.
+	return 0, nil
+}
+
+// Decompresses into provided buffer.
+decompress_slice_to_output_buffer :: proc(input: []u8, output: []u8, model := DEFAULT_MODEL) -> (size: int, err: compress.Error) {
+	inp, inp_end := 0, len(input)
+	out, out_end := 0, len(output)
+
+	validate_model(model) or_return
+
+	for inp < inp_end {
+		val  := transmute(i8)input[inp]
+		mark := int(-1)
+
+		for val < 0 {
+			val <<= 1
+			mark += 1
+		}
+
+		if mark > len(model.packs) {
+			return out, .Unknown_Compression_Method
+		}
+
+		if mark < 0 {
+			if out >= out_end {
+				return out, .Output_Too_Short
+			}
+
+			// Ignore the sentinel value for non-ASCII chars
+			if input[inp] == 0x00 {
+				inp += 1
+				if inp >= inp_end {
+					return out, .Stream_Too_Short
+				}
+			}
+			output[out] = input[inp]
+			inp, out = inp + 1, out + 1
+
+		} else {
+			pack := model.packs[mark]
+
+			if out + int(pack.bytes_unpacked) > out_end {
+				return out, .Output_Too_Short
+			} else if inp + int(pack.bytes_packed) > inp_end {
+				return out, .Stream_Too_Short
+			}
+
+			code := intrinsics.unaligned_load((^u32)(&input[inp]))
+			when ODIN_ENDIAN == .Little {
+				code = intrinsics.byte_swap(code)
+			}
+
+			// Unpack the leading char
+			offset := pack.offsets[0]
+			mask   := pack.masks[0]
+
+			last_chr := model.characters_by_id[(code >> offset) & u32(mask)]
+			output[out] = last_chr
+
+			// Unpack the successor chars
+			for i := 1; i < int(pack.bytes_unpacked); i += 1 {
+				offset = pack.offsets[i]
+				mask   = pack.masks[i]
+
+				index_major := u32(last_chr - model.min_char) * u32(model.successor_count)
+				index_minor := (code >> offset) & u32(mask)
+
+				last_chr = model.successors_reversed[index_major + index_minor]
+
+				output[out + i] = last_chr
+			}
+
+			out += int(pack.bytes_unpacked)
+			inp += int(pack.bytes_packed)
+		}
+	}
+
+	return out, nil
+}
+
+decompress_slice_to_string :: proc(input: []u8, model := DEFAULT_MODEL, allocator := context.allocator) -> (res: string, err: compress.Error) {
+	context.allocator = allocator
+
+	if len(input) == 0 {
+		return "", .Stream_Too_Short
+	}
+
+	max_output_size := decompress_bound(len(input), model)
+
+	buf: [dynamic]u8
+	if !resize(&buf, max_output_size) {
+		return "", .Out_Of_Memory
+	}
+
+	length, result := decompress_slice_to_output_buffer(input, buf[:])
+	resize(&buf, length)
+	return string(buf[:]), result
+}
+decompress :: proc{decompress_slice_to_output_buffer, decompress_slice_to_string}
+
+compress_string_to_buffer :: proc(input: string, output: []u8, model := DEFAULT_MODEL, allocator := context.allocator) -> (size: int, err: compress.Error) {
+	inp, inp_end := 0, len(input)
+	out, out_end := 0, len(output)
+	output := output
+
+	validate_model(model) or_return
+
+	indices := make([]i16, model.max_successor_n + 1)
+	defer delete(indices)
+
+	last_resort := false
+
+	encode: for inp < inp_end {
+		if last_resort {
+			last_resort = false
+
+			if input[inp] & 0x80 == 0x80 {
+				// Non-ASCII case
+				if out + 2 > out_end {
+					return out, .Output_Too_Short
+				}
+
+				// Put in a sentinel byte
+				output[out] = 0x00
+				out += 1
+			} else {
+				// An ASCII byte
+				if out + 1 > out_end {
+					return out, .Output_Too_Short
+				}
+			}
+			output[out] = input[inp]
+			out, inp = out + 1, inp + 1
+		} else {
+			// Find the longest string of known successors
+			indices[0] = model.ids_by_character[input[inp]]
+			last_chr_index := indices[0]
+
+			if last_chr_index < 0 {
+				last_resort = true
+				continue encode
+			}
+
+			rest := inp_end - inp
+			n_consecutive: i8 = 1
+			for ; n_consecutive <= model.max_successor_n; n_consecutive += 1 {
+				if inp_end > 0 && int(n_consecutive) == rest {
+					break
+				}
+
+				current_index := model.ids_by_character[input[inp + int(n_consecutive)]]
+				if current_index < 0 { // '\0' is always -1
+					break
+				}
+
+				successor_index := model.successors_by_bigram[last_chr_index * i16(model.character_count) + current_index]
+				if successor_index < 0 {
+					break
+				}
+
+				indices[n_consecutive] = i16(successor_index)
+				last_chr_index = current_index
+			}
+
+			if n_consecutive < 2 {
+				last_resort = true
+				continue encode
+			}
+
+			pack_n := find_best_encoding(indices, n_consecutive)
+			if pack_n >= 0 {
+				if out + int(model.packs[pack_n].bytes_packed) > out_end {
+					return out, .Output_Too_Short
+				}
+
+				pack := model.packs[pack_n]
+				code := pack.word
+
+				for i := 0; i < int(pack.bytes_unpacked); i += 1 {
+					code |= u32(indices[i]) << pack.offsets[i]
+				}
+
+				// In the little-endian world, we need to swap what's in the register to match the memory representation.
+				when ODIN_ENDIAN == .Little {
+					code = intrinsics.byte_swap(code)
+				}
+				out_ptr := raw_data(output[out:])
+
+				switch pack.bytes_packed {
+				case 4:
+					intrinsics.unaligned_store(transmute(^u32)out_ptr, code)
+				case 2:
+					intrinsics.unaligned_store(transmute(^u16)out_ptr, u16(code))
+				case 1:
+					intrinsics.unaligned_store(transmute(^u8)out_ptr,  u8(code))
+				case:
+					return out, .Unknown_Compression_Method
+				}
+
+				out += int(pack.bytes_packed)
+				inp += int(pack.bytes_unpacked)
+			} else {
+				last_resort = true
+				continue encode
+			}
+		}
+	}
+	return out, nil
+}
+
+compress_string :: proc(input: string, model := DEFAULT_MODEL, allocator := context.allocator) -> (output: []u8, err: compress.Error) {
+	context.allocator = allocator
+
+	if len(input) == 0 {
+		return {}, .Stream_Too_Short
+	}
+
+	max_output_size := compress_bound(len(input))
+
+	buf: [dynamic]u8
+	if !resize(&buf, max_output_size) {
+		return {}, .Out_Of_Memory
+	}
+
+	length, result := compress_string_to_buffer(input, buf[:])
+	resize(&buf, length)
+	return buf[:length], result
+}
+compress :: proc{compress_string_to_buffer, compress_string}

+ 2 - 0
examples/all/all_main.odin

@@ -10,6 +10,7 @@ import c              "core:c"
 import libc           "core:c/libc"
 
 import compress       "core:compress"
+import shoco          "core:compress/shoco"
 import gzip           "core:compress/gzip"
 import zlib           "core:compress/zlib"
 
@@ -115,6 +116,7 @@ _ :: bytes
 _ :: c
 _ :: libc
 _ :: compress
+_ :: shoco
 _ :: gzip
 _ :: zlib
 _ :: bit_array

+ 26 - 0
tests/core/assets/Shoco/LICENSE

@@ -0,0 +1,26 @@
+Copyright (c) 2016-2021 Ginger Bill. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

BIN
tests/core/assets/Shoco/LICENSE.shoco


+ 95 - 0
tests/core/assets/Shoco/README.md

@@ -0,0 +1,95 @@
+<p align="center">
+    <img src="misc/logo-slim.png" alt="Odin logo" style="width:65%">
+    <br/>
+   The Data-Oriented Language for Sane Software Development.
+    <br/>
+    <br/>
+    <a href="https://github.com/odin-lang/odin/releases/latest">
+        <img src="https://img.shields.io/github/release/odin-lang/odin.svg">
+    </a>
+    <a href="https://github.com/odin-lang/odin/releases/latest">
+        <img src="https://img.shields.io/badge/platforms-Windows%20|%20Linux%20|%20macOS-green.svg">
+    </a>
+    <br>
+    <a href="https://discord.gg/odinlang">
+        <img src="https://img.shields.io/discord/568138951836172421?logo=discord">
+    </a>
+    <a href="https://github.com/odin-lang/odin/actions">
+        <img src="https://github.com/odin-lang/odin/workflows/CI/badge.svg?branch=master&event=push">
+    </a>
+</p>
+
+# The Odin Programming Language
+
+
+Odin is a general-purpose programming language with distinct typing, built for high performance, modern systems, and built-in data-oriented data types. The Odin Programming Language, the C alternative for the joy of programming.
+
+Website: [https://odin-lang.org/](https://odin-lang.org/)
+
+```odin
+package main
+
+import "core:fmt"
+
+main :: proc() {
+	program := "+ + * 😃 - /"
+	accumulator := 0
+
+	for token in program {
+		switch token {
+		case '+': accumulator += 1
+		case '-': accumulator -= 1
+		case '*': accumulator *= 2
+		case '/': accumulator /= 2
+		case '😃': accumulator *= accumulator
+		case: // Ignore everything else
+		}
+	}
+
+	fmt.printf("The program \"%s\" calculates the value %d\n",
+	           program, accumulator)
+}
+
+```
+
+## Documentation
+
+#### [Getting Started](https://odin-lang.org/docs/install)
+
+Instructions for downloading and installing the Odin compiler and libraries.
+
+#### [Nightly Builds](https://odin-lang.org/docs/nightly/)
+
+Get the latest nightly builds of Odin.
+
+### Learning Odin
+
+#### [Overview of Odin](https://odin-lang.org/docs/overview)
+
+An overview of the Odin programming language.
+
+#### [Frequently Asked Questions (FAQ)](https://odin-lang.org/docs/faq)
+
+Answers to common questions about Odin.
+
+#### [Packages](https://pkg.odin-lang.org/)
+
+Documentation for all the official packages part of the [core](https://pkg.odin-lang.org/core/) and [vendor](https://pkg.odin-lang.org/vendor/) library collections.
+
+#### [The Odin Wiki](https://github.com/odin-lang/Odin/wiki)
+
+A wiki maintained by the Odin community.
+
+#### [Odin Discord](https://discord.gg/sVBPHEv)
+
+Get live support and talk with other odiners on the Odin Discord.
+
+### Articles
+
+#### [The Odin Blog](https://odin-lang.org/news/)
+
+The official blog of the Odin programming language, featuring announcements, news, and in-depth articles by the Odin team and guests.
+
+## Warnings
+
+* The Odin compiler is still in development.

BIN
tests/core/assets/Shoco/README.md.shoco


+ 56 - 1
tests/core/compress/test_core_compress.odin

@@ -7,13 +7,14 @@ package test_core_compress
 	List of contributors:
 		Jeroen van Rijn: Initial implementation.
 
-	A test suite for ZLIB, GZIP.
+	A test suite for ZLIB, GZIP and Shoco.
 */
 
 import "core:testing"
 
 import "core:compress/zlib"
 import "core:compress/gzip"
+import "core:compress/shoco"
 
 import "core:bytes"
 import "core:fmt"
@@ -48,6 +49,7 @@ main :: proc() {
 	t := testing.T{w=w}
 	zlib_test(&t)
 	gzip_test(&t)
+	shoco_test(&t)
 
 	fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
 	if TEST_fail > 0 {
@@ -134,3 +136,56 @@ gzip_test :: proc(t: ^testing.T) {
 		expect(t, false, error)
 	}
 }
+
+@test
+shoco_test :: proc(t: ^testing.T) {
+
+	Shoco_Tests :: []struct{
+		compressed:     []u8,
+		raw:            []u8,
+		short_pack:     int,
+		short_sentinel: int,
+	}{
+		{ #load("../assets/Shoco/README.md.shoco"), #load("../assets/Shoco/README.md"), 10, 1006 },
+		{ #load("../assets/Shoco/LICENSE.shoco"),   #load("../assets/Shoco/LICENSE"),   25, 68   },
+	}
+
+	for v in Shoco_Tests {
+		expected_raw        := len(v.raw)
+		expected_compressed := len(v.compressed)
+
+		biggest_unpacked := shoco.decompress_bound(expected_compressed)
+		biggest_packed   := shoco.compress_bound(expected_raw)
+
+		buffer := make([]u8, max(biggest_packed, biggest_unpacked))
+		defer delete(buffer)
+
+		size, err := shoco.decompress(v.compressed, buffer[:])
+		msg := fmt.tprintf("Expected `decompress` to return `nil`, got %v", err)
+		expect(t, err == nil, msg)
+
+		msg  = fmt.tprintf("Decompressed %v bytes into %v. Expected to decompress into %v bytes.", len(v.compressed), size, expected_raw)
+		expect(t, size == expected_raw, msg)
+		expect(t, string(buffer[:size]) == string(v.raw), "Decompressed contents don't match.")
+
+		size, err = shoco.compress(string(v.raw), buffer[:])
+		expect(t, err == nil, "Expected `compress` to return `nil`.")
+
+		msg = fmt.tprintf("Compressed %v bytes into %v. Expected to compress into %v bytes.", expected_raw, size, expected_compressed)
+		expect(t, size == expected_compressed, msg)
+
+		size, err = shoco.decompress(v.compressed, buffer[:expected_raw - 10])
+		msg = fmt.tprintf("Decompressing into too small a buffer returned %v, expected `.Output_Too_Short`", err)
+		expect(t, err == .Output_Too_Short, msg)
+
+		size, err = shoco.compress(string(v.raw), buffer[:expected_compressed - 10])
+		msg = fmt.tprintf("Compressing into too small a buffer returned %v, expected `.Output_Too_Short`", err)
+		expect(t, err == .Output_Too_Short, msg)
+
+		size, err = shoco.decompress(v.compressed[:v.short_pack], buffer[:])
+		expect(t, err == .Stream_Too_Short, "Expected `decompress` to return `Stream_Too_Short` because there was no more data after selecting a pack.")
+
+		size, err = shoco.decompress(v.compressed[:v.short_sentinel], buffer[:])
+		expect(t, err == .Stream_Too_Short, "Expected `decompress` to return `Stream_Too_Short` because there was no more data after non-ASCII sentinel.")
+	}
+}