gzip.odin 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. package gzip
  2. import "core:compress/zlib"
  3. import "core:compress"
  4. import "core:os"
  5. import "core:io"
  6. import "core:bytes"
  7. import "core:hash"
  8. /*
  9. This package implements support for the GZIP file format v4.3,
  10. as specified in RFC 1952.
  11. It is implemented in such a way that it lends itself naturally
  12. to be the input to a complementary TAR implementation.
  13. */
  14. Magic :: enum u16le {
  15. GZIP = 0x8b << 8 | 0x1f,
  16. }
  17. Header :: struct #packed {
  18. magic: Magic,
  19. compression_method: Compression,
  20. flags: Header_Flags,
  21. modification_time: u32le,
  22. xfl: Compression_Flags,
  23. os: OS,
  24. }
  25. #assert(size_of(Header) == 10);
  26. Header_Flag :: enum u8 {
  27. // Order is important
  28. text = 0,
  29. header_crc = 1,
  30. extra = 2,
  31. name = 3,
  32. comment = 4,
  33. reserved_1 = 5,
  34. reserved_2 = 6,
  35. reserved_3 = 7,
  36. }
  37. Header_Flags :: distinct bit_set[Header_Flag; u8];
  38. OS :: enum u8 {
  39. FAT = 0,
  40. Amiga = 1,
  41. VMS = 2,
  42. Unix = 3,
  43. VM_CMS = 4,
  44. Atari_TOS = 5,
  45. HPFS = 6,
  46. Macintosh = 7,
  47. Z_System = 8,
  48. CP_M = 9,
  49. TOPS_20 = 10,
  50. NTFS = 11,
  51. QDOS = 12,
  52. Acorn_RISCOS = 13,
  53. _Unknown = 14,
  54. Unknown = 255,
  55. }
  56. OS_Name :: #partial [OS]string{
  57. .FAT = "FAT",
  58. .Amiga = "Amiga",
  59. .VMS = "VMS/OpenVMS",
  60. .Unix = "Unix",
  61. .VM_CMS = "VM/CMS",
  62. .Atari_TOS = "Atari TOS",
  63. .HPFS = "HPFS",
  64. .Macintosh = "Macintosh",
  65. .Z_System = "Z-System",
  66. .CP_M = "CP/M",
  67. .TOPS_20 = "TOPS-20",
  68. .NTFS = "NTFS",
  69. .QDOS = "QDOS",
  70. .Acorn_RISCOS = "Acorn RISCOS",
  71. .Unknown = "Unknown",
  72. };
  73. Compression :: enum u8 {
  74. DEFLATE = 8,
  75. }
  76. Compression_Flags :: enum u8 {
  77. Maximum_Compression = 2,
  78. Fastest_Compression = 4,
  79. }
  80. Error :: compress.Error;
  81. E_General :: compress.General_Error;
  82. E_GZIP :: compress.GZIP_Error;
  83. E_ZLIB :: compress.ZLIB_Error;
  84. E_Deflate :: compress.Deflate_Error;
  85. is_kind :: compress.is_kind;
  86. load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
  87. r := bytes.Reader{};
  88. bytes.reader_init(&r, slice);
  89. stream := bytes.reader_to_stream(&r);
  90. err = load_from_stream(stream, buf, allocator);
  91. return err;
  92. }
  93. load_from_file :: proc(filename: string, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
  94. data, ok := os.read_entire_file(filename, allocator);
  95. defer delete(data);
  96. err = E_General.File_Not_Found;
  97. if ok {
  98. err = load_from_slice(data, buf, allocator);
  99. }
  100. return;
  101. }
  102. load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
  103. ctx := compress.Context{
  104. input = stream,
  105. };
  106. buf := buf;
  107. ws := bytes.buffer_to_stream(buf);
  108. ctx.output = ws;
  109. header, e := compress.read_data(&ctx, Header);
  110. if e != .None {
  111. return E_General.File_Too_Short;
  112. }
  113. if header.magic != .GZIP {
  114. return E_GZIP.Invalid_GZIP_Signature;
  115. }
  116. if header.compression_method != .DEFLATE {
  117. return E_General.Unknown_Compression_Method;
  118. }
  119. if header.os >= ._Unknown {
  120. header.os = .Unknown;
  121. }
  122. if .reserved_1 in header.flags || .reserved_2 in header.flags || .reserved_3 in header.flags {
  123. return E_GZIP.Reserved_Flag_Set;
  124. }
  125. // printf("signature: %v\n", header.magic);
  126. // printf("compression: %v\n", header.compression_method);
  127. // printf("flags: %v\n", header.flags);
  128. // printf("modification time: %v\n", time.unix(i64(header.modification_time), 0));
  129. // printf("xfl: %v (%v)\n", header.xfl, int(header.xfl));
  130. // printf("os: %v\n", OS_Name[header.os]);
  131. if .extra in header.flags {
  132. xlen, e_extra := compress.read_data(&ctx, u16le);
  133. if e_extra != .None {
  134. return E_General.Stream_Too_Short;
  135. }
  136. // printf("Extra data present (%v bytes)\n", xlen);
  137. if xlen < 4 {
  138. // Minimum length is 2 for ID + 2 for a field length, if set to zero.
  139. return E_GZIP.Invalid_Extra_Data;
  140. }
  141. field_id: [2]u8;
  142. field_length: u16le;
  143. field_error: io.Error;
  144. for xlen >= 4 {
  145. // println("Parsing Extra field(s).");
  146. field_id, field_error = compress.read_data(&ctx, [2]u8);
  147. if field_error != .None {
  148. // printf("Parsing Extra returned: %v\n", field_error);
  149. return E_General.Stream_Too_Short;
  150. }
  151. xlen -= 2;
  152. field_length, field_error = compress.read_data(&ctx, u16le);
  153. if field_error != .None {
  154. // printf("Parsing Extra returned: %v\n", field_error);
  155. return E_General.Stream_Too_Short;
  156. }
  157. xlen -= 2;
  158. if xlen <= 0 {
  159. // We're not going to try and recover by scanning for a ZLIB header.
  160. // Who knows what else is wrong with this file.
  161. return E_GZIP.Invalid_Extra_Data;
  162. }
  163. // printf(" Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
  164. if field_length > 0 {
  165. field_data := make([]u8, field_length, context.temp_allocator);
  166. _, field_error = ctx.input->impl_read(field_data);
  167. if field_error != .None {
  168. // printf("Parsing Extra returned: %v\n", field_error);
  169. return E_General.Stream_Too_Short;
  170. }
  171. xlen -= field_length;
  172. // printf("%v\n", string(field_data));
  173. }
  174. if xlen != 0 {
  175. return E_GZIP.Invalid_Extra_Data;
  176. }
  177. }
  178. }
  179. if .name in header.flags {
  180. // Should be enough.
  181. name: [1024]u8;
  182. b: [1]u8;
  183. i := 0;
  184. name_error: io.Error;
  185. for i < len(name) {
  186. _, name_error = ctx.input->impl_read(b[:]);
  187. if name_error != .None {
  188. return E_General.Stream_Too_Short;
  189. }
  190. if b == 0 {
  191. break;
  192. }
  193. name[i] = b[0];
  194. i += 1;
  195. if i >= len(name) {
  196. return E_GZIP.Original_Name_Too_Long;
  197. }
  198. }
  199. // printf("Original filename: %v\n", string(name[:i]));
  200. }
  201. if .comment in header.flags {
  202. // Should be enough.
  203. comment: [1024]u8;
  204. b: [1]u8;
  205. i := 0;
  206. comment_error: io.Error;
  207. for i < len(comment) {
  208. _, comment_error = ctx.input->impl_read(b[:]);
  209. if comment_error != .None {
  210. return E_General.Stream_Too_Short;
  211. }
  212. if b == 0 {
  213. break;
  214. }
  215. comment[i] = b[0];
  216. i += 1;
  217. if i >= len(comment) {
  218. return E_GZIP.Comment_Too_Long;
  219. }
  220. }
  221. // printf("Comment: %v\n", string(comment[:i]));
  222. }
  223. if .header_crc in header.flags {
  224. crc16: [2]u8;
  225. crc_error: io.Error;
  226. _, crc_error = ctx.input->impl_read(crc16[:]);
  227. if crc_error != .None {
  228. return E_General.Stream_Too_Short;
  229. }
  230. /*
  231. We don't actually check the CRC16 (lower 2 bytes of CRC32 of header data until the CRC field).
  232. If we find a gzip file in the wild that sets this field, we can add proper support for it.
  233. */
  234. }
  235. /*
  236. We should have arrived at the ZLIB payload.
  237. */
  238. zlib_error := zlib.inflate_raw(&ctx);
  239. // fmt.printf("ZLIB returned: %v\n", zlib_error);
  240. if !is_kind(zlib_error, E_General.OK) || zlib_error == nil {
  241. return zlib_error;
  242. }
  243. /*
  244. Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
  245. */
  246. compress.discard_to_next_byte_lsb(&ctx);
  247. payload_crc_b: [4]u8;
  248. payload_len_b: [4]u8;
  249. for i in 0..3 {
  250. payload_crc_b[i] = u8(compress.read_bits_lsb(&ctx, 8));
  251. }
  252. payload_crc := transmute(u32le)payload_crc_b;
  253. for i in 0..3 {
  254. payload_len_b[i] = u8(compress.read_bits_lsb(&ctx, 8));
  255. }
  256. payload_len := int(transmute(u32le)payload_len_b);
  257. payload := bytes.buffer_to_bytes(buf);
  258. crc32 := u32le(hash.crc32(payload));
  259. if crc32 != payload_crc {
  260. return E_GZIP.Payload_CRC_Invalid;
  261. }
  262. if len(payload) != payload_len {
  263. return E_GZIP.Payload_Length_Invalid;
  264. }
  265. return E_General.OK;
  266. }
  267. load :: proc{load_from_file, load_from_slice, load_from_stream};