common.odin 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. /*
  2. Copyright 2021 Jeroen van Rijn <[email protected]>.
  3. Made available under Odin's BSD-3 license.
  4. List of contributors:
  5. Jeroen van Rijn: Initial implementation, optimization.
  6. */
  7. // package compress is a collection of utilities to aid with other compression packages
  8. package compress
  9. import "core:io"
  10. import "core:bytes"
  11. import "core:runtime"
  12. /*
  13. These settings bound how much compression algorithms will allocate for their output buffer.
  14. If streaming their output, these are unnecessary and will be ignored.
  15. */
  16. /*
  17. When a decompression routine doesn't stream its output, but writes to a buffer,
  18. we pre-allocate an output buffer to speed up decompression. The default is 1 MiB.
  19. */
  20. COMPRESS_OUTPUT_ALLOCATE_MIN :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MIN, 1 << 20))
  21. /*
  22. This bounds the maximum a buffer will resize to as needed, or the maximum we'll
  23. pre-allocate if you inform the decompression routine you know the payload size.
  24. For reference, the largest payload size of a GZIP file is 4 GiB.
  25. */
  26. when size_of(uintptr) == 8 {
  27. /*
  28. For 64-bit platforms, we set the default max buffer size to 4 GiB,
  29. which is GZIP and PKZIP's max payload size.
  30. */
  31. COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 32))
  32. } else {
  33. /*
  34. For 32-bit platforms, we set the default max buffer size to 512 MiB.
  35. */
  36. COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 29))
  37. }
  38. Error :: union #shared_nil {
  39. General_Error,
  40. Deflate_Error,
  41. ZLIB_Error,
  42. GZIP_Error,
  43. ZIP_Error,
  44. runtime.Allocator_Error,
  45. }
  46. General_Error :: enum {
  47. None = 0,
  48. File_Not_Found,
  49. Cannot_Open_File,
  50. File_Too_Short,
  51. Stream_Too_Short,
  52. Output_Too_Short,
  53. Unknown_Compression_Method,
  54. Checksum_Failed,
  55. Incompatible_Options,
  56. Unimplemented,
  57. /*
  58. Memory errors
  59. */
  60. Allocation_Failed,
  61. Resize_Failed,
  62. }
  63. GZIP_Error :: enum {
  64. None = 0,
  65. Invalid_GZIP_Signature,
  66. Reserved_Flag_Set,
  67. Invalid_Extra_Data,
  68. Original_Name_Too_Long,
  69. Comment_Too_Long,
  70. Payload_Length_Invalid,
  71. Payload_CRC_Invalid,
  72. /*
  73. GZIP's payload can be a maximum of max(u32le), or 4 GiB.
  74. If you tell it you expect it to contain more, that's obviously an error.
  75. */
  76. Payload_Size_Exceeds_Max_Payload,
  77. /*
  78. For buffered instead of streamed output, the payload size can't exceed
  79. the max set by the `COMPRESS_OUTPUT_ALLOCATE_MAX` switch in compress/common.odin.
  80. You can tweak this setting using `-define:COMPRESS_OUTPUT_ALLOCATE_MAX=size_in_bytes`
  81. */
  82. Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX,
  83. }
  84. ZIP_Error :: enum {
  85. None = 0,
  86. Invalid_ZIP_File_Signature,
  87. Unexpected_Signature,
  88. Insert_Next_Disk,
  89. Expected_End_of_Central_Directory_Record,
  90. }
  91. ZLIB_Error :: enum {
  92. None = 0,
  93. Unsupported_Window_Size,
  94. FDICT_Unsupported,
  95. Unsupported_Compression_Level,
  96. Code_Buffer_Malformed,
  97. }
  98. Deflate_Error :: enum {
  99. None = 0,
  100. Huffman_Bad_Sizes,
  101. Huffman_Bad_Code_Lengths,
  102. Inflate_Error,
  103. Bad_Distance,
  104. Bad_Huffman_Code,
  105. Len_Nlen_Mismatch,
  106. BType_3,
  107. }
  108. // General I/O context for ZLIB, LZW, etc.
  109. Context_Memory_Input :: struct #packed {
  110. input_data: []u8,
  111. output: ^bytes.Buffer,
  112. bytes_written: i64,
  113. code_buffer: u64,
  114. num_bits: u64,
  115. /*
  116. If we know the data size, we can optimize the reads and writes.
  117. */
  118. size_packed: i64,
  119. size_unpacked: i64,
  120. }
  121. when size_of(rawptr) == 8 {
  122. #assert(size_of(Context_Memory_Input) == 64)
  123. } else {
  124. // e.g. `-target:windows_i386`
  125. #assert(size_of(Context_Memory_Input) == 52)
  126. }
  127. Context_Stream_Input :: struct #packed {
  128. input_data: []u8,
  129. input: io.Stream,
  130. output: ^bytes.Buffer,
  131. bytes_written: i64,
  132. code_buffer: u64,
  133. num_bits: u64,
  134. /*
  135. If we know the data size, we can optimize the reads and writes.
  136. */
  137. size_packed: i64,
  138. size_unpacked: i64,
  139. /*
  140. Flags:
  141. `input_fully_in_memory`
  142. true = This tells us we read input from `input_data` exclusively. [] = EOF.
  143. false = Try to refill `input_data` from the `input` stream.
  144. */
  145. input_fully_in_memory: b8,
  146. padding: [1]u8,
  147. }
  148. /*
  149. TODO: The stream versions should really only check if a certain method is available once, perhaps even during setup.
  150. Bit and byte readers may be merged so that reading bytes will grab them from the bit buffer first.
  151. This simplifies end-of-stream handling where bits may be left in the bit buffer.
  152. */
  153. input_size_from_memory :: proc(z: ^Context_Memory_Input) -> (res: i64, err: Error) {
  154. return i64(len(z.input_data)), nil
  155. }
  156. input_size_from_stream :: proc(z: ^Context_Stream_Input) -> (res: i64, err: Error) {
  157. return io.size(z.input), nil
  158. }
  159. input_size :: proc{input_size_from_memory, input_size_from_stream}
  160. @(optimization_mode="speed")
  161. read_slice_from_memory :: #force_inline proc(z: ^Context_Memory_Input, size: int) -> (res: []u8, err: io.Error) {
  162. #no_bounds_check {
  163. if len(z.input_data) >= size {
  164. res = z.input_data[:size]
  165. z.input_data = z.input_data[size:]
  166. return res, .None
  167. }
  168. }
  169. if len(z.input_data) == 0 {
  170. return []u8{}, .EOF
  171. } else {
  172. return []u8{}, .Short_Buffer
  173. }
  174. }
  175. @(optimization_mode="speed")
  176. read_slice_from_stream :: #force_inline proc(z: ^Context_Stream_Input, size: int) -> (res: []u8, err: io.Error) {
  177. // TODO: REMOVE ALL USE OF context.temp_allocator here
  178. // the is literally no need for it
  179. b := make([]u8, size, context.temp_allocator)
  180. _, e := z.input->impl_read(b[:])
  181. if e == .None {
  182. return b, .None
  183. }
  184. return []u8{}, e
  185. }
  186. read_slice :: proc{read_slice_from_memory, read_slice_from_stream}
  187. @(optimization_mode="speed")
  188. read_data :: #force_inline proc(z: ^$C, $T: typeid) -> (res: T, err: io.Error) {
  189. b, e := read_slice(z, size_of(T))
  190. if e == .None {
  191. return (^T)(&b[0])^, .None
  192. }
  193. return T{}, e
  194. }
  195. @(optimization_mode="speed")
  196. read_u8_from_memory :: #force_inline proc(z: ^Context_Memory_Input) -> (res: u8, err: io.Error) {
  197. #no_bounds_check {
  198. if len(z.input_data) >= 1 {
  199. res = z.input_data[0]
  200. z.input_data = z.input_data[1:]
  201. return res, .None
  202. }
  203. }
  204. return 0, .EOF
  205. }
  206. @(optimization_mode="speed")
  207. read_u8_from_stream :: #force_inline proc(z: ^Context_Stream_Input) -> (res: u8, err: io.Error) {
  208. b, e := read_slice_from_stream(z, 1)
  209. if e == .None {
  210. return b[0], .None
  211. }
  212. return 0, e
  213. }
  214. read_u8 :: proc{read_u8_from_memory, read_u8_from_stream}
  215. /*
  216. You would typically only use this at the end of Inflate, to drain bits from the code buffer
  217. preferentially.
  218. */
  219. @(optimization_mode="speed")
  220. read_u8_prefer_code_buffer_lsb :: #force_inline proc(z: ^$C) -> (res: u8, err: io.Error) {
  221. if z.num_bits >= 8 {
  222. res = u8(read_bits_no_refill_lsb(z, 8))
  223. } else {
  224. size, _ := input_size(z)
  225. if size > 0 {
  226. res, err = read_u8(z)
  227. } else {
  228. err = .EOF
  229. }
  230. }
  231. return
  232. }
  233. @(optimization_mode="speed")
  234. peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid) -> (res: T, err: io.Error) {
  235. size :: size_of(T)
  236. #no_bounds_check {
  237. if len(z.input_data) >= size {
  238. buf := z.input_data[:size]
  239. return (^T)(&buf[0])^, .None
  240. }
  241. }
  242. if len(z.input_data) == 0 {
  243. return T{}, .EOF
  244. } else {
  245. return T{}, .Short_Buffer
  246. }
  247. }
  248. @(optimization_mode="speed")
  249. peek_data_at_offset_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid, #any_int offset: int) -> (res: T, err: io.Error) {
  250. size :: size_of(T)
  251. #no_bounds_check {
  252. if len(z.input_data) >= size + offset {
  253. buf := z.input_data[offset:][:size]
  254. return (^T)(&buf[0])^, .None
  255. }
  256. }
  257. if len(z.input_data) == 0 {
  258. return T{}, .EOF
  259. } else {
  260. return T{}, .Short_Buffer
  261. }
  262. }
  263. @(optimization_mode="speed")
  264. peek_data_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid) -> (res: T, err: io.Error) {
  265. size :: size_of(T)
  266. // Get current position to read from.
  267. curr, e1 := z.input->impl_seek(0, .Current)
  268. if e1 != .None {
  269. return T{}, e1
  270. }
  271. r, e2 := io.to_reader_at(z.input)
  272. if !e2 {
  273. return T{}, .Empty
  274. }
  275. when size <= 128 {
  276. b: [size]u8
  277. } else {
  278. b := make([]u8, size, context.temp_allocator)
  279. }
  280. _, e3 := io.read_at(r, b[:], curr)
  281. if e3 != .None {
  282. return T{}, .Empty
  283. }
  284. res = (^T)(&b[0])^
  285. return res, .None
  286. }
  287. @(optimization_mode="speed")
  288. peek_data_at_offset_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid, #any_int offset: int) -> (res: T, err: io.Error) {
  289. size :: size_of(T)
  290. // Get current position to return to.
  291. cur_pos, e1 := z.input->impl_seek(0, .Current)
  292. if e1 != .None {
  293. return T{}, e1
  294. }
  295. // Seek to offset.
  296. pos, e2 := z.input->impl_seek(offset, .Start)
  297. if e2 != .None {
  298. return T{}, e2
  299. }
  300. r, e3 := io.to_reader_at(z.input)
  301. if !e3 {
  302. return T{}, .Empty
  303. }
  304. when size <= 128 {
  305. b: [size]u8
  306. } else {
  307. b := make([]u8, size, context.temp_allocator)
  308. }
  309. _, e4 := io.read_at(r, b[:], pos)
  310. if e4 != .None {
  311. return T{}, .Empty
  312. }
  313. // Return read head to original position.
  314. z.input->impl_seek(cur_pos, .Start)
  315. res = (^T)(&b[0])^
  316. return res, .None
  317. }
  318. peek_data :: proc{peek_data_from_memory, peek_data_from_stream, peek_data_at_offset_from_memory, peek_data_at_offset_from_stream}
  319. // Sliding window read back
  320. @(optimization_mode="speed")
  321. peek_back_byte :: #force_inline proc(z: ^$C, offset: i64) -> (res: u8, err: io.Error) {
  322. // Look back into the sliding window.
  323. return z.output.buf[z.bytes_written - offset], .None
  324. }
  325. // Generalized bit reader LSB
  326. @(optimization_mode="speed")
  327. refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width := i8(48)) {
  328. refill := u64(width)
  329. b := u64(0)
  330. if z.num_bits > refill {
  331. return
  332. }
  333. for {
  334. if len(z.input_data) != 0 {
  335. b = u64(z.input_data[0])
  336. z.input_data = z.input_data[1:]
  337. } else {
  338. b = 0
  339. }
  340. z.code_buffer |= b << u8(z.num_bits)
  341. z.num_bits += 8
  342. if z.num_bits > refill {
  343. break
  344. }
  345. }
  346. }
  347. // Generalized bit reader LSB
  348. @(optimization_mode="speed")
  349. refill_lsb_from_stream :: proc(z: ^Context_Stream_Input, width := i8(24)) {
  350. refill := u64(width)
  351. for {
  352. if z.num_bits > refill {
  353. break
  354. }
  355. if z.code_buffer == 0 && z.num_bits > 63 {
  356. z.num_bits = 0
  357. }
  358. if z.code_buffer >= 1 << uint(z.num_bits) {
  359. // Code buffer is malformed.
  360. z.num_bits = max(u64)
  361. return
  362. }
  363. b, err := read_u8(z)
  364. if err != .None {
  365. // This is fine at the end of the file.
  366. return
  367. }
  368. z.code_buffer |= (u64(b) << u8(z.num_bits))
  369. z.num_bits += 8
  370. }
  371. }
  372. refill_lsb :: proc{refill_lsb_from_memory, refill_lsb_from_stream}
  373. @(optimization_mode="speed")
  374. consume_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) {
  375. z.code_buffer >>= width
  376. z.num_bits -= u64(width)
  377. }
  378. @(optimization_mode="speed")
  379. consume_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) {
  380. z.code_buffer >>= width
  381. z.num_bits -= u64(width)
  382. }
  383. consume_bits_lsb :: proc{consume_bits_lsb_from_memory, consume_bits_lsb_from_stream}
  384. @(optimization_mode="speed")
  385. peek_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
  386. if z.num_bits < u64(width) {
  387. refill_lsb(z)
  388. }
  389. return u32(z.code_buffer & ~(~u64(0) << width))
  390. }
  391. @(optimization_mode="speed")
  392. peek_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
  393. if z.num_bits < u64(width) {
  394. refill_lsb(z)
  395. }
  396. return u32(z.code_buffer & ~(~u64(0) << width))
  397. }
  398. peek_bits_lsb :: proc{peek_bits_lsb_from_memory, peek_bits_lsb_from_stream}
  399. @(optimization_mode="speed")
  400. peek_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
  401. assert(z.num_bits >= u64(width))
  402. return u32(z.code_buffer & ~(~u64(0) << width))
  403. }
  404. @(optimization_mode="speed")
  405. peek_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
  406. assert(z.num_bits >= u64(width))
  407. return u32(z.code_buffer & ~(~u64(0) << width))
  408. }
  409. peek_bits_no_refill_lsb :: proc{peek_bits_no_refill_lsb_from_memory, peek_bits_no_refill_lsb_from_stream}
  410. @(optimization_mode="speed")
  411. read_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
  412. k := #force_inline peek_bits_lsb(z, width)
  413. #force_inline consume_bits_lsb(z, width)
  414. return k
  415. }
  416. @(optimization_mode="speed")
  417. read_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
  418. k := peek_bits_lsb(z, width)
  419. consume_bits_lsb(z, width)
  420. return k
  421. }
  422. read_bits_lsb :: proc{read_bits_lsb_from_memory, read_bits_lsb_from_stream}
  423. @(optimization_mode="speed")
  424. read_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
  425. k := #force_inline peek_bits_no_refill_lsb(z, width)
  426. #force_inline consume_bits_lsb(z, width)
  427. return k
  428. }
  429. @(optimization_mode="speed")
  430. read_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
  431. k := peek_bits_no_refill_lsb(z, width)
  432. consume_bits_lsb(z, width)
  433. return k
  434. }
  435. read_bits_no_refill_lsb :: proc{read_bits_no_refill_lsb_from_memory, read_bits_no_refill_lsb_from_stream}
  436. @(optimization_mode="speed")
  437. discard_to_next_byte_lsb_from_memory :: proc(z: ^Context_Memory_Input) {
  438. discard := u8(z.num_bits & 7)
  439. #force_inline consume_bits_lsb(z, discard)
  440. }
  441. @(optimization_mode="speed")
  442. discard_to_next_byte_lsb_from_stream :: proc(z: ^Context_Stream_Input) {
  443. discard := u8(z.num_bits & 7)
  444. consume_bits_lsb(z, discard)
  445. }
  446. discard_to_next_byte_lsb :: proc{discard_to_next_byte_lsb_from_memory, discard_to_next_byte_lsb_from_stream}