zstd_seekable.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. #ifndef SEEKABLE_H
  2. #define SEEKABLE_H
  3. #if defined (__cplusplus)
  4. extern "C" {
  5. #endif
  6. #include <stdio.h>
  7. #include "zstd.h" /* ZSTDLIB_API */
  8. #define ZSTD_seekTableFooterSize 9
  9. #define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1
  10. #define ZSTD_SEEKABLE_MAXFRAMES 0x8000000U
  11. /* Limit the maximum size to avoid any potential issues storing the compressed size */
  12. #define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x80000000U
  13. /*-****************************************************************************
  14. * Seekable Format
  15. *
  16. * The seekable format splits the compressed data into a series of "frames",
  17. * each compressed individually so that decompression of a section in the
  18. * middle of an archive only requires zstd to decompress at most a frame's
  19. * worth of extra data, instead of the entire archive.
  20. ******************************************************************************/
  21. typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream;
  22. typedef struct ZSTD_seekable_s ZSTD_seekable;
  23. typedef struct ZSTD_seekTable_s ZSTD_seekTable;
  24. /*-****************************************************************************
  25. * Seekable compression - HowTo
  26. * A ZSTD_seekable_CStream object is required to tracking streaming operation.
  27. * Use ZSTD_seekable_createCStream() and ZSTD_seekable_freeCStream() to create/
  28. * release resources.
  29. *
  30. * Streaming objects are reusable to avoid allocation and deallocation,
  31. * to start a new compression operation call ZSTD_seekable_initCStream() on the
  32. * compressor.
  33. *
  34. * Data streamed to the seekable compressor will automatically be split into
  35. * frames of size `maxFrameSize` (provided in ZSTD_seekable_initCStream()),
  36. * or if none is provided, will be cut off whenever ZSTD_seekable_endFrame() is
  37. * called or when the default maximum frame size (2GB) is reached.
  38. *
  39. * Use ZSTD_seekable_initCStream() to initialize a ZSTD_seekable_CStream object
  40. * for a new compression operation.
  41. * `maxFrameSize` indicates the size at which to automatically start a new
  42. * seekable frame. `maxFrameSize == 0` implies the default maximum size.
  43. * `checksumFlag` indicates whether or not the seek table should include frame
  44. * checksums on the uncompressed data for verification.
  45. * @return : a size hint for input to provide for compression, or an error code
  46. * checkable with ZSTD_isError()
  47. *
  48. * Use ZSTD_seekable_compressStream() repetitively to consume input stream.
  49. * The function will automatically update both `pos` fields.
  50. * Note that it may not consume the entire input, in which case `pos < size`,
  51. * and it's up to the caller to present again remaining data.
  52. * @return : a size hint, preferred nb of bytes to use as input for next
  53. * function call or an error code, which can be tested using
  54. * ZSTD_isError().
  55. * Note 1 : it's just a hint, to help latency a little, any other
  56. * value will work fine.
  57. *
  58. * At any time, call ZSTD_seekable_endFrame() to end the current frame and
  59. * start a new one.
  60. *
  61. * ZSTD_seekable_endStream() will end the current frame, and then write the seek
  62. * table so that decompressors can efficiently find compressed frames.
  63. * ZSTD_seekable_endStream() may return a number > 0 if it was unable to flush
  64. * all the necessary data to `output`. In this case, it should be called again
  65. * until all remaining data is flushed out and 0 is returned.
  66. ******************************************************************************/
  67. /*===== Seekable compressor management =====*/
  68. ZSTDLIB_API ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void);
  69. ZSTDLIB_API size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs);
  70. /*===== Seekable compression functions =====*/
  71. ZSTDLIB_API size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, int compressionLevel, int checksumFlag, unsigned maxFrameSize);
  72. ZSTDLIB_API size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
  73. ZSTDLIB_API size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
  74. ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
  75. /*= Raw seek table API
  76. * These functions allow for the seek table to be constructed directly.
  77. * This table can then be appended to a file of concatenated frames.
  78. * This allows the frames to be compressed independently, even in parallel,
  79. * and compiled together afterward into a seekable archive.
  80. *
  81. * Use ZSTD_seekable_createFrameLog() to allocate and initialize a tracking
  82. * structure.
  83. *
  84. * Call ZSTD_seekable_logFrame() once for each frame in the archive.
  85. * checksum is optional, and will not be used if checksumFlag was 0 when the
  86. * frame log was created. If present, it should be the least significant 32
  87. * bits of the XXH64 hash of the uncompressed data.
  88. *
  89. * Call ZSTD_seekable_writeSeekTable to serialize the data into a seek table.
  90. * If the entire table was written, the return value will be 0. Otherwise,
  91. * it will be equal to the number of bytes left to write. */
  92. typedef struct ZSTD_frameLog_s ZSTD_frameLog;
  93. ZSTDLIB_API ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag);
  94. ZSTDLIB_API size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl);
  95. ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum);
  96. ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output);
  97. /*-****************************************************************************
  98. * Seekable decompression - HowTo
  99. * A ZSTD_seekable object is required to tracking the seekTable.
  100. *
  101. * Call ZSTD_seekable_init* to initialize a ZSTD_seekable object with the
  102. * the seek table provided in the input.
  103. * There are three modes for ZSTD_seekable_init:
  104. * - ZSTD_seekable_initBuff() : An in-memory API. The data contained in
  105. * `src` should be the entire seekable file, including the seek table.
  106. * `src` should be kept alive and unmodified until the ZSTD_seekable object
  107. * is freed or reset.
  108. * - ZSTD_seekable_initFile() : A simplified file API using stdio. fread and
  109. * fseek will be used to access the required data for building the seek
  110. * table and doing decompression operations. `src` should not be closed
  111. * or modified until the ZSTD_seekable object is freed or reset.
  112. * - ZSTD_seekable_initAdvanced() : A general API allowing the client to
  113. * provide its own read and seek callbacks.
  114. * + ZSTD_seekable_read() : read exactly `n` bytes into `buffer`.
  115. * Premature EOF should be treated as an error.
  116. * + ZSTD_seekable_seek() : seek the read head to `offset` from `origin`,
  117. * where origin is either SEEK_SET (beginning of
  118. * file), or SEEK_END (end of file).
  119. * Both functions should return a non-negative value in case of success, and a
  120. * negative value in case of failure. If implementing using this API and
  121. * stdio, be careful with files larger than 4GB and fseek. All of these
  122. * functions return an error code checkable with ZSTD_isError().
  123. *
  124. * Call ZSTD_seekable_decompress to decompress `dstSize` bytes at decompressed
  125. * offset `offset`. ZSTD_seekable_decompress may have to decompress the entire
  126. * prefix of the frame before the desired data if it has not already processed
  127. * this section. If ZSTD_seekable_decompress is called multiple times for a
  128. * consecutive range of data, it will efficiently retain the decompressor object
  129. * and avoid redecompressing frame prefixes. The return value is the number of
  130. * bytes decompressed, or an error code checkable with ZSTD_isError().
  131. *
  132. * The seek table access functions can be used to obtain the data contained
  133. * in the seek table. If frameIndex is larger than the value returned by
  134. * ZSTD_seekable_getNumFrames(), they will return error codes checkable with
  135. * ZSTD_isError(). Note that since the offset access functions return
  136. * unsigned long long instead of size_t, in this case they will instead return
  137. * the value ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE.
  138. ******************************************************************************/
  139. /*===== Seekable decompressor management =====*/
  140. ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void);
  141. ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs);
  142. /*===== Seekable decompression functions =====*/
  143. ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize);
  144. ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src);
  145. ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned long long offset);
  146. ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex);
  147. #define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2)
  148. /*===== Seekable seek table access functions =====*/
  149. ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs);
  150. ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
  151. ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
  152. ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
  153. ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
  154. ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long offset);
  155. /*-****************************************************************************
  156. * Direct exploitation of the seekTable
  157. *
  158. * Memory constrained use cases that manage multiple archives
  159. * benefit from retaining multiple archive seek tables
  160. * without retaining a ZSTD_seekable instance for each.
  161. *
  162. * Below API allow the above-mentioned use cases
  163. * to initialize a ZSTD_seekable, extract its (smaller) ZSTD_seekTable,
  164. * then throw the ZSTD_seekable away to save memory.
  165. *
  166. * Standard ZSTD operations can then be used
  167. * to decompress frames based on seek table offsets.
  168. ******************************************************************************/
  169. /*===== Independent seek table management =====*/
  170. ZSTDLIB_API ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs);
  171. ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st);
  172. /*===== Direct seek table access functions =====*/
  173. ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st);
  174. ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
  175. ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
  176. ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
  177. ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
  178. ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long offset);
  179. /*===== Seekable advanced I/O API =====*/
  180. typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n);
  181. typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin);
  182. typedef struct {
  183. void* opaque;
  184. ZSTD_seekable_read* read;
  185. ZSTD_seekable_seek* seek;
  186. } ZSTD_seekable_customFile;
  187. ZSTDLIB_API size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src);
  188. #if defined (__cplusplus)
  189. }
  190. #endif
  191. #endif