Lzma2Dec.c 13 KB


  1. /* Lzma2Dec.c -- LZMA2 Decoder
  2. 2024-03-01 : Igor Pavlov : Public domain */
  3. /* #define SHOW_DEBUG_INFO */
  4. #include "Precomp.h"
  5. #ifdef SHOW_DEBUG_INFO
  6. #include <stdio.h>
  7. #endif
  8. #include <string.h>
  9. #include "Lzma2Dec.h"
  10. /*
  11. 00000000 - End of data
  12. 00000001 U U - Uncompressed, reset dic, need reset state and set new prop
  13. 00000010 U U - Uncompressed, no reset
  14. 100uuuuu U U P P - LZMA, no reset
  15. 101uuuuu U U P P - LZMA, reset state
  16. 110uuuuu U U P P S - LZMA, reset state + set new prop
  17. 111uuuuu U U P P S - LZMA, reset state + set new prop, reset dic
  18. u, U - Unpack Size
  19. P - Pack Size
  20. S - Props
  21. */
  22. #define LZMA2_CONTROL_COPY_RESET_DIC 1
  23. #define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & (1 << 7)) == 0)
  24. #define LZMA2_LCLP_MAX 4
  25. #define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
  26. #ifdef SHOW_DEBUG_INFO
  27. #define PRF(x) x
  28. #else
  29. #define PRF(x)
  30. #endif
  31. typedef enum
  32. {
  33. LZMA2_STATE_CONTROL,
  34. LZMA2_STATE_UNPACK0,
  35. LZMA2_STATE_UNPACK1,
  36. LZMA2_STATE_PACK0,
  37. LZMA2_STATE_PACK1,
  38. LZMA2_STATE_PROP,
  39. LZMA2_STATE_DATA,
  40. LZMA2_STATE_DATA_CONT,
  41. LZMA2_STATE_FINISHED,
  42. LZMA2_STATE_ERROR
  43. } ELzma2State;
  44. static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
  45. {
  46. UInt32 dicSize;
  47. if (prop > 40)
  48. return SZ_ERROR_UNSUPPORTED;
  49. dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop);
  50. props[0] = (Byte)LZMA2_LCLP_MAX;
  51. props[1] = (Byte)(dicSize);
  52. props[2] = (Byte)(dicSize >> 8);
  53. props[3] = (Byte)(dicSize >> 16);
  54. props[4] = (Byte)(dicSize >> 24);
  55. return SZ_OK;
  56. }
  57. SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
  58. {
  59. Byte props[LZMA_PROPS_SIZE];
  60. RINOK(Lzma2Dec_GetOldProps(prop, props))
  61. return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
  62. }
  63. SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
  64. {
  65. Byte props[LZMA_PROPS_SIZE];
  66. RINOK(Lzma2Dec_GetOldProps(prop, props))
  67. return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
  68. }
  69. void Lzma2Dec_Init(CLzma2Dec *p)
  70. {
  71. p->state = LZMA2_STATE_CONTROL;
  72. p->needInitLevel = 0xE0;
  73. p->isExtraMode = False;
  74. p->unpackSize = 0;
  75. // p->decoder.dicPos = 0; // we can use it instead of full init
  76. LzmaDec_Init(&p->decoder);
  77. }
  78. // ELzma2State
  79. static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
  80. {
  81. switch (p->state)
  82. {
  83. case LZMA2_STATE_CONTROL:
  84. p->isExtraMode = False;
  85. p->control = b;
  86. PRF(printf("\n %8X", (unsigned)p->decoder.dicPos));
  87. PRF(printf(" %02X", (unsigned)b));
  88. if (b == 0)
  89. return LZMA2_STATE_FINISHED;
  90. if (LZMA2_IS_UNCOMPRESSED_STATE(p))
  91. {
  92. if (b == LZMA2_CONTROL_COPY_RESET_DIC)
  93. p->needInitLevel = 0xC0;
  94. else if (b > 2 || p->needInitLevel == 0xE0)
  95. return LZMA2_STATE_ERROR;
  96. }
  97. else
  98. {
  99. if (b < p->needInitLevel)
  100. return LZMA2_STATE_ERROR;
  101. p->needInitLevel = 0;
  102. p->unpackSize = (UInt32)(b & 0x1F) << 16;
  103. }
  104. return LZMA2_STATE_UNPACK0;
  105. case LZMA2_STATE_UNPACK0:
  106. p->unpackSize |= (UInt32)b << 8;
  107. return LZMA2_STATE_UNPACK1;
  108. case LZMA2_STATE_UNPACK1:
  109. p->unpackSize |= (UInt32)b;
  110. p->unpackSize++;
  111. PRF(printf(" %7u", (unsigned)p->unpackSize));
  112. return LZMA2_IS_UNCOMPRESSED_STATE(p) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0;
  113. case LZMA2_STATE_PACK0:
  114. p->packSize = (UInt32)b << 8;
  115. return LZMA2_STATE_PACK1;
  116. case LZMA2_STATE_PACK1:
  117. p->packSize |= (UInt32)b;
  118. p->packSize++;
  119. // if (p->packSize < 5) return LZMA2_STATE_ERROR;
  120. PRF(printf(" %5u", (unsigned)p->packSize));
  121. return (p->control & 0x40) ? LZMA2_STATE_PROP : LZMA2_STATE_DATA;
  122. case LZMA2_STATE_PROP:
  123. {
  124. unsigned lc, lp;
  125. if (b >= (9 * 5 * 5))
  126. return LZMA2_STATE_ERROR;
  127. lc = b % 9;
  128. b /= 9;
  129. p->decoder.prop.pb = (Byte)(b / 5);
  130. lp = b % 5;
  131. if (lc + lp > LZMA2_LCLP_MAX)
  132. return LZMA2_STATE_ERROR;
  133. p->decoder.prop.lc = (Byte)lc;
  134. p->decoder.prop.lp = (Byte)lp;
  135. return LZMA2_STATE_DATA;
  136. }
  137. default:
  138. return LZMA2_STATE_ERROR;
  139. }
  140. }
  141. static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
  142. {
  143. memcpy(p->dic + p->dicPos, src, size);
  144. p->dicPos += size;
  145. if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size)
  146. p->checkDicSize = p->prop.dicSize;
  147. p->processedPos += (UInt32)size;
  148. }
  149. void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
  150. SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
  151. const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
  152. {
  153. SizeT inSize = *srcLen;
  154. *srcLen = 0;
  155. *status = LZMA_STATUS_NOT_SPECIFIED;
  156. while (p->state != LZMA2_STATE_ERROR)
  157. {
  158. SizeT dicPos;
  159. if (p->state == LZMA2_STATE_FINISHED)
  160. {
  161. *status = LZMA_STATUS_FINISHED_WITH_MARK;
  162. return SZ_OK;
  163. }
  164. dicPos = p->decoder.dicPos;
  165. if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY)
  166. {
  167. *status = LZMA_STATUS_NOT_FINISHED;
  168. return SZ_OK;
  169. }
  170. if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
  171. {
  172. if (*srcLen == inSize)
  173. {
  174. *status = LZMA_STATUS_NEEDS_MORE_INPUT;
  175. return SZ_OK;
  176. }
  177. (*srcLen)++;
  178. p->state = Lzma2Dec_UpdateState(p, *src++);
  179. if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED)
  180. break;
  181. continue;
  182. }
  183. {
  184. SizeT inCur = inSize - *srcLen;
  185. SizeT outCur = dicLimit - dicPos;
  186. ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY;
  187. if (outCur >= p->unpackSize)
  188. {
  189. outCur = (SizeT)p->unpackSize;
  190. curFinishMode = LZMA_FINISH_END;
  191. }
  192. if (LZMA2_IS_UNCOMPRESSED_STATE(p))
  193. {
  194. if (inCur == 0)
  195. {
  196. *status = LZMA_STATUS_NEEDS_MORE_INPUT;
  197. return SZ_OK;
  198. }
  199. if (p->state == LZMA2_STATE_DATA)
  200. {
  201. BoolInt initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
  202. LzmaDec_InitDicAndState(&p->decoder, initDic, False);
  203. }
  204. if (inCur > outCur)
  205. inCur = outCur;
  206. if (inCur == 0)
  207. break;
  208. LzmaDec_UpdateWithUncompressed(&p->decoder, src, inCur);
  209. src += inCur;
  210. *srcLen += inCur;
  211. p->unpackSize -= (UInt32)inCur;
  212. p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
  213. }
  214. else
  215. {
  216. SRes res;
  217. if (p->state == LZMA2_STATE_DATA)
  218. {
  219. BoolInt initDic = (p->control >= 0xE0);
  220. BoolInt initState = (p->control >= 0xA0);
  221. LzmaDec_InitDicAndState(&p->decoder, initDic, initState);
  222. p->state = LZMA2_STATE_DATA_CONT;
  223. }
  224. if (inCur > p->packSize)
  225. inCur = (SizeT)p->packSize;
  226. res = LzmaDec_DecodeToDic(&p->decoder, dicPos + outCur, src, &inCur, curFinishMode, status);
  227. src += inCur;
  228. *srcLen += inCur;
  229. p->packSize -= (UInt32)inCur;
  230. outCur = p->decoder.dicPos - dicPos;
  231. p->unpackSize -= (UInt32)outCur;
  232. if (res != 0)
  233. break;
  234. if (*status == LZMA_STATUS_NEEDS_MORE_INPUT)
  235. {
  236. if (p->packSize == 0)
  237. break;
  238. return SZ_OK;
  239. }
  240. if (inCur == 0 && outCur == 0)
  241. {
  242. if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
  243. || p->unpackSize != 0
  244. || p->packSize != 0)
  245. break;
  246. p->state = LZMA2_STATE_CONTROL;
  247. }
  248. *status = LZMA_STATUS_NOT_SPECIFIED;
  249. }
  250. }
  251. }
  252. *status = LZMA_STATUS_NOT_SPECIFIED;
  253. p->state = LZMA2_STATE_ERROR;
  254. return SZ_ERROR_DATA;
  255. }
  256. ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
  257. SizeT outSize,
  258. const Byte *src, SizeT *srcLen,
  259. int checkFinishBlock)
  260. {
  261. SizeT inSize = *srcLen;
  262. *srcLen = 0;
  263. while (p->state != LZMA2_STATE_ERROR)
  264. {
  265. if (p->state == LZMA2_STATE_FINISHED)
  266. return (ELzma2ParseStatus)LZMA_STATUS_FINISHED_WITH_MARK;
  267. if (outSize == 0 && !checkFinishBlock)
  268. return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
  269. if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
  270. {
  271. if (*srcLen == inSize)
  272. return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
  273. (*srcLen)++;
  274. p->state = Lzma2Dec_UpdateState(p, *src++);
  275. if (p->state == LZMA2_STATE_UNPACK0)
  276. {
  277. // if (p->decoder.dicPos != 0)
  278. if (p->control == LZMA2_CONTROL_COPY_RESET_DIC || p->control >= 0xE0)
  279. return LZMA2_PARSE_STATUS_NEW_BLOCK;
  280. // if (outSize == 0) return LZMA_STATUS_NOT_FINISHED;
  281. }
  282. // The following code can be commented.
  283. // It's not big problem, if we read additional input bytes.
  284. // It will be stopped later in LZMA2_STATE_DATA / LZMA2_STATE_DATA_CONT state.
  285. if (outSize == 0 && p->state != LZMA2_STATE_FINISHED)
  286. {
  287. // checkFinishBlock is true. So we expect that block must be finished,
  288. // We can return LZMA_STATUS_NOT_SPECIFIED or LZMA_STATUS_NOT_FINISHED here
  289. // break;
  290. return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
  291. }
  292. if (p->state == LZMA2_STATE_DATA)
  293. return LZMA2_PARSE_STATUS_NEW_CHUNK;
  294. continue;
  295. }
  296. if (outSize == 0)
  297. return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
  298. {
  299. SizeT inCur = inSize - *srcLen;
  300. if (LZMA2_IS_UNCOMPRESSED_STATE(p))
  301. {
  302. if (inCur == 0)
  303. return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
  304. if (inCur > p->unpackSize)
  305. inCur = p->unpackSize;
  306. if (inCur > outSize)
  307. inCur = outSize;
  308. p->decoder.dicPos += inCur;
  309. src += inCur;
  310. *srcLen += inCur;
  311. outSize -= inCur;
  312. p->unpackSize -= (UInt32)inCur;
  313. p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
  314. }
  315. else
  316. {
  317. p->isExtraMode = True;
  318. if (inCur == 0)
  319. {
  320. if (p->packSize != 0)
  321. return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
  322. }
  323. else if (p->state == LZMA2_STATE_DATA)
  324. {
  325. p->state = LZMA2_STATE_DATA_CONT;
  326. if (*src != 0)
  327. {
  328. // first byte of lzma chunk must be Zero
  329. *srcLen += 1;
  330. p->packSize--;
  331. break;
  332. }
  333. }
  334. if (inCur > p->packSize)
  335. inCur = (SizeT)p->packSize;
  336. src += inCur;
  337. *srcLen += inCur;
  338. p->packSize -= (UInt32)inCur;
  339. if (p->packSize == 0)
  340. {
  341. SizeT rem = outSize;
  342. if (rem > p->unpackSize)
  343. rem = p->unpackSize;
  344. p->decoder.dicPos += rem;
  345. p->unpackSize -= (UInt32)rem;
  346. outSize -= rem;
  347. if (p->unpackSize == 0)
  348. p->state = LZMA2_STATE_CONTROL;
  349. }
  350. }
  351. }
  352. }
  353. p->state = LZMA2_STATE_ERROR;
  354. return (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED;
  355. }
  356. SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
  357. {
  358. SizeT outSize = *destLen, inSize = *srcLen;
  359. *srcLen = *destLen = 0;
  360. for (;;)
  361. {
  362. SizeT inCur = inSize, outCur, dicPos;
  363. ELzmaFinishMode curFinishMode;
  364. SRes res;
  365. if (p->decoder.dicPos == p->decoder.dicBufSize)
  366. p->decoder.dicPos = 0;
  367. dicPos = p->decoder.dicPos;
  368. curFinishMode = LZMA_FINISH_ANY;
  369. outCur = p->decoder.dicBufSize - dicPos;
  370. if (outCur >= outSize)
  371. {
  372. outCur = outSize;
  373. curFinishMode = finishMode;
  374. }
  375. res = Lzma2Dec_DecodeToDic(p, dicPos + outCur, src, &inCur, curFinishMode, status);
  376. src += inCur;
  377. inSize -= inCur;
  378. *srcLen += inCur;
  379. outCur = p->decoder.dicPos - dicPos;
  380. memcpy(dest, p->decoder.dic + dicPos, outCur);
  381. dest += outCur;
  382. outSize -= outCur;
  383. *destLen += outCur;
  384. if (res != 0)
  385. return res;
  386. if (outCur == 0 || outSize == 0)
  387. return SZ_OK;
  388. }
  389. }
  390. SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
  391. Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc)
  392. {
  393. CLzma2Dec p;
  394. SRes res;
  395. SizeT outSize = *destLen, inSize = *srcLen;
  396. *destLen = *srcLen = 0;
  397. *status = LZMA_STATUS_NOT_SPECIFIED;
  398. Lzma2Dec_CONSTRUCT(&p)
  399. RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc))
  400. p.decoder.dic = dest;
  401. p.decoder.dicBufSize = outSize;
  402. Lzma2Dec_Init(&p);
  403. *srcLen = inSize;
  404. res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
  405. *destLen = p.decoder.dicPos;
  406. if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
  407. res = SZ_ERROR_INPUT_EOF;
  408. Lzma2Dec_FreeProbs(&p, alloc);
  409. return res;
  410. }
  411. #undef PRF