bzip2stream.pp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668
  1. {$mode objfpc}
  2. {$h+}
  3. unit bzip2stream;
  4. {****************************************************************************
  5. BZIP2 decompression unit
  6. Copyright (C) 2002 by Daniel Mantione
  7. Class port (C) 2009 by Michael Van Canneyt
  8. This unit provides a decompression stream to decode .bz2 files. It is
  9. inpired by Julian R. Seward's libbzip2 library and therefore you should
  10. send credits to him and bug reports to me :)
  11. This code is licensed under the same terms as the original libbz2 library,
  12. which is decsribed in the file LICENSE. If you don't have this file, look
  13. at http://www.freepascal.org for this bzip2 unit, the LICENSE file will
  14. be included. In case of problems, contact the author.
  15. E-mail addresses:
  16. Michael Van Canneyt <[email protected]>
  17. Daniel Mantione <[email protected]>
  18. Julian R. Seward <[email protected]>
  19. Please do not contact Julian about this Pascal library, he didn't wrote it.
  20. ****************************************************************************}
  21. interface
  22. {$goto on}
  23. uses Classes,SysUtils, bzip2comn;
  24. Type
  25. TDecompressBzip2Stream=Class(TOwnerStream)
  26. Private
  27. block_randomized:boolean;
  28. blocksize:byte;
  29. tt:Pcardinal_array;
  30. tt_count:cardinal;
  31. rle_run_left,rle_run_data:byte;
  32. nextrle:Pbyte;
  33. decode_available:cardinal;
  34. block_origin:cardinal;
  35. current_block:cardinal;
  36. read_data,bits_available:byte;
  37. inuse16:set of 0..15;
  38. inuse:set of 0..255;
  39. inuse_count:cardinal;
  40. seq_to_unseq:array[0..255] of byte;
  41. alphasize:cardinal;
  42. group_count,group_pos,gsel,gminlen:byte;
  43. group_no:cardinal;
  44. glimit,gperm,gbase:Phuffarray;
  45. selector_count:cardinal;
  46. selector,selector_mtf:array[0..max_selectors] of byte;
  47. len:array[0..max_groups,0..max_alpha_size] of byte;
  48. limit:array[0..max_groups,0..max_alpha_size] of cardinal;
  49. base:array[0..max_groups,0..max_alpha_size] of cardinal;
  50. perm:array[0..max_groups,0..max_alpha_size] of cardinal;
  51. minlens:array[0..max_groups] of byte;
  52. cftab:array[0..257] of cardinal;
  53. mtfbase:array[0..256 div mtfl_size-1] of cardinal;
  54. mtfa:array[0..mtfa_size-1] of byte;
  55. function get_bits(n:byte):byte;
  56. function get_boolean:boolean;
  57. function get_byte:byte;
  58. function get_cardinal24:cardinal;
  59. function get_cardinal:cardinal;
  60. procedure receive_mapping_table;
  61. procedure receive_selectors;
  62. procedure undo_mtf_values;
  63. procedure receive_coding_tables;
  64. procedure make_hufftab;
  65. procedure init_mtf;
  66. function get_mtf_value:cardinal;
  67. procedure move_mtf_block;
  68. procedure receive_mtf_values;
  69. procedure detransform;
  70. function decode_block : boolean;
  71. Function new_block : boolean;
  72. Function consume_rle : Boolean; inline;
  73. Function rle_read(bufptr:Pbyte;count:Longint) : longint;
  74. Procedure Error(Msg : String; ACode : Integer);
  75. Public
  76. Constructor Create(ASource : TStream);
  77. Destructor Destroy; override;
  78. function Read(var Buffer; Count: Longint): Longint; override;
  79. end;
  80. EBzip2 = Class(Exception)
  81. ErrCode : Integer;
  82. end;
  83. implementation
  84. {$ifdef cpui386}
  85. {$i bzip2si386.inc}
  86. {$endif}
  87. {*****************************************************************************
  88. TDecompressBzip2Stream
  89. *****************************************************************************}
  90. Resourcestring
  91. BZip2Initialize = 'Invalid BZip2 stream: invalid header';
  92. SDecodingError = 'Decoding error';
  93. SErrUnimplemented = 'Feature not implemented';
  94. Constructor TDecompressBzip2Stream.Create(ASource: TStream);
  95. var magic:array[1..3] of char;
  96. c:char;
  97. begin
  98. Inherited Create(ASource);
  99. {Read the magic.}
  100. Source.ReadBuffer(magic,sizeof(magic));
  101. if magic<>bzip2_stream_magic then
  102. Error(BZip2Initialize,bzip2_bad_header_magic);
  103. {Read the block size and allocate the working array.}
  104. Source.ReadBuffer(c,1);
  105. blocksize:=byte(c)-byte('0');
  106. GetMem(tt,blocksize*100000*sizeof(cardinal));
  107. decode_available:=high(decode_available);
  108. end;
  109. Procedure TDecompressBzip2Stream.Error(Msg : String; ACode : Integer);
  110. Var
  111. BE : EBzip2;
  112. begin
  113. BE:=EBzip2.Create(Msg);
  114. BE.ErrCode:=ACode;
  115. Raise BE;
  116. end;
  117. function TDecompressBzip2Stream.get_bits(n:byte):byte;
  118. var data:byte;
  119. begin
  120. if n>bits_available then
  121. begin
  122. Source.ReadBuffer(data,1);
  123. get_bits:=(read_data shr (8-n)) or data shr (8-(n-bits_available));
  124. read_data:=data shl (n-bits_available);
  125. inc(bits_available,8);
  126. end
  127. else
  128. begin
  129. get_bits:=read_data shr (8-n);
  130. read_data:=read_data shl n;
  131. end;
  132. dec(bits_available,n);
  133. end;
  134. function TDecompressBzip2Stream.get_boolean:boolean;
  135. begin
  136. get_boolean:=boolean(get_bits(1));
  137. end;
  138. function TDecompressBzip2Stream.get_byte:byte;
  139. begin
  140. get_byte:=get_bits(8);
  141. end;
  142. function TDecompressBzip2Stream.get_cardinal24:cardinal;
  143. begin
  144. get_cardinal24:=get_bits(8) shl 16 or get_bits(8) shl 8 or get_bits(8);
  145. end;
  146. function TDecompressBzip2Stream.get_cardinal:cardinal;
  147. begin
  148. get_cardinal:=get_bits(8) shl 24 or get_bits(8) shl 16 or get_bits(8) shl 8 or
  149. get_bits(8);
  150. end;
  151. procedure TDecompressBzip2Stream.receive_mapping_table;
  152. {Receive the mapping table. To save space, the inuse set is stored in pieces
  153. of 16 bits. First 16 bits are stored which pieces of 16 bits are used, then
  154. the pieces follow.}
  155. var i,j:byte;
  156. begin
  157. inuse16:=[];
  158. {Receive the first 16 bits which tell which pieces are stored.}
  159. for i:=0 to 15 do
  160. if get_boolean then
  161. include(inuse16,i);
  162. {Receive the used pieces.}
  163. inuse:=[];
  164. inuse_count:=0;
  165. for i:=0 to 15 do
  166. if i in inuse16 then
  167. for j:=0 to 15 do
  168. if get_boolean then
  169. begin
  170. include(inuse,16*i+j);
  171. seq_to_unseq[inuse_count]:=16*i+j;
  172. inc(inuse_count);
  173. end;
  174. { system.write('Mapping table: ');
  175. for i:=0 to 255 do
  176. if i in inuse then
  177. system.write(i,' ');
  178. writeln;}
  179. end;
  180. procedure TDecompressBzip2Stream.receive_selectors;
  181. {Receives the selectors.}
  182. var i:cardinal;
  183. j:byte;
  184. begin
  185. group_count:=get_bits(3);
  186. selector_count:=get_bits(8) shl 7 or get_bits(7);
  187. for i:=0 to selector_count-1 do
  188. begin
  189. j:=0;
  190. while get_boolean do
  191. begin
  192. inc(j);
  193. if j>5 then
  194. error(SDecodingError,bzip2_data_error);
  195. end;
  196. selector_mtf[i]:=j;
  197. end;
  198. { system.write('Selector_mtf: ');
  199. for i:=0 to selector_count-1 do
  200. system.write(selector_mtf[i],' ');
  201. writeln;}
  202. end;
  203. procedure TDecompressBzip2Stream.undo_mtf_values;
  204. {Undo the MTF values for the selectors.}
  205. var pos:array[0..max_groups] of byte;
  206. i:cardinal;
  207. v,tmp:byte;
  208. begin
  209. for v:=0 to group_count-1 do
  210. pos[v]:=v;
  211. for i:=0 to selector_count-1 do
  212. begin
  213. v:=selector_mtf[i];
  214. tmp:=pos[v];
  215. while v<>0 do
  216. begin
  217. pos[v]:=pos[v-1];
  218. dec(v);
  219. end;
  220. pos[0]:=tmp;
  221. selector[i]:=tmp;
  222. end;
  223. end;
  224. procedure TDecompressBzip2Stream.receive_coding_tables;
  225. var t,curr:byte;
  226. i:cardinal;
  227. begin
  228. for t:=0 to group_count-1 do
  229. begin
  230. curr:=get_bits(5);
  231. for i:=0 to alphasize-1 do
  232. begin
  233. repeat
  234. if not(curr in [1..20]) then
  235. error(SDecodingError,bzip2_data_error);
  236. if not get_boolean then
  237. break;
  238. if get_boolean then
  239. dec(curr)
  240. else
  241. inc(curr);
  242. until false;
  243. len[t,i]:=curr;
  244. end;
  245. end;
  246. end;
  247. procedure TDecompressBzip2Stream.make_hufftab;
  248. {Builds the Huffman tables.}
  249. var i:cardinal;
  250. t,minlen,maxlen:byte;
  251. begin
  252. for t:=0 to group_count-1 do
  253. begin
  254. minlen:=32;
  255. maxlen:=0;
  256. for i:=0 to alphasize-1 do
  257. begin
  258. if len[t,i]>maxlen then
  259. maxlen:=len[t,i];
  260. if len[t,i]<minlen then
  261. minlen:=len[t,i];
  262. end;
  263. hb_create_decode_tables(limit[t],base[t],perm[t],len[t],
  264. minlen,maxlen,alphasize);
  265. minlens[t]:=minlen;
  266. end;
  267. end;
  268. procedure TDecompressBzip2Stream.init_mtf;
  269. var i,j:byte;
  270. k:cardinal;
  271. begin
  272. k:=mtfa_size-1;
  273. for i:=256 div mtfl_size-1 downto 0 do
  274. begin
  275. for j:=mtfl_size-1 downto 0 do
  276. begin
  277. mtfa[k]:=i*mtfl_size+j;
  278. dec(k);
  279. end;
  280. mtfbase[i]:=k+1;
  281. end;
  282. end;
  283. function TDecompressBzip2Stream.get_mtf_value:cardinal;
  284. var zn:byte;
  285. zvec:cardinal;
  286. begin
  287. if group_pos=0 then
  288. begin
  289. inc(group_no);
  290. group_pos:=group_size;
  291. gsel:=selector[group_no];
  292. gminlen:=minlens[gsel];
  293. glimit:=@limit[gsel];
  294. gperm:=@perm[gsel];
  295. gbase:=@base[gsel];
  296. end;
  297. dec(group_pos);
  298. zn:=gminlen;
  299. zvec:=get_bits(zn);
  300. while zvec>glimit^[zn] do
  301. begin
  302. inc(zn);
  303. zvec:=zvec shl 1 or byte(get_boolean);
  304. end;
  305. get_mtf_value:=gperm^[zvec-gbase^[zn]];
  306. end;
  307. procedure TDecompressBzip2Stream.move_mtf_block;
  308. var i:byte;
  309. j,k:cardinal;
  310. begin
  311. k:=MTFA_SIZE;
  312. for i:=256 div MTFL_SIZE-1 downto 0 do
  313. begin
  314. j:=mtfbase[i];
  315. Pcardinal(@mtfa[k- 4])^:=Pcardinal(@mtfa[j+12])^;
  316. Pcardinal(@mtfa[k- 8])^:=Pcardinal(@mtfa[j+ 8])^;
  317. Pcardinal(@mtfa[k-12])^:=Pcardinal(@mtfa[j+ 4])^;
  318. dec(k,16);
  319. Pcardinal(@mtfa[k ])^:=Pcardinal(@mtfa[j ])^;
  320. mtfbase[i]:=k;
  321. end;
  322. end;
  323. procedure TDecompressBzip2Stream.receive_mtf_values;
  324. const run_a=0;
  325. run_b=1;
  326. var t,next_sym:cardinal;
  327. es:cardinal;
  328. n:byte;
  329. nn,i:cardinal;
  330. p,q:Pbyte;
  331. u,v:Pcardinal;
  332. lno,off:cardinal;
  333. begin
  334. group_no:=high(group_no);
  335. group_pos:=0;
  336. t:=0;
  337. for i:=0 to 257 do
  338. cftab[i]:=0;
  339. init_mtf;
  340. next_sym:=get_mtf_value;
  341. while next_sym<>inuse_count+1 do
  342. begin
  343. { writeln(t,' ',next_sym);
  344. if t=22296 then
  345. t:=t; }
  346. if next_sym<=run_b then
  347. begin
  348. es:=0;
  349. n:=0;
  350. repeat
  351. inc(es,(next_sym+1) shl n);
  352. inc(n);
  353. next_sym:=get_mtf_value;
  354. until next_sym>run_b;
  355. n:=seq_to_unseq[mtfa[mtfbase[0]]];
  356. inc(cftab[n],es);
  357. if t+es>100000*blocksize then
  358. error(SDecodingError,bzip2_data_error);
  359. while es>0 do
  360. begin
  361. tt^[t]:=n;
  362. dec(es);
  363. inc(t);
  364. end;
  365. end
  366. else
  367. begin
  368. nn:=next_sym-1;
  369. if nn<mtfl_size then
  370. begin
  371. {Avoid the costs of the general case.}
  372. p:=@mtfa[mtfbase[0]];
  373. q:=p+nn;
  374. n:=q^;
  375. repeat
  376. q^:=(q-1)^;
  377. dec(q);
  378. until q=p;
  379. q^:=n;
  380. end
  381. else
  382. begin
  383. {General case.}
  384. lno:=nn div MTFL_SIZE;
  385. off:=nn and (MTFL_SIZE-1);
  386. p:=@mtfa[mtfbase[lno]];
  387. q:=p+off;
  388. n:=q^;
  389. while(q<>p) do
  390. begin
  391. q^:=(q-1)^;
  392. dec(q);
  393. end;
  394. u:=@mtfbase;
  395. v:=u+lno;
  396. repeat
  397. mtfa[v^]:=mtfa[(v-1)^+MTFL_SIZE-1];
  398. dec(v);
  399. dec(v^);
  400. until v=u;
  401. mtfa[v^]:=n;
  402. if v^=0 then
  403. move_mtf_block;
  404. end;
  405. inc(cftab[seq_to_unseq[n]]);
  406. tt^[t]:=cardinal(seq_to_unseq[n]);
  407. inc(t);
  408. if t>100000*blocksize then
  409. error(SDecodingError,bzip2_data_error);
  410. next_sym:=get_mtf_value;
  411. end;
  412. end;
  413. tt_count:=t;
  414. {Setup cftab to facilitate generation of T^(-1).}
  415. t:=0;
  416. for i:=0 to 256 do
  417. begin
  418. nn:=cftab[i];
  419. cftab[i]:=t;
  420. { writeln(i,' ',t);}
  421. inc(t,nn);
  422. end;
  423. end;
  424. {$ifndef HAVE_DETRANSFORM}
  425. procedure TDecompressBzip2Stream.detransform;
  426. var a:cardinal;
  427. p,q,r:Pcardinal;
  428. begin
  429. a:=0;
  430. p:=@tt^[0];
  431. q:=p+tt_count;
  432. while p<>q do
  433. begin
  434. r:=@tt^[cftab[p^ and $ff]];
  435. inc(cftab[p^ and $ff]);
  436. r^:=r^ or a;
  437. inc(a,256);
  438. inc(p);
  439. end;
  440. end;
  441. {$endif}
  442. function TDecompressBzip2Stream.decode_block:boolean;
  443. {Decode a new compressed block.}
  444. var magic:array[1..6] of char;
  445. stored_blockcrc:cardinal;
  446. i:byte;
  447. begin
  448. for i:=1 to 6 do
  449. magic[i]:=char(get_byte);
  450. if magic='1AY&SY' then
  451. begin
  452. inc(current_block);
  453. stored_blockcrc:=get_cardinal;
  454. block_randomized:=get_boolean;
  455. block_origin:=get_cardinal24;
  456. {Receive the mapping table.}
  457. receive_mapping_table;
  458. alphasize:=cardinal(inuse_count)+2;
  459. {Receive the selectors. Raises exception}
  460. receive_selectors;
  461. {Undo the MTF values for the selectors.}
  462. undo_mtf_values;
  463. {Receive the coding tables.}
  464. receive_coding_tables;
  465. {Build the Huffman tables.}
  466. make_hufftab;
  467. {Receive the MTF values.}
  468. receive_mtf_values;
  469. {Undo the Burrows Wheeler transformation.}
  470. detransform;
  471. decode_available:=tt_count;
  472. Result:=True;
  473. end
  474. else
  475. begin
  476. if magic<>#$17'rE8P'#$90 then
  477. error(SDecodingError,bzip2_bad_block_magic);
  478. Result:=false;
  479. end;
  480. end;
  481. Function TDecompressBzip2Stream.new_block : Boolean;
  482. begin
  483. Result:=decode_block;
  484. If result then
  485. nextrle:=@tt^[tt^[block_origin] shr 8]
  486. else
  487. nextrle:=nil;
  488. end;
  489. Function TDecompressBzip2Stream.consume_rle : Boolean;inline;
  490. {Make nextrle point to the next decoded byte. If nextrle did point to the last
  491. byte in the current block, decode the next block.}
  492. begin
  493. { Pcardinal(nextrle)^:=Pcardinal(nextrle)^ shr 8;}
  494. nextrle:=@tt^[Pcardinal(nextrle)^ shr 8];
  495. dec(decode_available);
  496. if decode_available=0 then
  497. Result:=new_block
  498. else
  499. Result:=True;
  500. end;
  501. Function TDecompressBzip2Stream.rle_read(bufptr:Pbyte;Count:Longint) : LongInt;
  502. var rle_len:cardinal;
  503. data:byte;
  504. label rle_write;
  505. begin
  506. Result:=0;
  507. rle_len:=rle_run_left;
  508. data:=rle_run_data;
  509. if block_randomized then
  510. {Not yet implemented.}
  511. Error(SErrUnimplemented,-1)
  512. else
  513. begin
  514. if rle_len<>0 then
  515. {Speed is important. Instead of an if statement within the
  516. repeat loop use a goto outside the loop.}
  517. goto rle_write;
  518. repeat
  519. if decode_available=0 then
  520. break;
  521. rle_len:=1;
  522. data:=nextrle^;
  523. if consume_rle and (decode_available>0) and (data=nextrle^) then
  524. begin
  525. inc(rle_len);
  526. if consume_rle and (decode_available>0) and (data=nextrle^) then
  527. begin
  528. inc(rle_len);
  529. if consume_rle and (decode_available>0) and (data=nextrle^) then
  530. begin
  531. if consume_rle then
  532. inc(rle_len,nextrle^+1);
  533. consume_rle;
  534. end;
  535. end;
  536. end;
  537. rle_write:
  538. repeat
  539. bufptr^:=data;
  540. inc(bufptr);
  541. dec(count);
  542. dec(rle_len);
  543. inc(Result);
  544. until (rle_len=0) or (count=0);
  545. until count=0;
  546. end;
  547. rle_run_data:=data;
  548. rle_run_left:=rle_len;
  549. end;
  550. Function TDecompressBzip2Stream.Read(var Buffer; Count : Longint) : LongInt;
  551. var bufptr:Pbyte;
  552. begin
  553. bufptr:=@buffer;
  554. if decode_available=high(decode_available) then
  555. begin
  556. {Initialize the rle process:
  557. - Decode a block
  558. - Initialize pointer.}
  559. if not decode_block then
  560. begin
  561. nextrle:=nil;
  562. error(SDecodingError,bzip2_endoffile);
  563. end;
  564. nextrle:=@tt^[tt^[block_origin] shr 8];
  565. end;
  566. Result:=rle_read(bufptr,count);
  567. end;
  568. Destructor TDecompressBzip2Stream.Destroy;
  569. begin
  570. if tt<>nil then
  571. FreeMem(tt,blocksize*100000*sizeof(cardinal));
  572. Inherited;
  573. end;
  574. end.