bzip2stream.pp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. {$mode objfpc}
  2. {$h+}
  3. {$IFNDEF FPC_DOTTEDUNITS}
  4. unit bzip2stream;
  5. {$ENDIF FPC_DOTTEDUNITS}
  6. {****************************************************************************
  7. BZIP2 decompression unit
  8. Copyright (C) 2002 by Daniel Mantione
  9. Class port (C) 2009 by Michael Van Canneyt
  10. This unit provides a decompression stream to decode .bz2 files. It is
  11. inpired by Julian R. Seward's libbzip2 library and therefore you should
  12. send credits to him and bug reports to me :)
  13. This code is licensed under the same terms as the original libbz2 library,
  14. which is decsribed in the file LICENSE. If you don't have this file, look
  15. at http://www.freepascal.org for this bzip2 unit, the LICENSE file will
  16. be included. In case of problems, contact the author.
  17. E-mail addresses:
  18. Michael Van Canneyt <[email protected]>
  19. Daniel Mantione <[email protected]>
  20. Julian R. Seward <[email protected]>
  21. Please do not contact Julian about this Pascal library, he didn't wrote it.
  22. ****************************************************************************}
  23. interface
  24. {$goto on}
  25. {$IFDEF FPC_DOTTEDUNITS}
  26. uses System.Classes,System.SysUtils, System.Bzip2comn;
  27. {$ELSE FPC_DOTTEDUNITS}
  28. uses Classes,SysUtils, bzip2comn;
  29. {$ENDIF FPC_DOTTEDUNITS}
  30. Type
  31. TDecompressBzip2Stream=Class(TOwnerStream)
  32. Private
  33. block_randomized:boolean;
  34. blocksize:byte;
  35. tt:Pcardinal_array;
  36. tt_count:cardinal;
  37. rle_run_left,rle_run_data:byte;
  38. nextrle:Pbyte;
  39. decode_available:cardinal;
  40. block_origin:cardinal;
  41. current_block:cardinal;
  42. read_data,bits_available:byte;
  43. inuse16:set of 0..15;
  44. inuse:set of 0..255;
  45. inuse_count:cardinal;
  46. seq_to_unseq:array[0..255] of byte;
  47. alphasize:cardinal;
  48. group_count,group_pos,gsel,gminlen:byte;
  49. group_no:cardinal;
  50. glimit,gperm,gbase:Phuffarray;
  51. selector_count:cardinal;
  52. selector,selector_mtf:array[0..max_selectors] of byte;
  53. len:array[0..max_groups,0..max_alpha_size] of byte;
  54. limit:array[0..max_groups,0..max_alpha_size] of cardinal;
  55. base:array[0..max_groups,0..max_alpha_size] of cardinal;
  56. perm:array[0..max_groups,0..max_alpha_size] of cardinal;
  57. minlens:array[0..max_groups] of byte;
  58. cftab:array[0..257] of cardinal;
  59. mtfbase:array[0..256 div mtfl_size-1] of cardinal;
  60. mtfa:array[0..mtfa_size-1] of byte;
  61. function get_bits(n:byte):byte;
  62. function get_boolean:boolean;
  63. function get_byte:byte;
  64. function get_cardinal24:cardinal;
  65. function get_cardinal:cardinal;
  66. procedure receive_mapping_table;
  67. procedure receive_selectors;
  68. procedure undo_mtf_values;
  69. procedure receive_coding_tables;
  70. procedure make_hufftab;
  71. procedure init_mtf;
  72. function get_mtf_value:cardinal;
  73. procedure move_mtf_block;
  74. procedure receive_mtf_values;
  75. procedure detransform;
  76. function decode_block : boolean;
  77. Function new_block : boolean;
  78. Function consume_rle : Boolean; inline;
  79. Function rle_read(bufptr:Pbyte;count:Longint) : longint;
  80. Procedure Error(Msg : String; ACode : Integer);
  81. Public
  82. Constructor Create(ASource : TStream);
  83. Destructor Destroy; override;
  84. function Read(var Buffer; Count: Longint): Longint; override;
  85. end;
  86. EBzip2 = Class(Exception)
  87. ErrCode : Integer;
  88. end;
  89. implementation
  90. {$ifdef cpui386}
  91. {$i bzip2si386.inc}
  92. {$endif}
  93. {*****************************************************************************
  94. TDecompressBzip2Stream
  95. *****************************************************************************}
  96. Resourcestring
  97. BZip2Initialize = 'Invalid BZip2 stream: invalid header';
  98. SDecodingError = 'Decoding error';
  99. SErrUnimplemented = 'Feature not implemented';
  100. Constructor TDecompressBzip2Stream.Create(ASource: TStream);
  101. var magic:array[1..3] of AnsiChar;
  102. c:AnsiChar;
  103. begin
  104. Inherited Create(ASource);
  105. {Read the magic.}
  106. Source.ReadBuffer(magic,sizeof(magic));
  107. if magic<>bzip2_stream_magic then
  108. Error(BZip2Initialize,bzip2_bad_header_magic);
  109. {Read the block size and allocate the working array.}
  110. Source.ReadBuffer(c,1);
  111. blocksize:=byte(c)-byte('0');
  112. GetMem(tt,blocksize*100000*sizeof(cardinal));
  113. decode_available:=high(decode_available);
  114. end;
  115. Procedure TDecompressBzip2Stream.Error(Msg : String; ACode : Integer);
  116. Var
  117. BE : EBzip2;
  118. begin
  119. BE:=EBzip2.Create(Msg);
  120. BE.ErrCode:=ACode;
  121. Raise BE;
  122. end;
  123. function TDecompressBzip2Stream.get_bits(n:byte):byte;
  124. var data:byte;
  125. begin
  126. if n>bits_available then
  127. begin
  128. Source.ReadBuffer(data,1);
  129. get_bits:=(read_data shr (8-n)) or data shr (8-(n-bits_available));
  130. read_data:=byte(data shl (n-bits_available));
  131. inc(bits_available,8);
  132. end
  133. else
  134. begin
  135. get_bits:=read_data shr (8-n);
  136. read_data:=byte(read_data shl n);
  137. end;
  138. dec(bits_available,n);
  139. end;
  140. function TDecompressBzip2Stream.get_boolean:boolean;
  141. begin
  142. get_boolean:=boolean(get_bits(1));
  143. end;
  144. function TDecompressBzip2Stream.get_byte:byte;
  145. begin
  146. get_byte:=get_bits(8);
  147. end;
  148. function TDecompressBzip2Stream.get_cardinal24:cardinal;
  149. begin
  150. get_cardinal24:=get_bits(8) shl 16 or get_bits(8) shl 8 or get_bits(8);
  151. end;
  152. function TDecompressBzip2Stream.get_cardinal:cardinal;
  153. begin
  154. get_cardinal:=get_bits(8) shl 24 or get_bits(8) shl 16 or get_bits(8) shl 8 or
  155. get_bits(8);
  156. end;
  157. procedure TDecompressBzip2Stream.receive_mapping_table;
  158. {Receive the mapping table. To save space, the inuse set is stored in pieces
  159. of 16 bits. First 16 bits are stored which pieces of 16 bits are used, then
  160. the pieces follow.}
  161. var i,j:byte;
  162. begin
  163. inuse16:=[];
  164. {Receive the first 16 bits which tell which pieces are stored.}
  165. for i:=0 to 15 do
  166. if get_boolean then
  167. include(inuse16,i);
  168. {Receive the used pieces.}
  169. inuse:=[];
  170. inuse_count:=0;
  171. for i:=0 to 15 do
  172. if i in inuse16 then
  173. for j:=0 to 15 do
  174. if get_boolean then
  175. begin
  176. include(inuse,16*i+j);
  177. seq_to_unseq[inuse_count]:=16*i+j;
  178. inc(inuse_count);
  179. end;
  180. { system.write('Mapping table: ');
  181. for i:=0 to 255 do
  182. if i in inuse then
  183. system.write(i,' ');
  184. writeln;}
  185. end;
  186. procedure TDecompressBzip2Stream.receive_selectors;
  187. {Receives the selectors.}
  188. var i:cardinal;
  189. j:byte;
  190. begin
  191. group_count:=get_bits(3);
  192. selector_count:=get_bits(8) shl 7 or get_bits(7);
  193. for i:=0 to selector_count-1 do
  194. begin
  195. j:=0;
  196. while get_boolean do
  197. begin
  198. inc(j);
  199. if j>5 then
  200. error(SDecodingError,bzip2_data_error);
  201. end;
  202. selector_mtf[i]:=j;
  203. end;
  204. { system.write('Selector_mtf: ');
  205. for i:=0 to selector_count-1 do
  206. system.write(selector_mtf[i],' ');
  207. writeln;}
  208. end;
  209. procedure TDecompressBzip2Stream.undo_mtf_values;
  210. {Undo the MTF values for the selectors.}
  211. var pos:array[0..max_groups] of byte;
  212. i:cardinal;
  213. v,tmp:byte;
  214. begin
  215. for v:=0 to group_count-1 do
  216. pos[v]:=v;
  217. for i:=0 to selector_count-1 do
  218. begin
  219. v:=selector_mtf[i];
  220. tmp:=pos[v];
  221. while v<>0 do
  222. begin
  223. pos[v]:=pos[v-1];
  224. dec(v);
  225. end;
  226. pos[0]:=tmp;
  227. selector[i]:=tmp;
  228. end;
  229. end;
  230. procedure TDecompressBzip2Stream.receive_coding_tables;
  231. var t,curr:byte;
  232. i:cardinal;
  233. begin
  234. for t:=0 to group_count-1 do
  235. begin
  236. curr:=get_bits(5);
  237. for i:=0 to alphasize-1 do
  238. begin
  239. repeat
  240. if not(curr in [1..20]) then
  241. error(SDecodingError,bzip2_data_error);
  242. if not get_boolean then
  243. break;
  244. if get_boolean then
  245. dec(curr)
  246. else
  247. inc(curr);
  248. until false;
  249. len[t,i]:=curr;
  250. end;
  251. end;
  252. end;
  253. procedure TDecompressBzip2Stream.make_hufftab;
  254. {Builds the Huffman tables.}
  255. var i:cardinal;
  256. t,minlen,maxlen:byte;
  257. begin
  258. for t:=0 to group_count-1 do
  259. begin
  260. minlen:=32;
  261. maxlen:=0;
  262. for i:=0 to alphasize-1 do
  263. begin
  264. if len[t,i]>maxlen then
  265. maxlen:=len[t,i];
  266. if len[t,i]<minlen then
  267. minlen:=len[t,i];
  268. end;
  269. hb_create_decode_tables(limit[t],base[t],perm[t],len[t],
  270. minlen,maxlen,alphasize);
  271. minlens[t]:=minlen;
  272. end;
  273. end;
  274. procedure TDecompressBzip2Stream.init_mtf;
  275. var i,j:byte;
  276. k:cardinal;
  277. begin
  278. k:=mtfa_size-1;
  279. for i:=256 div mtfl_size-1 downto 0 do
  280. begin
  281. for j:=mtfl_size-1 downto 0 do
  282. begin
  283. mtfa[k]:=i*mtfl_size+j;
  284. dec(k);
  285. end;
  286. mtfbase[i]:=k+1;
  287. end;
  288. end;
  289. function TDecompressBzip2Stream.get_mtf_value:cardinal;
  290. var zn:byte;
  291. zvec:cardinal;
  292. begin
  293. if group_pos=0 then
  294. begin
  295. { Code changed to avoid range/overflow check errors
  296. where compiled with -Cr or -Co options. }
  297. if group_no=high(group_no) then
  298. group_no:=0
  299. else
  300. inc(group_no);
  301. group_pos:=group_size;
  302. gsel:=selector[group_no];
  303. gminlen:=minlens[gsel];
  304. glimit:=@limit[gsel];
  305. gperm:=@perm[gsel];
  306. gbase:=@base[gsel];
  307. end;
  308. dec(group_pos);
  309. zn:=gminlen;
  310. zvec:=get_bits(zn);
  311. while zvec>glimit^[zn] do
  312. begin
  313. inc(zn);
  314. zvec:=zvec shl 1 or byte(get_boolean);
  315. end;
  316. get_mtf_value:=gperm^[zvec-gbase^[zn]];
  317. end;
  318. procedure TDecompressBzip2Stream.move_mtf_block;
  319. var i:byte;
  320. j,k:cardinal;
  321. begin
  322. k:=MTFA_SIZE;
  323. for i:=256 div MTFL_SIZE-1 downto 0 do
  324. begin
  325. j:=mtfbase[i];
  326. Pcardinal(@mtfa[k- 4])^:=Pcardinal(@mtfa[j+12])^;
  327. Pcardinal(@mtfa[k- 8])^:=Pcardinal(@mtfa[j+ 8])^;
  328. Pcardinal(@mtfa[k-12])^:=Pcardinal(@mtfa[j+ 4])^;
  329. dec(k,16);
  330. Pcardinal(@mtfa[k ])^:=Pcardinal(@mtfa[j ])^;
  331. mtfbase[i]:=k;
  332. end;
  333. end;
  334. procedure TDecompressBzip2Stream.receive_mtf_values;
  335. const run_a=0;
  336. run_b=1;
  337. var t,next_sym:cardinal;
  338. es:cardinal;
  339. n:byte;
  340. nn,i:cardinal;
  341. p,q:Pbyte;
  342. u,v:Pcardinal;
  343. lno,off:cardinal;
  344. begin
  345. group_no:=high(group_no);
  346. group_pos:=0;
  347. t:=0;
  348. for i:=0 to 257 do
  349. cftab[i]:=0;
  350. init_mtf;
  351. next_sym:=get_mtf_value;
  352. while next_sym<>inuse_count+1 do
  353. begin
  354. { writeln(t,' ',next_sym);
  355. if t=22296 then
  356. t:=t; }
  357. if next_sym<=run_b then
  358. begin
  359. es:=0;
  360. n:=0;
  361. repeat
  362. inc(es,(next_sym+1) shl n);
  363. inc(n);
  364. next_sym:=get_mtf_value;
  365. until next_sym>run_b;
  366. n:=seq_to_unseq[mtfa[mtfbase[0]]];
  367. inc(cftab[n],es);
  368. if t+es>100000*blocksize then
  369. error(SDecodingError,bzip2_data_error);
  370. while es>0 do
  371. begin
  372. tt^[t]:=ntole(cardinal(n));
  373. dec(es);
  374. inc(t);
  375. end;
  376. end
  377. else
  378. begin
  379. nn:=next_sym-1;
  380. if nn<mtfl_size then
  381. begin
  382. {Avoid the costs of the general case.}
  383. p:=@mtfa[mtfbase[0]];
  384. q:=p+nn;
  385. n:=q^;
  386. repeat
  387. q^:=(q-1)^;
  388. dec(q);
  389. until q=p;
  390. q^:=n;
  391. end
  392. else
  393. begin
  394. {General case.}
  395. lno:=nn div MTFL_SIZE;
  396. off:=nn and (MTFL_SIZE-1);
  397. p:=@mtfa[mtfbase[lno]];
  398. q:=p+off;
  399. n:=q^;
  400. while(q<>p) do
  401. begin
  402. q^:=(q-1)^;
  403. dec(q);
  404. end;
  405. u:=@mtfbase;
  406. v:=u+lno;
  407. repeat
  408. mtfa[v^]:=mtfa[(v-1)^+MTFL_SIZE-1];
  409. dec(v);
  410. dec(v^);
  411. until v=u;
  412. mtfa[v^]:=n;
  413. if v^=0 then
  414. move_mtf_block;
  415. end;
  416. inc(cftab[seq_to_unseq[n]]);
  417. tt^[t]:=ntole(cardinal(seq_to_unseq[n]));
  418. inc(t);
  419. if t>100000*blocksize then
  420. error(SDecodingError,bzip2_data_error);
  421. next_sym:=get_mtf_value;
  422. end;
  423. end;
  424. tt_count:=t;
  425. {Setup cftab to facilitate generation of T^(-1).}
  426. t:=0;
  427. for i:=0 to 256 do
  428. begin
  429. nn:=cftab[i];
  430. cftab[i]:=t;
  431. { writeln(i,' ',t);}
  432. inc(t,nn);
  433. end;
  434. end;
  435. {$ifndef HAVE_DETRANSFORM}
  436. procedure TDecompressBzip2Stream.detransform;
  437. var a:cardinal;
  438. p,q,r:Pcardinal;
  439. begin
  440. a:=0;
  441. p:=@tt^[0];
  442. q:=p+tt_count;
  443. while p<>q do
  444. begin
  445. r:=@tt^[cftab[ntole(p^) and $ff]];
  446. inc(cftab[ntole(p^) and $ff]);
  447. r^:=r^ or ntole(a);
  448. inc(a,256);
  449. inc(p);
  450. end;
  451. end;
  452. {$endif}
  453. function TDecompressBzip2Stream.decode_block:boolean;
  454. {Decode a new compressed block.}
  455. var magic:array[1..6] of AnsiChar;
  456. stored_blockcrc:cardinal;
  457. i:byte;
  458. begin
  459. for i:=1 to 6 do
  460. magic[i]:=AnsiChar(get_byte);
  461. if magic='1AY&SY' then
  462. begin
  463. inc(current_block);
  464. stored_blockcrc:=get_cardinal;
  465. block_randomized:=get_boolean;
  466. block_origin:=get_cardinal24;
  467. {Receive the mapping table.}
  468. receive_mapping_table;
  469. alphasize:=cardinal(inuse_count)+2;
  470. {Receive the selectors. Raises exception}
  471. receive_selectors;
  472. {Undo the MTF values for the selectors.}
  473. undo_mtf_values;
  474. {Receive the coding tables.}
  475. receive_coding_tables;
  476. {Build the Huffman tables.}
  477. make_hufftab;
  478. {Receive the MTF values.}
  479. receive_mtf_values;
  480. {Undo the Burrows Wheeler transformation.}
  481. detransform;
  482. decode_available:=tt_count;
  483. Result:=True;
  484. end
  485. else
  486. begin
  487. if magic<>#$17'rE8P'#$90 then
  488. error(SDecodingError,bzip2_bad_block_magic);
  489. Result:=false;
  490. end;
  491. end;
  492. Function TDecompressBzip2Stream.new_block : Boolean;
  493. begin
  494. Result:=decode_block;
  495. If result then
  496. nextrle:=@tt^[ntole(tt^[block_origin]) shr 8]
  497. else
  498. nextrle:=nil;
  499. end;
  500. Function TDecompressBzip2Stream.consume_rle : Boolean;inline;
  501. {Make nextrle point to the next decoded byte. If nextrle did point to the last
  502. byte in the current block, decode the next block.}
  503. begin
  504. { Pcardinal(nextrle)^:=Pcardinal(nextrle)^ shr 8;}
  505. nextrle:=@tt^[ntole(Pcardinal(nextrle)^) shr 8];
  506. dec(decode_available);
  507. if decode_available=0 then
  508. Result:=new_block
  509. else
  510. Result:=True;
  511. end;
  512. Function TDecompressBzip2Stream.rle_read(bufptr:Pbyte;Count:Longint) : LongInt;
  513. var rle_len:cardinal;
  514. data:byte;
  515. label rle_write;
  516. begin
  517. Result:=0;
  518. rle_len:=rle_run_left;
  519. data:=rle_run_data;
  520. if block_randomized then
  521. {Not yet implemented.}
  522. Error(SErrUnimplemented,-1)
  523. else
  524. begin
  525. if rle_len<>0 then
  526. {Speed is important. Instead of an if statement within the
  527. repeat loop use a goto outside the loop.}
  528. goto rle_write;
  529. repeat
  530. if decode_available=0 then
  531. break;
  532. rle_len:=1;
  533. data:=nextrle^;
  534. if consume_rle and (decode_available>0) and (data=nextrle^) then
  535. begin
  536. inc(rle_len);
  537. if consume_rle and (decode_available>0) and (data=nextrle^) then
  538. begin
  539. inc(rle_len);
  540. if consume_rle and (decode_available>0) and (data=nextrle^) then
  541. begin
  542. if consume_rle then
  543. inc(rle_len,nextrle^+1);
  544. consume_rle;
  545. end;
  546. end;
  547. end;
  548. rle_write:
  549. repeat
  550. bufptr^:=data;
  551. inc(bufptr);
  552. dec(count);
  553. dec(rle_len);
  554. inc(Result);
  555. until (rle_len=0) or (count=0);
  556. until count=0;
  557. end;
  558. rle_run_data:=data;
  559. rle_run_left:=rle_len;
  560. end;
  561. Function TDecompressBzip2Stream.Read(var Buffer; Count : Longint) : LongInt;
  562. var bufptr:Pbyte;
  563. begin
  564. bufptr:=@buffer;
  565. if decode_available=high(decode_available) then
  566. begin
  567. {Initialize the rle process:
  568. - Decode a block
  569. - Initialize pointer.}
  570. if not decode_block then
  571. begin
  572. nextrle:=nil;
  573. error(SDecodingError,bzip2_endoffile);
  574. end;
  575. nextrle:=@tt^[ntole(tt^[block_origin]) shr 8];
  576. end;
  577. Result:=rle_read(bufptr,count);
  578. end;
  579. Destructor TDecompressBzip2Stream.Destroy;
  580. begin
  581. if tt<>nil then
  582. FreeMem(tt,blocksize*100000*sizeof(cardinal));
  583. Inherited;
  584. end;
  585. end.