bzip2.pas 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. {$IFNDEF FPC_DOTTEDUNITS}
  2. unit bzip2;
  3. {$ENDIF FPC_DOTTEDUNITS}
  4. {****************************************************************************
  5. BZIP2 decompression unit
  6. Copyright (C) 2002 by Daniel Mantione
  7. This unit provides a decompression stream to decode .bz2 files. It is
  8. inpired by Julian R. Seward's libbzip2 library and therefore you should
  9. send credits to him and bug reports to me :)
  10. This code is licensed under the same terms as the original libbz2 library,
  11. which is decsribed in the file LICENSE. If you don't have this file, look
  12. at http://www.freepascal.org for this bzip2 unit, the LICENSE file will
  13. be included. In case of problems, contact the author.
  14. E-mail addresses:
  15. Daniel Mantione <[email protected]>
  16. Julian R. Seward <[email protected]>
  17. Please do not contact Julian about this Pascal library, he didn't wrote it.
  18. ****************************************************************************}
  19. interface
  20. {$goto on}
  21. {$IFDEF FPC_DOTTEDUNITS}
  22. uses System.Objects, System.Bzip2comn;
  23. {$ELSE FPC_DOTTEDUNITS}
  24. uses objects, bzip2comn;
  25. {$ENDIF FPC_DOTTEDUNITS}
  26. Type
  27. Tbzip2_decode_stream=object(Tstream)
  28. short:cardinal;
  29. readstream:Pstream;
  30. block_randomized:boolean;
  31. blocksize:byte;
  32. tt:Pcardinal_array;
  33. tt_count:cardinal;
  34. rle_run_left,rle_run_data:byte;
  35. nextrle:Pbyte;
  36. decode_available:cardinal;
  37. block_origin:cardinal;
  38. current_block:cardinal;
  39. read_data,bits_available:byte;
  40. inuse16:set of 0..15;
  41. inuse:set of 0..255;
  42. inuse_count:cardinal;
  43. seq_to_unseq:array[0..255] of byte;
  44. alphasize:cardinal;
  45. group_count,group_pos,gsel,gminlen:byte;
  46. group_no:cardinal;
  47. glimit,gperm,gbase:Phuffarray;
  48. selector_count:cardinal;
  49. selector,selector_mtf:array[0..max_selectors] of byte;
  50. len:array[0..max_groups,0..max_alpha_size] of byte;
  51. limit:array[0..max_groups,0..max_alpha_size] of cardinal;
  52. base:array[0..max_groups,0..max_alpha_size] of cardinal;
  53. perm:array[0..max_groups,0..max_alpha_size] of cardinal;
  54. minlens:array[0..max_groups] of byte;
  55. cftab:array[0..257] of cardinal;
  56. mtfbase:array[0..256 div mtfl_size-1] of cardinal;
  57. mtfa:array[0..mtfa_size-1] of byte;
  58. constructor init(Areadstream:Pstream);
  59. function get_bits(n:byte):byte;
  60. function get_boolean:boolean;
  61. function get_byte:byte;
  62. function get_cardinal24:cardinal;
  63. function get_cardinal:cardinal;
  64. procedure receive_mapping_table;
  65. procedure receive_selectors;
  66. procedure undo_mtf_values;
  67. procedure receive_coding_tables;
  68. procedure make_hufftab;
  69. procedure init_mtf;
  70. function get_mtf_value:cardinal;
  71. procedure move_mtf_block;
  72. procedure receive_mtf_values;
  73. procedure detransform;
  74. function decode_block:boolean;
  75. procedure read(var buf;count:Longint);virtual;
  76. procedure new_block;
  77. procedure consume_rle;inline;
  78. procedure rle_read(bufptr:Pbyte;var count:Longint);
  79. destructor done;virtual;
  80. end;
  81. implementation
  82. {$ifdef cpui386}
  83. {$i bzip2i386.inc}
  84. {$endif}
  85. {*****************************************************************************
  86. Tbzip2_decode_stream
  87. *****************************************************************************}
  88. constructor Tbzip2_decode_stream.init(Areadstream:Pstream);
  89. var magic:array[1..3] of AnsiChar;
  90. c:AnsiChar;
  91. begin
  92. readstream:=Areadstream;
  93. {Read the magic.}
  94. readstream^.read(magic,sizeof(magic));
  95. if magic<>bzip2_stream_magic then
  96. begin
  97. error(stiniterror,bzip2_bad_header_magic);
  98. exit;
  99. end;
  100. {Read the block size and allocate the working array.}
  101. readstream^.read(c,1);
  102. blocksize:=byte(c)-byte('0');
  103. getmem(tt,blocksize*100000*sizeof(cardinal));
  104. decode_available:=high(decode_available);
  105. end;
  106. function Tbzip2_decode_stream.get_bits(n:byte):byte;
  107. var data:byte;
  108. begin
  109. if n>bits_available then
  110. begin
  111. readstream^.read(data,1);
  112. get_bits:=(read_data shr (8-n)) or data shr (8-(n-bits_available));
  113. read_data:=data shl (n-bits_available);
  114. inc(bits_available,8);
  115. end
  116. else
  117. begin
  118. get_bits:=read_data shr (8-n);
  119. read_data:=read_data shl n;
  120. end;
  121. dec(bits_available,n);
  122. end;
  123. function Tbzip2_decode_stream.get_boolean:boolean;
  124. begin
  125. get_boolean:=boolean(get_bits(1));
  126. end;
  127. function Tbzip2_decode_stream.get_byte:byte;
  128. begin
  129. get_byte:=get_bits(8);
  130. end;
  131. function Tbzip2_decode_stream.get_cardinal24:cardinal;
  132. begin
  133. get_cardinal24:=get_bits(8) shl 16 or get_bits(8) shl 8 or get_bits(8);
  134. end;
  135. function Tbzip2_decode_stream.get_cardinal:cardinal;
  136. begin
  137. get_cardinal:=get_bits(8) shl 24 or get_bits(8) shl 16 or get_bits(8) shl 8 or
  138. get_bits(8);
  139. end;
  140. procedure Tbzip2_decode_stream.receive_mapping_table;
  141. {Receive the mapping table. To save space, the inuse set is stored in pieces
  142. of 16 bits. First 16 bits are stored which pieces of 16 bits are used, then
  143. the pieces follow.}
  144. var i,j:byte;
  145. begin
  146. inuse16:=[];
  147. {Receive the first 16 bits which tell which pieces are stored.}
  148. for i:=0 to 15 do
  149. if get_boolean then
  150. include(inuse16,i);
  151. {Receive the used pieces.}
  152. inuse:=[];
  153. inuse_count:=0;
  154. for i:=0 to 15 do
  155. if i in inuse16 then
  156. for j:=0 to 15 do
  157. if get_boolean then
  158. begin
  159. include(inuse,16*i+j);
  160. seq_to_unseq[inuse_count]:=16*i+j;
  161. inc(inuse_count);
  162. end;
  163. { system.write('Mapping table: ');
  164. for i:=0 to 255 do
  165. if i in inuse then
  166. system.write(i,' ');
  167. writeln;}
  168. end;
  169. procedure Tbzip2_decode_stream.receive_selectors;
  170. {Receives the selectors.}
  171. var i:cardinal;
  172. j:byte;
  173. begin
  174. group_count:=get_bits(3);
  175. selector_count:=get_bits(8) shl 7 or get_bits(7);
  176. for i:=0 to selector_count-1 do
  177. begin
  178. j:=0;
  179. while get_boolean do
  180. begin
  181. inc(j);
  182. if j>5 then
  183. error(streaderror,bzip2_data_error);
  184. end;
  185. selector_mtf[i]:=j;
  186. end;
  187. { system.write('Selector_mtf: ');
  188. for i:=0 to selector_count-1 do
  189. system.write(selector_mtf[i],' ');
  190. writeln;}
  191. end;
  192. procedure Tbzip2_decode_stream.undo_mtf_values;
  193. {Undo the MTF values for the selectors.}
  194. var pos:array[0..max_groups] of byte;
  195. i:cardinal;
  196. v,tmp:byte;
  197. begin
  198. for v:=0 to group_count-1 do
  199. pos[v]:=v;
  200. for i:=0 to selector_count-1 do
  201. begin
  202. v:=selector_mtf[i];
  203. tmp:=pos[v];
  204. while v<>0 do
  205. begin
  206. pos[v]:=pos[v-1];
  207. dec(v);
  208. end;
  209. pos[0]:=tmp;
  210. selector[i]:=tmp;
  211. end;
  212. end;
  213. procedure Tbzip2_decode_stream.receive_coding_tables;
  214. var t,curr:byte;
  215. i:cardinal;
  216. begin
  217. for t:=0 to group_count-1 do
  218. begin
  219. curr:=get_bits(5);
  220. for i:=0 to alphasize-1 do
  221. begin
  222. repeat
  223. if not(curr in [1..20]) then
  224. begin
  225. error(streaderror,bzip2_data_error);
  226. exit;
  227. end;
  228. if not get_boolean then
  229. break;
  230. if get_boolean then
  231. dec(curr)
  232. else
  233. inc(curr);
  234. until false;
  235. len[t,i]:=curr;
  236. end;
  237. end;
  238. { writeln('Coding tables:');
  239. for t:=0 to group_count-1 do
  240. begin
  241. for i:=0 to alphasize-1 do
  242. system.write(len[t,i],' ');
  243. writeln;
  244. end;}
  245. end;
  246. procedure Tbzip2_decode_stream.make_hufftab;
  247. {Builds the Huffman tables.}
  248. var i:cardinal;
  249. t,minlen,maxlen:byte;
  250. begin
  251. for t:=0 to group_count-1 do
  252. begin
  253. minlen:=32;
  254. maxlen:=0;
  255. for i:=0 to alphasize-1 do
  256. begin
  257. if len[t,i]>maxlen then
  258. maxlen:=len[t,i];
  259. if len[t,i]<minlen then
  260. minlen:=len[t,i];
  261. end;
  262. hb_create_decode_tables(limit[t],base[t],perm[t],len[t],
  263. minlen,maxlen,alphasize);
  264. minlens[t]:=minlen;
  265. end;
  266. end;
  267. procedure Tbzip2_decode_stream.init_mtf;
  268. var i,j:byte;
  269. k:cardinal;
  270. begin
  271. k:=mtfa_size-1;
  272. for i:=256 div mtfl_size-1 downto 0 do
  273. begin
  274. for j:=mtfl_size-1 downto 0 do
  275. begin
  276. mtfa[k]:=i*mtfl_size+j;
  277. dec(k);
  278. end;
  279. mtfbase[i]:=k+1;
  280. end;
  281. end;
  282. function Tbzip2_decode_stream.get_mtf_value:cardinal;
  283. var zn:byte;
  284. zvec:cardinal;
  285. begin
  286. if group_pos=0 then
  287. begin
  288. inc(group_no);
  289. group_pos:=group_size;
  290. gsel:=selector[group_no];
  291. gminlen:=minlens[gsel];
  292. glimit:=@limit[gsel];
  293. gperm:=@perm[gsel];
  294. gbase:=@base[gsel];
  295. end;
  296. dec(group_pos);
  297. zn:=gminlen;
  298. zvec:=get_bits(zn);
  299. while zvec>glimit^[zn] do
  300. begin
  301. inc(zn);
  302. zvec:=zvec shl 1 or byte(get_boolean);
  303. end;
  304. get_mtf_value:=gperm^[zvec-gbase^[zn]];
  305. end;
  306. procedure Tbzip2_decode_stream.move_mtf_block;
  307. var i:byte;
  308. j,k:cardinal;
  309. begin
  310. k:=MTFA_SIZE;
  311. for i:=256 div MTFL_SIZE-1 downto 0 do
  312. begin
  313. j:=mtfbase[i];
  314. Pcardinal(@mtfa[k- 4])^:=Pcardinal(@mtfa[j+12])^;
  315. Pcardinal(@mtfa[k- 8])^:=Pcardinal(@mtfa[j+ 8])^;
  316. Pcardinal(@mtfa[k-12])^:=Pcardinal(@mtfa[j+ 4])^;
  317. dec(k,16);
  318. Pcardinal(@mtfa[k ])^:=Pcardinal(@mtfa[j ])^;
  319. mtfbase[i]:=k;
  320. end;
  321. end;
  322. procedure Tbzip2_decode_stream.receive_mtf_values;
  323. const run_a=0;
  324. run_b=1;
  325. var t,next_sym:cardinal;
  326. es:cardinal;
  327. n:byte;
  328. nn,i:cardinal;
  329. p,q:Pbyte;
  330. u,v:Pcardinal;
  331. lno,off:cardinal;
  332. begin
  333. group_no:=high(group_no);
  334. group_pos:=0;
  335. t:=0;
  336. for i:=0 to 257 do
  337. cftab[i]:=0;
  338. init_mtf;
  339. next_sym:=get_mtf_value;
  340. while next_sym<>inuse_count+1 do
  341. begin
  342. { writeln(t,' ',next_sym);
  343. if t=22296 then
  344. t:=t; }
  345. if next_sym<=run_b then
  346. begin
  347. es:=0;
  348. n:=0;
  349. repeat
  350. inc(es,(next_sym+1) shl n);
  351. inc(n);
  352. next_sym:=get_mtf_value;
  353. until next_sym>run_b;
  354. n:=seq_to_unseq[mtfa[mtfbase[0]]];
  355. inc(cftab[n],es);
  356. if t+es>100000*blocksize then
  357. begin
  358. error(streaderror,bzip2_data_error);
  359. exit;
  360. end;
  361. while es>0 do
  362. begin
  363. tt^[t]:=ntole(cardinal(n));
  364. dec(es);
  365. inc(t);
  366. end;
  367. end
  368. else
  369. begin
  370. nn:=next_sym-1;
  371. if nn<mtfl_size then
  372. begin
  373. {Avoid the costs of the general case.}
  374. p:=@mtfa[mtfbase[0]];
  375. q:=p+nn;
  376. n:=q^;
  377. repeat
  378. q^:=(q-1)^;
  379. dec(q);
  380. until q=p;
  381. q^:=n;
  382. end
  383. else
  384. begin
  385. {General case.}
  386. lno:=nn div MTFL_SIZE;
  387. off:=nn and (MTFL_SIZE-1);
  388. p:=@mtfa[mtfbase[lno]];
  389. q:=p+off;
  390. n:=q^;
  391. while(q<>p) do
  392. begin
  393. q^:=(q-1)^;
  394. dec(q);
  395. end;
  396. u:=@mtfbase;
  397. v:=u+lno;
  398. repeat
  399. mtfa[v^]:=mtfa[(v-1)^+MTFL_SIZE-1];
  400. dec(v);
  401. dec(v^);
  402. until v=u;
  403. mtfa[v^]:=n;
  404. if v^=0 then
  405. move_mtf_block;
  406. end;
  407. inc(cftab[seq_to_unseq[n]]);
  408. tt^[t]:=ntole(cardinal(seq_to_unseq[n]));
  409. inc(t);
  410. if t>100000*blocksize then
  411. begin
  412. error(streaderror,bzip2_data_error);
  413. exit;
  414. end;
  415. next_sym:=get_mtf_value;
  416. end;
  417. end;
  418. tt_count:=t;
  419. {Setup cftab to facilitate generation of T^(-1).}
  420. t:=0;
  421. for i:=0 to 256 do
  422. begin
  423. nn:=cftab[i];
  424. cftab[i]:=t;
  425. { writeln(i,' ',t);}
  426. inc(t,nn);
  427. end;
  428. end;
  429. {$ifndef HAVE_DETRANSFORM}
  430. procedure Tbzip2_decode_stream.detransform;
  431. var a:cardinal;
  432. p,q,r:Pcardinal;
  433. begin
  434. a:=0;
  435. p:=@tt^[0];
  436. q:=p+tt_count;
  437. while p<>q do
  438. begin
  439. r:=@tt^[cftab[ntole(p^) and $ff]];
  440. inc(cftab[ntole(p^) and $ff]);
  441. r^:=r^ or ntole(a);
  442. inc(a,256);
  443. inc(p);
  444. end;
  445. end;
  446. {$endif}
  447. function Tbzip2_decode_stream.decode_block:boolean;
  448. {Decode a new compressed block.}
  449. var magic:array[1..6] of AnsiChar;
  450. stored_blockcrc:cardinal;
  451. i:byte;
  452. begin
  453. for i:=1 to 6 do
  454. magic[i]:=AnsiChar(get_byte);
  455. if magic='1AY&SY' then
  456. begin
  457. inc(current_block);
  458. { writeln('Block ',current_block,': Header ok');}
  459. stored_blockcrc:=get_cardinal;
  460. block_randomized:=get_boolean;
  461. block_origin:=get_cardinal24;
  462. {Receive the mapping table.}
  463. receive_mapping_table;
  464. alphasize:=cardinal(inuse_count)+2;
  465. { writeln('Mapping table ok.');}
  466. {Receive the selectors.}
  467. receive_selectors;
  468. if status<>0 then
  469. exit;
  470. { writeln('Selectors ok.');}
  471. {Undo the MTF values for the selectors.}
  472. undo_mtf_values;
  473. { writeln('Undo mtf ok.');}
  474. {Receive the coding tables.}
  475. receive_coding_tables;
  476. if status<>0 then
  477. exit;
  478. { writeln('Coding tables ok');}
  479. {Build the Huffman tables.}
  480. make_hufftab;
  481. { writeln('Huffman ok.');}
  482. {Receive the MTF values.}
  483. receive_mtf_values;
  484. { writeln('MTF OK');}
  485. {Undo the Burrows Wheeler transformation.}
  486. detransform;
  487. { writeln('Detransform OK');}
  488. decode_available:=tt_count;
  489. end
  490. else
  491. begin
  492. if magic<>#$17'rE8P'#$90 then
  493. error(streaderror,bzip2_bad_block_magic);
  494. decode_block:=false;
  495. end;
  496. end;
  497. procedure Tbzip2_decode_stream.new_block;
  498. begin
  499. if decode_block then
  500. nextrle:=@tt^[ntole(tt^[block_origin]) shr 8]
  501. else
  502. begin
  503. error(streaderror,bzip2_endoffile);
  504. nextrle:=nil;
  505. end;
  506. end;
  507. procedure Tbzip2_decode_stream.consume_rle;inline;
  508. {Make nextrle point to the next decoded byte. If nextrle did point to the last
  509. byte in the current block, decode the next block.}
  510. begin
  511. { Pcardinal(nextrle)^:=Pcardinal(nextrle)^ shr 8;}
  512. nextrle:=@tt^[ntole(Pcardinal(nextrle)^) shr 8];
  513. dec(decode_available);
  514. if decode_available=0 then
  515. new_block;
  516. end;
  517. procedure Tbzip2_decode_stream.rle_read(bufptr:Pbyte;var count:Longint);
  518. var rle_len:cardinal;
  519. data:byte;
  520. label rle_write;
  521. begin
  522. rle_len:=rle_run_left;
  523. data:=rle_run_data;
  524. if block_randomized then
  525. {Not yet implemented.}
  526. runerror(212)
  527. else
  528. begin
  529. if rle_len<>0 then
  530. {Speed is important. Instead of an if statement within the
  531. repeat loop use a goto outside the loop.}
  532. goto rle_write;
  533. repeat
  534. if decode_available=0 then
  535. break;
  536. rle_len:=1;
  537. data:=nextrle^;
  538. consume_rle;
  539. if (decode_available>0) and (data=nextrle^) then
  540. begin
  541. inc(rle_len);
  542. consume_rle;
  543. if (decode_available>0) and (data=nextrle^) then
  544. begin
  545. inc(rle_len);
  546. consume_rle;
  547. if (decode_available>0) and (data=nextrle^) then
  548. begin
  549. consume_rle;
  550. inc(rle_len,nextrle^+1);
  551. consume_rle;
  552. end;
  553. end;
  554. end;
  555. rle_write:
  556. repeat
  557. bufptr^:=data;
  558. inc(bufptr);
  559. dec(count);
  560. dec(rle_len);
  561. until (rle_len=0) or (count=0);
  562. until count=0;
  563. short:=count;
  564. end;
  565. rle_run_data:=data;
  566. rle_run_left:=rle_len;
  567. end;
  568. procedure Tbzip2_decode_stream.read(var buf;count:Longint);
  569. var bufptr:Pbyte;
  570. begin
  571. short:=0;
  572. bufptr:=@buf;
  573. if decode_available=high(decode_available) then
  574. begin
  575. {Initialize the rle process:
  576. - Decode a block
  577. - Initialize pointer.}
  578. if not decode_block then
  579. begin
  580. error(streaderror,bzip2_endoffile);
  581. nextrle:=nil;
  582. end;
  583. nextrle:=@tt^[ntole(tt^[block_origin]) shr 8];
  584. end;
  585. rle_read(bufptr,count);
  586. end;
  587. destructor Tbzip2_decode_stream.done;
  588. begin
  589. if tt<>nil then
  590. freemem(tt,blocksize*100000*sizeof(cardinal));
  591. inherited done;
  592. end;
  593. end.