bzip2.pas 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. unit bzip2;
  2. {****************************************************************************
  3. BZIP2 decompression unit
  4. Copyright (C) 2002 by Daniel Mantione
  5. This unit provides a decompression stream to decode .bz2 files. It is
  6. inpired by Julian R. Seward's libbzip2 library and therefore you should
  7. send credits to him and bug reports to me :)
  8. This code is licensed under the same terms as the original libbz2 library,
  9. which is decsribed in the file LICENSE. If you don't have this file, look
  10. at http://www.freepascal.org for this bzip2 unit, the LICENSE file will
  11. be included. In case of problems, contact the author.
  12. E-mail addresses:
  13. Daniel Mantione <[email protected]>
  14. Julian R. Seward <[email protected]>
  15. Please do not contact Julian about this Pascal library, he didn't wrote it.
  16. ****************************************************************************}
  17. interface
  18. uses objects;
  19. const max_groups=6;
  20. max_alpha_size=258;
  21. max_code_len=23;
  22. group_size=50;
  23. iter_count=4;
  24. max_selectors=2+(900000 div group_size);
  25. const mtfa_size=4096;
  26. mtfl_size=16;
  27. type Tcardinal_array=array [0..899999] of cardinal;
  28. Pcardinal_array=^Tcardinal_array;
  29. Pcardinal=^cardinal;
  30. Thuffarray=array[0..max_alpha_size] of cardinal;
  31. Phuffarray=^Thuffarray;
  32. Tbzip2_decode_stream=object(Tstream)
  33. short:cardinal;
  34. readstream:Pstream;
  35. block_randomized:boolean;
  36. blocksize:byte;
  37. tt:Pcardinal_array;
  38. tt_count:cardinal;
  39. rle_run_left,rle_run_data:byte;
  40. nextrle:Pbyte;
  41. decode_available:cardinal;
  42. block_origin:cardinal;
  43. current_block:cardinal;
  44. read_data,bits_available:byte;
  45. inuse16:set of 0..15;
  46. inuse:set of 0..255;
  47. inuse_count:cardinal;
  48. seq_to_unseq:array[0..255] of byte;
  49. alphasize:cardinal;
  50. group_count,group_pos,gsel,gminlen:byte;
  51. group_no:cardinal;
  52. glimit,gperm,gbase:Phuffarray;
  53. selector_count:cardinal;
  54. selector,selector_mtf:array[0..max_selectors] of byte;
  55. len:array[0..max_groups,0..max_alpha_size] of byte;
  56. limit:array[0..max_groups,0..max_alpha_size] of cardinal;
  57. base:array[0..max_groups,0..max_alpha_size] of cardinal;
  58. perm:array[0..max_groups,0..max_alpha_size] of cardinal;
  59. minlens:array[0..max_groups] of byte;
  60. cftab:array[0..257] of cardinal;
  61. mtfbase:array[0..256 div mtfl_size-1] of cardinal;
  62. mtfa:array[0..mtfa_size-1] of byte;
  63. constructor init(Areadstream:Pstream);
  64. function get_bits(n:byte):byte;
  65. function get_boolean:boolean;
  66. function get_byte:byte;
  67. function get_cardinal24:cardinal;
  68. function get_cardinal:cardinal;
  69. procedure receive_mapping_table;
  70. procedure receive_selectors;
  71. procedure undo_mtf_values;
  72. procedure receive_coding_tables;
  73. procedure make_hufftab;
  74. procedure init_mtf;
  75. function get_mtf_value:cardinal;
  76. procedure move_mtf_block;
  77. procedure receive_mtf_values;
  78. procedure detransform;
  79. function decode_block:boolean;
  80. procedure read(var buf;count:sw_word);virtual;
  81. procedure new_block;
  82. procedure consume_rle;inline;
  83. procedure rle_read(bufptr:Pbyte;var count:sw_word);
  84. destructor done;virtual;
  85. end;
  86. {A bzip2 stream starts with this:}
  87. const bzip2_stream_magic='BZh';
  88. {Error codes for stream errorinfo.}
  89. const bzip2_bad_header_magic =1;
  90. bzip2_bad_block_magic =2;
  91. bzip2_endoffile =3;
  92. bzip2_data_error =4;
  93. implementation
  94. {$ifdef i386}
  95. {$i bzip2i386.inc}
  96. {$endif}
  97. procedure hb_create_decode_tables(var limit,base,perm:array of cardinal;
  98. var length:array of byte;
  99. minlen,maxlen:byte;alphasize:cardinal);
  100. var pp,i,j,vec:cardinal;
  101. begin
  102. pp:=0;
  103. for i:=minlen to maxlen do
  104. for j:=0 to alphasize-1 do
  105. if length[j]=i then
  106. begin
  107. perm[pp]:=j;
  108. inc(pp);
  109. end;
  110. for i:=0 to max_code_len-1 do
  111. begin
  112. base[i]:=0;
  113. limit[i]:=0;
  114. end;
  115. for i:=0 to alphasize-1 do
  116. inc(base[length[i]+1]);
  117. for i:=1 to max_code_len-1 do
  118. inc(base[i],base[i-1]);
  119. vec:=0;
  120. for i:=minlen to maxlen do
  121. begin
  122. inc(vec,base[i+1]-base[i]);
  123. limit[i]:=vec-1;
  124. vec:=vec shl 1;
  125. end;
  126. for i:=minlen+1 to maxlen do
  127. base[i]:=((limit[i-1]+1) shl 1)-base[i];
  128. end;
  129. {*****************************************************************************
  130. Tbzip2_decode_stream
  131. *****************************************************************************}
  132. constructor Tbzip2_decode_stream.init(Areadstream:Pstream);
  133. var magic:array[1..3] of char;
  134. c:char;
  135. begin
  136. readstream:=Areadstream;
  137. {Read the magic.}
  138. readstream^.read(magic,sizeof(magic));
  139. if magic<>bzip2_stream_magic then
  140. begin
  141. error(stiniterror,bzip2_bad_header_magic);
  142. exit;
  143. end;
  144. {Read the block size and allocate the working array.}
  145. readstream^.read(c,1);
  146. blocksize:=byte(c)-byte('0');
  147. getmem(tt,blocksize*100000*sizeof(cardinal));
  148. decode_available:=high(decode_available);
  149. end;
  150. function Tbzip2_decode_stream.get_bits(n:byte):byte;
  151. var data:byte;
  152. begin
  153. if n>bits_available then
  154. begin
  155. readstream^.read(data,1);
  156. get_bits:=(read_data shr (8-n)) or data shr (8-(n-bits_available));
  157. read_data:=data shl (n-bits_available);
  158. inc(bits_available,8);
  159. end
  160. else
  161. begin
  162. get_bits:=read_data shr (8-n);
  163. read_data:=read_data shl n;
  164. end;
  165. dec(bits_available,n);
  166. end;
  167. function Tbzip2_decode_stream.get_boolean:boolean;
  168. begin
  169. get_boolean:=boolean(get_bits(1));
  170. end;
  171. function Tbzip2_decode_stream.get_byte:byte;
  172. begin
  173. get_byte:=get_bits(8);
  174. end;
  175. function Tbzip2_decode_stream.get_cardinal24:cardinal;
  176. begin
  177. get_cardinal24:=get_bits(8) shl 16 or get_bits(8) shl 8 or get_bits(8);
  178. end;
  179. function Tbzip2_decode_stream.get_cardinal:cardinal;
  180. begin
  181. get_cardinal:=get_bits(8) shl 24 or get_bits(8) shl 16 or get_bits(8) shl 8 or
  182. get_bits(8);
  183. end;
  184. procedure Tbzip2_decode_stream.receive_mapping_table;
  185. {Receive the mapping table. To save space, the inuse set is stored in pieces
  186. of 16 bits. First 16 bits are stored which pieces of 16 bits are used, then
  187. the pieces follow.}
  188. var i,j:byte;
  189. begin
  190. inuse16:=[];
  191. {Receive the first 16 bits which tell which pieces are stored.}
  192. for i:=0 to 15 do
  193. if get_boolean then
  194. include(inuse16,i);
  195. {Receive the used pieces.}
  196. inuse:=[];
  197. inuse_count:=0;
  198. for i:=0 to 15 do
  199. if i in inuse16 then
  200. for j:=0 to 15 do
  201. if get_boolean then
  202. begin
  203. include(inuse,16*i+j);
  204. seq_to_unseq[inuse_count]:=16*i+j;
  205. inc(inuse_count);
  206. end;
  207. { system.write('Mapping table: ');
  208. for i:=0 to 255 do
  209. if i in inuse then
  210. system.write(i,' ');
  211. writeln;}
  212. end;
  213. procedure Tbzip2_decode_stream.receive_selectors;
  214. {Receives the selectors.}
  215. var i:cardinal;
  216. j:byte;
  217. begin
  218. group_count:=get_bits(3);
  219. selector_count:=get_bits(8) shl 7 or get_bits(7);
  220. for i:=0 to selector_count-1 do
  221. begin
  222. j:=0;
  223. while get_boolean do
  224. begin
  225. inc(j);
  226. if j>5 then
  227. error(streaderror,bzip2_data_error);
  228. end;
  229. selector_mtf[i]:=j;
  230. end;
  231. { system.write('Selector_mtf: ');
  232. for i:=0 to selector_count-1 do
  233. system.write(selector_mtf[i],' ');
  234. writeln;}
  235. end;
  236. procedure Tbzip2_decode_stream.undo_mtf_values;
  237. {Undo the MTF values for the selectors.}
  238. var pos:array[0..max_groups] of byte;
  239. i:cardinal;
  240. v,tmp:byte;
  241. begin
  242. for v:=0 to group_count-1 do
  243. pos[v]:=v;
  244. for i:=0 to selector_count-1 do
  245. begin
  246. v:=selector_mtf[i];
  247. tmp:=pos[v];
  248. while v<>0 do
  249. begin
  250. pos[v]:=pos[v-1];
  251. dec(v);
  252. end;
  253. pos[0]:=tmp;
  254. selector[i]:=tmp;
  255. end;
  256. end;
  257. procedure Tbzip2_decode_stream.receive_coding_tables;
  258. var t,curr:byte;
  259. i:cardinal;
  260. begin
  261. for t:=0 to group_count-1 do
  262. begin
  263. curr:=get_bits(5);
  264. for i:=0 to alphasize-1 do
  265. begin
  266. repeat
  267. if not(curr in [1..20]) then
  268. begin
  269. error(streaderror,bzip2_data_error);
  270. exit;
  271. end;
  272. if not get_boolean then
  273. break;
  274. if get_boolean then
  275. dec(curr)
  276. else
  277. inc(curr);
  278. until false;
  279. len[t,i]:=curr;
  280. end;
  281. end;
  282. { writeln('Coding tables:');
  283. for t:=0 to group_count-1 do
  284. begin
  285. for i:=0 to alphasize-1 do
  286. system.write(len[t,i],' ');
  287. writeln;
  288. end;}
  289. end;
  290. procedure Tbzip2_decode_stream.make_hufftab;
  291. {Builds the Huffman tables.}
  292. var i:cardinal;
  293. t,minlen,maxlen:byte;
  294. begin
  295. for t:=0 to group_count-1 do
  296. begin
  297. minlen:=32;
  298. maxlen:=0;
  299. for i:=0 to alphasize-1 do
  300. begin
  301. if len[t,i]>maxlen then
  302. maxlen:=len[t,i];
  303. if len[t,i]<minlen then
  304. minlen:=len[t,i];
  305. end;
  306. hb_create_decode_tables(limit[t],base[t],perm[t],len[t],
  307. minlen,maxlen,alphasize);
  308. minlens[t]:=minlen;
  309. end;
  310. end;
  311. procedure Tbzip2_decode_stream.init_mtf;
  312. var i,j:byte;
  313. k:cardinal;
  314. begin
  315. k:=mtfa_size-1;
  316. for i:=256 div mtfl_size-1 downto 0 do
  317. begin
  318. for j:=mtfl_size-1 downto 0 do
  319. begin
  320. mtfa[k]:=i*mtfl_size+j;
  321. dec(k);
  322. end;
  323. mtfbase[i]:=k+1;
  324. end;
  325. end;
  326. function Tbzip2_decode_stream.get_mtf_value:cardinal;
  327. var zn:byte;
  328. zvec:cardinal;
  329. begin
  330. if group_pos=0 then
  331. begin
  332. inc(group_no);
  333. group_pos:=group_size;
  334. gsel:=selector[group_no];
  335. gminlen:=minlens[gsel];
  336. glimit:=@limit[gsel];
  337. gperm:=@perm[gsel];
  338. gbase:=@base[gsel];
  339. end;
  340. dec(group_pos);
  341. zn:=gminlen;
  342. zvec:=get_bits(zn);
  343. while zvec>glimit^[zn] do
  344. begin
  345. inc(zn);
  346. zvec:=zvec shl 1 or byte(get_boolean);
  347. end;
  348. get_mtf_value:=gperm^[zvec-gbase^[zn]];
  349. end;
  350. procedure Tbzip2_decode_stream.move_mtf_block;
  351. var i:byte;
  352. j,k:cardinal;
  353. begin
  354. k:=MTFA_SIZE;
  355. for i:=256 div MTFL_SIZE-1 downto 0 do
  356. begin
  357. j:=mtfbase[i];
  358. Pcardinal(@mtfa[k- 4])^:=Pcardinal(@mtfa[j+12])^;
  359. Pcardinal(@mtfa[k- 8])^:=Pcardinal(@mtfa[j+ 8])^;
  360. Pcardinal(@mtfa[k-12])^:=Pcardinal(@mtfa[j+ 4])^;
  361. dec(k,16);
  362. Pcardinal(@mtfa[k ])^:=Pcardinal(@mtfa[j ])^;
  363. mtfbase[i]:=k;
  364. end;
  365. end;
  366. procedure Tbzip2_decode_stream.receive_mtf_values;
  367. const run_a=0;
  368. run_b=1;
  369. var t,next_sym:cardinal;
  370. es:cardinal;
  371. n:byte;
  372. nn,i:cardinal;
  373. p,q:Pbyte;
  374. u,v:Pcardinal;
  375. lno,off:cardinal;
  376. begin
  377. group_no:=high(group_no);
  378. group_pos:=0;
  379. t:=0;
  380. for i:=0 to 257 do
  381. cftab[i]:=0;
  382. init_mtf;
  383. next_sym:=get_mtf_value;
  384. while next_sym<>inuse_count+1 do
  385. begin
  386. { writeln(t,' ',next_sym);
  387. if t=22296 then
  388. t:=t; }
  389. if next_sym<=run_b then
  390. begin
  391. es:=0;
  392. n:=0;
  393. repeat
  394. inc(es,(next_sym+1) shl n);
  395. inc(n);
  396. next_sym:=get_mtf_value;
  397. until next_sym>run_b;
  398. n:=seq_to_unseq[mtfa[mtfbase[0]]];
  399. inc(cftab[n],es);
  400. if t+es>100000*blocksize then
  401. begin
  402. error(streaderror,bzip2_data_error);
  403. exit;
  404. end;
  405. while es>0 do
  406. begin
  407. tt^[t]:=n;
  408. dec(es);
  409. inc(t);
  410. end;
  411. end
  412. else
  413. begin
  414. nn:=next_sym-1;
  415. if nn<mtfl_size then
  416. begin
  417. {Avoid the costs of the general case.}
  418. p:=@mtfa[mtfbase[0]];
  419. q:=p+nn;
  420. n:=q^;
  421. repeat
  422. q^:=(q-1)^;
  423. dec(q);
  424. until q=p;
  425. q^:=n;
  426. end
  427. else
  428. begin
  429. {General case.}
  430. lno:=nn div MTFL_SIZE;
  431. off:=nn and (MTFL_SIZE-1);
  432. p:=@mtfa[mtfbase[lno]];
  433. q:=p+off;
  434. n:=q^;
  435. while(q<>p) do
  436. begin
  437. q^:=(q-1)^;
  438. dec(q);
  439. end;
  440. u:=@mtfbase;
  441. v:=u+lno;
  442. repeat
  443. mtfa[v^]:=mtfa[(v-1)^+MTFL_SIZE-1];
  444. dec(v);
  445. dec(v^);
  446. until v=u;
  447. mtfa[v^]:=n;
  448. if v^=0 then
  449. move_mtf_block;
  450. end;
  451. inc(cftab[seq_to_unseq[n]]);
  452. tt^[t]:=cardinal(seq_to_unseq[n]);
  453. inc(t);
  454. if t>100000*blocksize then
  455. begin
  456. error(streaderror,bzip2_data_error);
  457. exit;
  458. end;
  459. next_sym:=get_mtf_value;
  460. end;
  461. end;
  462. tt_count:=t;
  463. {Setup cftab to facilitate generation of T^(-1).}
  464. t:=0;
  465. for i:=0 to 256 do
  466. begin
  467. nn:=cftab[i];
  468. cftab[i]:=t;
  469. { writeln(i,' ',t);}
  470. inc(t,nn);
  471. end;
  472. end;
  473. {$ifndef HAVE_DETRANSFORM}
  474. procedure Tbzip2_decode_stream.detransform;
  475. var a:cardinal;
  476. p,q,r:Pcardinal;
  477. begin
  478. a:=0;
  479. p:=@tt^[0];
  480. q:=p+tt_count;
  481. while p<>q do
  482. begin
  483. r:=@tt^[cftab[p^ and $ff]];
  484. inc(cftab[p^ and $ff]);
  485. r^:=r^ or a;
  486. inc(a,256);
  487. inc(p);
  488. end;
  489. end;
  490. {$endif}
  491. function Tbzip2_decode_stream.decode_block:boolean;
  492. {Decode a new compressed block.}
  493. var magic:array[1..6] of char;
  494. stored_blockcrc:cardinal;
  495. i:byte;
  496. begin
  497. for i:=1 to 6 do
  498. magic[i]:=char(get_byte);
  499. if magic='1AY&SY' then
  500. begin
  501. inc(current_block);
  502. { writeln('Block ',current_block,': Header ok');}
  503. stored_blockcrc:=get_cardinal;
  504. block_randomized:=get_boolean;
  505. block_origin:=get_cardinal24;
  506. {Receive the mapping table.}
  507. receive_mapping_table;
  508. alphasize:=cardinal(inuse_count)+2;
  509. { writeln('Mapping table ok.');}
  510. {Receive the selectors.}
  511. receive_selectors;
  512. if status<>0 then
  513. exit;
  514. { writeln('Selectors ok.');}
  515. {Undo the MTF values for the selectors.}
  516. undo_mtf_values;
  517. { writeln('Undo mtf ok.');}
  518. {Receive the coding tables.}
  519. receive_coding_tables;
  520. if status<>0 then
  521. exit;
  522. { writeln('Coding tables ok');}
  523. {Build the Huffman tables.}
  524. make_hufftab;
  525. { writeln('Huffman ok.');}
  526. {Receive the MTF values.}
  527. receive_mtf_values;
  528. { writeln('MTF OK');}
  529. {Undo the Burrows Wheeler transformation.}
  530. detransform;
  531. { writeln('Detransform OK');}
  532. decode_available:=tt_count;
  533. end
  534. else
  535. begin
  536. if magic<>#$17'rE8P'#$90 then
  537. error(streaderror,bzip2_bad_block_magic);
  538. decode_block:=false;
  539. end;
  540. end;
  541. procedure Tbzip2_decode_stream.new_block;
  542. begin
  543. if decode_block then
  544. nextrle:=@tt^[tt^[block_origin] shr 8]
  545. else
  546. begin
  547. error(streaderror,bzip2_endoffile);
  548. nextrle:=nil;
  549. end;
  550. end;
  551. procedure Tbzip2_decode_stream.consume_rle;inline;
  552. {Make nextrle point to the next decoded byte. If nextrle did point to the last
  553. byte in the current block, decode the next block.}
  554. begin
  555. { Pcardinal(nextrle)^:=Pcardinal(nextrle)^ shr 8;}
  556. nextrle:=@tt^[Pcardinal(nextrle)^ shr 8];
  557. dec(decode_available);
  558. if decode_available=0 then
  559. new_block;
  560. end;
  561. procedure Tbzip2_decode_stream.rle_read(bufptr:Pbyte;var count:sw_word);
  562. var rle_len:cardinal;
  563. data:byte;
  564. label rle_write;
  565. begin
  566. rle_len:=rle_run_left;
  567. data:=rle_run_data;
  568. if block_randomized then
  569. {Not yet implemented.}
  570. runerror(212)
  571. else
  572. begin
  573. if rle_len<>0 then
  574. {Speed is important. Instead of an if statement within the
  575. repeat loop use a goto outside the loop.}
  576. goto rle_write;
  577. repeat
  578. if decode_available=0 then
  579. break;
  580. rle_len:=1;
  581. data:=nextrle^;
  582. consume_rle;
  583. if (decode_available>0) and (data=nextrle^) then
  584. begin
  585. inc(rle_len);
  586. consume_rle;
  587. if (decode_available>0) and (data=nextrle^) then
  588. begin
  589. inc(rle_len);
  590. consume_rle;
  591. if (decode_available>0) and (data=nextrle^) then
  592. begin
  593. consume_rle;
  594. inc(rle_len,nextrle^+1);
  595. consume_rle;
  596. end;
  597. end;
  598. end;
  599. rle_write:
  600. repeat
  601. bufptr^:=data;
  602. inc(bufptr);
  603. dec(count);
  604. dec(rle_len);
  605. until (rle_len=0) or (count=0);
  606. until count=0;
  607. short:=count;
  608. end;
  609. rle_run_data:=data;
  610. rle_run_left:=rle_len;
  611. end;
  612. procedure Tbzip2_decode_stream.read(var buf;count:sw_word);
  613. var bufptr:Pbyte;
  614. begin
  615. short:=0;
  616. bufptr:=@buf;
  617. if decode_available=high(decode_available) then
  618. begin
  619. {Initialize the rle process:
  620. - Decode a block
  621. - Initialize pointer.}
  622. if not decode_block then
  623. begin
  624. error(streaderror,bzip2_endoffile);
  625. nextrle:=nil;
  626. end;
  627. nextrle:=@tt^[tt^[block_origin] shr 8];
  628. end;
  629. rle_read(bufptr,count);
  630. end;
  631. destructor Tbzip2_decode_stream.done;
  632. begin
  633. if tt<>nil then
  634. freemem(tt,blocksize*100000*sizeof(cardinal));
  635. inherited done;
  636. end;
  637. end.