BZip2OutputStream.cs 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776
  1. // BZip2OutputStream.cs
  2. // Copyright (C) 2001 Mike Krueger
  3. //
  4. // This program is free software; you can redistribute it and/or
  5. // modify it under the terms of the GNU General Public License
  6. // as published by the Free Software Foundation; either version 2
  7. // of the License, or (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU General Public License
  15. // along with this program; if not, write to the Free Software
  16. // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17. //
  18. // Linking this library statically or dynamically with other modules is
  19. // making a combined work based on this library. Thus, the terms and
  20. // conditions of the GNU General Public License cover the whole
  21. // combination.
  22. //
  23. // As a special exception, the copyright holders of this library give you
  24. // permission to link this library with independent modules to produce an
  25. // executable, regardless of the license terms of these independent
  26. // modules, and to copy and distribute the resulting executable under
  27. // terms of your choice, provided that you also meet, for each linked
  28. // independent module, the terms and conditions of the license of that
  29. // module. An independent module is a module which is not derived from
  30. // or based on this library. If you modify this library, you may extend
  31. // this exception to your version of the library, but you are not
  32. // obligated to do so. If you do not wish to do so, delete this
  33. // exception statement from your version.
  34. using System;
  35. using System.IO;
  36. using ICSharpCode.SharpZipLib.Checksums;
  37. namespace ICSharpCode.SharpZipLib.BZip2
  38. {
  39. /// <summary>
  40. /// An output stream that compresses into the BZip2 format (without the file
  41. /// header chars) into another stream.
  42. /// TODO: Update to BZip2 1.0.1, 1.0.2
  43. /// </summary>
  44. public class BZip2OutputStream : Stream
  45. {
  46. /// <summary>
  47. /// I needed to implement the abstract member.
  48. /// </summary>
  49. public override bool CanRead {
  50. get {
  51. return baseStream.CanRead;
  52. }
  53. }
  54. /// <summary>
  55. /// I needed to implement the abstract member.
  56. /// </summary>
  57. public override bool CanSeek {
  58. get {
  59. return baseStream.CanSeek;
  60. }
  61. }
  62. /// <summary>
  63. /// I needed to implement the abstract member.
  64. /// </summary>
  65. public override bool CanWrite {
  66. get {
  67. return baseStream.CanWrite;
  68. }
  69. }
  70. /// <summary>
  71. /// I needed to implement the abstract member.
  72. /// </summary>
  73. public override long Length {
  74. get {
  75. return baseStream.Length;
  76. }
  77. }
  78. /// <summary>
  79. /// I needed to implement the abstract member.
  80. /// </summary>
  81. public override long Position {
  82. get {
  83. return baseStream.Position;
  84. }
  85. set {
  86. baseStream.Position = value;
  87. }
  88. }
  89. /// <summary>
  90. /// I needed to implement the abstract member.
  91. /// </summary>
  92. public override long Seek(long offset, SeekOrigin origin)
  93. {
  94. return baseStream.Seek(offset, origin);
  95. }
  96. /// <summary>
  97. /// I needed to implement the abstract member.
  98. /// </summary>
  99. public override void SetLength(long val)
  100. {
  101. baseStream.SetLength(val);
  102. }
  103. /// <summary>
  104. /// I needed to implement the abstract member.
  105. /// </summary>
  106. public override int ReadByte()
  107. {
  108. return baseStream.ReadByte();
  109. }
  110. /// <summary>
  111. /// I needed to implement the abstract member.
  112. /// </summary>
  113. public override int Read(byte[] b, int off, int len)
  114. {
  115. return baseStream.Read(b, off, len);
  116. }
  117. public override void Write(byte[] buf, int off, int len)
  118. {
  119. for (int i = 0; i < len; ++i) {
  120. WriteByte(buf[off + i]);
  121. }
  122. }
  123. readonly static int SETMASK = (1 << 21);
  124. readonly static int CLEARMASK = (~SETMASK);
  125. readonly static int GREATER_ICOST = 15;
  126. readonly static int LESSER_ICOST = 0;
  127. readonly static int SMALL_THRESH = 20;
  128. readonly static int DEPTH_THRESH = 10;
  129. /*--
  130. If you are ever unlucky/improbable enough
  131. to get a stack overflow whilst sorting,
  132. increase the following constant and try
  133. again. In practice I have never seen the
  134. stack go above 27 elems, so the following
  135. limit seems very generous.
  136. --*/
  137. readonly static int QSORT_STACK_SIZE = 1000;
  138. static void Panic()
  139. {
  140. //Console.WriteLine("panic");
  141. }
  142. void MakeMaps()
  143. {
  144. int i;
  145. nInUse = 0;
  146. for (i = 0; i < 256; i++) {
  147. if (inUse[i]) {
  148. seqToUnseq[nInUse] = (char)i;
  149. unseqToSeq[i] = (char)nInUse;
  150. nInUse++;
  151. }
  152. }
  153. }
  154. static void HbMakeCodeLengths(char[] len, int[] freq, int alphaSize, int maxLen)
  155. {
  156. /*--
  157. Nodes and heap entries run from 1. Entry 0
  158. for both the heap and nodes is a sentinel.
  159. --*/
  160. int nNodes, nHeap, n1, n2, j, k;
  161. bool tooLong;
  162. int[] heap = new int[BZip2Constants.MAX_ALPHA_SIZE + 2];
  163. int[] weight = new int[BZip2Constants.MAX_ALPHA_SIZE * 2];
  164. int[] parent = new int[BZip2Constants.MAX_ALPHA_SIZE * 2];
  165. for (int i = 0; i < alphaSize; ++i) {
  166. weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
  167. }
  168. while (true) {
  169. nNodes = alphaSize;
  170. nHeap = 0;
  171. heap[0] = 0;
  172. weight[0] = 0;
  173. parent[0] = -2;
  174. for (int i = 1; i <= alphaSize; ++i) {
  175. parent[i] = -1;
  176. nHeap++;
  177. heap[nHeap] = i;
  178. int zz = nHeap;
  179. int tmp = heap[zz];
  180. while (weight[tmp] < weight[heap[zz >> 1]]) {
  181. heap[zz] = heap[zz >> 1];
  182. zz >>= 1;
  183. }
  184. heap[zz] = tmp;
  185. }
  186. if (!(nHeap < (BZip2Constants.MAX_ALPHA_SIZE+2))) {
  187. Panic();
  188. }
  189. while (nHeap > 1) {
  190. n1 = heap[1];
  191. heap[1] = heap[nHeap];
  192. nHeap--;
  193. int zz = 1;
  194. int yy = 0;
  195. int tmp = heap[zz];
  196. while (true) {
  197. yy = zz << 1;
  198. if (yy > nHeap) {
  199. break;
  200. }
  201. if (yy < nHeap && weight[heap[yy+1]] < weight[heap[yy]]) {
  202. yy++;
  203. }
  204. if (weight[tmp] < weight[heap[yy]]) {
  205. break;
  206. }
  207. heap[zz] = heap[yy];
  208. zz = yy;
  209. }
  210. heap[zz] = tmp;
  211. n2 = heap[1];
  212. heap[1] = heap[nHeap];
  213. nHeap--;
  214. zz = 1;
  215. yy = 0;
  216. tmp = heap[zz];
  217. while (true) {
  218. yy = zz << 1;
  219. if (yy > nHeap) {
  220. break;
  221. }
  222. if (yy < nHeap && weight[heap[yy+1]] < weight[heap[yy]]) {
  223. yy++;
  224. }
  225. if (weight[tmp] < weight[heap[yy]]) {
  226. break;
  227. }
  228. heap[zz] = heap[yy];
  229. zz = yy;
  230. }
  231. heap[zz] = tmp;
  232. nNodes++;
  233. parent[n1] = parent[n2] = nNodes;
  234. weight[nNodes] = (int)((weight[n1] & 0xffffff00) + (weight[n2] & 0xffffff00)) |
  235. (int)(1 + (((weight[n1] & 0x000000ff) > (weight[n2] & 0x000000ff)) ? (weight[n1] & 0x000000ff) : (weight[n2] & 0x000000ff)));
  236. parent[nNodes] = -1;
  237. nHeap++;
  238. heap[nHeap] = nNodes;
  239. zz = nHeap;
  240. tmp = heap[zz];
  241. while (weight[tmp] < weight[heap[zz >> 1]]) {
  242. heap[zz] = heap[zz >> 1];
  243. zz >>= 1;
  244. }
  245. heap[zz] = tmp;
  246. }
  247. if (!(nNodes < (BZip2Constants.MAX_ALPHA_SIZE * 2))) {
  248. Panic();
  249. }
  250. tooLong = false;
  251. for (int i = 1; i <= alphaSize; ++i) {
  252. j = 0;
  253. k = i;
  254. while (parent[k] >= 0) {
  255. k = parent[k];
  256. j++;
  257. }
  258. len[i - 1] = (char)j;
  259. if (j > maxLen) {
  260. tooLong = true;
  261. }
  262. }
  263. if (!tooLong) {
  264. break;
  265. }
  266. for (int i = 1; i < alphaSize; ++i) {
  267. j = weight[i] >> 8;
  268. j = 1 + (j / 2);
  269. weight[i] = j << 8;
  270. }
  271. }
  272. }
  273. /*--
  274. index of the last char in the block, so
  275. the block size == last + 1.
  276. --*/
  277. int last;
  278. /*--
  279. index in zptr[] of original string after sorting.
  280. --*/
  281. int origPtr;
  282. /*--
  283. always: in the range 0 .. 9.
  284. The current block size is 100000 * this number.
  285. --*/
  286. int blockSize100k;
  287. bool blockRandomised;
  288. // int bytesIn;
  289. int bytesOut;
  290. int bsBuff;
  291. int bsLive;
  292. IChecksum mCrc = new StrangeCRC();
  293. bool[] inUse = new bool[256];
  294. int nInUse;
  295. char[] seqToUnseq = new char[256];
  296. char[] unseqToSeq = new char[256];
  297. char[] selector = new char[BZip2Constants.MAX_SELECTORS];
  298. char[] selectorMtf = new char[BZip2Constants.MAX_SELECTORS];
  299. byte[] block;
  300. int[] quadrant;
  301. int[] zptr;
  302. short[] szptr;
  303. int[] ftab;
  304. int nMTF;
  305. int[] mtfFreq = new int[BZip2Constants.MAX_ALPHA_SIZE];
  306. /*
  307. * Used when sorting. If too many long comparisons
  308. * happen, we stop sorting, randomise the block
  309. * slightly, and try again.
  310. */
  311. int workFactor;
  312. int workDone;
  313. int workLimit;
  314. bool firstAttempt;
  315. int nBlocksRandomised;
  316. int currentChar = -1;
  317. int runLength = 0;
  318. public BZip2OutputStream(Stream inStream) : this(inStream, 9)
  319. {
  320. }
  321. public BZip2OutputStream(Stream inStream, int inBlockSize)
  322. {
  323. block = null;
  324. quadrant = null;
  325. zptr = null;
  326. ftab = null;
  327. BsSetStream(inStream);
  328. workFactor = 50;
  329. if(inBlockSize > 9) {
  330. inBlockSize = 9;
  331. }
  332. if(inBlockSize < 1) {
  333. inBlockSize = 1;
  334. }
  335. blockSize100k = inBlockSize;
  336. AllocateCompressStructures();
  337. Initialize();
  338. InitBlock();
  339. }
  340. public override void WriteByte(byte bv)
  341. {
  342. int b = (256 + bv) % 256;
  343. if (currentChar != -1) {
  344. if (currentChar == b) {
  345. runLength++;
  346. if (runLength > 254) {
  347. WriteRun();
  348. currentChar = -1;
  349. runLength = 0;
  350. }
  351. } else {
  352. WriteRun();
  353. runLength = 1;
  354. currentChar = b;
  355. }
  356. } else {
  357. currentChar = b;
  358. runLength++;
  359. }
  360. }
  361. void WriteRun()
  362. {
  363. if (last < allowableBlockSize) {
  364. inUse[currentChar] = true;
  365. for (int i = 0; i < runLength; i++) {
  366. mCrc.Update(currentChar);
  367. }
  368. switch (runLength) {
  369. case 1:
  370. last++;
  371. block[last + 1] = (byte)currentChar;
  372. break;
  373. case 2:
  374. last++;
  375. block[last + 1] = (byte)currentChar;
  376. last++;
  377. block[last + 1] = (byte)currentChar;
  378. break;
  379. case 3:
  380. last++;
  381. block[last + 1] = (byte)currentChar;
  382. last++;
  383. block[last + 1] = (byte)currentChar;
  384. last++;
  385. block[last + 1] = (byte)currentChar;
  386. break;
  387. default:
  388. inUse[runLength - 4] = true;
  389. last++;
  390. block[last + 1] = (byte)currentChar;
  391. last++;
  392. block[last + 1] = (byte)currentChar;
  393. last++;
  394. block[last + 1] = (byte)currentChar;
  395. last++;
  396. block[last + 1] = (byte)currentChar;
  397. last++;
  398. block[last + 1] = (byte)(runLength - 4);
  399. break;
  400. }
  401. } else {
  402. EndBlock();
  403. InitBlock();
  404. WriteRun();
  405. }
  406. }
  407. bool closed = false;
  408. public void Finalize()
  409. {
  410. Close();
  411. }
  412. public override void Close()
  413. {
  414. if (closed) {
  415. return;
  416. }
  417. if (runLength > 0) {
  418. WriteRun();
  419. }
  420. currentChar = -1;
  421. EndBlock();
  422. EndCompression();
  423. closed = true;
  424. // super.close();
  425. Flush();
  426. baseStream.Close();
  427. }
  428. public override void Flush()
  429. {
  430. // super.flush();
  431. baseStream.Flush();
  432. }
  433. uint blockCRC, combinedCRC;
  434. void Initialize()
  435. {
  436. // bytesIn = 0;
  437. bytesOut = 0;
  438. nBlocksRandomised = 0;
  439. /*--- Write `magic' bytes h indicating file-format == huffmanised,
  440. followed by a digit indicating blockSize100k.
  441. ---*/
  442. BsPutUChar('B'); // -jr- 18-Nov-2003 added to match BZ2 1.02 header already in BZip class but that sux a lot
  443. BsPutUChar('Z');
  444. BsPutUChar('h');
  445. BsPutUChar('0' + blockSize100k);
  446. combinedCRC = 0;
  447. }
  448. int allowableBlockSize;
  449. void InitBlock()
  450. {
  451. // blockNo++;
  452. mCrc.Reset();
  453. last = -1;
  454. // ch = 0;
  455. for (int i = 0; i < 256; i++) {
  456. inUse[i] = false;
  457. }
  458. /*--- 20 is just a paranoia constant ---*/
  459. allowableBlockSize = BZip2Constants.baseBlockSize * blockSize100k - 20;
  460. }
  461. void EndBlock()
  462. {
  463. if (last < 0) { //-jr- dont do anything for empty files, (makes empty files compatible with original Bzip)
  464. return;
  465. }
  466. blockCRC = (uint)mCrc.Value;
  467. combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31);
  468. combinedCRC ^= blockCRC;
  469. /*-- sort the block and establish posn of original string --*/
  470. DoReversibleTransformation();
  471. /*--
  472. A 6-byte block header, the value chosen arbitrarily
  473. as 0x314159265359 :-). A 32 bit value does not really
  474. give a strong enough guarantee that the value will not
  475. appear by chance in the compressed datastream. Worst-case
  476. probability of this event, for a 900k block, is about
  477. 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
  478. For a compressed file of size 100Gb -- about 100000 blocks --
  479. only a 48-bit marker will do. NB: normal compression/
  480. decompression do *not* rely on these statistical properties.
  481. They are only important when trying to recover blocks from
  482. damaged files.
  483. --*/
  484. BsPutUChar(0x31);
  485. BsPutUChar(0x41);
  486. BsPutUChar(0x59);
  487. BsPutUChar(0x26);
  488. BsPutUChar(0x53);
  489. BsPutUChar(0x59);
  490. /*-- Now the block's CRC, so it is in a known place. --*/
  491. BsPutint((int)blockCRC);
  492. /*-- Now a single bit indicating randomisation. --*/
  493. if (blockRandomised) {
  494. BsW(1,1);
  495. nBlocksRandomised++;
  496. } else {
  497. BsW(1,0);
  498. }
  499. /*-- Finally, block's contents proper. --*/
  500. MoveToFrontCodeAndSend();
  501. }
  502. void EndCompression()
  503. {
  504. /*--
  505. Now another magic 48-bit number, 0x177245385090, to
  506. indicate the end of the last block. (sqrt(pi), if
  507. you want to know. I did want to use e, but it contains
  508. too much repetition -- 27 18 28 18 28 46 -- for me
  509. to feel statistically comfortable. Call me paranoid.)
  510. --*/
  511. BsPutUChar(0x17);
  512. BsPutUChar(0x72);
  513. BsPutUChar(0x45);
  514. BsPutUChar(0x38);
  515. BsPutUChar(0x50);
  516. BsPutUChar(0x90);
  517. BsPutint((int)combinedCRC);
  518. BsFinishedWithStream();
  519. }
  520. void HbAssignCodes (int[] code, char[] length, int minLen, int maxLen, int alphaSize)
  521. {
  522. int vec = 0;
  523. for (int n = minLen; n <= maxLen; ++n) {
  524. for (int i = 0; i < alphaSize; ++i) {
  525. if (length[i] == n) {
  526. code[i] = vec;
  527. ++vec;
  528. }
  529. }
  530. vec <<= 1;
  531. }
  532. }
  533. void BsSetStream(Stream f)
  534. {
  535. baseStream = f;
  536. bsLive = 0;
  537. bsBuff = 0;
  538. bytesOut = 0;
  539. }
  540. void BsFinishedWithStream()
  541. {
  542. while (bsLive > 0)
  543. {
  544. int ch = (bsBuff >> 24);
  545. baseStream.WriteByte((byte)ch); // write 8-bit
  546. bsBuff <<= 8;
  547. bsLive -= 8;
  548. bytesOut++;
  549. }
  550. }
  551. void BsW(int n, int v)
  552. {
  553. while (bsLive >= 8) {
  554. int ch = (bsBuff >> 24);
  555. baseStream.WriteByte((byte)ch); // write 8-bit
  556. bsBuff <<= 8;
  557. bsLive -= 8;
  558. ++bytesOut;
  559. }
  560. bsBuff |= (v << (32 - bsLive - n));
  561. bsLive += n;
  562. }
  563. void BsPutUChar(int c)
  564. {
  565. BsW(8, c);
  566. }
  567. void BsPutint(int u)
  568. {
  569. BsW(8, (u >> 24) & 0xFF);
  570. BsW(8, (u >> 16) & 0xFF);
  571. BsW(8, (u >> 8) & 0xFF);
  572. BsW(8, u & 0xFF);
  573. }
  574. void BsPutIntVS(int numBits, int c)
  575. {
  576. BsW(numBits, c);
  577. }
  578. void SendMTFValues()
  579. {
  580. char[][] len = new char[BZip2Constants.N_GROUPS][];
  581. for (int i = 0; i < BZip2Constants.N_GROUPS; ++i) {
  582. len[i] = new char[BZip2Constants.MAX_ALPHA_SIZE];
  583. }
  584. int gs, ge, totc, bt, bc, iter;
  585. int nSelectors = 0, alphaSize, minLen, maxLen, selCtr;
  586. int nGroups, nBytes;
  587. alphaSize = nInUse + 2;
  588. for (int t = 0; t < BZip2Constants.N_GROUPS; t++) {
  589. for (int v = 0; v < alphaSize; v++) {
  590. len[t][v] = (char)GREATER_ICOST;
  591. }
  592. }
  593. /*--- Decide how many coding tables to use ---*/
  594. if (nMTF <= 0) {
  595. Panic();
  596. }
  597. if (nMTF < 200) {
  598. nGroups = 2;
  599. } else if (nMTF < 600) {
  600. nGroups = 3;
  601. } else if (nMTF < 1200) {
  602. nGroups = 4;
  603. } else if (nMTF < 2400) {
  604. nGroups = 5;
  605. } else {
  606. nGroups = 6;
  607. }
  608. /*--- Generate an initial set of coding tables ---*/
  609. int nPart = nGroups;
  610. int remF = nMTF;
  611. gs = 0;
  612. while (nPart > 0) {
  613. int tFreq = remF / nPart;
  614. int aFreq = 0;
  615. ge = gs - 1;
  616. while (aFreq < tFreq && ge < alphaSize - 1) {
  617. ge++;
  618. aFreq += mtfFreq[ge];
  619. }
  620. if (ge > gs && nPart != nGroups && nPart != 1 && ((nGroups - nPart) % 2 == 1)) {
  621. aFreq -= mtfFreq[ge];
  622. ge--;
  623. }
  624. for (int v = 0; v < alphaSize; v++) {
  625. if (v >= gs && v <= ge) {
  626. len[nPart - 1][v] = (char)LESSER_ICOST;
  627. } else {
  628. len[nPart - 1][v] = (char)GREATER_ICOST;
  629. }
  630. }
  631. nPart--;
  632. gs = ge + 1;
  633. remF -= aFreq;
  634. }
  635. int[][] rfreq = new int[BZip2Constants.N_GROUPS][];
  636. for (int i = 0; i < BZip2Constants.N_GROUPS; ++i) {
  637. rfreq[i] = new int[BZip2Constants.MAX_ALPHA_SIZE];
  638. }
  639. int[] fave = new int[BZip2Constants.N_GROUPS];
  640. short[] cost = new short[BZip2Constants.N_GROUPS];
  641. /*---
  642. Iterate up to N_ITERS times to improve the tables.
  643. ---*/
  644. for (iter = 0; iter < BZip2Constants.N_ITERS; ++iter) {
  645. for (int t = 0; t < nGroups; ++t) {
  646. fave[t] = 0;
  647. }
  648. for (int t = 0; t < nGroups; ++t) {
  649. for (int v = 0; v < alphaSize; ++v) {
  650. rfreq[t][v] = 0;
  651. }
  652. }
  653. nSelectors = 0;
  654. totc = 0;
  655. gs = 0;
  656. while (true) {
  657. /*--- Set group start & end marks. --*/
  658. if (gs >= nMTF) {
  659. break;
  660. }
  661. ge = gs + BZip2Constants.G_SIZE - 1;
  662. if (ge >= nMTF) {
  663. ge = nMTF - 1;
  664. }
  665. /*--
  666. Calculate the cost of this group as coded
  667. by each of the coding tables.
  668. --*/
  669. for (int t = 0; t < nGroups; t++) {
  670. cost[t] = 0;
  671. }
  672. if (nGroups == 6) {
  673. short cost0, cost1, cost2, cost3, cost4, cost5;
  674. cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
  675. for (int i = gs; i <= ge; ++i) {
  676. short icv = szptr[i];
  677. cost0 += (short)len[0][icv];
  678. cost1 += (short)len[1][icv];
  679. cost2 += (short)len[2][icv];
  680. cost3 += (short)len[3][icv];
  681. cost4 += (short)len[4][icv];
  682. cost5 += (short)len[5][icv];
  683. }
  684. cost[0] = cost0;
  685. cost[1] = cost1;
  686. cost[2] = cost2;
  687. cost[3] = cost3;
  688. cost[4] = cost4;
  689. cost[5] = cost5;
  690. } else {
  691. for (int i = gs; i <= ge; ++i) {
  692. short icv = szptr[i];
  693. for (int t = 0; t < nGroups; t++) {
  694. cost[t] += (short)len[t][icv];
  695. }
  696. }
  697. }
  698. /*--
  699. Find the coding table which is best for this group,
  700. and record its identity in the selector table.
  701. --*/
  702. bc = 999999999;
  703. bt = -1;
  704. for (int t = 0; t < nGroups; ++t) {
  705. if (cost[t] < bc) {
  706. bc = cost[t];
  707. bt = t;
  708. }
  709. }
  710. totc += bc;
  711. fave[bt]++;
  712. selector[nSelectors] = (char)bt;
  713. nSelectors++;
  714. /*--
  715. Increment the symbol frequencies for the selected table.
  716. --*/
  717. for (int i = gs; i <= ge; ++i) {
  718. ++rfreq[bt][szptr[i]];
  719. }
  720. gs = ge+1;
  721. }
  722. /*--
  723. Recompute the tables based on the accumulated frequencies.
  724. --*/
  725. for (int t = 0; t < nGroups; ++t) {
  726. HbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20);
  727. }
  728. }
  729. rfreq = null;
  730. fave = null;
  731. cost = null;
  732. if (!(nGroups < 8)) {
  733. Panic();
  734. }
  735. if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZip2Constants.G_SIZE)))) {
  736. Panic();
  737. }
  738. /*--- Compute MTF values for the selectors. ---*/
  739. char[] pos = new char[BZip2Constants.N_GROUPS];
  740. char ll_i, tmp2, tmp;
  741. for (int i = 0; i < nGroups; i++) {
  742. pos[i] = (char)i;
  743. }
  744. for (int i = 0; i < nSelectors; i++) {
  745. ll_i = selector[i];
  746. int j = 0;
  747. tmp = pos[j];
  748. while (ll_i != tmp) {
  749. j++;
  750. tmp2 = tmp;
  751. tmp = pos[j];
  752. pos[j] = tmp2;
  753. }
  754. pos[0] = tmp;
  755. selectorMtf[i] = (char)j;
  756. }
  757. int[][] code = new int[BZip2Constants.N_GROUPS][];
  758. for (int i = 0; i < BZip2Constants.N_GROUPS; ++i) {
  759. code[i] = new int[BZip2Constants.MAX_ALPHA_SIZE];
  760. }
  761. /*--- Assign actual codes for the tables. --*/
  762. for (int t = 0; t < nGroups; t++) {
  763. minLen = 32;
  764. maxLen = 0;
  765. for (int i = 0; i < alphaSize; i++) {
  766. if (len[t][i] > maxLen) {
  767. maxLen = len[t][i];
  768. }
  769. if (len[t][i] < minLen) {
  770. minLen = len[t][i];
  771. }
  772. }
  773. if (maxLen > 20) {
  774. Panic();
  775. }
  776. if (minLen < 1) {
  777. Panic();
  778. }
  779. HbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
  780. }
  781. /*--- Transmit the mapping table. ---*/
  782. bool[] inUse16 = new bool[16];
  783. for (int i = 0; i < 16; ++i) {
  784. inUse16[i] = false;
  785. for (int j = 0; j < 16; ++j) {
  786. if (inUse[i * 16 + j]) {
  787. inUse16[i] = true;
  788. } // TODO : insert break;
  789. }
  790. }
  791. nBytes = bytesOut;
  792. for (int i = 0; i < 16; ++i) {
  793. if (inUse16[i]) {
  794. BsW(1,1);
  795. } else {
  796. BsW(1,0);
  797. }
  798. }
  799. for (int i = 0; i < 16; ++i) {
  800. if (inUse16[i]) {
  801. for (int j = 0; j < 16; ++j) {
  802. if (inUse[i * 16 + j]) {
  803. BsW(1,1);
  804. } else {
  805. BsW(1,0);
  806. }
  807. }
  808. }
  809. }
  810. /*--- Now the selectors. ---*/
  811. nBytes = bytesOut;
  812. BsW(3, nGroups);
  813. BsW(15, nSelectors);
  814. for (int i = 0; i < nSelectors; ++i) {
  815. for (int j = 0; j < selectorMtf[i]; ++j) {
  816. BsW(1,1);
  817. }
  818. BsW(1,0);
  819. }
  820. /*--- Now the coding tables. ---*/
  821. nBytes = bytesOut;
  822. for (int t = 0; t < nGroups; ++t) {
  823. int curr = len[t][0];
  824. BsW(5, curr);
  825. for (int i = 0; i < alphaSize; ++i) {
  826. while (curr < len[t][i]) {
  827. BsW(2, 2);
  828. curr++; /* 10 */
  829. }
  830. while (curr > len[t][i]) {
  831. BsW(2, 3);
  832. curr--; /* 11 */
  833. }
  834. BsW (1, 0);
  835. }
  836. }
  837. /*--- And finally, the block data proper ---*/
  838. nBytes = bytesOut;
  839. selCtr = 0;
  840. gs = 0;
  841. while (true) {
  842. if (gs >= nMTF) {
  843. break;
  844. }
  845. ge = gs + BZip2Constants.G_SIZE - 1;
  846. if (ge >= nMTF) {
  847. ge = nMTF - 1;
  848. }
  849. for (int i = gs; i <= ge; i++) {
  850. BsW(len[selector[selCtr]][szptr[i]], code[selector[selCtr]][szptr[i]]);
  851. }
  852. gs = ge + 1;
  853. ++selCtr;
  854. }
  855. if (!(selCtr == nSelectors)) {
  856. Panic();
  857. }
  858. }
  859. void MoveToFrontCodeAndSend ()
  860. {
  861. BsPutIntVS(24, origPtr);
  862. GenerateMTFValues();
  863. SendMTFValues();
  864. }
  865. Stream baseStream;
  866. void SimpleSort(int lo, int hi, int d)
  867. {
  868. int i, j, h, bigN, hp;
  869. int v;
  870. bigN = hi - lo + 1;
  871. if (bigN < 2) {
  872. return;
  873. }
  874. hp = 0;
  875. while (incs[hp] < bigN) {
  876. hp++;
  877. }
  878. hp--;
  879. for (; hp >= 0; hp--) {
  880. h = incs[hp];
  881. i = lo + h;
  882. while (true) {
  883. /*-- copy 1 --*/
  884. if (i > hi)
  885. break;
  886. v = zptr[i];
  887. j = i;
  888. while (FullGtU(zptr[j-h]+d, v+d)) {
  889. zptr[j] = zptr[j-h];
  890. j = j - h;
  891. if (j <= (lo + h - 1))
  892. break;
  893. }
  894. zptr[j] = v;
  895. i++;
  896. /*-- copy 2 --*/
  897. if (i > hi) {
  898. break;
  899. }
  900. v = zptr[i];
  901. j = i;
  902. while (FullGtU ( zptr[j-h]+d, v+d )) {
  903. zptr[j] = zptr[j-h];
  904. j = j - h;
  905. if (j <= (lo + h - 1)) {
  906. break;
  907. }
  908. }
  909. zptr[j] = v;
  910. i++;
  911. /*-- copy 3 --*/
  912. if (i > hi) {
  913. break;
  914. }
  915. v = zptr[i];
  916. j = i;
  917. while (FullGtU ( zptr[j-h]+d, v+d)) {
  918. zptr[j] = zptr[j-h];
  919. j = j - h;
  920. if (j <= (lo + h - 1)) {
  921. break;
  922. }
  923. }
  924. zptr[j] = v;
  925. i++;
  926. if (workDone > workLimit && firstAttempt) {
  927. return;
  928. }
  929. }
  930. }
  931. }
  932. void Vswap(int p1, int p2, int n )
  933. {
  934. int temp = 0;
  935. while (n > 0) {
  936. temp = zptr[p1];
  937. zptr[p1] = zptr[p2];
  938. zptr[p2] = temp;
  939. p1++;
  940. p2++;
  941. n--;
  942. }
  943. }
  944. byte Med3(byte a, byte b, byte c )
  945. {
  946. byte t;
  947. if (a > b) {
  948. t = a;
  949. a = b;
  950. b = t;
  951. }
  952. if (b > c) {
  953. t = b;
  954. b = c;
  955. c = t;
  956. }
  957. if (a > b) {
  958. b = a;
  959. }
  960. return b;
  961. }
  962. class StackElem
  963. {
  964. public int ll;
  965. public int hh;
  966. public int dd;
  967. }
  968. void QSort3(int loSt, int hiSt, int dSt)
  969. {
  970. int unLo, unHi, ltLo, gtHi, med, n, m;
  971. int sp, lo, hi, d;
  972. StackElem[] stack = new StackElem[QSORT_STACK_SIZE];
  973. for (int count = 0; count < QSORT_STACK_SIZE; count++) {
  974. stack[count] = new StackElem();
  975. }
  976. sp = 0;
  977. stack[sp].ll = loSt;
  978. stack[sp].hh = hiSt;
  979. stack[sp].dd = dSt;
  980. sp++;
  981. while (sp > 0) {
  982. if (sp >= QSORT_STACK_SIZE) {
  983. Panic();
  984. }
  985. sp--;
  986. lo = stack[sp].ll;
  987. hi = stack[sp].hh;
  988. d = stack[sp].dd;
  989. if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
  990. SimpleSort(lo, hi, d);
  991. if (workDone > workLimit && firstAttempt) {
  992. return;
  993. }
  994. continue;
  995. }
  996. med = Med3(block[zptr[lo] + d + 1],
  997. block[zptr[hi ] + d + 1],
  998. block[zptr[(lo + hi) >> 1] + d + 1]);
  999. unLo = ltLo = lo;
  1000. unHi = gtHi = hi;
  1001. while (true) {
  1002. while (true) {
  1003. if (unLo > unHi) {
  1004. break;
  1005. }
  1006. n = ((int)block[zptr[unLo]+d + 1]) - med;
  1007. if (n == 0) {
  1008. int temp = 0;
  1009. temp = zptr[unLo];
  1010. zptr[unLo] = zptr[ltLo];
  1011. zptr[ltLo] = temp;
  1012. ltLo++;
  1013. unLo++;
  1014. continue;
  1015. }
  1016. if (n > 0) {
  1017. break;
  1018. }
  1019. unLo++;
  1020. }
  1021. while (true) {
  1022. if (unLo > unHi) {
  1023. break;
  1024. }
  1025. n = ((int)block[zptr[unHi]+d + 1]) - med;
  1026. if (n == 0) {
  1027. int temp = 0;
  1028. temp = zptr[unHi];
  1029. zptr[unHi] = zptr[gtHi];
  1030. zptr[gtHi] = temp;
  1031. gtHi--;
  1032. unHi--;
  1033. continue;
  1034. }
  1035. if (n < 0) {
  1036. break;
  1037. }
  1038. unHi--;
  1039. }
  1040. if (unLo > unHi) {
  1041. break;
  1042. }
  1043. {
  1044. int temp = zptr[unLo];
  1045. zptr[unLo] = zptr[unHi];
  1046. zptr[unHi] = temp;
  1047. unLo++;
  1048. unHi--;
  1049. }
  1050. }
  1051. if (gtHi < ltLo) {
  1052. stack[sp].ll = lo;
  1053. stack[sp].hh = hi;
  1054. stack[sp].dd = d+1;
  1055. sp++;
  1056. continue;
  1057. }
  1058. n = ((ltLo-lo) < (unLo-ltLo)) ? (ltLo-lo) : (unLo-ltLo);
  1059. Vswap(lo, unLo-n, n);
  1060. m = ((hi-gtHi) < (gtHi-unHi)) ? (hi-gtHi) : (gtHi-unHi);
  1061. Vswap(unLo, hi-m+1, m);
  1062. n = lo + unLo - ltLo - 1;
  1063. m = hi - (gtHi - unHi) + 1;
  1064. stack[sp].ll = lo;
  1065. stack[sp].hh = n;
  1066. stack[sp].dd = d;
  1067. sp++;
  1068. stack[sp].ll = n + 1;
  1069. stack[sp].hh = m - 1;
  1070. stack[sp].dd = d+1;
  1071. sp++;
  1072. stack[sp].ll = m;
  1073. stack[sp].hh = hi;
  1074. stack[sp].dd = d;
  1075. sp++;
  1076. }
  1077. }
  1078. void MainSort()
  1079. {
  1080. int i, j, ss, sb;
  1081. int[] runningOrder = new int[256];
  1082. int[] copy = new int[256];
  1083. bool[] bigDone = new bool[256];
  1084. int c1, c2;
  1085. int numQSorted;
  1086. /*--
  1087. In the various block-sized structures, live data runs
  1088. from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
  1089. set up the overshoot area for block.
  1090. --*/
  1091. // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" );
  1092. for (i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++) {
  1093. block[last + i + 2] = block[(i % (last + 1)) + 1];
  1094. }
  1095. for (i = 0; i <= last + BZip2Constants.NUM_OVERSHOOT_BYTES; i++) {
  1096. quadrant[i] = 0;
  1097. }
  1098. block[0] = (byte)(block[last + 1]);
  1099. if (last < 4000) {
  1100. /*--
  1101. Use simpleSort(), since the full sorting mechanism
  1102. has quite a large constant overhead.
  1103. --*/
  1104. for (i = 0; i <= last; i++) {
  1105. zptr[i] = i;
  1106. }
  1107. firstAttempt = false;
  1108. workDone = workLimit = 0;
  1109. SimpleSort(0, last, 0);
  1110. } else {
  1111. numQSorted = 0;
  1112. for (i = 0; i <= 255; i++) {
  1113. bigDone[i] = false;
  1114. }
  1115. for (i = 0; i <= 65536; i++) {
  1116. ftab[i] = 0;
  1117. }
  1118. c1 = block[0];
  1119. for (i = 0; i <= last; i++) {
  1120. c2 = block[i + 1];
  1121. ftab[(c1 << 8) + c2]++;
  1122. c1 = c2;
  1123. }
  1124. for (i = 1; i <= 65536; i++) {
  1125. ftab[i] += ftab[i - 1];
  1126. }
  1127. c1 = block[1];
  1128. for (i = 0; i < last; i++) {
  1129. c2 = block[i + 2];
  1130. j = (c1 << 8) + c2;
  1131. c1 = c2;
  1132. ftab[j]--;
  1133. zptr[ftab[j]] = i;
  1134. }
  1135. j = ((block[last + 1]) << 8) + (block[1]);
  1136. ftab[j]--;
  1137. zptr[ftab[j]] = last;
  1138. /*--
  1139. Now ftab contains the first loc of every small bucket.
  1140. Calculate the running order, from smallest to largest
  1141. big bucket.
  1142. --*/
  1143. for (i = 0; i <= 255; i++) {
  1144. runningOrder[i] = i;
  1145. }
  1146. int vv;
  1147. int h = 1;
  1148. do {
  1149. h = 3 * h + 1;
  1150. } while (h <= 256);
  1151. do {
  1152. h = h / 3;
  1153. for (i = h; i <= 255; i++) {
  1154. vv = runningOrder[i];
  1155. j = i;
  1156. while ((ftab[((runningOrder[j-h])+1) << 8] - ftab[(runningOrder[j-h]) << 8]) > (ftab[((vv)+1) << 8] - ftab[(vv) << 8])) {
  1157. runningOrder[j] = runningOrder[j-h];
  1158. j = j - h;
  1159. if (j <= (h - 1)) {
  1160. break;
  1161. }
  1162. }
  1163. runningOrder[j] = vv;
  1164. }
  1165. } while (h != 1);
  1166. /*--
  1167. The main sorting loop.
  1168. --*/
  1169. for (i = 0; i <= 255; i++) {
  1170. /*--
  1171. Process big buckets, starting with the least full.
  1172. --*/
  1173. ss = runningOrder[i];
  1174. /*--
  1175. Complete the big bucket [ss] by quicksorting
  1176. any unsorted small buckets [ss, j]. Hopefully
  1177. previous pointer-scanning phases have already
  1178. completed many of the small buckets [ss, j], so
  1179. we don't have to sort them at all.
  1180. --*/
  1181. for (j = 0; j <= 255; j++) {
  1182. sb = (ss << 8) + j;
  1183. if(!((ftab[sb] & SETMASK) == SETMASK)) {
  1184. int lo = ftab[sb] & CLEARMASK;
  1185. int hi = (ftab[sb+1] & CLEARMASK) - 1;
  1186. if (hi > lo) {
  1187. QSort3(lo, hi, 2);
  1188. numQSorted += (hi - lo + 1);
  1189. if (workDone > workLimit && firstAttempt) {
  1190. return;
  1191. }
  1192. }
  1193. ftab[sb] |= SETMASK;
  1194. }
  1195. }
  1196. /*--
  1197. The ss big bucket is now done. Record this fact,
  1198. and update the quadrant descriptors. Remember to
  1199. update quadrants in the overshoot area too, if
  1200. necessary. The "if (i < 255)" test merely skips
  1201. this updating for the last bucket processed, since
  1202. updating for the last bucket is pointless.
  1203. --*/
  1204. bigDone[ss] = true;
  1205. if (i < 255) {
  1206. int bbStart = ftab[ss << 8] & CLEARMASK;
  1207. int bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
  1208. int shifts = 0;
  1209. while ((bbSize >> shifts) > 65534) {
  1210. shifts++;
  1211. }
  1212. for (j = 0; j < bbSize; j++) {
  1213. int a2update = zptr[bbStart + j];
  1214. int qVal = (j >> shifts);
  1215. quadrant[a2update] = qVal;
  1216. if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES) {
  1217. quadrant[a2update + last + 1] = qVal;
  1218. }
  1219. }
  1220. if (!(((bbSize-1) >> shifts) <= 65535)) {
  1221. Panic();
  1222. }
  1223. }
  1224. /*--
  1225. Now scan this big bucket so as to synthesise the
  1226. sorted order for small buckets [t, ss] for all t != ss.
  1227. --*/
  1228. for (j = 0; j <= 255; j++) {
  1229. copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
  1230. }
  1231. for (j = ftab[ss << 8] & CLEARMASK; j < (ftab[(ss+1) << 8] & CLEARMASK); j++) {
  1232. c1 = block[zptr[j]];
  1233. if (!bigDone[c1]) {
  1234. zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
  1235. copy[c1] ++;
  1236. }
  1237. }
  1238. for (j = 0; j <= 255; j++) {
  1239. ftab[(j << 8) + ss] |= SETMASK;
  1240. }
  1241. }
  1242. }
  1243. }
  1244. void RandomiseBlock()
  1245. {
  1246. int i;
  1247. int rNToGo = 0;
  1248. int rTPos = 0;
  1249. for (i = 0; i < 256; i++) {
  1250. inUse[i] = false;
  1251. }
  1252. for (i = 0; i <= last; i++) {
  1253. if (rNToGo == 0) {
  1254. rNToGo = (int)BZip2Constants.rNums[rTPos];
  1255. rTPos++;
  1256. if (rTPos == 512) {
  1257. rTPos = 0;
  1258. }
  1259. }
  1260. rNToGo--;
  1261. block[i + 1] ^= (byte)((rNToGo == 1) ? 1 : 0);
  1262. // handle 16 bit signed numbers
  1263. block[i + 1] &= 0xFF;
  1264. inUse[block[i + 1]] = true;
  1265. }
  1266. }
  1267. void DoReversibleTransformation()
  1268. {
  1269. workLimit = workFactor * last;
  1270. workDone = 0;
  1271. blockRandomised = false;
  1272. firstAttempt = true;
  1273. MainSort();
  1274. if (workDone > workLimit && firstAttempt) {
  1275. RandomiseBlock();
  1276. workLimit = workDone = 0;
  1277. blockRandomised = true;
  1278. firstAttempt = false;
  1279. MainSort();
  1280. }
  1281. origPtr = -1;
  1282. for (int i = 0; i <= last; i++) {
  1283. if (zptr[i] == 0) {
  1284. origPtr = i;
  1285. break;
  1286. }
  1287. }
  1288. if (origPtr == -1) {
  1289. Panic();
  1290. }
  1291. }
  1292. bool FullGtU(int i1, int i2)
  1293. {
  1294. int k;
  1295. byte c1, c2;
  1296. int s1, s2;
  1297. c1 = block[i1 + 1];
  1298. c2 = block[i2 + 1];
  1299. if (c1 != c2) {
  1300. return c1 > c2;
  1301. }
  1302. i1++;
  1303. i2++;
  1304. c1 = block[i1 + 1];
  1305. c2 = block[i2 + 1];
  1306. if (c1 != c2) {
  1307. return c1 > c2;
  1308. }
  1309. i1++;
  1310. i2++;
  1311. c1 = block[i1 + 1];
  1312. c2 = block[i2 + 1];
  1313. if (c1 != c2) {
  1314. return c1 > c2;
  1315. }
  1316. i1++;
  1317. i2++;
  1318. c1 = block[i1 + 1];
  1319. c2 = block[i2 + 1];
  1320. if (c1 != c2) {
  1321. return c1 > c2;
  1322. }
  1323. i1++;
  1324. i2++;
  1325. c1 = block[i1 + 1];
  1326. c2 = block[i2 + 1];
  1327. if (c1 != c2) {
  1328. return c1 > c2;
  1329. }
  1330. i1++;
  1331. i2++;
  1332. c1 = block[i1 + 1];
  1333. c2 = block[i2 + 1];
  1334. if (c1 != c2) {
  1335. return c1 > c2;
  1336. }
  1337. i1++;
  1338. i2++;
  1339. k = last + 1;
  1340. do {
  1341. c1 = block[i1 + 1];
  1342. c2 = block[i2 + 1];
  1343. if (c1 != c2) {
  1344. return c1 > c2;
  1345. }
  1346. s1 = quadrant[i1];
  1347. s2 = quadrant[i2];
  1348. if (s1 != s2) {
  1349. return s1 > s2;
  1350. }
  1351. i1++;
  1352. i2++;
  1353. c1 = block[i1 + 1];
  1354. c2 = block[i2 + 1];
  1355. if (c1 != c2) {
  1356. return c1 > c2;
  1357. }
  1358. s1 = quadrant[i1];
  1359. s2 = quadrant[i2];
  1360. if (s1 != s2) {
  1361. return s1 > s2;
  1362. }
  1363. i1++;
  1364. i2++;
  1365. c1 = block[i1 + 1];
  1366. c2 = block[i2 + 1];
  1367. if (c1 != c2) {
  1368. return c1 > c2;
  1369. }
  1370. s1 = quadrant[i1];
  1371. s2 = quadrant[i2];
  1372. if (s1 != s2) {
  1373. return s1 > s2;
  1374. }
  1375. i1++;
  1376. i2++;
  1377. c1 = block[i1 + 1];
  1378. c2 = block[i2 + 1];
  1379. if (c1 != c2) {
  1380. return c1 > c2;
  1381. }
  1382. s1 = quadrant[i1];
  1383. s2 = quadrant[i2];
  1384. if (s1 != s2) {
  1385. return s1 > s2;
  1386. }
  1387. i1++;
  1388. i2++;
  1389. if (i1 > last) {
  1390. i1 -= last;
  1391. i1--;
  1392. }
  1393. if (i2 > last) {
  1394. i2 -= last;
  1395. i2--;
  1396. }
  1397. k -= 4;
  1398. ++workDone;
  1399. } while (k >= 0);
  1400. return false;
  1401. }
  1402. /*--
  1403. Knuth's increments seem to work better
  1404. than Incerpi-Sedgewick here. Possibly
  1405. because the number of elems to sort is
  1406. usually small, typically <= 20.
  1407. --*/
  1408. readonly int[] incs = new int[] {
  1409. 1, 4, 13, 40, 121, 364, 1093, 3280,
  1410. 9841, 29524, 88573, 265720,
  1411. 797161, 2391484
  1412. };
  1413. void AllocateCompressStructures()
  1414. {
  1415. int n = BZip2Constants.baseBlockSize * blockSize100k;
  1416. block = new byte[(n + 1 + BZip2Constants.NUM_OVERSHOOT_BYTES)];
  1417. quadrant = new int[(n + BZip2Constants.NUM_OVERSHOOT_BYTES)];
  1418. zptr = new int[n];
  1419. ftab = new int[65537];
  1420. if (block == null || quadrant == null || zptr == null || ftab == null) {
  1421. // int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537;
  1422. // compressOutOfMemory ( totalDraw, n );
  1423. }
  1424. /*
  1425. The back end needs a place to store the MTF values
  1426. whilst it calculates the coding tables. We could
  1427. put them in the zptr array. However, these values
  1428. will fit in a short, so we overlay szptr at the
  1429. start of zptr, in the hope of reducing the number
  1430. of cache misses induced by the multiple traversals
  1431. of the MTF values when calculating coding tables.
  1432. Seems to improve compression speed by about 1%.
  1433. */
  1434. // szptr = zptr;
  1435. szptr = new short[2 * n];
  1436. }
  1437. void GenerateMTFValues()
  1438. {
  1439. char[] yy = new char[256];
  1440. int i, j;
  1441. char tmp;
  1442. char tmp2;
  1443. int zPend;
  1444. int wr;
  1445. int EOB;
  1446. MakeMaps();
  1447. EOB = nInUse+1;
  1448. for (i = 0; i <= EOB; i++) {
  1449. mtfFreq[i] = 0;
  1450. }
  1451. wr = 0;
  1452. zPend = 0;
  1453. for (i = 0; i < nInUse; i++) {
  1454. yy[i] = (char) i;
  1455. }
  1456. for (i = 0; i <= last; i++) {
  1457. char ll_i;
  1458. ll_i = unseqToSeq[block[zptr[i]]];
  1459. j = 0;
  1460. tmp = yy[j];
  1461. while (ll_i != tmp) {
  1462. j++;
  1463. tmp2 = tmp;
  1464. tmp = yy[j];
  1465. yy[j] = tmp2;
  1466. }
  1467. yy[0] = tmp;
  1468. if (j == 0) {
  1469. zPend++;
  1470. } else {
  1471. if (zPend > 0) {
  1472. zPend--;
  1473. while (true) {
  1474. switch (zPend % 2) {
  1475. case 0:
  1476. szptr[wr] = (short)BZip2Constants.RUNA;
  1477. wr++;
  1478. mtfFreq[BZip2Constants.RUNA]++;
  1479. break;
  1480. case 1:
  1481. szptr[wr] = (short)BZip2Constants.RUNB;
  1482. wr++;
  1483. mtfFreq[BZip2Constants.RUNB]++;
  1484. break;
  1485. }
  1486. if (zPend < 2) {
  1487. break;
  1488. }
  1489. zPend = (zPend - 2) / 2;
  1490. }
  1491. zPend = 0;
  1492. }
  1493. szptr[wr] = (short)(j + 1);
  1494. wr++;
  1495. mtfFreq[j + 1]++;
  1496. }
  1497. }
  1498. if (zPend > 0) {
  1499. zPend--;
  1500. while (true) {
  1501. switch (zPend % 2) {
  1502. case 0:
  1503. szptr[wr] = (short)BZip2Constants.RUNA;
  1504. wr++;
  1505. mtfFreq[BZip2Constants.RUNA]++;
  1506. break;
  1507. case 1:
  1508. szptr[wr] = (short)BZip2Constants.RUNB;
  1509. wr++;
  1510. mtfFreq[BZip2Constants.RUNB]++;
  1511. break;
  1512. }
  1513. if (zPend < 2) {
  1514. break;
  1515. }
  1516. zPend = (zPend - 2) / 2;
  1517. }
  1518. }
  1519. szptr[wr] = (short)EOB;
  1520. wr++;
  1521. mtfFreq[EOB]++;
  1522. nMTF = wr;
  1523. }
  1524. }
  1525. }
  1526. /* This file was derived from a file containing under this license:
  1527. *
  1528. * This file is a part of bzip2 and/or libbzip2, a program and
  1529. * library for lossless, block-sorting data compression.
  1530. *
  1531. * Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
  1532. *
  1533. * Redistribution and use in source and binary forms, with or without
  1534. * modification, are permitted provided that the following conditions
  1535. * are met:
  1536. *
  1537. * 1. Redistributions of source code must retain the above copyright
  1538. * notice, this list of conditions and the following disclaimer.
  1539. *
  1540. * 2. The origin of this software must not be misrepresented; you must
  1541. * not claim that you wrote the original software. If you use this
  1542. * software in a product, an acknowledgment in the product
  1543. * documentation would be appreciated but is not required.
  1544. *
  1545. * 3. Altered source versions must be plainly marked as such, and must
  1546. * not be misrepresented as being the original software.
  1547. *
  1548. * 4. The name of the author may not be used to endorse or promote
  1549. * products derived from this software without specific prior written
  1550. * permission.
  1551. *
  1552. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  1553. * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  1554. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  1555. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  1556. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  1557. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  1558. * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  1559. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  1560. * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  1561. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  1562. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  1563. *
  1564. * Java version ported by Keiron Liddle, Aftex Software <[email protected]> 1999-2001
  1565. */