LZW.CPP 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. //
  2. // Copyright 2020 Electronic Arts Inc.
  3. //
  4. // TiberianDawn.DLL and RedAlert.dll and corresponding source code is free
  5. // software: you can redistribute it and/or modify it under the terms of
  6. // the GNU General Public License as published by the Free Software Foundation,
  7. // either version 3 of the License, or (at your option) any later version.
  8. // TiberianDawn.DLL and RedAlert.dll and corresponding source code is distributed
  9. // in the hope that it will be useful, but with permitted additional restrictions
  10. // under Section 7 of the GPL. See the GNU General Public License in LICENSE.TXT
  11. // distributed with this program. You should have received a copy of the
  12. // GNU General Public License along with permitted additional restrictions
  13. // with this program. If not, see https://github.com/electronicarts/CnC_Remastered_Collection
  14. /* $Header: /CounterStrike/LZW.CPP 1 3/03/97 10:25a Joe_bostic $ */
  15. /***********************************************************************************************
  16. *** C O N F I D E N T I A L --- W E S T W O O D S T U D I O S ***
  17. ***********************************************************************************************
  18. * *
  19. * Project Name : Command & Conquer *
  20. * *
  21. * File Name : LZW.CPP *
  22. * *
  23. * Programmer : Joe L. Bostic *
  24. * *
  25. * Start Date : 08/28/96 *
  26. * *
  27. * Last Update : August 28, 1996 [JLB] *
  28. * *
  29. *---------------------------------------------------------------------------------------------*
  30. * Functions: *
  31. * Find_Child_Node -- Find a matching dictionary entry. *
  32. * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  33. #include <stdio.h>
  34. #include <stdlib.h>
  35. #include <string.h>
  36. #include "xstraw.h"
  37. #include "xpipe.h"
  38. #include "buff.h"
  39. #include "lzw.h"
  40. LZWEngine::LZWEngine(void)
  41. {
  42. Reset();
  43. }
  44. void LZWEngine::Reset(void)
  45. {
  46. for (int i = 0; i < TABLE_SIZE; i++) {
  47. dict[i].Make_Unused();
  48. }
  49. }
  50. int LZWEngine::Compress(Buffer const & input, Buffer const & output)
  51. {
  52. BufferStraw instraw(input);
  53. BufferPipe outpipe(output);
  54. int outcount = 0;
  55. CodeType string_code = END_OF_STREAM;
  56. CodeType next_code = FIRST_CODE;
  57. string_code = 0;
  58. if (instraw.Get(&string_code, sizeof(char)) == 0) {
  59. string_code = END_OF_STREAM;
  60. }
  61. for (;;) {
  62. /*
  63. ** Fetch a character from the source data stream. If exhausted,
  64. ** then break out of the process loop so that the final code
  65. ** can be written out.
  66. */
  67. unsigned char character;
  68. if (instraw.Get(&character, sizeof(character)) == 0) break;
  69. /*
  70. ** See if there is a match for the current code and current
  71. ** character. A match indicates that there is already a
  72. ** dictionary entry that fully represents the character
  73. ** sequence.
  74. */
  75. int index = Find_Child_Node(string_code, character);
  76. /*
  77. ** If a code match was found, then set the current code
  78. ** value to this code value that represents the concatenation
  79. ** of the previous code value and the current character.
  80. */
  81. if (index != -1 && dict[index].CodeValue != -1) {
  82. string_code = dict[index].CodeValue;
  83. } else {
  84. /*
  85. ** Since no exact match was found, then create a new code
  86. ** entry that represents the current code and character
  87. ** value concatenated. This presumes there is room in the
  88. ** code table.
  89. */
  90. if (index != -1 && next_code <= MAX_CODE) {
  91. dict[index] = CodeClass(next_code, string_code, character);
  92. next_code++;
  93. }
  94. /*
  95. ** Output the code to the compression stream and reset the
  96. ** current code value to match the current character. This
  97. ** has the effect of clearing out the current character
  98. ** sequence scan in preparation for building a new one. It
  99. ** also ensures that the character will be written out.
  100. */
  101. outcount += outpipe.Put(&string_code, sizeof(string_code));
  102. string_code = character;
  103. }
  104. }
  105. outcount += outpipe.Put(&string_code, sizeof(string_code));
  106. if (string_code != END_OF_STREAM) {
  107. string_code = END_OF_STREAM;
  108. outcount += outpipe.Put(&string_code, sizeof(string_code));
  109. }
  110. return(outcount);
  111. }
  112. int LZWEngine::Uncompress(Buffer const & input, Buffer const & output)
  113. {
  114. int outcount = 0;
  115. BufferStraw instraw(input);
  116. BufferPipe outpipe(output);
  117. CodeType old_code;
  118. if (instraw.Get(&old_code, sizeof(old_code)) == 0) {
  119. return(outcount);
  120. }
  121. unsigned char character = (unsigned char)old_code;
  122. outcount += outpipe.Put(&character, sizeof(character));
  123. unsigned int count;
  124. CodeType new_code;
  125. CodeType next_code = FIRST_CODE;
  126. for (;;) {
  127. if (instraw.Get(&new_code, sizeof(new_code)) == 0) break;
  128. if (new_code == END_OF_STREAM) break;
  129. /*
  130. ** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER
  131. ** case which generates an undefined code. It handles it by decoding
  132. ** the last code, and adding a single character to the end of the decode string.
  133. */
  134. if (new_code >= next_code) {
  135. decode_stack[0] = character;
  136. count = 1;
  137. count += Decode_String(&decode_stack[1], old_code);
  138. } else {
  139. count = Decode_String(decode_stack, new_code);
  140. }
  141. character = decode_stack[count-1];
  142. while (count > 0) {
  143. --count;
  144. outcount += outpipe.Put(&decode_stack[count], sizeof(decode_stack[0]));
  145. }
  146. /*
  147. ** Add the new code sequence to the dictionary (presuming there is still
  148. ** room).
  149. */
  150. if (next_code <= MAX_CODE) {
  151. dict[next_code] = CodeClass(next_code, old_code, character);
  152. next_code++;
  153. }
  154. old_code = new_code;
  155. }
  156. return(outcount);
  157. }
  158. int LZWEngine::Make_LZW_Hash(CodeType code, char character)
  159. {
  160. return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code);
  161. }
  162. int LZWEngine::Find_Child_Node(CodeType parent_code, char child_character)
  163. {
  164. /*
  165. ** Fetch the first try index for the code and character.
  166. */
  167. int hash_index = Make_LZW_Hash(parent_code, child_character);
  168. /*
  169. ** Base the hash-miss-try-again-displacement value on the current
  170. ** index. [Shouldn't the value be some large prime number???].
  171. */
  172. int offset = 1;
  173. if (hash_index != 0) {
  174. offset = TABLE_SIZE - hash_index;
  175. }
  176. /*
  177. ** Keep offsetting through the dictionary until an exact match is
  178. ** found for the code and character specified.
  179. */
  180. int initial = hash_index;
  181. while (!dict[hash_index].Is_Matching(parent_code, child_character)) {
  182. /*
  183. ** Stop searching if an unused index is found since this means that
  184. ** a match doesn't exist in the table at all.
  185. */
  186. if (dict[hash_index].Is_Unused()) break;
  187. /*
  188. ** Bump the hash index to another value such that sequential bumps
  189. ** will not result in the same index value until all of the table
  190. ** has been scanned.
  191. */
  192. hash_index -= offset;
  193. if (hash_index < 0) {
  194. hash_index += TABLE_SIZE;
  195. }
  196. /*
  197. ** If the entire table has been scanned and no match or unused
  198. ** entry was found, then return a special value indicating this
  199. ** condition.
  200. */
  201. if (initial == hash_index) {
  202. hash_index = -1;
  203. break;
  204. }
  205. }
  206. return(hash_index);
  207. }
  208. int LZWEngine::Decode_String(char * ptr, CodeType code)
  209. {
  210. int count = 0;
  211. while (code > 255) {
  212. *ptr++ = dict[code].CharValue;
  213. count++;
  214. code = dict[code].ParentCode;
  215. }
  216. *ptr = (char)code;
  217. count++;
  218. return(count);
  219. }
  220. int LZW_Uncompress(Buffer const & inbuff, Buffer const & outbuff)
  221. {
  222. LZWEngine lzw;
  223. return(lzw.Uncompress(inbuff, outbuff));
  224. }
  225. int LZW_Compress(Buffer const & inbuff, Buffer const & outbuff)
  226. {
  227. LZWEngine lzw;
  228. return(lzw.Compress(inbuff, outbuff));
  229. }
  230. #ifdef NEVER
  231. /*
  232. * Constants used throughout the program. BITS defines how many bits
  233. * will be in a code. TABLE_SIZE defines the size of the dictionary
  234. * table.
  235. */
  236. #define BITS 12
  237. #define MAX_CODE ( ( 1 << BITS ) - 1 )
  238. #define TABLE_SIZE 5021
  239. #define END_OF_STREAM 256
  240. #define FIRST_CODE 257
  241. #define UNUSED -1
  242. typedef unsigned short CodeType;
  243. /*
  244. * This data structure defines the dictionary. Each entry in the dictionary
  245. * has a code value. This is the code emitted by the compressor. Each
  246. * code is actually made up of two pieces: a parent_code, and a
  247. * character. Code values of less than 256 are actually plain
  248. * text codes.
  249. */
  250. struct CodeClass
  251. {
  252. CodeType CodeValue;
  253. CodeType ParentCode;
  254. char CharValue;
  255. CodeClass(void) {}
  256. CodeClass(CodeType code, CodeType parent, char c) : CodeValue(code), ParentCode(parent), CharValue(c) {}
  257. void Make_Unused(void) {CodeValue = UNUSED;}
  258. bool Is_Unused(void) const {return(CodeValue == UNUSED);}
  259. bool Is_Matching(CodeType code, char c) const {return(ParentCode == code && CharValue == c);}
  260. };
  261. CodeClass dict[TABLE_SIZE];
  262. char decode_stack[TABLE_SIZE];
  263. inline int Make_LZW_Hash(CodeType code, char character)
  264. {
  265. return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code);
  266. }
  267. /***********************************************************************************************
  268. * Find_Child_Node -- Find a matching dictionary entry. *
  269. * *
  270. * This hashing routine is responsible for finding the table location *
  271. * for a string/character combination. The table index is created *
  272. * by using an exclusive OR combination of the prefix and character. *
  273. * This code also has to check for collisions, and handles them by *
  274. * jumping around in the table. *
  275. * *
  276. * INPUT: parent_code -- The code of the parent string sequence. *
  277. * *
  278. * character -- The current character. *
  279. * *
  280. * OUTPUT: Returns with the index for the matching dictionary entry. If no matching index *
  281. * could be found, then it returns with the index to an unused dictionary entry. If *
  282. * there are also no unused entries in the dictionary, then -1 is returned. *
  283. * *
  284. * WARNINGS: none *
  285. * *
  286. * HISTORY: *
  287. * 08/28/1996 JLB : Created. *
  288. *=============================================================================================*/
  289. static int Find_Child_Node(CodeType parent_code, char child_character)
  290. {
  291. /*
  292. ** Fetch the first try index for the code and character.
  293. */
  294. int hash_index = Make_LZW_Hash(parent_code, child_character);
  295. /*
  296. ** Base the hash-miss-try-again-displacement value on the current
  297. ** index. [Shouldn't the value be some large prime number???].
  298. */
  299. int offset = 1;
  300. if (hash_index != 0) {
  301. offset = TABLE_SIZE - hash_index;
  302. }
  303. /*
  304. ** Keep offsetting through the dictionary until an exact match is
  305. ** found for the code and character specified.
  306. */
  307. int initial = hash_index;
  308. while (!dict[hash_index].Is_Matching(parent_code, child_character)) {
  309. /*
  310. ** Stop searching if an unused index is found since this means that
  311. ** a match doesn't exist in the table at all.
  312. */
  313. if (dict[hash_index].Is_Unused()) break;
  314. /*
  315. ** Bump the hash index to another value such that sequential bumps
  316. ** will not result in the same index value until all of the table
  317. ** has been scanned.
  318. */
  319. hash_index -= offset;
  320. if (hash_index < 0) {
  321. hash_index += TABLE_SIZE;
  322. }
  323. /*
  324. ** If the entire table has been scanned and no match or unused
  325. ** entry was found, then return a special value indicating this
  326. ** condition.
  327. */
  328. if (initial == hash_index) {
  329. hash_index = -1;
  330. break;
  331. }
  332. }
  333. return(hash_index);
  334. }
  335. /*
  336. * This routine decodes a string from the dictionary, and stores it
  337. * in the decode_stack data structure. It returns a count to the
  338. * calling program of how many characters were placed in the stack.
  339. */
  340. static int Decode_String(char * ptr, CodeType code)
  341. {
  342. int count = 0;
  343. while (code > 255) {
  344. *ptr++ = dict[code].CharValue;
  345. count++;
  346. code = dict[code].ParentCode;
  347. }
  348. *ptr = (char)code;
  349. count++;
  350. return(count);
  351. }
  352. /*
  353. * The compressor is short and simple. It reads in new symbols one
  354. * at a time from the input file. It then checks to see if the
  355. * combination of the current symbol and the current code are already
  356. * defined in the dictionary. If they are not, they are added to the
  357. * dictionary, and we start over with a new one symbol code. If they
  358. * are, the code for the combination of the code and character becomes
  359. * our new code.
  360. */
  361. int LZW_Compress(Buffer & inbuff, Buffer & outbuff)
  362. {
  363. BufferStraw input(inbuff);
  364. BufferPipe output(outbuff);
  365. for (int i = 0; i < TABLE_SIZE; i++) {
  366. dict[i].Make_Unused();
  367. // dict[i].code_value = UNUSED;
  368. }
  369. int outcount = 0;
  370. CodeType string_code = END_OF_STREAM;
  371. CodeType next_code = FIRST_CODE;
  372. for (;;) {
  373. char character;
  374. if (input.Get(&character, sizeof(character)) == 0) break;
  375. int index = Find_Child_Node(string_code, character);
  376. if (index == -1) {
  377. string_code = character;
  378. outcount += output.Put(&string_code, sizeof(string_code));
  379. } else {
  380. if (dict[index].CodeValue != -1) {
  381. string_code = dict[ index ].CodeValue;
  382. } else {
  383. if (next_code <= MAX_CODE) {
  384. dict[index] = CodeClass(next_code++, string_code, character);
  385. }
  386. outcount += output.Put(&string_code, sizeof(string_code));
  387. string_code = character;
  388. }
  389. }
  390. }
  391. outcount += output.Put(&string_code, sizeof(string_code));
  392. string_code = END_OF_STREAM;
  393. outcount += output.Put(&string_code, sizeof(string_code));
  394. return(outcount);
  395. }
  396. /*
  397. * The file expander operates much like the encoder. It has to
  398. * read in codes, the convert the codes to a string of characters.
  399. * The only catch in the whole operation occurs when the encoder
  400. * encounters a CHAR+STRING+CHAR+STRING+CHAR sequence. When this
  401. * occurs, the encoder outputs a code that is not presently defined
  402. * in the table. This is handled as an exception.
  403. */
  404. int LZW_Uncompress(Buffer & inbuff, Buffer & outbuff)
  405. {
  406. int outcount = 0;
  407. BufferStraw input(inbuff);
  408. BufferPipe output(outbuff);
  409. CodeType old_code;
  410. if (input.Get(&old_code, sizeof(old_code)) == 0) {
  411. return(outcount);
  412. }
  413. char character = (char)old_code;
  414. outcount += output.Put(&character, sizeof(character));
  415. unsigned int count;
  416. CodeType new_code;
  417. CodeType next_code = FIRST_CODE;
  418. for (;;) {
  419. if (input.Get(&new_code, sizeof(new_code)) == 0) break;
  420. /*
  421. ** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER
  422. ** case which generates an undefined code. It handles it by decoding
  423. ** the last code, and adding a single character to the end of the decode string.
  424. */
  425. if (new_code >= next_code) {
  426. decode_stack[0] = character;
  427. count = 1;
  428. count += Decode_String(&decode_stack[1], old_code);
  429. } else {
  430. count = Decode_String(decode_stack, new_code);
  431. }
  432. character = decode_stack[count-1];
  433. while (count > 0) {
  434. --count;
  435. outcount += output.Put(&decode_stack[count], sizeof(decode_stack[0]));
  436. }
  437. /*
  438. ** Add the new code sequence to the dictionary (presuming there is still
  439. ** room).
  440. */
  441. if (next_code <= MAX_CODE) {
  442. dict[next_code] = CodeClass(next_code, old_code, character);
  443. next_code++;
  444. }
  445. old_code = new_code;
  446. }
  447. return(outcount);
  448. }
  449. #endif