LZW.CPP 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. /*
  2. ** Command & Conquer Red Alert(tm)
  3. ** Copyright 2025 Electronic Arts Inc.
  4. **
  5. ** This program is free software: you can redistribute it and/or modify
  6. ** it under the terms of the GNU General Public License as published by
  7. ** the Free Software Foundation, either version 3 of the License, or
  8. ** (at your option) any later version.
  9. **
  10. ** This program is distributed in the hope that it will be useful,
  11. ** but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ** GNU General Public License for more details.
  14. **
  15. ** You should have received a copy of the GNU General Public License
  16. ** along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /* $Header: /CounterStrike/LZW.CPP 1 3/03/97 10:25a Joe_bostic $ */
  19. /***********************************************************************************************
  20. *** C O N F I D E N T I A L --- W E S T W O O D S T U D I O S ***
  21. ***********************************************************************************************
  22. * *
  23. * Project Name : Command & Conquer *
  24. * *
  25. * File Name : LZW.CPP *
  26. * *
  27. * Programmer : Joe L. Bostic *
  28. * *
  29. * Start Date : 08/28/96 *
  30. * *
  31. * Last Update : August 28, 1996 [JLB] *
  32. * *
  33. *---------------------------------------------------------------------------------------------*
  34. * Functions: *
  35. * Find_Child_Node -- Find a matching dictionary entry. *
  36. * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  37. #include <stdio.h>
  38. #include <stdlib.h>
  39. #include <string.h>
  40. #include "xstraw.h"
  41. #include "xpipe.h"
  42. #include "buff.h"
  43. #include "lzw.h"
  44. LZWEngine::LZWEngine(void)
  45. {
  46. Reset();
  47. }
  48. void LZWEngine::Reset(void)
  49. {
  50. for (int i = 0; i < TABLE_SIZE; i++) {
  51. dict[i].Make_Unused();
  52. }
  53. }
  54. int LZWEngine::Compress(Buffer const & input, Buffer const & output)
  55. {
  56. BufferStraw instraw(input);
  57. BufferPipe outpipe(output);
  58. int outcount = 0;
  59. CodeType string_code = END_OF_STREAM;
  60. CodeType next_code = FIRST_CODE;
  61. string_code = 0;
  62. if (instraw.Get(&string_code, sizeof(char)) == 0) {
  63. string_code = END_OF_STREAM;
  64. }
  65. for (;;) {
  66. /*
  67. ** Fetch a character from the source data stream. If exhausted,
  68. ** then break out of the process loop so that the final code
  69. ** can be written out.
  70. */
  71. unsigned char character;
  72. if (instraw.Get(&character, sizeof(character)) == 0) break;
  73. /*
  74. ** See if there is a match for the current code and current
  75. ** character. A match indicates that there is already a
  76. ** dictionary entry that fully represents the character
  77. ** sequence.
  78. */
  79. int index = Find_Child_Node(string_code, character);
  80. /*
  81. ** If a code match was found, then set the current code
  82. ** value to this code value that represents the concatenation
  83. ** of the previous code value and the current character.
  84. */
  85. if (index != -1 && dict[index].CodeValue != -1) {
  86. string_code = dict[index].CodeValue;
  87. } else {
  88. /*
  89. ** Since no exact match was found, then create a new code
  90. ** entry that represents the current code and character
  91. ** value concatenated. This presumes there is room in the
  92. ** code table.
  93. */
  94. if (index != -1 && next_code <= MAX_CODE) {
  95. dict[index] = CodeClass(next_code, string_code, character);
  96. next_code++;
  97. }
  98. /*
  99. ** Output the code to the compression stream and reset the
  100. ** current code value to match the current character. This
  101. ** has the effect of clearing out the current character
  102. ** sequence scan in preparation for building a new one. It
  103. ** also ensures that the character will be written out.
  104. */
  105. outcount += outpipe.Put(&string_code, sizeof(string_code));
  106. string_code = character;
  107. }
  108. }
  109. outcount += outpipe.Put(&string_code, sizeof(string_code));
  110. if (string_code != END_OF_STREAM) {
  111. string_code = END_OF_STREAM;
  112. outcount += outpipe.Put(&string_code, sizeof(string_code));
  113. }
  114. return(outcount);
  115. }
  116. int LZWEngine::Uncompress(Buffer const & input, Buffer const & output)
  117. {
  118. int outcount = 0;
  119. BufferStraw instraw(input);
  120. BufferPipe outpipe(output);
  121. CodeType old_code;
  122. if (instraw.Get(&old_code, sizeof(old_code)) == 0) {
  123. return(outcount);
  124. }
  125. unsigned char character = (unsigned char)old_code;
  126. outcount += outpipe.Put(&character, sizeof(character));
  127. unsigned int count;
  128. CodeType new_code;
  129. CodeType next_code = FIRST_CODE;
  130. for (;;) {
  131. if (instraw.Get(&new_code, sizeof(new_code)) == 0) break;
  132. if (new_code == END_OF_STREAM) break;
  133. /*
  134. ** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER
  135. ** case which generates an undefined code. It handles it by decoding
  136. ** the last code, and adding a single character to the end of the decode string.
  137. */
  138. if (new_code >= next_code) {
  139. decode_stack[0] = character;
  140. count = 1;
  141. count += Decode_String(&decode_stack[1], old_code);
  142. } else {
  143. count = Decode_String(decode_stack, new_code);
  144. }
  145. character = decode_stack[count-1];
  146. while (count > 0) {
  147. --count;
  148. outcount += outpipe.Put(&decode_stack[count], sizeof(decode_stack[0]));
  149. }
  150. /*
  151. ** Add the new code sequence to the dictionary (presuming there is still
  152. ** room).
  153. */
  154. if (next_code <= MAX_CODE) {
  155. dict[next_code] = CodeClass(next_code, old_code, character);
  156. next_code++;
  157. }
  158. old_code = new_code;
  159. }
  160. return(outcount);
  161. }
  162. int LZWEngine::Make_LZW_Hash(CodeType code, char character)
  163. {
  164. return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code);
  165. }
  166. int LZWEngine::Find_Child_Node(CodeType parent_code, char child_character)
  167. {
  168. /*
  169. ** Fetch the first try index for the code and character.
  170. */
  171. int hash_index = Make_LZW_Hash(parent_code, child_character);
  172. /*
  173. ** Base the hash-miss-try-again-displacement value on the current
  174. ** index. [Shouldn't the value be some large prime number???].
  175. */
  176. int offset = 1;
  177. if (hash_index != 0) {
  178. offset = TABLE_SIZE - hash_index;
  179. }
  180. /*
  181. ** Keep offsetting through the dictionary until an exact match is
  182. ** found for the code and character specified.
  183. */
  184. int initial = hash_index;
  185. while (!dict[hash_index].Is_Matching(parent_code, child_character)) {
  186. /*
  187. ** Stop searching if an unused index is found since this means that
  188. ** a match doesn't exist in the table at all.
  189. */
  190. if (dict[hash_index].Is_Unused()) break;
  191. /*
  192. ** Bump the hash index to another value such that sequential bumps
  193. ** will not result in the same index value until all of the table
  194. ** has been scanned.
  195. */
  196. hash_index -= offset;
  197. if (hash_index < 0) {
  198. hash_index += TABLE_SIZE;
  199. }
  200. /*
  201. ** If the entire table has been scanned and no match or unused
  202. ** entry was found, then return a special value indicating this
  203. ** condition.
  204. */
  205. if (initial == hash_index) {
  206. hash_index = -1;
  207. break;
  208. }
  209. }
  210. return(hash_index);
  211. }
  212. int LZWEngine::Decode_String(char * ptr, CodeType code)
  213. {
  214. int count = 0;
  215. while (code > 255) {
  216. *ptr++ = dict[code].CharValue;
  217. count++;
  218. code = dict[code].ParentCode;
  219. }
  220. *ptr = (char)code;
  221. count++;
  222. return(count);
  223. }
  224. int LZW_Uncompress(Buffer const & inbuff, Buffer const & outbuff)
  225. {
  226. LZWEngine lzw;
  227. return(lzw.Uncompress(inbuff, outbuff));
  228. }
  229. int LZW_Compress(Buffer const & inbuff, Buffer const & outbuff)
  230. {
  231. LZWEngine lzw;
  232. return(lzw.Compress(inbuff, outbuff));
  233. }
  234. #ifdef NEVER
  235. /*
  236. * Constants used throughout the program. BITS defines how many bits
  237. * will be in a code. TABLE_SIZE defines the size of the dictionary
  238. * table.
  239. */
  240. #define BITS 12
  241. #define MAX_CODE ( ( 1 << BITS ) - 1 )
  242. #define TABLE_SIZE 5021
  243. #define END_OF_STREAM 256
  244. #define FIRST_CODE 257
  245. #define UNUSED -1
  246. typedef unsigned short CodeType;
  247. /*
  248. * This data structure defines the dictionary. Each entry in the dictionary
  249. * has a code value. This is the code emitted by the compressor. Each
  250. * code is actually made up of two pieces: a parent_code, and a
  251. * character. Code values of less than 256 are actually plain
  252. * text codes.
  253. */
  254. struct CodeClass
  255. {
  256. CodeType CodeValue;
  257. CodeType ParentCode;
  258. char CharValue;
  259. CodeClass(void) {}
  260. CodeClass(CodeType code, CodeType parent, char c) : CodeValue(code), ParentCode(parent), CharValue(c) {}
  261. void Make_Unused(void) {CodeValue = UNUSED;}
  262. bool Is_Unused(void) const {return(CodeValue == UNUSED);}
  263. bool Is_Matching(CodeType code, char c) const {return(ParentCode == code && CharValue == c);}
  264. };
  265. CodeClass dict[TABLE_SIZE];
  266. char decode_stack[TABLE_SIZE];
  267. inline int Make_LZW_Hash(CodeType code, char character)
  268. {
  269. return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code);
  270. }
  271. /***********************************************************************************************
  272. * Find_Child_Node -- Find a matching dictionary entry. *
  273. * *
  274. * This hashing routine is responsible for finding the table location *
  275. * for a string/character combination. The table index is created *
  276. * by using an exclusive OR combination of the prefix and character. *
  277. * This code also has to check for collisions, and handles them by *
  278. * jumping around in the table. *
  279. * *
  280. * INPUT: parent_code -- The code of the parent string sequence. *
  281. * *
  282. * character -- The current character. *
  283. * *
  284. * OUTPUT: Returns with the index for the matching dictionary entry. If no matching index *
  285. * could be found, then it returns with the index to an unused dictionary entry. If *
  286. * there are also no unused entries in the dictionary, then -1 is returned. *
  287. * *
  288. * WARNINGS: none *
  289. * *
  290. * HISTORY: *
  291. * 08/28/1996 JLB : Created. *
  292. *=============================================================================================*/
  293. static int Find_Child_Node(CodeType parent_code, char child_character)
  294. {
  295. /*
  296. ** Fetch the first try index for the code and character.
  297. */
  298. int hash_index = Make_LZW_Hash(parent_code, child_character);
  299. /*
  300. ** Base the hash-miss-try-again-displacement value on the current
  301. ** index. [Shouldn't the value be some large prime number???].
  302. */
  303. int offset = 1;
  304. if (hash_index != 0) {
  305. offset = TABLE_SIZE - hash_index;
  306. }
  307. /*
  308. ** Keep offsetting through the dictionary until an exact match is
  309. ** found for the code and character specified.
  310. */
  311. int initial = hash_index;
  312. while (!dict[hash_index].Is_Matching(parent_code, child_character)) {
  313. /*
  314. ** Stop searching if an unused index is found since this means that
  315. ** a match doesn't exist in the table at all.
  316. */
  317. if (dict[hash_index].Is_Unused()) break;
  318. /*
  319. ** Bump the hash index to another value such that sequential bumps
  320. ** will not result in the same index value until all of the table
  321. ** has been scanned.
  322. */
  323. hash_index -= offset;
  324. if (hash_index < 0) {
  325. hash_index += TABLE_SIZE;
  326. }
  327. /*
  328. ** If the entire table has been scanned and no match or unused
  329. ** entry was found, then return a special value indicating this
  330. ** condition.
  331. */
  332. if (initial == hash_index) {
  333. hash_index = -1;
  334. break;
  335. }
  336. }
  337. return(hash_index);
  338. }
  339. /*
  340. * This routine decodes a string from the dictionary, and stores it
  341. * in the decode_stack data structure. It returns a count to the
  342. * calling program of how many characters were placed in the stack.
  343. */
  344. static int Decode_String(char * ptr, CodeType code)
  345. {
  346. int count = 0;
  347. while (code > 255) {
  348. *ptr++ = dict[code].CharValue;
  349. count++;
  350. code = dict[code].ParentCode;
  351. }
  352. *ptr = (char)code;
  353. count++;
  354. return(count);
  355. }
  356. /*
  357. * The compressor is short and simple. It reads in new symbols one
  358. * at a time from the input file. It then checks to see if the
  359. * combination of the current symbol and the current code are already
  360. * defined in the dictionary. If they are not, they are added to the
  361. * dictionary, and we start over with a new one symbol code. If they
  362. * are, the code for the combination of the code and character becomes
  363. * our new code.
  364. */
  365. int LZW_Compress(Buffer & inbuff, Buffer & outbuff)
  366. {
  367. BufferStraw input(inbuff);
  368. BufferPipe output(outbuff);
  369. for (int i = 0; i < TABLE_SIZE; i++) {
  370. dict[i].Make_Unused();
  371. // dict[i].code_value = UNUSED;
  372. }
  373. int outcount = 0;
  374. CodeType string_code = END_OF_STREAM;
  375. CodeType next_code = FIRST_CODE;
  376. for (;;) {
  377. char character;
  378. if (input.Get(&character, sizeof(character)) == 0) break;
  379. int index = Find_Child_Node(string_code, character);
  380. if (index == -1) {
  381. string_code = character;
  382. outcount += output.Put(&string_code, sizeof(string_code));
  383. } else {
  384. if (dict[index].CodeValue != -1) {
  385. string_code = dict[ index ].CodeValue;
  386. } else {
  387. if (next_code <= MAX_CODE) {
  388. dict[index] = CodeClass(next_code++, string_code, character);
  389. }
  390. outcount += output.Put(&string_code, sizeof(string_code));
  391. string_code = character;
  392. }
  393. }
  394. }
  395. outcount += output.Put(&string_code, sizeof(string_code));
  396. string_code = END_OF_STREAM;
  397. outcount += output.Put(&string_code, sizeof(string_code));
  398. return(outcount);
  399. }
  400. /*
  401. * The file expander operates much like the encoder. It has to
  402. * read in codes, the convert the codes to a string of characters.
  403. * The only catch in the whole operation occurs when the encoder
  404. * encounters a CHAR+STRING+CHAR+STRING+CHAR sequence. When this
  405. * occurs, the encoder outputs a code that is not presently defined
  406. * in the table. This is handled as an exception.
  407. */
  408. int LZW_Uncompress(Buffer & inbuff, Buffer & outbuff)
  409. {
  410. int outcount = 0;
  411. BufferStraw input(inbuff);
  412. BufferPipe output(outbuff);
  413. CodeType old_code;
  414. if (input.Get(&old_code, sizeof(old_code)) == 0) {
  415. return(outcount);
  416. }
  417. char character = (char)old_code;
  418. outcount += output.Put(&character, sizeof(character));
  419. unsigned int count;
  420. CodeType new_code;
  421. CodeType next_code = FIRST_CODE;
  422. for (;;) {
  423. if (input.Get(&new_code, sizeof(new_code)) == 0) break;
  424. /*
  425. ** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER
  426. ** case which generates an undefined code. It handles it by decoding
  427. ** the last code, and adding a single character to the end of the decode string.
  428. */
  429. if (new_code >= next_code) {
  430. decode_stack[0] = character;
  431. count = 1;
  432. count += Decode_String(&decode_stack[1], old_code);
  433. } else {
  434. count = Decode_String(decode_stack, new_code);
  435. }
  436. character = decode_stack[count-1];
  437. while (count > 0) {
  438. --count;
  439. outcount += output.Put(&decode_stack[count], sizeof(decode_stack[0]));
  440. }
  441. /*
  442. ** Add the new code sequence to the dictionary (presuming there is still
  443. ** room).
  444. */
  445. if (next_code <= MAX_CODE) {
  446. dict[next_code] = CodeClass(next_code, old_code, character);
  447. next_code++;
  448. }
  449. old_code = new_code;
  450. }
  451. return(outcount);
  452. }
  453. #endif