patchfile.cxx 47 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564
  1. /**
  2. * PANDA 3D SOFTWARE
  3. * Copyright (c) Carnegie Mellon University. All rights reserved.
  4. *
  5. * All use of this software is subject to the terms of the revised BSD
  6. * license. You should have received a copy of this license along
  7. * with this source code in a file named "LICENSE."
  8. *
  9. * @file patchfile.cxx
  10. * @author darren, mike
  11. * @date 1997-01-09
  12. */
  13. #include "pandabase.h"
  14. #ifdef HAVE_OPENSSL
  15. #include "config_express.h"
  16. #include "error_utils.h"
  17. #include "patchfile.h"
  18. #include "streamReader.h"
  19. #include "streamWriter.h"
  20. #include "multifile.h"
  21. #include "hashVal.h"
  22. #include "virtualFileSystem.h"
  23. #include <string.h> // for strstr
  24. #ifdef HAVE_TAR
  25. #include <libtar.h>
  26. #include <fcntl.h> // for O_RDONLY
  27. #endif // HAVE_TAR
  28. #ifdef HAVE_TAR
  29. std::istream *Patchfile::_tar_istream = nullptr;
  30. #endif // HAVE_TAR
  31. using std::endl;
  32. using std::ios;
  33. using std::istream;
  34. using std::min;
  35. using std::ostream;
  36. using std::streampos;
  37. using std::string;
  38. // this actually slows things down... #define
  39. // USE_MD5_FOR_HASHTABLE_INDEX_VALUES
  40. /*
  41. * Patch File Format IF THIS CHANGES, UPDATE installerApplyPatch.cxx IN THE
  42. * INSTALLER [ HEADER ] 4 bytes 0xfeebfaac ("magic number") (older patch
  43. * files have a magic number 0xfeebfaab, indicating they are version number
  44. * 0.) 2 bytes version number (if magic number == 0xfeebfaac) 4 bytes length
  45. * of starting file (if version >= 1) 16 bytes MD5 of starting file (if
  46. * version >= 1) 4 bytes length of resulting patched file 16 bytes MD5 of
  47. * resultant patched file Note that MD5 hashes are written in the order
  48. * observed by HashVal::read_stream() and HashVal::write_stream(), which is
  49. * not the normal linear order. (Each group of four bytes is reversed.)
  50. */
  51. const int _v0_header_length = 4 + 4 + 16;
  52. const int _v1_header_length = 4 + 2 + 4 + 16 + 4 + 16;
  53. /*
  54. * [ ADDCOPY pairs; repeated N times ] 2 bytes AL = ADD length AL bytes
  55. * bytes to add 2 bytes CL = COPY length 4 bytes offset of data to copy from
  56. * original file, if CL != 0. If version >= 2, offset is relative to end of
  57. * previous copy block; if version < 2, offset is relative to beginning of
  58. * file. [ TERMINATOR ] 2 bytes zero-length ADD 2 bytes zero-length COPY
  59. */
  60. // Defines
  61. const uint32_t Patchfile::_v0_magic_number = 0xfeebfaab;
  62. const uint32_t Patchfile::_magic_number = 0xfeebfaac;
  63. // Created version 1 on 11202 to store length and MD5 of original file. To
  64. // version 2 on 11202 to store copy offsets as relative.
  65. const uint16_t Patchfile::_current_version = 2;
  66. const uint32_t Patchfile::_HASH_BITS = 24;
  67. const uint32_t Patchfile::_HASHTABLESIZE = uint32_t(1) << Patchfile::_HASH_BITS;
  68. const uint32_t Patchfile::_DEFAULT_FOOTPRINT_LENGTH = 9; // this produced the smallest patch file for libpanda.dll when tested, 12/20/2000
  69. const uint32_t Patchfile::_NULL_VALUE = uint32_t(0) - 1;
  70. const uint32_t Patchfile::_MAX_RUN_LENGTH = (uint32_t(1) << 16) - 1;
  71. const uint32_t Patchfile::_HASH_MASK = (uint32_t(1) << Patchfile::_HASH_BITS) - 1;
  72. /**
  73. * Create a patch file and initializes internal data
  74. */
  75. Patchfile::
  76. Patchfile() {
  77. PT(Buffer) buffer = new Buffer(patchfile_buffer_size);
  78. init(buffer);
  79. }
  80. /**
  81. * Create patch file with buffer to patch
  82. */
  83. Patchfile::
  84. Patchfile(PT(Buffer) buffer) {
  85. init(buffer);
  86. }
  87. /**
  88. *
  89. */
  90. void Patchfile::
  91. init(PT(Buffer) buffer) {
  92. _rename_output_to_orig = false;
  93. _delete_patchfile = false;
  94. _hash_table = nullptr;
  95. _initiated = false;
  96. nassertv(!buffer.is_null());
  97. _buffer = buffer;
  98. _version_number = 0;
  99. _allow_multifile = true;
  100. _patch_stream = nullptr;
  101. _origfile_stream = nullptr;
  102. reset_footprint_length();
  103. }
  104. /**
  105. *
  106. */
  107. Patchfile::
  108. ~Patchfile() {
  109. if (_hash_table != nullptr) {
  110. PANDA_FREE_ARRAY(_hash_table);
  111. }
  112. if (_initiated) {
  113. cleanup();
  114. }
  115. nassertv(_patch_stream == nullptr);
  116. nassertv(_origfile_stream == nullptr);
  117. }
  118. /**
  119. * Closes and clean up internal data structures
  120. */
  121. void Patchfile::
  122. cleanup() {
  123. if (!_initiated) {
  124. express_cat.error()
  125. << "Patchfile::cleanup() - Patching has not been initiated"
  126. << endl;
  127. return;
  128. }
  129. // close files
  130. VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr();
  131. if (_origfile_stream != nullptr) {
  132. vfs->close_read_file(_origfile_stream);
  133. _origfile_stream = nullptr;
  134. }
  135. if (_patch_stream != nullptr) {
  136. vfs->close_read_file(_patch_stream);
  137. _patch_stream = nullptr;
  138. }
  139. _write_stream.close();
  140. _initiated = false;
  141. }
  142. // PATCH FILE APPLY MEMBER FUNCTIONS
  143. // NOTE: this patch-application functionality unfortunately has to be
  144. // duplicated in the Installer. It is contained in the file
  145. // installerApplyPatch.cxx PLEASE MAKE SURE THAT THAT FILE GETS UPDATED IF ANY
  146. // OF THIS LOGIC CHANGES! (i.e. if the patch file format changes)
  147. /**
  148. * Set up to apply the patch to the file (original file and patch are
  149. * destroyed in the process).
  150. */
  151. int Patchfile::
  152. initiate(const Filename &patch_file, const Filename &file) {
  153. int result = initiate(patch_file, file, Filename::temporary("", "patch_"));
  154. _rename_output_to_orig = true;
  155. _delete_patchfile = !keep_temporary_files;
  156. return result;
  157. }
  158. /**
  159. * Set up to apply the patch to the file. In this form, neither the original
  160. * file nor the patch file are destroyed.
  161. */
  162. int Patchfile::
  163. initiate(const Filename &patch_file, const Filename &orig_file,
  164. const Filename &target_file) {
  165. if (_initiated) {
  166. express_cat.error()
  167. << "Patchfile::initiate() - Patching has already been initiated"
  168. << endl;
  169. return EU_error_abort;
  170. }
  171. nassertr(orig_file != target_file, EU_error_abort);
  172. VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr();
  173. // Open the original file for read
  174. nassertr(_origfile_stream == nullptr, EU_error_abort);
  175. _orig_file = orig_file;
  176. _orig_file.set_binary();
  177. _origfile_stream = vfs->open_read_file(_orig_file, false);
  178. if (_origfile_stream == nullptr) {
  179. express_cat.error()
  180. << "Patchfile::initiate() - Failed to open file: " << _orig_file << endl;
  181. return get_write_error();
  182. }
  183. // Open the temp file for write
  184. _output_file = target_file;
  185. _output_file.set_binary();
  186. if (!_output_file.open_write(_write_stream)) {
  187. express_cat.error()
  188. << "Patchfile::initiate() - Failed to open file: " << _output_file << endl;
  189. return get_write_error();
  190. }
  191. if (express_cat.is_debug()) {
  192. express_cat.debug()
  193. << "Patchfile using output file " << _output_file << "\n";
  194. }
  195. int result = internal_read_header(patch_file);
  196. _total_bytes_processed = 0;
  197. _initiated = true;
  198. return result;
  199. }
  200. /**
  201. * Opens the patch file for reading, and gets the header information from the
  202. * file but does not begin to do any real work. This can be used to query the
  203. * data stored in the patch.
  204. */
  205. int Patchfile::
  206. read_header(const Filename &patch_file) {
  207. if (_initiated) {
  208. express_cat.error()
  209. << "Patchfile::initiate() - Patching has already been initiated"
  210. << endl;
  211. return EU_error_abort;
  212. }
  213. int result = internal_read_header(patch_file);
  214. if (_patch_stream != nullptr) {
  215. VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr();
  216. vfs->close_read_file(_patch_stream);
  217. _patch_stream = nullptr;
  218. }
  219. return result;
  220. }
  221. /**
  222. * Perform one buffer's worth of patching Returns EU_ok while patching Returns
  223. * EU_success when done If error happens will return one of: EU_error_abort :
  224. * Patching has not been initiated EU_error_file_invalid : file is corrupted
  225. * EU_error_invalid_checksum : incompatible patch file
  226. * EU_error_write_file_rename : could not rename file
  227. */
  228. int Patchfile::
  229. run() {
  230. // Now patch the file using the given buffer
  231. int buflen;
  232. int bytes_read;
  233. uint16_t ADD_length;
  234. uint16_t COPY_length;
  235. int32_t COPY_offset;
  236. if (_initiated == false) {
  237. express_cat.error()
  238. << "Patchfile::run() - Patching has not been initiated"
  239. << endl;
  240. return EU_error_abort;
  241. }
  242. nassertr(_patch_stream != nullptr, EU_error_abort);
  243. nassertr(_origfile_stream != nullptr, EU_error_abort);
  244. StreamReader patch_reader(*_patch_stream);
  245. buflen = _buffer->get_length();
  246. bytes_read = 0;
  247. while (bytes_read < buflen) {
  248. // read # of ADD bytes
  249. nassertr(_buffer->get_length() >= (int)sizeof(ADD_length), false);
  250. ADD_length = patch_reader.get_uint16();
  251. if (_patch_stream->fail()) {
  252. express_cat.error()
  253. << "Truncated patch file.\n";
  254. return EU_error_file_invalid;
  255. }
  256. bytes_read += (int)ADD_length;
  257. _total_bytes_processed += (int)ADD_length;
  258. if (_total_bytes_processed > _total_bytes_to_process) {
  259. express_cat.error()
  260. << "Runaway patch file.\n";
  261. return EU_error_file_invalid;
  262. }
  263. // if there are bytes to add, read them from patch file and write them to
  264. // output
  265. if (express_cat.is_spam() && ADD_length != 0) {
  266. express_cat.spam()
  267. << "ADD: " << ADD_length << " (to "
  268. << _write_stream.tellp() << ")" << endl;
  269. }
  270. uint32_t bytes_left = (uint32_t)ADD_length;
  271. while (bytes_left > 0) {
  272. uint32_t bytes_this_time = (uint32_t) min(bytes_left, (uint32_t) buflen);
  273. _patch_stream->read(_buffer->_buffer, bytes_this_time);
  274. if (_patch_stream->fail()) {
  275. express_cat.error()
  276. << "Truncated patch file.\n";
  277. return EU_error_file_invalid;
  278. }
  279. _write_stream.write(_buffer->_buffer, bytes_this_time);
  280. bytes_left -= bytes_this_time;
  281. }
  282. // read # of COPY bytes
  283. nassertr(_buffer->get_length() >= (int)sizeof(COPY_length), false);
  284. COPY_length = patch_reader.get_uint16();
  285. if (_patch_stream->fail()) {
  286. express_cat.error()
  287. << "Truncated patch file.\n";
  288. return EU_error_file_invalid;
  289. }
  290. bytes_read += (int)COPY_length;
  291. _total_bytes_processed += (int)COPY_length;
  292. if (_total_bytes_processed > _total_bytes_to_process) {
  293. express_cat.error()
  294. << "Runaway patch file.\n";
  295. return EU_error_file_invalid;
  296. }
  297. // if there are bytes to copy, read them from original file and write them
  298. // to output
  299. if (0 != COPY_length) {
  300. // read copy offset
  301. nassertr(_buffer->get_length() >= (int)sizeof(COPY_offset), false);
  302. COPY_offset = patch_reader.get_int32();
  303. if (_patch_stream->fail()) {
  304. express_cat.error()
  305. << "Truncated patch file.\n";
  306. return EU_error_file_invalid;
  307. }
  308. // seek to the copy source pos
  309. if (_version_number < 2) {
  310. _origfile_stream->seekg(COPY_offset, ios::beg);
  311. } else {
  312. _origfile_stream->seekg(COPY_offset, ios::cur);
  313. }
  314. if (_origfile_stream->fail()) {
  315. express_cat.error()
  316. << "Invalid copy offset in patch file.\n";
  317. return EU_error_file_invalid;
  318. }
  319. if (express_cat.is_spam()) {
  320. express_cat.spam()
  321. << "COPY: " << COPY_length << " bytes from offset "
  322. << COPY_offset << " (from " << _origfile_stream->tellg()
  323. << " to " << _write_stream.tellp() << ")"
  324. << endl;
  325. }
  326. // read the copy bytes from original file and write them to output
  327. uint32_t bytes_left = (uint32_t)COPY_length;
  328. while (bytes_left > 0) {
  329. uint32_t bytes_this_time = (uint32_t) min(bytes_left, (uint32_t) buflen);
  330. _origfile_stream->read(_buffer->_buffer, bytes_this_time);
  331. if (_origfile_stream->fail()) {
  332. express_cat.error()
  333. << "Invalid copy length in patch file.\n";
  334. return EU_error_file_invalid;
  335. }
  336. _write_stream.write(_buffer->_buffer, bytes_this_time);
  337. bytes_left -= bytes_this_time;
  338. }
  339. }
  340. // if we got a pair of zero-length ADD and COPY blocks, we're done
  341. if ((0 == ADD_length) && (0 == COPY_length)) {
  342. cleanup();
  343. if (express_cat.is_debug()) {
  344. express_cat.debug()
  345. // << "result file = " << _result_file_length
  346. << " total bytes = " << _total_bytes_processed << endl;
  347. }
  348. // check the MD5 from the patch file against the newly patched file
  349. {
  350. HashVal MD5_actual;
  351. MD5_actual.hash_file(_output_file);
  352. if (_MD5_ofResult != MD5_actual) {
  353. // Whoops, patching screwed up somehow.
  354. if (_origfile_stream != nullptr) {
  355. VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr();
  356. vfs->close_read_file(_origfile_stream);
  357. _origfile_stream = nullptr;
  358. }
  359. _write_stream.close();
  360. express_cat.info()
  361. << "Patching produced incorrect checksum. Got:\n"
  362. << " " << MD5_actual
  363. << "\nExpected:\n"
  364. << " " << _MD5_ofResult
  365. << "\n";
  366. // This is a fine time to double-check the starting checksum.
  367. if (!has_source_hash()) {
  368. express_cat.info()
  369. << "No source hash in patch file to verify.\n";
  370. } else {
  371. HashVal MD5_orig;
  372. MD5_orig.hash_file(_orig_file);
  373. if (MD5_orig != get_source_hash()) {
  374. express_cat.info()
  375. << "Started from incorrect source file. Got:\n"
  376. << " " << MD5_orig
  377. << "\nExpected:\n"
  378. << " " << get_source_hash()
  379. << "\n";
  380. } else {
  381. express_cat.info()
  382. << "Started from correct source file:\n"
  383. << " " << MD5_orig
  384. << "\n";
  385. }
  386. }
  387. // delete the temp file and the patch file
  388. if (_rename_output_to_orig) {
  389. _output_file.unlink();
  390. }
  391. if (_delete_patchfile) {
  392. _patch_file.unlink();
  393. }
  394. // return "invalid checksum"
  395. return EU_error_invalid_checksum;
  396. }
  397. }
  398. // delete the patch file
  399. if (_delete_patchfile) {
  400. _patch_file.unlink();
  401. }
  402. // rename the temp file to the original file name
  403. if (_rename_output_to_orig) {
  404. _orig_file.unlink();
  405. if (!_output_file.rename_to(_orig_file)) {
  406. express_cat.error()
  407. << "Patchfile::run() failed to rename temp file to: " << _orig_file
  408. << endl;
  409. return EU_error_write_file_rename;
  410. }
  411. }
  412. return EU_success;
  413. }
  414. }
  415. return EU_ok;
  416. }
  417. /**
  418. * Patches the entire file in one call returns true on success and false on
  419. * error
  420. *
  421. * This version will delete the patch file and overwrite the original file.
  422. */
  423. bool Patchfile::
  424. apply(Filename &patch_file, Filename &file) {
  425. int ret = initiate(patch_file, file);
  426. if (ret < 0)
  427. return false;
  428. for (;;) {
  429. ret = run();
  430. if (ret == EU_success)
  431. return true;
  432. if (ret < 0)
  433. return false;
  434. }
  435. return false;
  436. }
  437. /**
  438. * Patches the entire file in one call returns true on success and false on
  439. * error
  440. *
  441. * This version will not delete any files.
  442. */
  443. bool Patchfile::
  444. apply(Filename &patch_file, Filename &orig_file, const Filename &target_file) {
  445. int ret = initiate(patch_file, orig_file, target_file);
  446. if (ret < 0)
  447. return false;
  448. for (;;) {
  449. ret = run();
  450. if (ret == EU_success)
  451. return true;
  452. if (ret < 0)
  453. return false;
  454. }
  455. return false;
  456. }
  457. /**
  458. * Reads the header and leaves the patch file open.
  459. */
  460. int Patchfile::
  461. internal_read_header(const Filename &patch_file) {
  462. // Open the patch file for read
  463. VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr();
  464. nassertr(_patch_stream == nullptr, EU_error_abort);
  465. _patch_file = patch_file;
  466. _patch_file.set_binary();
  467. _patch_stream = vfs->open_read_file(_patch_file, true);
  468. if (_patch_stream == nullptr) {
  469. express_cat.error()
  470. << "Patchfile::initiate() - Failed to open file: " << _patch_file << endl;
  471. return get_write_error();
  472. }
  473. // read header, make sure the patch file is valid
  474. StreamReader patch_reader(*_patch_stream);
  475. // check the magic number
  476. nassertr(_buffer->get_length() >= _v0_header_length, false);
  477. uint32_t magic_number = patch_reader.get_uint32();
  478. if (magic_number != _magic_number && magic_number != _v0_magic_number) {
  479. express_cat.error()
  480. << "Invalid patch file: " << _patch_file << endl;
  481. return EU_error_file_invalid;
  482. }
  483. _version_number = 0;
  484. if (magic_number != _v0_magic_number) {
  485. _version_number = patch_reader.get_uint16();
  486. }
  487. if (_version_number > _current_version) {
  488. express_cat.error()
  489. << "Can't read version " << _version_number << " patch files: "
  490. << _patch_file << endl;
  491. return EU_error_file_invalid;
  492. }
  493. if (_version_number >= 1) {
  494. // Get the length of the source file.
  495. /*uint32_t source_file_length =*/ patch_reader.get_uint32();
  496. // get the MD5 of the source file.
  497. _MD5_ofSource.read_stream(patch_reader);
  498. }
  499. // get the length of the patched result file
  500. _total_bytes_to_process = patch_reader.get_uint32();
  501. // get the MD5 of the resultant patched file
  502. _MD5_ofResult.read_stream(patch_reader);
  503. express_cat.debug()
  504. << "Patchfile::initiate() - valid patchfile" << endl;
  505. return EU_success;
  506. }
  507. // PATCH FILE BUILDING MEMBER FUNCTIONS
  508. /**
  509. *
  510. */
  511. uint32_t Patchfile::
  512. calc_hash(const char *buffer) {
  513. #ifdef USE_MD5_FOR_HASHTABLE_INDEX_VALUES
  514. HashVal hash;
  515. hash.hash_buffer(buffer, _footprint_length);
  516. // cout << uint16_t(hash.get_value(0)) << " ";
  517. return uint16_t(hash.get_value(0));
  518. #else
  519. uint32_t hash_value = 0;
  520. for(int i = 0; i < (int)_footprint_length; i++) {
  521. // this is probably not such a good hash. to be replaced --> TRIED MD5,
  522. // was not worth it for the execution-time hit on 800Mhz PC
  523. hash_value ^= uint32_t(*buffer) << ((i * 2) % Patchfile::_HASH_BITS);
  524. buffer++;
  525. }
  526. // use the bits that overflowed past the end of the hash bit range (this is
  527. // intended for _HASH_BITS == 24)
  528. hash_value ^= (hash_value >> Patchfile::_HASH_BITS);
  529. // cout << hash_value << " ";
  530. return hash_value & _HASH_MASK;
  531. #endif
  532. }
  533. /**
  534. *
  535. * The hash and link tables allow for a quick, linear search of all locations
  536. * in the file that begin with a particular sequence of bytes, or "footprint."
  537. *
  538. * The hash table is a table of offsets into the file, with one entry for
  539. * every possible footprint hash value. For a hash of a footprint, the entry
  540. * at the offset of the hash value provides an initial location in the file
  541. * that has a matching footprint.
  542. *
  543. * The link table is a large linked list of file offsets, with one entry for
  544. * every byte in the file. Each offset in the link table will point to
  545. * another offset that has the same footprint at the corresponding offset in
  546. * the actual file. Starting with an offset taken from the hash table, one
  547. * can rapidly produce a list of offsets that all have the same footprint.
  548. */
  549. void Patchfile::
  550. build_hash_link_tables(const char *buffer_orig, uint32_t length_orig,
  551. uint32_t *hash_table, uint32_t *link_table) {
  552. uint32_t i;
  553. // clear hash table
  554. for(i = 0; i < _HASHTABLESIZE; i++) {
  555. hash_table[i] = _NULL_VALUE;
  556. }
  557. // clear link table
  558. for(i = 0; i < length_orig; i++) {
  559. link_table[i] = _NULL_VALUE;
  560. }
  561. if(length_orig < _footprint_length) return;
  562. // run through original file and hash each footprint
  563. for(i = 0; i < (length_orig - _footprint_length); i++) {
  564. uint32_t hash_value = calc_hash(&buffer_orig[i]);
  565. // we must now store this file index in the hash table at the offset of
  566. // the hash value
  567. // to account for multiple file offsets with identical hash values, there
  568. // is a link table with an entry for every footprint in the file. We
  569. // create linked lists of offsets in the link table.
  570. // first, set the value in the link table for the current offset to
  571. // whatever the current list head is (the value in the hash table) (note
  572. // that this only works because the hash and link tables both use
  573. // _NULL_VALUE to indicate a null index)
  574. link_table[i] = hash_table[hash_value];
  575. // set the new list head; store the current offset in the hash table at
  576. // the offset of the footprint's hash value
  577. hash_table[hash_value] = i;
  578. /*
  579. if (_NULL_VALUE == hash_table[hash_value]) {
  580. // hash entry is empty, store this offset
  581. hash_table[hash_value] = i;
  582. } else {
  583. // hash entry is taken, go to the link table
  584. uint32_t link_offset = hash_table[hash_value];
  585. while (_NULL_VALUE != link_table[link_offset]) {
  586. link_offset = link_table[link_offset];
  587. }
  588. link_table[link_offset] = i;
  589. }
  590. */
  591. }
  592. }
  593. /**
  594. *
  595. * This function calculates the length of a match between two strings of bytes
  596. */
  597. uint32_t Patchfile::
  598. calc_match_length(const char* buf1, const char* buf2, uint32_t max_length,
  599. uint32_t min_length) {
  600. // early out: look ahead and sample the end of the minimum range
  601. if (min_length > 2) {
  602. if (min_length >= max_length)
  603. return 0;
  604. if (buf1[min_length] != buf2[min_length] ||
  605. buf1[min_length-1] != buf2[min_length-1] ||
  606. buf1[min_length-2] != buf2[min_length-2]) {
  607. return 0;
  608. }
  609. }
  610. uint32_t length = 0;
  611. while ((length < max_length) && (*buf1 == *buf2)) {
  612. buf1++, buf2++, length++;
  613. }
  614. return length;
  615. }
  616. /**
  617. *
  618. * This function will find the longest string in the original file that
  619. * matches a string in the new file.
  620. */
  621. void Patchfile::
  622. find_longest_match(uint32_t new_pos, uint32_t &copy_pos, uint16_t &copy_length,
  623. uint32_t *hash_table, uint32_t *link_table, const char* buffer_orig,
  624. uint32_t length_orig, const char* buffer_new, uint32_t length_new) {
  625. // set length to a safe value
  626. copy_length = 0;
  627. // get offset of matching string (in orig file) from hash table
  628. uint32_t hash_value = calc_hash(&buffer_new[new_pos]);
  629. // if no match, bail
  630. if (_NULL_VALUE == hash_table[hash_value])
  631. return;
  632. copy_pos = hash_table[hash_value];
  633. // calc match length
  634. copy_length = (uint16_t)calc_match_length(&buffer_new[new_pos],
  635. &buffer_orig[copy_pos],
  636. min(min((length_new - new_pos),
  637. (length_orig - copy_pos)),
  638. _MAX_RUN_LENGTH),
  639. 0);
  640. // run through link table, see if we find any longer matches
  641. uint32_t match_offset;
  642. uint16_t match_length;
  643. match_offset = link_table[copy_pos];
  644. while (match_offset != _NULL_VALUE) {
  645. match_length = (uint16_t)calc_match_length(&buffer_new[new_pos],
  646. &buffer_orig[match_offset],
  647. min(min((length_new - new_pos),
  648. (length_orig - match_offset)),
  649. _MAX_RUN_LENGTH),
  650. copy_length);
  651. // have we found a longer match?
  652. if (match_length > copy_length) {
  653. copy_pos = match_offset;
  654. copy_length = match_length;
  655. }
  656. // traverse the link table
  657. match_offset = link_table[match_offset];
  658. }
  659. }
  660. /**
  661. *
  662. */
  663. void Patchfile::
  664. emit_ADD(ostream &write_stream, uint32_t length, const char* buffer) {
  665. nassertv(length == (uint16_t)length); //we only write a uint16
  666. if (express_cat.is_spam()) {
  667. express_cat.spam()
  668. << "ADD: " << length << " (to " << _add_pos << ")" << endl;
  669. }
  670. // write ADD length
  671. StreamWriter patch_writer(write_stream);
  672. patch_writer.add_uint16((uint16_t)length);
  673. // if there are bytes to add, add them
  674. if (length > 0) {
  675. patch_writer.append_data(buffer, (uint16_t)length);
  676. }
  677. _add_pos += length;
  678. }
  679. /**
  680. *
  681. */
  682. void Patchfile::
  683. emit_COPY(ostream &write_stream, uint32_t length, uint32_t copy_pos) {
  684. nassertv(length == (uint16_t)length); //we only write a uint16
  685. int32_t offset = (int)copy_pos - (int)_last_copy_pos;
  686. if (express_cat.is_spam()) {
  687. express_cat.spam()
  688. << "COPY: " << length << " bytes from offset " << offset
  689. << " (from " << copy_pos << " to " << _add_pos << ")" << endl;
  690. }
  691. // write COPY length
  692. StreamWriter patch_writer(write_stream);
  693. patch_writer.add_uint16((uint16_t)length);
  694. if ((uint16_t)length != 0) {
  695. // write COPY offset
  696. patch_writer.add_int32(offset);
  697. _last_copy_pos = copy_pos + length;
  698. }
  699. _add_pos += length;
  700. }
  701. /**
  702. * Emits an add/copy pair. If necessary, repeats the pair as needed to work
  703. * around the 16-bit chunk size limit.
  704. */
  705. void Patchfile::
  706. emit_add_and_copy(ostream &write_stream,
  707. uint32_t add_length, const char *add_buffer,
  708. uint32_t copy_length, uint32_t copy_pos) {
  709. if (add_length == 0 && copy_length == 0) {
  710. // Don't accidentally emit a termination code.
  711. return;
  712. }
  713. static const uint16_t max_write = 65535;
  714. while (add_length > max_write) {
  715. // Overflow. This chunk is too large to fit into a single ADD block, so
  716. // we have to write it as multiple ADDs.
  717. emit_ADD(write_stream, max_write, add_buffer);
  718. add_buffer += max_write;
  719. add_length -= max_write;
  720. emit_COPY(write_stream, 0, 0);
  721. }
  722. emit_ADD(write_stream, add_length, add_buffer);
  723. while (copy_length > max_write) {
  724. // Overflow.
  725. emit_COPY(write_stream, max_write, copy_pos);
  726. copy_pos += max_write;
  727. copy_length -= max_write;
  728. emit_ADD(write_stream, 0, nullptr);
  729. }
  730. emit_COPY(write_stream, copy_length, copy_pos);
  731. }
  732. /**
  733. * Potentially emits one or more add/copy pairs. The current state is saved,
  734. * so as to minimize wasted emits from consecutive adds or copies.
  735. */
  736. void Patchfile::
  737. cache_add_and_copy(ostream &write_stream,
  738. uint32_t add_length, const char *add_buffer,
  739. uint32_t copy_length, uint32_t copy_pos) {
  740. if (add_length != 0) {
  741. if (_cache_copy_length != 0) {
  742. // Have to flush.
  743. cache_flush(write_stream);
  744. }
  745. // Add the string to the current cache.
  746. _cache_add_data += string(add_buffer, add_length);
  747. }
  748. if (copy_length != 0) {
  749. if (_cache_copy_length == 0) {
  750. // Start a new copy phase.
  751. _cache_copy_start = copy_pos;
  752. _cache_copy_length = copy_length;
  753. } else if (_cache_copy_start + _cache_copy_length == copy_pos) {
  754. // We can just tack on the copy to what we've already got.
  755. _cache_copy_length += copy_length;
  756. } else {
  757. // It's a discontinuous copy. We have to flush.
  758. cache_flush(write_stream);
  759. _cache_copy_start = copy_pos;
  760. _cache_copy_length = copy_length;
  761. }
  762. }
  763. }
  764. /**
  765. * Closes any copy or add phases that are still open after a previous call to
  766. * cache_add_and_copy().
  767. */
  768. void Patchfile::
  769. cache_flush(ostream &write_stream) {
  770. emit_add_and_copy(write_stream,
  771. _cache_add_data.size(), _cache_add_data.data(),
  772. _cache_copy_length, _cache_copy_start);
  773. _cache_add_data = string();
  774. _cache_copy_length = 0;
  775. }
  776. /**
  777. *
  778. * Writes the patchfile header.
  779. */
  780. void Patchfile::
  781. write_header(ostream &write_stream,
  782. istream &stream_orig, istream &stream_new) {
  783. // prepare to write the patch file header
  784. // write the patch file header
  785. StreamWriter patch_writer(write_stream);
  786. patch_writer.add_uint32(_magic_number);
  787. patch_writer.add_uint16(_current_version);
  788. stream_orig.seekg(0, ios::end);
  789. streampos source_file_length = stream_orig.tellg();
  790. patch_writer.add_uint32((uint32_t)source_file_length);
  791. // calc MD5 of original file
  792. _MD5_ofSource.hash_stream(stream_orig);
  793. // add it to the header
  794. _MD5_ofSource.write_stream(patch_writer);
  795. if (express_cat.is_debug()) {
  796. express_cat.debug()
  797. << "Orig: " << _MD5_ofSource << "\n";
  798. }
  799. stream_new.seekg(0, ios::end);
  800. streampos result_file_length = stream_new.tellg();
  801. patch_writer.add_uint32((uint32_t)result_file_length);
  802. // calc MD5 of resultant patched file
  803. _MD5_ofResult.hash_stream(stream_new);
  804. // add it to the header
  805. _MD5_ofResult.write_stream(patch_writer);
  806. if (express_cat.is_debug()) {
  807. express_cat.debug()
  808. << " New: " << _MD5_ofResult << "\n";
  809. }
  810. }
  811. /**
  812. * Writes the patchfile terminator.
  813. */
  814. void Patchfile::
  815. write_terminator(ostream &write_stream) {
  816. cache_flush(write_stream);
  817. // write terminator (null ADD, null COPY)
  818. emit_ADD(write_stream, 0, nullptr);
  819. emit_COPY(write_stream, 0, 0);
  820. }
  821. /**
  822. * Computes the patches for the entire file (if it is not a multifile) or for
  823. * a single subfile (if it is)
  824. *
  825. * Returns true if successful, false on error.
  826. */
  827. bool Patchfile::
  828. compute_file_patches(ostream &write_stream,
  829. uint32_t offset_orig, uint32_t offset_new,
  830. istream &stream_orig, istream &stream_new) {
  831. // read in original file
  832. stream_orig.seekg(0, ios::end);
  833. nassertr(stream_orig, false);
  834. uint32_t source_file_length = stream_orig.tellg();
  835. if (express_cat.is_debug()) {
  836. express_cat.debug()
  837. << "Allocating " << source_file_length << " bytes to read orig\n";
  838. }
  839. char *buffer_orig = (char *)PANDA_MALLOC_ARRAY(source_file_length);
  840. stream_orig.seekg(0, ios::beg);
  841. stream_orig.read(buffer_orig, source_file_length);
  842. // read in new file
  843. stream_new.seekg(0, ios::end);
  844. uint32_t result_file_length = stream_new.tellg();
  845. nassertr(stream_new, false);
  846. if (express_cat.is_debug()) {
  847. express_cat.debug()
  848. << "Allocating " << result_file_length << " bytes to read new\n";
  849. }
  850. char *buffer_new = (char *)PANDA_MALLOC_ARRAY(result_file_length);
  851. stream_new.seekg(0, ios::beg);
  852. stream_new.read(buffer_new, result_file_length);
  853. // allocate hashlink tables
  854. if (_hash_table == nullptr) {
  855. if (express_cat.is_debug()) {
  856. express_cat.debug()
  857. << "Allocating hashtable of size " << _HASHTABLESIZE << " * 4\n";
  858. }
  859. _hash_table = (uint32_t *)PANDA_MALLOC_ARRAY(_HASHTABLESIZE * sizeof(uint32_t));
  860. }
  861. if (express_cat.is_debug()) {
  862. express_cat.debug()
  863. << "Allocating linktable of size " << source_file_length << " * 4\n";
  864. }
  865. uint32_t *link_table = (uint32_t *)PANDA_MALLOC_ARRAY(source_file_length * sizeof(uint32_t));
  866. // build hash and link tables for original file
  867. build_hash_link_tables(buffer_orig, source_file_length, _hash_table, link_table);
  868. // run through new file
  869. uint32_t new_pos = 0;
  870. uint32_t start_pos = new_pos; // this is the position for the start of ADD operations
  871. if(((uint32_t) result_file_length) >= _footprint_length)
  872. {
  873. while (new_pos < (result_file_length - _footprint_length)) {
  874. // find best match for current position
  875. uint32_t COPY_pos;
  876. uint16_t COPY_length;
  877. find_longest_match(new_pos, COPY_pos, COPY_length, _hash_table, link_table,
  878. buffer_orig, source_file_length, buffer_new, result_file_length);
  879. // if no match or match not longer than footprint length, skip to next
  880. // byte
  881. if (COPY_length < _footprint_length) {
  882. // go to next byte
  883. new_pos++;
  884. } else {
  885. // emit ADD for all skipped bytes
  886. int num_skipped = (int)new_pos - (int)start_pos;
  887. if (express_cat.is_spam()) {
  888. express_cat.spam()
  889. << "build: num_skipped = " << num_skipped
  890. << endl;
  891. }
  892. cache_add_and_copy(write_stream, num_skipped, &buffer_new[start_pos],
  893. COPY_length, COPY_pos + offset_orig);
  894. new_pos += (uint32_t)COPY_length;
  895. start_pos = new_pos;
  896. }
  897. }
  898. }
  899. if (express_cat.is_spam()) {
  900. express_cat.spam()
  901. << "build: result_file_length = " << result_file_length
  902. << " start_pos = " << start_pos
  903. << endl;
  904. }
  905. // are there still more bytes left in the new file?
  906. if (start_pos != result_file_length) {
  907. // emit ADD for all remaining bytes
  908. uint32_t remaining_bytes = result_file_length - start_pos;
  909. cache_add_and_copy(write_stream, remaining_bytes, &buffer_new[start_pos],
  910. 0, 0);
  911. start_pos += remaining_bytes;
  912. }
  913. PANDA_FREE_ARRAY(link_table);
  914. PANDA_FREE_ARRAY(buffer_orig);
  915. PANDA_FREE_ARRAY(buffer_new);
  916. return true;
  917. }
  918. /**
  919. * Computes patches for the files, knowing that they are both Panda
  920. * Multifiles. This will build patches one subfile at a time, which can
  921. * potentially be much, much faster for large Multifiles that contain many
  922. * small subfiles.
  923. */
  924. bool Patchfile::
  925. compute_mf_patches(ostream &write_stream,
  926. uint32_t offset_orig, uint32_t offset_new,
  927. istream &stream_orig, istream &stream_new) {
  928. Multifile mf_orig, mf_new;
  929. IStreamWrapper stream_origw(stream_orig);
  930. IStreamWrapper stream_neww(stream_new);
  931. if (!mf_orig.open_read(&stream_origw) ||
  932. !mf_new.open_read(&stream_neww)) {
  933. express_cat.error()
  934. << "Input multifiles appear to be corrupt.\n";
  935. return false;
  936. }
  937. if (mf_new.needs_repack()) {
  938. express_cat.error()
  939. << "Input multifiles need to be repacked.\n";
  940. return false;
  941. }
  942. // First, compute the patch for the header index.
  943. {
  944. ISubStream index_orig(&stream_origw, 0, mf_orig.get_index_end());
  945. ISubStream index_new(&stream_neww, 0, mf_new.get_index_end());
  946. if (!do_compute_patches("", "",
  947. write_stream, offset_orig, offset_new,
  948. index_orig, index_new)) {
  949. return false;
  950. }
  951. nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)mf_new.get_index_end(), false);
  952. }
  953. // Now walk through each subfile in the new multifile. If a particular
  954. // subfile exists in both source files, we compute the patches for the
  955. // subfile; for a new subfile, we trivially add it. If a subfile has been
  956. // removed, we simply don't add it (we'll never even notice this case).
  957. int new_num_subfiles = mf_new.get_num_subfiles();
  958. for (int ni = 0; ni < new_num_subfiles; ++ni) {
  959. nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)mf_new.get_subfile_internal_start(ni), false);
  960. string name = mf_new.get_subfile_name(ni);
  961. int oi = mf_orig.find_subfile(name);
  962. if (oi < 0) {
  963. // This is a newly-added subfile. Add it the hard way.
  964. express_cat.info()
  965. << "Adding subfile " << mf_new.get_subfile_name(ni) << "\n";
  966. streampos new_start = mf_new.get_subfile_internal_start(ni);
  967. size_t new_size = mf_new.get_subfile_internal_length(ni);
  968. char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
  969. stream_new.seekg(new_start, ios::beg);
  970. stream_new.read(buffer_new, new_size);
  971. cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
  972. PANDA_FREE_ARRAY(buffer_new);
  973. } else {
  974. // This subfile exists in both the original and the new files. Patch
  975. // it.
  976. streampos orig_start = mf_orig.get_subfile_internal_start(oi);
  977. size_t orig_size = mf_orig.get_subfile_internal_length(oi);
  978. streampos new_start = mf_new.get_subfile_internal_start(ni);
  979. size_t new_size = mf_new.get_subfile_internal_length(ni);
  980. if (!patch_subfile(write_stream, offset_orig, offset_new,
  981. mf_new.get_subfile_name(ni),
  982. stream_origw, orig_start, orig_start + (streampos)orig_size,
  983. stream_neww, new_start, new_start + (streampos)new_size)) {
  984. return false;
  985. }
  986. }
  987. }
  988. return true;
  989. }
  990. #ifdef HAVE_TAR
  991. /**
  992. * Uses libtar to extract the location within the tar file of each of the
  993. * subfiles. Returns true if the tar file is read successfully, false if
  994. * there is an error (e.g. it is not a tar file).
  995. */
  996. bool Patchfile::
  997. read_tar(TarDef &tar, istream &stream) {
  998. TAR *tfile;
  999. tartype_t tt;
  1000. tt.openfunc = tar_openfunc;
  1001. tt.closefunc = tar_closefunc;
  1002. tt.readfunc = tar_readfunc;
  1003. tt.writefunc = tar_writefunc;
  1004. stream.seekg(0, ios::beg);
  1005. nassertr(_tar_istream == nullptr, false);
  1006. _tar_istream = &stream;
  1007. if (tar_open(&tfile, (char *)"dummy", &tt, O_RDONLY, 0, 0) != 0) {
  1008. _tar_istream = nullptr;
  1009. return false;
  1010. }
  1011. // Walk through the tar file, noting the current file position as we reach
  1012. // each subfile. Use this information to infer the start and end of each
  1013. // subfile within the stream.
  1014. streampos last_pos = 0;
  1015. int flag = th_read(tfile);
  1016. while (flag == 0) {
  1017. TarSubfile subfile;
  1018. subfile._name = th_get_pathname(tfile);
  1019. subfile._header_start = last_pos;
  1020. subfile._data_start = stream.tellg();
  1021. subfile._data_end = subfile._data_start + (streampos)th_get_size(tfile);
  1022. tar_skip_regfile(tfile);
  1023. subfile._end = stream.tellg();
  1024. tar.push_back(subfile);
  1025. last_pos = subfile._end;
  1026. flag = th_read(tfile);
  1027. }
  1028. // Create one more "subfile" for the bytes at the tail of the file. This
  1029. // subfile has no name.
  1030. TarSubfile subfile;
  1031. subfile._header_start = last_pos;
  1032. stream.clear();
  1033. stream.seekg(0, ios::end);
  1034. subfile._data_start = stream.tellg();
  1035. subfile._data_end = subfile._data_start;
  1036. subfile._end = subfile._data_start;
  1037. tar.push_back(subfile);
  1038. tar_close(tfile);
  1039. _tar_istream = nullptr;
  1040. return (flag == 1);
  1041. }
  1042. #endif // HAVE_TAR
  1043. #ifdef HAVE_TAR
  1044. /**
  1045. * Computes patches for the files, knowing that they are both tar files. This
  1046. * is similar to compute_mf_patches().
  1047. *
  1048. * The tar indexes should have been built up by a previous call to read_tar().
  1049. */
  1050. bool Patchfile::
  1051. compute_tar_patches(ostream &write_stream,
  1052. uint32_t offset_orig, uint32_t offset_new,
  1053. istream &stream_orig, istream &stream_new,
  1054. TarDef &tar_orig, TarDef &tar_new) {
  1055. // Sort the orig list by filename, so we can quickly look up files from the
  1056. // new list.
  1057. tar_orig.sort();
  1058. // However, it is important to keep the new list in its original, on-disk
  1059. // order.
  1060. // Walk through each subfile in the new tar file. If a particular subfile
  1061. // exists in both source files, we compute the patches for the subfile; for
  1062. // a new subfile, we trivially add it. If a subfile has been removed, we
  1063. // simply don't add it (we'll never even notice this case).
  1064. IStreamWrapper stream_origw(stream_orig);
  1065. IStreamWrapper stream_neww(stream_new);
  1066. TarDef::const_iterator ni;
  1067. streampos last_pos = 0;
  1068. for (ni = tar_new.begin(); ni != tar_new.end(); ++ni) {
  1069. const TarSubfile &sf_new =(*ni);
  1070. nassertr(sf_new._header_start == last_pos, false);
  1071. TarDef::const_iterator oi = tar_orig.find(sf_new);
  1072. if (oi == tar_orig.end()) {
  1073. // This is a newly-added subfile. Add it the hard way.
  1074. express_cat.info()
  1075. << "Adding subfile " << sf_new._name << "\n";
  1076. streampos new_start = sf_new._header_start;
  1077. size_t new_size = sf_new._end - sf_new._header_start;
  1078. char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
  1079. stream_new.seekg(new_start, ios::beg);
  1080. stream_new.read(buffer_new, new_size);
  1081. cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
  1082. PANDA_FREE_ARRAY(buffer_new);
  1083. } else {
  1084. // This subfile exists in both the original and the new files. Patch
  1085. // it.
  1086. const TarSubfile &sf_orig =(*oi);
  1087. // We patch the header and data of the file separately, so we can
  1088. // accurately detect nested multifiles. The extra data at the end of
  1089. // the file (possibly introduced by a tar file's blocking) is the
  1090. // footer, which is also patched separately.
  1091. if (!patch_subfile(write_stream, offset_orig, offset_new, "",
  1092. stream_origw, sf_orig._header_start, sf_orig._data_start,
  1093. stream_neww, sf_new._header_start, sf_new._data_start)) {
  1094. return false;
  1095. }
  1096. if (!patch_subfile(write_stream, offset_orig, offset_new, sf_new._name,
  1097. stream_origw, sf_orig._data_start, sf_orig._data_end,
  1098. stream_neww, sf_new._data_start, sf_new._data_end)) {
  1099. return false;
  1100. }
  1101. if (!patch_subfile(write_stream, offset_orig, offset_new, "",
  1102. stream_origw, sf_orig._data_end, sf_orig._end,
  1103. stream_neww, sf_new._data_end, sf_new._end)) {
  1104. return false;
  1105. }
  1106. }
  1107. last_pos = sf_new._end;
  1108. }
  1109. return true;
  1110. }
  1111. #endif // HAVE_TAR
  1112. #ifdef HAVE_TAR
  1113. /**
  1114. * A callback function to redirect libtar to read from our istream instead of
  1115. * using low-level Unix I/O.
  1116. */
  1117. int Patchfile::
  1118. tar_openfunc(const char *, int, ...) {
  1119. // Since we don't actually open a file--the stream is already open--we do
  1120. // nothing here.
  1121. return 0;
  1122. }
  1123. #endif // HAVE_TAR
  1124. #ifdef HAVE_TAR
  1125. /**
  1126. * A callback function to redirect libtar to read from our istream instead of
  1127. * using low-level Unix I/O.
  1128. */
  1129. int Patchfile::
  1130. tar_closefunc(int) {
  1131. // Since we don't actually open a file, no need to close it either.
  1132. return 0;
  1133. }
  1134. #endif // HAVE_TAR
  1135. #ifdef HAVE_TAR
  1136. /**
  1137. * A callback function to redirect libtar to read from our istream instead of
  1138. * using low-level Unix I/O.
  1139. */
  1140. ssize_t Patchfile::
  1141. tar_readfunc(int, void *buffer, size_t nbytes) {
  1142. nassertr(_tar_istream != nullptr, 0);
  1143. _tar_istream->read((char *)buffer, nbytes);
  1144. return (ssize_t)_tar_istream->gcount();
  1145. }
  1146. #endif // HAVE_TAR
  1147. #ifdef HAVE_TAR
  1148. /**
  1149. * A callback function to redirect libtar to read from our istream instead of
  1150. * using low-level Unix I/O.
  1151. */
  1152. ssize_t Patchfile::
  1153. tar_writefunc(int, const void *, size_t) {
  1154. // Since we use libtar only for reading, it is an error if this method gets
  1155. // called.
  1156. nassertr(false, -1);
  1157. return -1;
  1158. }
  1159. #endif // HAVE_TAR
  1160. /**
  1161. *
  1162. * This implementation uses the "greedy differencing algorithm" described in
  1163. * the masters thesis "Differential Compression: A Generalized Solution for
  1164. * Binary Files" by Randal C. Burns (p.13). For an original file of size M and
  1165. * a new file of size N, this algorithm is O(M) in space and O(M*N) (worst-
  1166. * case) in time. return false on error
  1167. */
  1168. bool Patchfile::
  1169. build(Filename file_orig, Filename file_new, Filename patch_name) {
  1170. patch_name.set_binary();
  1171. // Open the original file for read
  1172. pifstream stream_orig;
  1173. file_orig.set_binary();
  1174. if (!file_orig.open_read(stream_orig)) {
  1175. express_cat.error()
  1176. << "Patchfile::build() - Failed to open file: " << file_orig << endl;
  1177. return false;
  1178. }
  1179. // Open the new file for read
  1180. pifstream stream_new;
  1181. file_new.set_binary();
  1182. if (!file_new.open_read(stream_new)) {
  1183. express_cat.error()
  1184. << "Patchfile::build() - Failed to open file: " << file_new << endl;
  1185. return false;
  1186. }
  1187. // Open patch file for write
  1188. pofstream write_stream;
  1189. if (!patch_name.open_write(write_stream)) {
  1190. express_cat.error()
  1191. << "Patchfile::build() - Failed to open file: " << patch_name << endl;
  1192. return false;
  1193. }
  1194. _last_copy_pos = 0;
  1195. _add_pos = 0;
  1196. _cache_add_data = string();
  1197. _cache_copy_start = 0;
  1198. _cache_copy_length = 0;
  1199. write_header(write_stream, stream_orig, stream_new);
  1200. if (!do_compute_patches(file_orig, file_new,
  1201. write_stream, 0, 0,
  1202. stream_orig, stream_new)) {
  1203. return false;
  1204. }
  1205. write_terminator(write_stream);
  1206. if (express_cat.is_debug()) {
  1207. express_cat.debug()
  1208. << "Patch file will generate " << _add_pos << "-byte file.\n";
  1209. }
  1210. #ifndef NDEBUG
  1211. {
  1212. // Make sure the resulting file would be the right size.
  1213. stream_new.seekg(0, ios::end);
  1214. streampos result_file_length = stream_new.tellg();
  1215. nassertr(_add_pos == result_file_length, false);
  1216. }
  1217. #endif // NDEBUG
  1218. return (_last_copy_pos != 0);
  1219. }
  1220. /**
  1221. * Computes the patches for the indicated A to B files, or subfiles. Checks
  1222. * for multifiles or tar files before falling back to whole-file patching.
  1223. */
  1224. bool Patchfile::
  1225. do_compute_patches(const Filename &file_orig, const Filename &file_new,
  1226. ostream &write_stream,
  1227. uint32_t offset_orig, uint32_t offset_new,
  1228. istream &stream_orig, istream &stream_new) {
  1229. nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new, false);
  1230. // Check whether our input files are Panda multifiles or tar files.
  1231. bool is_multifile = false;
  1232. #ifdef HAVE_TAR
  1233. bool is_tarfile = false;
  1234. TarDef tar_orig, tar_new;
  1235. #endif // HAVE_TAR
  1236. if (_allow_multifile) {
  1237. if (strstr(file_orig.get_basename().c_str(), ".mf") != nullptr ||
  1238. strstr(file_new.get_basename().c_str(), ".mf") != nullptr) {
  1239. // Read the first n bytes of both files for the Multifile magic number.
  1240. string magic_number = Multifile::get_magic_number();
  1241. char *buffer = (char *)PANDA_MALLOC_ARRAY(magic_number.size());
  1242. stream_orig.seekg(0, ios::beg);
  1243. stream_orig.read(buffer, magic_number.size());
  1244. if (stream_orig.gcount() == (int)magic_number.size() &&
  1245. memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
  1246. stream_new.seekg(0, ios::beg);
  1247. stream_new.read(buffer, magic_number.size());
  1248. if (stream_new.gcount() == (int)magic_number.size() &&
  1249. memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
  1250. is_multifile = true;
  1251. }
  1252. }
  1253. PANDA_FREE_ARRAY(buffer);
  1254. }
  1255. #ifdef HAVE_TAR
  1256. if (strstr(file_orig.get_basename().c_str(), ".tar") != nullptr ||
  1257. strstr(file_new.get_basename().c_str(), ".tar") != nullptr) {
  1258. if (read_tar(tar_orig, stream_orig) &&
  1259. read_tar(tar_new, stream_new)) {
  1260. is_tarfile = true;
  1261. }
  1262. }
  1263. #endif // HAVE_TAR
  1264. }
  1265. if (is_multifile) {
  1266. if (express_cat.is_debug()) {
  1267. express_cat.debug()
  1268. << file_orig.get_basename() << " appears to be a Panda Multifile.\n";
  1269. }
  1270. if (!compute_mf_patches(write_stream, offset_orig, offset_new,
  1271. stream_orig, stream_new)) {
  1272. return false;
  1273. }
  1274. #ifdef HAVE_TAR
  1275. } else if (is_tarfile) {
  1276. if (express_cat.is_debug()) {
  1277. express_cat.debug()
  1278. << file_orig.get_basename() << " appears to be a tar file.\n";
  1279. }
  1280. if (!compute_tar_patches(write_stream, offset_orig, offset_new,
  1281. stream_orig, stream_new, tar_orig, tar_new)) {
  1282. return false;
  1283. }
  1284. #endif // HAVE_TAR
  1285. } else {
  1286. if (express_cat.is_debug()) {
  1287. express_cat.debug()
  1288. << file_orig.get_basename() << " is not a multifile.\n";
  1289. }
  1290. if (!compute_file_patches(write_stream, offset_orig, offset_new,
  1291. stream_orig, stream_new)) {
  1292. return false;
  1293. }
  1294. }
  1295. return true;
  1296. }
  1297. /**
  1298. * Generates patches for a nested subfile of a Panda Multifile or a tar file.
  1299. */
  1300. bool Patchfile::
  1301. patch_subfile(ostream &write_stream,
  1302. uint32_t offset_orig, uint32_t offset_new,
  1303. const Filename &filename,
  1304. IStreamWrapper &stream_orig, streampos orig_start, streampos orig_end,
  1305. IStreamWrapper &stream_new, streampos new_start, streampos new_end) {
  1306. nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)new_start, false);
  1307. size_t new_size = new_end - new_start;
  1308. size_t orig_size = orig_end - orig_start;
  1309. ISubStream subfile_orig(&stream_orig, orig_start, orig_end);
  1310. ISubStream subfile_new(&stream_new, new_start, new_end);
  1311. bool is_unchanged = false;
  1312. if (orig_size == new_size) {
  1313. HashVal hash_orig, hash_new;
  1314. hash_orig.hash_stream(subfile_orig);
  1315. hash_new.hash_stream(subfile_new);
  1316. if (hash_orig == hash_new) {
  1317. // Actually, the subfile is unchanged; just emit it.
  1318. is_unchanged = true;
  1319. }
  1320. }
  1321. if (is_unchanged) {
  1322. if (express_cat.is_debug() && !filename.empty()) {
  1323. express_cat.debug()
  1324. << "Keeping subfile " << filename << "\n";
  1325. }
  1326. cache_add_and_copy(write_stream, 0, nullptr,
  1327. orig_size, offset_orig + orig_start);
  1328. } else {
  1329. if (!filename.empty()) {
  1330. express_cat.info()
  1331. << "Patching subfile " << filename << "\n";
  1332. }
  1333. if (!do_compute_patches(filename, filename, write_stream,
  1334. offset_orig + orig_start, offset_new + new_start,
  1335. subfile_orig, subfile_new)) {
  1336. return false;
  1337. }
  1338. }
  1339. return true;
  1340. }
  1341. #endif // HAVE_OPENSSL