瀏覽代碼

patch multifiles a lot smarter

David Rose 19 年之前
父節點
當前提交
c5c375aadf

+ 52 - 1
panda/src/downloadertools/build_patch.cxx

@@ -25,10 +25,59 @@
 #include "patchfile.h"
 #include "patchfile.h"
 #include "filename.h"
 #include "filename.h"
 
 
+void 
+usage() {
+  cerr << "Usage: build_patch [-f] <old_file> <new_file>" << endl;
+}
+
+void
+help() {
+  usage();
+  cerr << "\n"
+    "This program generates a patch file that describes the differences\n"
+    "between any two source files.  The patch file can later be used to\n"
+    "construct <new_file>, given <old_file>.  Arbitrary file types, including\n"
+    "binary files, are supported.\n\n"
+
+    "The patching algorithm can get very slow for very large files.  As an\n"
+    "optimization, if the input files are both Panda Multifiles, the patcher\n"
+    "will by default patch them on a per-subfile basis, which has the potential\n"
+    "to be much faster.  The -f option will forbid this and force the patcher\n"
+    "to work on the full file.\n\n";
+}
+
 int
 int
 main(int argc, char *argv[]) {
 main(int argc, char *argv[]) {
+  bool full_file = false;
+
+  //  extern char *optarg;
+  extern int optind;
+  static const char *optflags = "fh";
+  int flag = getopt(argc, argv, optflags);
+  Filename rel_path;
+  while (flag != EOF) {
+    switch (flag) {
+    case 'f':
+      full_file = true;
+      break;
+
+    case 'h':
+      help();
+      return 1;
+    case '?':
+      usage();
+      return 1;
+    default:
+      cerr << "Unhandled switch: " << flag << endl;
+      break;
+    }
+    flag = getopt(argc, argv, optflags);
+  }
+  argc -= (optind - 1);
+  argv += (optind - 1);
+
   if (argc < 3) {
   if (argc < 3) {
-    cerr << "Usage: build_patch <old_file> <new_file>" << endl;
+    usage();
     return 1;
     return 1;
   }
   }
 
 
@@ -41,6 +90,8 @@ main(int argc, char *argv[]) {
   Filename patch_file = dest_file.get_fullpath() + ".pch";
   Filename patch_file = dest_file.get_fullpath() + ".pch";
   Patchfile pfile;
   Patchfile pfile;
 
 
+  pfile.set_allow_multifile(!full_file);
+
   cerr << "Building patch file to convert " << src_file << " to "
   cerr << "Building patch file to convert " << src_file << " to "
        << dest_file << endl;
        << dest_file << endl;
   if (pfile.build(src_file, dest_file, patch_file) == false) {
   if (pfile.build(src_file, dest_file, patch_file) == false) {

+ 11 - 0
panda/src/express/multifile.I

@@ -203,6 +203,17 @@ read_subfile(int index) {
   return result;
   return result;
 }
 }
 
 
+////////////////////////////////////////////////////////////////////
+//     Function: Multifile::get_magic_number
+//       Access: Published, Static
+//  Description: Returns a string with the first n bytes written to a
+//               Multifile, to identify it as a Multifile.
+////////////////////////////////////////////////////////////////////
+INLINE string Multifile::
+get_magic_number() {
+  return string(_header, _header_size);
+}
+
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 //     Function: Multifile::word_to_streampos
 //     Function: Multifile::word_to_streampos
 //       Access: Private
 //       Access: Private

+ 55 - 7
panda/src/express/multifile.cxx

@@ -77,7 +77,7 @@ const size_t Multifile::_encrypt_header_size = 6;
 // sequential order at the beginning of the file (although they will
 // sequential order at the beginning of the file (although they will
 // after the file has been "packed").
 // after the file has been "packed").
 //
 //
-//   uint32     The address of the next entry.
+//   uint32     The address of the next entry.  0 to mark the end.
 //   uint32     The address of this subfile's data record.
 //   uint32     The address of this subfile's data record.
 //   uint32     The length in bytes of this subfile's data record.
 //   uint32     The length in bytes of this subfile's data record.
 //   uint16     The Subfile::_flags member.
 //   uint16     The Subfile::_flags member.
@@ -463,6 +463,7 @@ flush() {
   if (!_new_subfiles.empty()) {
   if (!_new_subfiles.empty()) {
     // Add a few more files to the end.  We always add subfiles at the
     // Add a few more files to the end.  We always add subfiles at the
     // end of the multifile, so go there first.
     // end of the multifile, so go there first.
+    sort(_new_subfiles.begin(), _new_subfiles.end(), IndirectLess<Subfile>());
     if (_last_index != (streampos)0) {
     if (_last_index != (streampos)0) {
       _write->seekp(0, ios::end);
       _write->seekp(0, ios::end);
       if (_write->fail()) {
       if (_write->fail()) {
@@ -576,6 +577,7 @@ repack() {
   if (_next_index == (streampos)0) {
   if (_next_index == (streampos)0) {
     // If the Multifile hasn't yet been written, this is really just a
     // If the Multifile hasn't yet been written, this is really just a
     // flush operation.
     // flush operation.
+    _needs_repack = false;
     return flush();
     return flush();
   }
   }
 
 
@@ -854,6 +856,41 @@ is_subfile_encrypted(int index) const {
   return (_subfiles[index]->_flags & SF_encrypted) != 0;
   return (_subfiles[index]->_flags & SF_encrypted) != 0;
 }
 }
 
 
+////////////////////////////////////////////////////////////////////
+//     Function: Multifile::get_index_end
+//       Access: Published
+//  Description: Returns the first byte that is guaranteed to follow
+//               any index byte already written to disk in the
+//               Multifile.
+//
+//               This number is largely meaningless in many cases, but
+//               if needs_repack() is false, and the file is flushed,
+//               this will indicate the number of bytes in the header
+//               + index.  Everything at this byte position and later
+//               will be actual data.
+////////////////////////////////////////////////////////////////////
+streampos Multifile::
+get_index_end() const {
+  return normalize_streampos(_next_index + (streampos)4);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: Multifile::get_subfile_internal_start
+//       Access: Published
+//  Description: Returns the starting byte position within the
+//               Multifile at which the indicated subfile begins.
+//               This may be used, with get_subfile_internal_length(),
+//               for low-level access to the subfile, but usually it
+//               is better to use open_read_subfile() instead (which
+//               automatically decrypts and/or uncompresses the
+//               subfile data).
+////////////////////////////////////////////////////////////////////
+streampos Multifile::
+get_subfile_internal_start(int index) const {
+  nassertr(index >= 0 && index < (int)_subfiles.size(), 0);
+  return _subfiles[index]->_data_start;
+}
+
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 //     Function: Multifile::get_subfile_internal_length
 //     Function: Multifile::get_subfile_internal_length
 //       Access: Published
 //       Access: Published
@@ -1453,13 +1490,24 @@ read_index() {
     return false;
     return false;
   }
   }
 
 
-  size_t before_size = _subfiles.size();
-  _subfiles.sort();
-  size_t after_size = _subfiles.size();
+  // Check if the list is already sorted.  If it is not, we need a
+  // repack.
+  for (size_t si = 1; si < _subfiles.size() && !_needs_repack; ++si) {
+    if (*_subfiles[si] < *_subfiles[si - 1]) {
+      _needs_repack = true;
+    }
+  }
 
 
-  // If these don't match, the same filename appeared twice in the
-  // index, which shouldn't be possible.
-  nassertr(before_size == after_size, true);
+  if (_needs_repack) {
+    // At least sort them now.
+    size_t before_size = _subfiles.size();
+    _subfiles.sort();
+    size_t after_size = _subfiles.size();
+    
+    // If these don't match, the same filename appeared twice in the
+    // index, which shouldn't be possible.
+    nassertr(before_size == after_size, true);
+  }
 
 
   delete subfile;
   delete subfile;
   return true;
   return true;

+ 5 - 0
panda/src/express/multifile.h

@@ -85,6 +85,9 @@ PUBLISHED:
   time_t get_subfile_timestamp(int index) const;
   time_t get_subfile_timestamp(int index) const;
   bool is_subfile_compressed(int index) const;
   bool is_subfile_compressed(int index) const;
   bool is_subfile_encrypted(int index) const;
   bool is_subfile_encrypted(int index) const;
+
+  streampos get_index_end() const;
+  streampos get_subfile_internal_start(int index) const;
   size_t get_subfile_internal_length(int index) const;
   size_t get_subfile_internal_length(int index) const;
 
 
   INLINE string read_subfile(int index);
   INLINE string read_subfile(int index);
@@ -95,6 +98,8 @@ PUBLISHED:
   void output(ostream &out) const;
   void output(ostream &out) const;
   void ls(ostream &out = cout) const;
   void ls(ostream &out = cout) const;
 
 
+  static INLINE string get_magic_number();
+
 public:
 public:
   bool read_subfile(int index, string &result);
   bool read_subfile(int index, string &result);
 
 

+ 28 - 3
panda/src/express/patchfile.I

@@ -30,8 +30,33 @@ get_progress() const {
       << "Patchfile::get_progress() - Patch has not been initiated" << endl;
       << "Patchfile::get_progress() - Patch has not been initiated" << endl;
     return 0.0f;
     return 0.0f;
   }
   }
-  nassertr(_result_file_length > 0, 0.0f);
-  return ((float)_total_bytes_processed / (float)_result_file_length);
+  nassertr(_total_bytes_to_process > 0, 0.0f);
+  return ((float)_total_bytes_processed / (float)_total_bytes_to_process);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: Patchfile::set_allow_multifile
+//       Access: Published
+//  Description: If this flag is set true, the Patchfile will make a
+//               special case for patching Panda Multifiles, if
+//               detected, and attempt to patch them on a
+//               subfile-by-subfile basis.  If this flag is false, the
+//               Patchfile will always patch the file on a full-file
+//               basis.
+////////////////////////////////////////////////////////////////////
+INLINE void Patchfile::
+set_allow_multifile(bool allow_multifile) {
+  _allow_multifile = allow_multifile;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: Patchfile::get_allow_multifile
+//       Access: Published
+//  Description:
+////////////////////////////////////////////////////////////////////
+INLINE bool Patchfile::
+get_allow_multifile() {
+  return _allow_multifile;
 }
 }
 
 
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
@@ -41,7 +66,7 @@ get_progress() const {
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 INLINE void Patchfile::
 INLINE void Patchfile::
 set_footprint_length(int length) {
 set_footprint_length(int length) {
-  nassertv(_footprint_length > 0);
+  nassertv(length > 0);
   _footprint_length = length;
   _footprint_length = length;
 }
 }
 
 

+ 340 - 149
panda/src/express/patchfile.cxx

@@ -25,6 +25,7 @@
 #include "patchfile.h"
 #include "patchfile.h"
 #include "streamReader.h"
 #include "streamReader.h"
 #include "streamWriter.h"
 #include "streamWriter.h"
+#include "multifile.h"
 
 
 #include <stdio.h> // for tempnam
 #include <stdio.h> // for tempnam
 
 
@@ -134,7 +135,7 @@ init(PT(Buffer) buffer) {
   _buffer = buffer;
   _buffer = buffer;
   
   
   _version_number = 0;
   _version_number = 0;
-
+  _allow_multifile = true;
   reset_footprint_length();
   reset_footprint_length();
 }
 }
 
 
@@ -358,7 +359,7 @@ run() {
 
 
       if (express_cat.is_debug()) {
       if (express_cat.is_debug()) {
         express_cat.debug()
         express_cat.debug()
-          << "result file = " << _result_file_length 
+          //<< "result file = " << _result_file_length 
           << " total bytes = " << _total_bytes_processed << endl;
           << " total bytes = " << _total_bytes_processed << endl;
       }
       }
 
 
@@ -502,14 +503,14 @@ internal_read_header(const Filename &patch_file) {
 
 
   if (_version_number >= 1) {
   if (_version_number >= 1) {
     // Get the length of the source file.
     // Get the length of the source file.
-    _source_file_length = patch_reader.get_uint32();
+    /*PN_uint32 source_file_length =*/ patch_reader.get_uint32();
     
     
     // get the MD5 of the source file.
     // get the MD5 of the source file.
     _MD5_ofSource.read_stream(patch_reader);
     _MD5_ofSource.read_stream(patch_reader);
   }
   }
 
 
   // get the length of the patched result file
   // get the length of the patched result file
-  _result_file_length = patch_reader.get_uint32();
+  _total_bytes_to_process = patch_reader.get_uint32();
 
 
   // get the MD5 of the resultant patched file
   // get the MD5 of the resultant patched file
   _MD5_ofResult.read_stream(patch_reader);
   _MD5_ofResult.read_stream(patch_reader);
@@ -761,7 +762,7 @@ find_longest_match(PN_uint32 new_pos, PN_uint32 &copy_pos, PN_uint16 &copy_lengt
 //  Description:
 //  Description:
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 void Patchfile::
 void Patchfile::
-emit_ADD(ofstream &write_stream, PN_uint32 length, const char* buffer,
+emit_ADD(ostream &write_stream, PN_uint32 length, const char* buffer,
          PN_uint32 ADD_pos) {
          PN_uint32 ADD_pos) {
 
 
   nassertv(length == (PN_uint16)length); //we only write a uint16
   nassertv(length == (PN_uint16)length); //we only write a uint16
@@ -786,11 +787,11 @@ emit_ADD(ofstream &write_stream, PN_uint32 length, const char* buffer,
 //       Access: Private
 //       Access: Private
 //  Description:
 //  Description:
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
-void Patchfile::
-emit_COPY(ofstream &write_stream, PN_uint32 length, PN_uint32 COPY_pos,
+PN_uint32 Patchfile::
+emit_COPY(ostream &write_stream, PN_uint32 length, PN_uint32 COPY_pos,
           PN_uint32 last_copy_pos, PN_uint32 ADD_pos) {
           PN_uint32 last_copy_pos, PN_uint32 ADD_pos) {
 
 
-  nassertv(length == (PN_uint16)length); //we only write a uint16
+  nassertr(length == (PN_uint16)length, last_copy_pos); //we only write a uint16
 
 
   PN_int32 offset = (int)COPY_pos - (int)last_copy_pos;
   PN_int32 offset = (int)COPY_pos - (int)last_copy_pos;
   if (express_cat.is_spam()) {
   if (express_cat.is_spam()) {
@@ -807,86 +808,120 @@ emit_COPY(ofstream &write_stream, PN_uint32 length, PN_uint32 COPY_pos,
     // write COPY offset
     // write COPY offset
     patch_writer.add_int32(offset);
     patch_writer.add_int32(offset);
   }
   }
+
+  return COPY_pos + length;
 }
 }
 
 
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
-//     Function: Patchfile::build
-//       Access: Public
-//  Description:
-//               This implementation uses the "greedy differencing
-//               algorithm" described in the masters thesis
-//               "Differential Compression: A Generalized Solution
-//               for Binary Files" by Randal C. Burns (p.13).
-//               For an original file of size M and a new file of
-//               size N, this algorithm is O(M) in space and
-//               O(M*N) (worst-case) in time.
+//     Function: Patchfile::emit_add_and_copy
+//       Access: Private
+//  Description: Emits an add/copy pair.  If necessary, repeats the
+//               pair as needed to work around the 16-bit chunk size
+//               limit.
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
-bool Patchfile::
-build(Filename file_orig, Filename file_new, Filename patch_name) {
-  patch_name.set_binary();
+PN_uint32 Patchfile::
+emit_add_and_copy(ostream &write_stream, 
+                  PN_uint32 add_length, const char *add_buffer,
+                  PN_uint32 copy_length, PN_uint32 copy_pos, PN_uint32 last_copy_pos,
+                  PN_uint32 add_pos) {
+  while (add_length > 65535) {
+    // Overflow.  This chunk is too large to fit into a single
+    // ADD block, so we have to write it as multiple ADDs.
+    static const PN_uint16 max_write = 65535;
+    emit_ADD(write_stream, max_write, add_buffer, add_pos);
+    add_pos += max_write;
+    add_buffer += max_write;
+    add_length -= max_write;
+    emit_COPY(write_stream, 0, last_copy_pos, last_copy_pos, add_pos);
+  }
 
 
-  START_PROFILE(overall);
+  emit_ADD(write_stream, add_length, add_buffer, add_pos);
+
+  while (copy_length > 65535) {
+    // Overflow.
+    static const PN_uint16 max_write = 65535;
+    last_copy_pos =
+      emit_COPY(write_stream, max_write, copy_pos, last_copy_pos, add_pos);
+    copy_pos += max_write;
+    add_pos += max_write;
+    copy_length -= max_write;
+    emit_ADD(write_stream, 0, NULL, add_pos);
+  }
 
 
-  START_PROFILE(readFiles);
+  last_copy_pos =
+    emit_COPY(write_stream, copy_length, copy_pos, last_copy_pos, add_pos);
 
 
-  // Open the original file for read
-  ifstream stream_orig;
-  file_orig.set_binary();
-  if (!file_orig.open_read(stream_orig)) {
-    express_cat.error()
-      << "Patchfile::build() - Failed to open file: " << file_orig << endl;
-    return false;
-  }
+  return last_copy_pos;
+}
 
 
-  // Open the new file for read
-  ifstream stream_new;
-  file_new.set_binary();
-  if (!file_new.open_read(stream_new)) {
-    express_cat.error()
-      << "Patchfile::build() - Failed to open file: " << file_new << endl;
-    return false;
-  }
 
 
-  // Open patch file for write
-  ofstream write_stream;
-  if (!patch_name.open_write(write_stream)) {
-    express_cat.error()
-      << "Patchfile::build() - Failed to open file: " << patch_name << endl;
-    return false;
-  }
 
 
-  // read in original file
-  stream_orig.seekg(0, ios::end);
-  _source_file_length = stream_orig.tellg();
-  if (express_cat.is_debug()) {
-    express_cat.debug()
-      << "Allocating " << _source_file_length << " bytes to read " 
-      << file_orig << "\n";
-  }
 
 
-  char *buffer_orig = new char[_source_file_length];
-  stream_orig.seekg(0, ios::beg);
-  stream_orig.read(buffer_orig, _source_file_length);
+////////////////////////////////////////////////////////////////////
+//     Function: Patchfile::write_header
+//       Access: Private
+//  Description:
+//               Writes the patchfile header.
+////////////////////////////////////////////////////////////////////
+void Patchfile::
+write_header(ostream &write_stream, 
+             char *buffer_orig, PN_uint32 source_file_length,
+             char *buffer_new, PN_uint32 result_file_length) {
+  // prepare to write the patch file header
 
 
-  // read in new file
-  stream_new.seekg(0, ios::end);
-  _result_file_length = stream_new.tellg();
-  if (express_cat.is_debug()) {
-    express_cat.debug()
-      << "Allocating " << _result_file_length << " bytes to write " 
-      << file_new << "\n";
-  }
+  START_PROFILE(writeHeader);
 
 
-  char *buffer_new = new char[_result_file_length];
-  stream_new.seekg(0, ios::beg);
-  stream_new.read(buffer_new, _result_file_length);
+  // write the patch file header
+  StreamWriter patch_writer(write_stream);
+  patch_writer.add_uint32(_magic_number);
+  patch_writer.add_uint16(_current_version);
+  patch_writer.add_uint32(source_file_length);
+  {
+    // calc MD5 of original file
+    _MD5_ofSource.hash_buffer(buffer_orig, source_file_length);
+    // add it to the header
+    _MD5_ofSource.write_stream(patch_writer);
+  }
+  patch_writer.add_uint32(result_file_length);
+  {
+    // calc MD5 of resultant patched file
+    _MD5_ofResult.hash_buffer(buffer_new, result_file_length);
+    // add it to the header
+    _MD5_ofResult.write_stream(patch_writer);
+  }
 
 
-  // close the original and new files (we have em in memory)
-  stream_orig.close();
-  stream_new.close();
+  END_PROFILE(writeHeader, "writing patch file header");
+}
 
 
-  END_PROFILE(readFiles, "reading files");
+////////////////////////////////////////////////////////////////////
+//     Function: Patchfile::write_terminator
+//       Access: Private
+//  Description:
+//               Writes the patchfile terminator.
+////////////////////////////////////////////////////////////////////
+void Patchfile::
+write_terminator(ostream &write_stream, PN_uint32 result_file_length,
+                 PN_uint32 last_copy_pos) {
+  // write terminator (null ADD, null COPY)
+  emit_ADD(write_stream, 0, NULL, result_file_length);
+  emit_COPY(write_stream, 0, last_copy_pos, last_copy_pos, result_file_length);
+}
 
 
+////////////////////////////////////////////////////////////////////
+//     Function: Patchfile::compute_patches
+//       Access: Private
+//  Description:
+//               Computes the patches for the entire file (if it is
+//               not a multifile) or for a single subfile (if it is)
+//
+//               Returns last_copy_pos, the last byte position from
+//               which we copied from the original file.
+////////////////////////////////////////////////////////////////////
+PN_uint32 Patchfile::
+compute_patches(ostream &write_stream, PN_uint32 last_copy_pos,
+                PN_uint32 copy_offset,
+                char *buffer_orig, PN_uint32 source_file_length,
+                char *buffer_new, PN_uint32 result_file_length) {
   START_PROFILE(allocTables);
   START_PROFILE(allocTables);
 
 
   // allocate hash/link tables
   // allocate hash/link tables
@@ -899,63 +934,36 @@ build(Filename file_orig, Filename file_new, Filename patch_name) {
 
 
   if (express_cat.is_debug()) {
   if (express_cat.is_debug()) {
     express_cat.debug()
     express_cat.debug()
-      << "Allocating linktable of size " << _source_file_length << " * 4\n";
+      << "Allocating linktable of size " << source_file_length << " * 4\n";
   }
   }
 
 
-  PN_uint32* link_table = new PN_uint32[_source_file_length];
+  PN_uint32* link_table = new PN_uint32[source_file_length];
 
 
   END_PROFILE(allocTables, "allocating hash and link tables");
   END_PROFILE(allocTables, "allocating hash and link tables");
 
 
   START_PROFILE(buildTables);
   START_PROFILE(buildTables);
 
 
   // build hash and link tables for original file
   // build hash and link tables for original file
-  build_hash_link_tables(buffer_orig, _source_file_length, hash_table, link_table);
+  build_hash_link_tables(buffer_orig, source_file_length, hash_table, link_table);
 
 
   END_PROFILE(buildTables, "building hash and link tables");
   END_PROFILE(buildTables, "building hash and link tables");
 
 
-  // prepare to write the patch file header
-
-  START_PROFILE(writeHeader);
-
-  // write the patch file header
-  StreamWriter patch_writer(write_stream);
-  patch_writer.add_uint32(_magic_number);
-  patch_writer.add_uint16(_current_version);
-  patch_writer.add_uint32(_source_file_length);
-  {
-    // calc MD5 of original file
-    _MD5_ofSource.hash_buffer(buffer_orig, _source_file_length);
-    // add it to the header
-    _MD5_ofSource.write_stream(patch_writer);
-  }
-  patch_writer.add_uint32(_result_file_length);
-  {
-    // calc MD5 of resultant patched file
-    _MD5_ofResult.hash_buffer(buffer_new, _result_file_length);
-    // add it to the header
-    _MD5_ofResult.write_stream(patch_writer);
-  }
-
-  END_PROFILE(writeHeader, "writing patch file header");
-
   // run through new file
   // run through new file
   START_PROFILE(buildPatchfile);
   START_PROFILE(buildPatchfile);
 
 
   PN_uint32 new_pos = 0;
   PN_uint32 new_pos = 0;
   PN_uint32 ADD_pos = new_pos; // this is the position for the start of ADD operations
   PN_uint32 ADD_pos = new_pos; // this is the position for the start of ADD operations
 
 
-  PN_uint32 last_copy_pos = 0;
-
-  if(((PN_uint32) _result_file_length) >= _footprint_length)
+  if(((PN_uint32) result_file_length) >= _footprint_length)
   {
   {
-    while (new_pos < (_result_file_length - _footprint_length)) {
+    while (new_pos < (result_file_length - _footprint_length)) {
 
 
       // find best match for current position
       // find best match for current position
       PN_uint32 COPY_pos;
       PN_uint32 COPY_pos;
       PN_uint16 COPY_length;
       PN_uint16 COPY_length;
 
 
       find_longest_match(new_pos, COPY_pos, COPY_length, hash_table, link_table,
       find_longest_match(new_pos, COPY_pos, COPY_length, hash_table, link_table,
-        buffer_orig, _source_file_length, buffer_new, _result_file_length);
+        buffer_orig, source_file_length, buffer_new, result_file_length);
 
 
       // if no match or match not longer than footprint length, skip to next byte
       // if no match or match not longer than footprint length, skip to next byte
       if (COPY_length < _footprint_length) {
       if (COPY_length < _footprint_length) {
@@ -969,23 +977,12 @@ build(Filename file_orig, Filename file_new, Filename patch_name) {
             << "build: num_skipped = " << num_skipped 
             << "build: num_skipped = " << num_skipped 
             << endl;
             << endl;
         }
         }
-        while (num_skipped != (PN_uint16)num_skipped) {
-          // Overflow.  This chunk is too large to fit into a single
-          // ADD block, so we have to write it as multiple ADDs.
-          static const PN_uint16 max_write = 65535;
-          emit_ADD(write_stream, max_write, &buffer_new[ADD_pos], ADD_pos);
-          ADD_pos += max_write;
-          num_skipped -= max_write;
-          emit_COPY(write_stream, 0, COPY_pos, last_copy_pos, ADD_pos);
-        }
-        
-        emit_ADD(write_stream, num_skipped, &buffer_new[ADD_pos], ADD_pos);
+        last_copy_pos =
+          emit_add_and_copy(write_stream, num_skipped, &buffer_new[ADD_pos],
+                            COPY_length, COPY_pos + copy_offset,
+                            last_copy_pos, ADD_pos);
         ADD_pos += num_skipped;
         ADD_pos += num_skipped;
-        nassertr(ADD_pos == new_pos, false);
-
-        // emit COPY for matching string
-        emit_COPY(write_stream, COPY_length, COPY_pos, last_copy_pos, ADD_pos);
-        last_copy_pos = COPY_pos + COPY_length;
+        nassertr(ADD_pos == new_pos, last_copy_pos);
 
 
         // skip past match in new_file
         // skip past match in new_file
         new_pos += (PN_uint32)COPY_length;
         new_pos += (PN_uint32)COPY_length;
@@ -996,49 +993,243 @@ build(Filename file_orig, Filename file_new, Filename patch_name) {
 
 
   if (express_cat.is_spam()) {
   if (express_cat.is_spam()) {
     express_cat.spam()
     express_cat.spam()
-      << "build: _result_file_length = " << _result_file_length
+      << "build: result_file_length = " << result_file_length
       << " ADD_pos = " << ADD_pos
       << " ADD_pos = " << ADD_pos
       << endl;
       << endl;
   }
   }
 
 
   // are there still more bytes left in the new file?
   // are there still more bytes left in the new file?
-  if (ADD_pos != _result_file_length) {
+  if (ADD_pos != result_file_length) {
     // emit ADD for all remaining bytes
     // emit ADD for all remaining bytes
-    /* This code overflows the emit_ADD uint16, look for rewrite in the following block
-    emit_ADD(write_stream, _result_file_length - ADD_pos, &buffer_new[ADD_pos],
-             ADD_pos);
-
-    // write null COPY
-    emit_COPY(write_stream, 0, last_copy_pos, last_copy_pos, _result_file_length);
-    */
 
 
-    // Make sure to handle _result_file_length larger than PN_uint16
-    PN_uint32 remaining_bytes = _result_file_length - ADD_pos;
-    while (remaining_bytes != (PN_uint16)remaining_bytes) {
-      static const PN_uint16 max_write = 65535;
-      emit_ADD(write_stream, max_write, &buffer_new[ADD_pos], ADD_pos);
-      ADD_pos += max_write;
-      remaining_bytes -= max_write;
-      emit_COPY(write_stream, 0, last_copy_pos, last_copy_pos, ADD_pos);
-    }
-    // emit ADD the last block (if any) that fits in PN_uint16
-    emit_ADD(write_stream, remaining_bytes, &buffer_new[ADD_pos], ADD_pos);
+    PN_uint32 remaining_bytes = result_file_length - ADD_pos;
+    last_copy_pos =
+      emit_add_and_copy(write_stream, remaining_bytes, &buffer_new[ADD_pos], 
+                        0, last_copy_pos, last_copy_pos, ADD_pos);
     ADD_pos += remaining_bytes;
     ADD_pos += remaining_bytes;
-    nassertr(ADD_pos == _result_file_length, false);
-    
-    // emit COPY for matching string
-    emit_COPY(write_stream, 0, last_copy_pos, last_copy_pos, ADD_pos);
+    nassertr(ADD_pos == result_file_length, last_copy_pos);
   }
   }
 
 
   END_PROFILE(buildPatchfile, "building patch file");
   END_PROFILE(buildPatchfile, "building patch file");
 
 
-  // write terminator (null ADD, null COPY)
-  emit_ADD(write_stream, 0, NULL, _result_file_length);
-  emit_COPY(write_stream, 0, last_copy_pos, last_copy_pos, _result_file_length);
+  delete[] hash_table;
+  delete[] link_table;
+
+  return last_copy_pos;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: Patchfile::compute_mf_patches
+//       Access: Private
+//  Description:
+//               Computes patches for the files, knowing that they are
+//               both Panda Multifiles.  This will build patches one
+//               subfile at a time, which can potentially be much,
+//               much faster for large Multifiles that contain many
+//               small subfiles.
+////////////////////////////////////////////////////////////////////
+PN_uint32 Patchfile::
+compute_mf_patches(ostream &write_stream, 
+                   char *buffer_orig, PN_uint32 source_file_length,
+                   char *buffer_new, PN_uint32 result_file_length) {
+  string string_orig(buffer_orig, source_file_length);
+  string string_new(buffer_new, result_file_length);
+  istringstream strm_orig(string_orig);
+  istringstream strm_new(string_new);
+
+  Multifile mf_orig, mf_new;
+  if (!mf_orig.open_read(&strm_orig) ||
+      !mf_new.open_read(&strm_new)) {
+    express_cat.error()
+      << "Input multifiles appear to be corrupt.\n";
+    return 0;
+  }
+
+  if (mf_new.needs_repack()) {
+    express_cat.error()
+      << "Input multifiles need to be repacked.\n";
+    return 0;
+  }
+
+  // First, compute the patch for the header / index.
+
+  PN_uint32 last_copy_pos =
+    compute_patches(write_stream, 0, 0,
+                    buffer_orig, (PN_uint32)mf_orig.get_index_end(),
+                    buffer_new, (PN_uint32)mf_new.get_index_end());
+  PN_uint32 add_pos = (PN_uint32)mf_new.get_index_end();
+
+  // Now walk through each subfile in the new multifile.  If a
+  // particular subfile exists in both source files, we compute the
+  // patches for the subfile; for a new subfile, we trivially add it.
+  // If a subfile has been removed, we simply don't add it (we'll
+  // never even notice this case).
+  int new_num_subfiles = mf_new.get_num_subfiles();
+  for (int ni = 0; ni < new_num_subfiles; ++ni) {
+    nassertr(add_pos == mf_new.get_subfile_internal_start(ni), last_copy_pos);
+    string name = mf_new.get_subfile_name(ni);
+    int oi = mf_orig.find_subfile(name);
+    if (oi < 0) {
+      // This subfile exists in the new file, but not in the original
+      // file.  Trivially add it.
+      express_cat.info()
+        << "Adding subfile " << mf_new.get_subfile_name(ni) << "\n";
+      PN_uint32 new_start = (PN_uint32)mf_new.get_subfile_internal_start(ni);
+      PN_uint32 new_size = (PN_uint32)mf_new.get_subfile_internal_length(ni);
+      last_copy_pos =
+        emit_add_and_copy(write_stream, new_size, &buffer_new[new_start],
+                          0, last_copy_pos, last_copy_pos, add_pos);
+      add_pos += new_size;
+      ++ni;
+
+    } else {
+      // This subfile exists in both the original and the new files.
+      // Patch it.
+      PN_uint32 orig_start = (PN_uint32)mf_orig.get_subfile_internal_start(oi);
+      PN_uint32 orig_size = (PN_uint32)mf_orig.get_subfile_internal_length(oi);
+      PN_uint32 new_start = (PN_uint32)mf_new.get_subfile_internal_start(ni);
+      PN_uint32 new_size = (PN_uint32)mf_new.get_subfile_internal_length(ni);
+      if (orig_size == new_size &&
+          memcmp(&buffer_orig[orig_start], &buffer_new[new_start], new_size) == 0) {
+        // Actually, the subfile is unchanged; just emit it.
+        if (express_cat.is_debug()) {
+          express_cat.debug()
+            << "Keeping subfile " << mf_new.get_subfile_name(ni) << "\n";
+        }
+        last_copy_pos =
+          emit_add_and_copy(write_stream, 0, NULL, 
+                            new_size, orig_start, last_copy_pos,
+                            add_pos);
+                  
+      } else {
+        express_cat.info()
+          << "Patching subfile " << mf_new.get_subfile_name(ni) << "\n";
+        last_copy_pos = 
+          compute_patches(write_stream, last_copy_pos, orig_start,
+                          &buffer_orig[orig_start], orig_size,
+                          &buffer_new[new_start], new_size);
+      }
+      add_pos += new_size;
+    }
+  }
+
+  nassertr(add_pos == result_file_length, last_copy_pos);
+  return last_copy_pos;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: Patchfile::build
+//       Access: Public
+//  Description:
+//               This implementation uses the "greedy differencing
+//               algorithm" described in the masters thesis
+//               "Differential Compression: A Generalized Solution
+//               for Binary Files" by Randal C. Burns (p.13).
+//               For an original file of size M and a new file of
+//               size N, this algorithm is O(M) in space and
+//               O(M*N) (worst-case) in time.
+////////////////////////////////////////////////////////////////////
+bool Patchfile::
+build(Filename file_orig, Filename file_new, Filename patch_name) {
+  patch_name.set_binary();
+
+  START_PROFILE(overall);
+
+  START_PROFILE(readFiles);
+
+  // Open the original file for read
+  ifstream stream_orig;
+  file_orig.set_binary();
+  if (!file_orig.open_read(stream_orig)) {
+    express_cat.error()
+      << "Patchfile::build() - Failed to open file: " << file_orig << endl;
+    return false;
+  }
+
+  // Open the new file for read
+  ifstream stream_new;
+  file_new.set_binary();
+  if (!file_new.open_read(stream_new)) {
+    express_cat.error()
+      << "Patchfile::build() - Failed to open file: " << file_new << endl;
+    return false;
+  }
+
+  // Open patch file for write
+  ofstream write_stream;
+  if (!patch_name.open_write(write_stream)) {
+    express_cat.error()
+      << "Patchfile::build() - Failed to open file: " << patch_name << endl;
+    return false;
+  }
+
+  // read in original file
+  stream_orig.seekg(0, ios::end);
+  PN_uint32 source_file_length = stream_orig.tellg();
+  if (express_cat.is_debug()) {
+    express_cat.debug()
+      << "Allocating " << source_file_length << " bytes to read " 
+      << file_orig << "\n";
+  }
+
+  char *buffer_orig = new char[source_file_length];
+  stream_orig.seekg(0, ios::beg);
+  stream_orig.read(buffer_orig, source_file_length);
+
+  // read in new file
+  stream_new.seekg(0, ios::end);
+  PN_uint32 result_file_length = stream_new.tellg();
+  if (express_cat.is_debug()) {
+    express_cat.debug()
+      << "Allocating " << result_file_length << " bytes to write " 
+      << file_new << "\n";
+  }
+
+  char *buffer_new = new char[result_file_length];
+  stream_new.seekg(0, ios::beg);
+  stream_new.read(buffer_new, result_file_length);
+
+  // close the original and new files (we have em in memory)
+  stream_orig.close();
+  stream_new.close();
+
+  END_PROFILE(readFiles, "reading files");
+
+  write_header(write_stream, buffer_orig, source_file_length,
+               buffer_new, result_file_length);
+
+  PN_uint32 last_copy_pos;
+
+  // Check whether our input files are Panda multifiles.
+  bool is_multifile = false;
+  if (_allow_multifile) {
+    if (file_orig.get_extension() == "mf" || file_new.get_extension() == "mf") {
+      string magic_number = Multifile::get_magic_number();
+      if (source_file_length > magic_number.size() &&
+          result_file_length > magic_number.size() &&
+          memcmp(buffer_orig, magic_number.data(), magic_number.size()) == 0 &&
+          memcmp(buffer_new, magic_number.data(), magic_number.size()) == 0) {
+        is_multifile = true;
+      }
+    }
+  }
+
+  if (is_multifile) {
+    last_copy_pos =
+      compute_mf_patches(write_stream, buffer_orig, source_file_length,
+                         buffer_new, result_file_length);
+  } else {
+    last_copy_pos =
+      compute_patches(write_stream, 0, 0,
+                      buffer_orig, source_file_length,
+                      buffer_new, result_file_length);
+  }
+
+  write_terminator(write_stream, result_file_length, last_copy_pos);
 
 
   END_PROFILE(overall, "total patch building operation");
   END_PROFILE(overall, "total patch building operation");
 
 
-  return true;
+  return (last_copy_pos != 0);
 }
 }
 
 
 #endif // HAVE_OPENSSL
 #endif // HAVE_OPENSSL

+ 30 - 7
panda/src/express/patchfile.h

@@ -67,6 +67,9 @@ PUBLISHED:
 
 
   INLINE float get_progress() const;
   INLINE float get_progress() const;
 
 
+  INLINE void set_allow_multifile(bool allow_multifile);
+  INLINE bool get_allow_multifile();
+
   INLINE void set_footprint_length(int length);
   INLINE void set_footprint_length(int length);
   INLINE int get_footprint_length();
   INLINE int get_footprint_length();
   INLINE void reset_footprint_length();
   INLINE void reset_footprint_length();
@@ -91,11 +94,30 @@ private:
   PN_uint32 calc_match_length(const char* buf1, const char* buf2, PN_uint32 max_length,
   PN_uint32 calc_match_length(const char* buf1, const char* buf2, PN_uint32 max_length,
     PN_uint32 min_length);
     PN_uint32 min_length);
 
 
-  void emit_ADD(ofstream &write_stream, PN_uint32 length, const char* buffer,
+  void emit_ADD(ostream &write_stream, PN_uint32 length, const char* buffer,
                 PN_uint32 ADD_pos);
                 PN_uint32 ADD_pos);
-  void emit_COPY(ofstream &write_stream, PN_uint32 length, 
-                 PN_uint32 COPY_pos, PN_uint32 last_copy_pos,
-                 PN_uint32 ADD_pos);
+  PN_uint32 emit_COPY(ostream &write_stream, PN_uint32 length, 
+                      PN_uint32 COPY_pos, PN_uint32 last_copy_pos,
+                      PN_uint32 ADD_pos);
+  PN_uint32 emit_add_and_copy(ostream &write_stream, 
+                              PN_uint32 add_length, const char *add_buffer,
+                              PN_uint32 copy_length, PN_uint32 copy_pos, PN_uint32 last_copy_pos,
+                              PN_uint32 add_pos);
+
+
+  void write_header(ostream &write_stream, 
+                    char *buffer_orig, PN_uint32 source_file_length,
+                    char *buffer_new, PN_uint32 result_file_length);
+  void write_terminator(ostream &write_stream, PN_uint32 result_file_length,
+                        PN_uint32 last_copy_pos);
+
+  PN_uint32 compute_patches(ostream &write_stream, PN_uint32 last_copy_pos,
+                            PN_uint32 copy_offset,
+                            char *buffer_orig, PN_uint32 source_file_length,
+                            char *buffer_new, PN_uint32 result_file_length);
+  PN_uint32 compute_mf_patches(ostream &write_stream, 
+                               char *buffer_orig, PN_uint32 source_file_length,
+                               char *buffer_new, PN_uint32 result_file_length);
 
 
   static const PN_uint32 _HASH_BITS;
   static const PN_uint32 _HASH_BITS;
   static const PN_uint32 _HASHTABLESIZE;
   static const PN_uint32 _HASHTABLESIZE;
@@ -104,6 +126,7 @@ private:
   static const PN_uint32 _MAX_RUN_LENGTH;
   static const PN_uint32 _MAX_RUN_LENGTH;
   static const PN_uint32 _HASH_MASK;
   static const PN_uint32 _HASH_MASK;
 
 
+  bool _allow_multifile;
   PN_uint32 _footprint_length;
   PN_uint32 _footprint_length;
 
 
 protected:
 protected:
@@ -115,11 +138,11 @@ protected:
   PN_uint16 _version_number;
   PN_uint16 _version_number;
 
 
   HashVal _MD5_ofSource;  
   HashVal _MD5_ofSource;  
-  PN_uint32 _source_file_length;
 
 
   HashVal _MD5_ofResult;  
   HashVal _MD5_ofResult;  
-  PN_uint32 _result_file_length;
-  int _total_bytes_processed;
+
+  PN_uint32 _total_bytes_to_process;
+  PN_uint32 _total_bytes_processed;
 
 
   ifstream _patch_stream;
   ifstream _patch_stream;
   ofstream _write_stream;
   ofstream _write_stream;