2
0
David Rose 23 жил өмнө
parent
commit
f6f16e60d3

+ 33 - 4
panda/src/downloader/chunkedStreamBuf.cxx

@@ -17,6 +17,7 @@
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 
 
 #include "chunkedStreamBuf.h"
 #include "chunkedStreamBuf.h"
+#include <ctype.h>
 
 
 // This module is not compiled if OpenSSL is not available.
 // This module is not compiled if OpenSSL is not available.
 #ifdef HAVE_SSL
 #ifdef HAVE_SSL
@@ -149,13 +150,34 @@ read_chars(char *start, size_t length) {
     (*_source)->read(start, length);
     (*_source)->read(start, length);
     size_t read_count = (*_source)->gcount();
     size_t read_count = (*_source)->gcount();
     _chunk_remaining -= read_count;
     _chunk_remaining -= read_count;
+
+    if (read_count == 0 && (*_source)->is_closed()) {
+      // Whoops, the socket closed while we were downloading.
+      if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) {
+        _doc->_state = HTTPChannel::S_failure;
+      }
+    }
+
     return read_count;
     return read_count;
   }
   }
 
 
   // Read the next chunk.
   // Read the next chunk.
   string line;
   string line;
-  if (!http_getline(line)) {
+  bool got_line = http_getline(line);
+  while (got_line && line.empty()) {
+    // Skip blank lines.  There really should be exactly one blank
+    // line, but who's counting?  It's tricky to count and maintain
+    // reentry for nonblocking I/O.
+    got_line = http_getline(line);
+  }
+  if (!got_line) {
     // EOF (or data unavailable) while trying to read the chunk size.
     // EOF (or data unavailable) while trying to read the chunk size.
+    if ((*_source)->is_closed()) {
+      // Whoops, the socket closed while we were downloading.
+      if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) {
+        _doc->_state = HTTPChannel::S_failure;
+      }
+    }
     return 0;
     return 0;
   }
   }
   size_t chunk_size = (size_t)strtol(line.c_str(), NULL, 16);
   size_t chunk_size = (size_t)strtol(line.c_str(), NULL, 16);
@@ -192,11 +214,18 @@ http_getline(string &str) {
     switch (ch) {
     switch (ch) {
     case '\n':
     case '\n':
       // end-of-line character, we're done.
       // end-of-line character, we're done.
-      if (downloader_cat.is_spam()) {
-        downloader_cat.spam() << "recv: " << _working_getline << "\n";
-      }
       str = _working_getline;
       str = _working_getline;
       _working_getline = string();
       _working_getline = string();
+      {
+        // Trim trailing whitespace.  We're not required to do this per the
+        // HTTP spec, but let's be generous.
+        size_t p = str.length();
+        while (p > 0 && isspace(str[p - 1])) {
+          --p;
+        }
+        str = str.substr(0, p);
+      }
+
       return true;
       return true;
 
 
     case '\r':
     case '\r':

+ 15 - 0
panda/src/downloader/httpChannel.I

@@ -276,6 +276,21 @@ request_header(const URLSpec &url) {
   begin_request("HEAD", url, string(), true);
   begin_request("HEAD", url, string(), true);
 }
 }
 
 
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPChannel::is_download_complete
+//       Access: Published
+//  Description: Returns true when a download_to() or
+//               download_to_ram() has executed and the file has been
+//               fully downloaded.  If this still returns false after
+//               processing has completed, there was an error in
+//               transmission.
+////////////////////////////////////////////////////////////////////
+INLINE bool HTTPChannel::
+is_download_complete() const {
+  return (_download_dest != DD_none &&
+          (_state == S_read_body || _state == S_read_trailer));
+}
+
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 //     Function: HTTPChannel::check_socket
 //     Function: HTTPChannel::check_socket
 //       Access: Private
 //       Access: Private

+ 248 - 13
panda/src/downloader/httpChannel.cxx

@@ -21,6 +21,7 @@
 #include "bioStream.h"
 #include "bioStream.h"
 #include "chunkedStream.h"
 #include "chunkedStream.h"
 #include "identityStream.h"
 #include "identityStream.h"
+#include "buffer.h"  // for Ramfile
 
 
 #ifdef HAVE_SSL
 #ifdef HAVE_SSL
 
 
@@ -57,10 +58,12 @@ HTTPChannel(HTTPClient *client) :
   _http_version_string = _client->get_http_version_string();
   _http_version_string = _client->get_http_version_string();
   _state = S_new;
   _state = S_new;
   _done_state = S_new;
   _done_state = S_new;
+  _started_download = false;
   _sent_so_far = 0;
   _sent_so_far = 0;
   _proxy_tunnel = false;
   _proxy_tunnel = false;
   _body_stream = NULL;
   _body_stream = NULL;
   _sbio = NULL;
   _sbio = NULL;
+  _download_to_ramfile = NULL;
 }
 }
 
 
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
@@ -71,6 +74,7 @@ HTTPChannel(HTTPClient *client) :
 HTTPChannel::
 HTTPChannel::
 ~HTTPChannel() {
 ~HTTPChannel() {
   free_bio();
   free_bio();
+  reset_download_to();
 }
 }
 
 
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
@@ -190,13 +194,33 @@ write_headers(ostream &out) const {
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 bool HTTPChannel::
 bool HTTPChannel::
 run() {
 run() {
+  if (_state == _done_state || _state == S_failure) {
+    if (!reached_done_state()) {
+      return false;
+    }
+  }
+
+  if (_started_download) {
+    switch (_download_dest) {
+    case DD_none:
+      return false;  // We're done.
+
+    case DD_file:
+      return run_download_to_file();
+
+    case DD_ram:
+      return run_download_to_ram();
+    }
+  }
+
   if (downloader_cat.is_spam()) {
   if (downloader_cat.is_spam()) {
     downloader_cat.spam()
     downloader_cat.spam()
       << "begin run(), _state = " << (int)_state << ", _done_state = "
       << "begin run(), _state = " << (int)_state << ", _done_state = "
       << (int)_done_state << "\n";
       << (int)_done_state << "\n";
   }
   }
-  if (_state == _done_state || _state == S_failure) {
-    return false;
+
+  if (_state == _done_state) {
+    return reached_done_state();
   }
   }
 
 
   bool repeat_later;
   bool repeat_later;
@@ -288,18 +312,13 @@ run() {
 
 
     if (_state == _done_state || _state == S_failure) {
     if (_state == _done_state || _state == S_failure) {
       // We've reached our terminal state.
       // We've reached our terminal state.
-      if (downloader_cat.is_spam()) {
-        downloader_cat.spam()
-          << "terminating run(), _state = " << (int)_state
-          << ", _done_state = " << (int)_done_state << "\n";
-      }
-      return false;
+      return reached_done_state();
     }
     }
   } while (!repeat_later || _bio.is_null());
   } while (!repeat_later || _bio.is_null());
 
 
   if (downloader_cat.is_spam()) {
   if (downloader_cat.is_spam()) {
     downloader_cat.spam()
     downloader_cat.spam()
-      << "continue run() later, _state = " << (int)_state
+      << "later run(), _state = " << (int)_state
       << ", _done_state = " << (int)_done_state << "\n";
       << ", _done_state = " << (int)_done_state << "\n";
   }
   }
   return true;
   return true;
@@ -309,7 +328,13 @@ run() {
 //     Function: HTTPChannel::read_body
 //     Function: HTTPChannel::read_body
 //       Access: Published
 //       Access: Published
 //  Description: Returns a newly-allocated istream suitable for
 //  Description: Returns a newly-allocated istream suitable for
-//               reading the body of the document.
+//               reading the body of the document.  This may only be
+//               called immediately after a call to get_document() or
+//               post_form(), or after a call to run() has returned
+//               false.
+//
+//               The user is responsible for deleting the returned
+//               istream later.
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 ISocketStream *HTTPChannel::
 ISocketStream *HTTPChannel::
 read_body() {
 read_body() {
@@ -343,6 +368,131 @@ read_body() {
 
 
   return result;
   return result;
 }
 }
+
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPChannel::download_to_file
+//       Access: Published
+//  Description: Specifies the name of a file to download the
+//               resulting document to.  This should be called
+//               immediately after get_document() or
+//               request_document() or related functions.
+//
+//               In the case of the blocking I/O methods like
+//               get_document(), this function will download the
+//               entire document to the file and return true if it was
+//               successfully downloaded, false otherwise.
+//
+//               In the case of non-blocking I/O methods like
+//               request_document(), this function simply indicates an
+//               intention to download to the indicated file.  It
+//               returns true if the file can be opened for writing,
+//               false otherwise, but the contents will not be
+//               completely downloaded until run() has returned false.
+//               At this time, it is possible that a communications
+//               error will have left a partial file, so
+//               is_download_complete() may be called to test this.
+////////////////////////////////////////////////////////////////////
+bool HTTPChannel::
+download_to_file(const Filename &filename) {
+  reset_download_to();
+  _download_to_filename = filename;
+  _download_to_filename.set_binary();
+  if (!_download_to_filename.open_write(_download_to_file)) {
+    downloader_cat.info()
+      << "Could not open " << filename << " for writing.\n";
+    return false;
+  }
+
+  _download_dest = DD_file;
+
+  if (_nonblocking) {
+    // In nonblocking mode, we can't start the download yet; that will
+    // be done later as run() is called.
+    return true;
+  }
+
+  // In normal, blocking mode, go ahead and do the download.
+  run();
+  return is_download_complete();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPChannel::download_to_ram
+//       Access: Published
+//  Description: Specifies a Ramfile object to download the
+//               resulting document to.  This should be called
+//               immediately after get_document() or
+//               request_document() or related functions.
+//
+//               In the case of the blocking I/O methods like
+//               get_document(), this function will download the
+//               entire document to the Ramfile and return true if it
+//               was successfully downloaded, false otherwise.
+//
+//               In the case of non-blocking I/O methods like
+//               request_document(), this function simply indicates an
+//               intention to download to the indicated Ramfile.  It
+//               returns true if the file can be opened for writing,
+//               false otherwise, but the contents will not be
+//               completely downloaded until run() has returned false.
+//               At this time, it is possible that a communications
+//               error will have left a partial file, so
+//               is_download_complete() may be called to test this.
+////////////////////////////////////////////////////////////////////
+bool HTTPChannel::
+download_to_ram(Ramfile *ramfile) {
+  nassertr(ramfile != (Ramfile *)NULL, false);
+  reset_download_to();
+  ramfile->_pos = 0;
+  ramfile->_data = string();
+  _download_to_ramfile = ramfile;
+  _download_dest = DD_ram;
+
+  if (_nonblocking) {
+    // In nonblocking mode, we can't start the download yet; that will
+    // be done later as run() is called.
+    return true;
+  }
+
+  // In normal, blocking mode, go ahead and do the download.
+  run();
+  return is_download_complete();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPChannel::reached_done_state
+//       Access: Private
+//  Description: Called by run() after it reaches the done state, this
+//               simply checks to see if a download was requested, and
+//               begins the download if it has been.
+////////////////////////////////////////////////////////////////////
+bool HTTPChannel::
+reached_done_state() {
+  if (downloader_cat.is_spam()) {
+    downloader_cat.spam()
+      << "terminating run(), _state = " << (int)_state
+      << ", _done_state = " << (int)_done_state << "\n";
+  }
+
+  if (_state == S_failure || _download_dest == DD_none) {
+    // All done.
+    return false;
+    
+  } else {
+    // Oops, we have to download the body now.
+    _body_stream = read_body();
+    if (_body_stream == (ISocketStream *)NULL) {
+      if (downloader_cat.is_debug()) {
+        downloader_cat.debug()
+          << "Unable to download body.\n";
+      }
+      return false;
+    } else {
+      _started_download = true;
+      return true;
+    }
+  }
+}
   
   
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 //     Function: HTTPChannel::run_connecting
 //     Function: HTTPChannel::run_connecting
@@ -942,6 +1092,66 @@ run_read_trailer() {
   return false;
   return false;
 }
 }
 
 
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPChannel::run_download_to_file
+//       Access: Private
+//  Description: After the headers, etc. have been read, this streams
+//               the download to the named file.
+////////////////////////////////////////////////////////////////////
+bool HTTPChannel::
+run_download_to_file() {
+  nassertr(_body_stream != (ISocketStream *)NULL, false);
+
+  int ch = _body_stream->get();
+  while (!_body_stream->eof() && !_body_stream->fail()) {
+    _download_to_file.put(ch);
+    ch = _body_stream->get();
+  }
+
+  if (_download_to_file.fail()) {
+    downloader_cat.warning()
+      << "Error writing to " << _download_to_filename << "\n";
+    _state = S_failure;
+    _download_to_file.close();
+    return false;
+  }
+
+  if (_body_stream->is_closed()) {
+    // Done.
+    _download_to_file.close();
+    return false;
+  } else {
+    // More to come.
+    return true;
+  }
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPChannel::run_download_to_ram
+//       Access: Private
+//  Description: After the headers, etc. have been read, this streams
+//               the download to the specified Ramfile object.
+////////////////////////////////////////////////////////////////////
+bool HTTPChannel::
+run_download_to_ram() {
+  nassertr(_body_stream != (ISocketStream *)NULL, false);
+  nassertr(_download_to_ramfile != (Ramfile *)NULL, false);
+
+  int ch = _body_stream->get();
+  while (!_body_stream->eof() && !_body_stream->fail()) {
+    _download_to_ramfile->_data += (char)ch;
+    ch = _body_stream->get();
+  }
+
+  if (_body_stream->is_closed()) {
+    // Done.
+    return false;
+  } else {
+    // More to come.
+    return true;
+  }
+}
+
 
 
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 //     Function: HTTPChannel::begin_request
 //     Function: HTTPChannel::begin_request
@@ -953,6 +1163,7 @@ run_read_trailer() {
 void HTTPChannel::
 void HTTPChannel::
 begin_request(const string &method, const URLSpec &url, const string &body,
 begin_request(const string &method, const URLSpec &url, const string &body,
               bool nonblocking) {
               bool nonblocking) {
+  reset_download_to();
   _status_code = 0;
   _status_code = 0;
   _status_string = string();
   _status_string = string();
   _redirect_trail.clear();
   _redirect_trail.clear();
@@ -1026,11 +1237,20 @@ http_getline(string &str) {
     switch (ch) {
     switch (ch) {
     case '\n':
     case '\n':
       // end-of-line character, we're done.
       // end-of-line character, we're done.
-      if (downloader_cat.is_spam()) {
-        downloader_cat.spam() << "recv: " << _working_getline << "\n";
-      }
       str = _working_getline;
       str = _working_getline;
       _working_getline = string();
       _working_getline = string();
+      {
+        // Trim trailing whitespace.  We're not required to do this per the
+        // HTTP spec, but let's be generous.
+        size_t p = str.length();
+        while (p > 0 && isspace(str[p - 1])) {
+          --p;
+        }
+        str = str.substr(0, p);
+      }
+      if (downloader_cat.is_spam()) {
+        downloader_cat.spam() << "recv: " << str << "\n";
+      }
       return true;
       return true;
 
 
     case '\r':
     case '\r':
@@ -1876,6 +2096,21 @@ free_bio() {
   _state = S_new;
   _state = S_new;
 }
 }
 
 
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPChannel::reset_download_to
+//       Access: Private
+//  Description: Resets the indication of how the document will be
+//               downloaded.  This must be re-specified after each
+//               get_document() (or related) call.
+////////////////////////////////////////////////////////////////////
+void HTTPChannel::
+reset_download_to() {
+  _started_download = false;
+  _download_to_file.close();
+  _download_to_ramfile = (Ramfile *)NULL;
+  _download_dest = DD_none;
+}
+
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 //     Function: HTTPChannel::get_basic_authorization
 //     Function: HTTPChannel::get_basic_authorization
 //       Access: Private
 //       Access: Private

+ 22 - 0
panda/src/downloader/httpChannel.h

@@ -36,8 +36,10 @@
 #include "pmap.h"
 #include "pmap.h"
 #include "pointerTo.h"
 #include "pointerTo.h"
 #include "config_downloader.h"
 #include "config_downloader.h"
+#include "filename.h"
 #include <openssl/ssl.h>
 #include <openssl/ssl.h>
 
 
+class Ramfile;
 class HTTPClient;
 class HTTPClient;
 
 
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
@@ -98,8 +100,13 @@ PUBLISHED:
   bool run();
   bool run();
 
 
   ISocketStream *read_body();
   ISocketStream *read_body();
+  bool download_to_file(const Filename &filename);
+  bool download_to_ram(Ramfile *ramfile);
+
+  INLINE bool is_download_complete() const;
 
 
 private:
 private:
+  bool reached_done_state();
   bool run_connecting();
   bool run_connecting();
   bool run_proxy_ready();
   bool run_proxy_ready();
   bool run_proxy_request_sent();
   bool run_proxy_request_sent();
@@ -115,6 +122,9 @@ private:
   bool run_read_body();
   bool run_read_body();
   bool run_read_trailer();
   bool run_read_trailer();
 
 
+  bool run_download_to_file();
+  bool run_download_to_ram();
+
   void begin_request(const string &method, const URLSpec &url, 
   void begin_request(const string &method, const URLSpec &url, 
                      const string &body, bool nonblocking);
                      const string &body, bool nonblocking);
 
 
@@ -148,6 +158,7 @@ private:
 #endif
 #endif
 
 
   void free_bio();
   void free_bio();
+  void reset_download_to();
 
 
   HTTPClient *_client;
   HTTPClient *_client;
   URLSpec _proxy;
   URLSpec _proxy;
@@ -161,6 +172,16 @@ private:
   string _header;
   string _header;
   string _body;
   string _body;
 
 
+  enum DownloadDest {
+    DD_none,
+    DD_file,
+    DD_ram,
+  };
+  DownloadDest _download_dest;
+  Filename _download_to_filename;
+  ofstream _download_to_file;
+  Ramfile *_download_to_ramfile;
+
   int _read_index;
   int _read_index;
 
 
   HTTPClient::HTTPVersion _http_version;
   HTTPClient::HTTPVersion _http_version;
@@ -201,6 +222,7 @@ private:
   };
   };
   State _state;
   State _state;
   State _done_state;
   State _done_state;
+  bool _started_download;
   string _proxy_header;
   string _proxy_header;
   string _proxy_request_text;
   string _proxy_request_text;
   bool _proxy_tunnel;
   bool _proxy_tunnel;

+ 22 - 12
panda/src/downloader/identityStreamBuf.cxx

@@ -141,6 +141,18 @@ read_chars(char *start, size_t length) {
     // file.
     // file.
     (*_source)->read(start, length);
     (*_source)->read(start, length);
     read_count = (*_source)->gcount();
     read_count = (*_source)->gcount();
+  
+    if (read_count == 0) {
+      if ((*_source)->is_closed()) {
+        // socket closed; we're done.
+        if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) {
+          // An IdentityStreamBuf doesn't have a trailer, so we've already
+          // "read" it.
+          _doc->_state = HTTPChannel::S_read_trailer;
+        }
+      }
+      return 0;
+    }
 
 
   } else {
   } else {
     // Extract some of the bytes remaining in the chunk.
     // Extract some of the bytes remaining in the chunk.
@@ -150,6 +162,16 @@ read_chars(char *start, size_t length) {
       (*_source)->read(start, length);
       (*_source)->read(start, length);
       read_count = (*_source)->gcount();
       read_count = (*_source)->gcount();
       _bytes_remaining -= read_count;
       _bytes_remaining -= read_count;
+  
+      if (read_count == 0) {
+        if ((*_source)->is_closed()) {
+          // socket closed unexpectedly; problem.
+          if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) {
+            _doc->_state = HTTPChannel::S_failure;
+          }
+        }
+        return 0;
+      }
     }
     }
       
       
     if (_bytes_remaining == 0) {
     if (_bytes_remaining == 0) {
@@ -161,18 +183,6 @@ read_chars(char *start, size_t length) {
       }
       }
     }
     }
   }
   }
-  
-  if (read_count == 0) {
-    if ((*_source)->is_closed()) {
-      // socket closed; we're done.
-      if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) {
-        // An IdentityStreamBuf doesn't have a trailer, so we've already
-        // "read" it.
-        _doc->_state = HTTPChannel::S_read_trailer;
-      }
-    }
-    return 0;
-  }
 
 
   return read_count;
   return read_count;
 }
 }