Переглянути джерело

refine interface for determining file size of download

David Rose 22 роки тому
батько
коміт
ed6187271c

+ 11 - 2
panda/src/downloader/chunkedStreamBuf.cxx

@@ -17,6 +17,7 @@
 ////////////////////////////////////////////////////////////////////
 
 #include "chunkedStreamBuf.h"
+#include "config_downloader.h"
 #include <ctype.h>
 
 // This module is not compiled if OpenSSL is not available.
@@ -77,7 +78,8 @@ open_read(BioStreamPtr *source, HTTPChannel *doc) {
 
   if (_doc != (HTTPChannel *)NULL) {
     _read_index = doc->_read_index;
-    _doc->_file_size = 0;
+    _doc->_transfer_file_size = 0;
+    _doc->_got_transfer_file_size = true;
 
     // Read a little bit from the file to get the first chunk (and
     // therefore the file size, or at least the size of the first
@@ -180,9 +182,16 @@ read_chars(char *start, size_t length) {
     return 0;
   }
   size_t chunk_size = (size_t)strtol(line.c_str(), NULL, 16);
+  if (downloader_cat.is_spam()) {
+    downloader_cat.spam()
+      << "Got chunk of size " << chunk_size << " bytes.\n";
+  }
+
   if (chunk_size == 0) {
     // Last chunk; we're done.
     _done = true;
+    _doc->_file_size = _doc->_transfer_file_size;
+    _doc->_got_file_size = true;
     if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) {
       _doc->finished_body(true);
     }
@@ -190,7 +199,7 @@ read_chars(char *start, size_t length) {
   }
 
   if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) {
-    _doc->_file_size += chunk_size;
+    _doc->_transfer_file_size += chunk_size;
   }
 
   _chunk_remaining = chunk_size;

+ 4 - 0
panda/src/downloader/config_downloader.cxx

@@ -88,8 +88,12 @@ const string http_proxy =
 config_downloader.GetString("http-proxy", "");
 const string http_direct_hosts =
 config_downloader.GetString("http-direct-hosts", "");
+const bool http_try_all_direct =
+config_downloader.GetBool("http-try-all-direct", true);
 const string http_proxy_username =
 config_downloader.GetString("http-proxy-username", "");
+const bool http_proxy_tunnel =
+config_downloader.GetBool("http-proxy-tunnel", false);
 
 // This is the default amount of time to wait for a TCP/IP connection
 // to be established, in seconds.  It is presently only used for

+ 2 - 0
panda/src/downloader/config_downloader.h

@@ -45,7 +45,9 @@ extern const bool verify_ssl;
 extern const string ssl_cipher_list;
 extern const string http_proxy;
 extern const string http_direct_hosts;
+extern const bool http_try_all_direct;
 extern const string http_proxy_username;
+extern const bool http_proxy_tunnel;
 extern const double connect_timeout;
 extern const double http_timeout;
 extern const int http_max_connect_count;

+ 66 - 14
panda/src/downloader/httpChannel.I

@@ -241,6 +241,44 @@ get_allow_proxy() const {
   return _allow_proxy;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPChannel::set_proxy_tunnel
+//       Access: Published
+//  Description: Normally, a proxy is itself asked for ordinary URL's,
+//               and the proxy decides whether to hand the client a
+//               cached version of the document or to contact the
+//               server for a fresh version.  The proxy may also
+//               modify the headers and transfer encoding on the way.
+//
+//               If this is set to true, then instead of asking for
+//               URL's from the proxy, we will ask the proxy to open a
+//               connection to the server (for instance, on port 80);
+//               if the proxy honors this request, then we contact the
+//               server directly through this connection to retrieve
+//               the document.  If the proxy does not honor the
+//               connect request, then the retrieve operation fails.
+//
+//               SSL connections (e.g. https), and connections through
+//               a Socks proxy, are always tunneled, regardless of the
+//               setting of this flag.
+////////////////////////////////////////////////////////////////////
+INLINE void HTTPChannel::
+set_proxy_tunnel(bool proxy_tunnel) {
+  _proxy_tunnel = proxy_tunnel;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPChannel::get_proxy_tunnel
+//       Access: Published
+//  Description: Returns true if connections always tunnel through a
+//               proxy, or false (the normal case) if we allow the
+//               proxy to serve up documents.  See set_proxy_tunnel().
+////////////////////////////////////////////////////////////////////
+INLINE bool HTTPChannel::
+get_proxy_tunnel() const {
+  return _proxy_tunnel;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: HTTPChannel::set_connect_timeout
 //       Access: Published
@@ -432,22 +470,36 @@ get_max_updates_per_second() const {
 }
 
 ////////////////////////////////////////////////////////////////////
-//     Function: HTTPChannel::get_file_size
+//     Function: HTTPChannel::set_expected_file_size
 //       Access: Published
-//  Description: Returns the size of the file, if it is known.
-//               Returns 0 if the file size is not known.  
-//
-//               If the file is dynamically generated, the size may
-//               not be available until a read has started
-//               (e.g. open_read_file() has been called); and even
-//               then it may increase as more of the file is read due
-//               to the nature of HTTP/1.1 requests which can change
-//               their minds midstream about how much data they're
-//               sending you.
+//  Description: This may be called immediately after a call to
+//               get_document() or some related function to specify
+//               the expected size of the document we are retrieving,
+//               if we happen to know.  This is used as the return
+//               value to get_file_size() only in the case that the
+//               server does not tell us the actual file size.
 ////////////////////////////////////////////////////////////////////
-INLINE size_t HTTPChannel::
-get_file_size() const {
-  return _file_size;
+INLINE void HTTPChannel::
+set_expected_file_size(size_t file_size) {
+  _expected_file_size = file_size;
+  _got_expected_file_size = true;
+}
+
+
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPChannel::is_file_size_known
+//       Access: Published
+//  Description: Returns true if the size of the file we are currently
+//               retrieving was told us by the server and thus is
+//               reliably known, or false if the size reported by
+//               get_file_size() represents an educated guess
+//               (possibly as set by set_expected_file_size(), or as
+//               inferred from a chunked transfer encoding in
+//               progress).
+////////////////////////////////////////////////////////////////////
+INLINE bool HTTPChannel::
+is_file_size_known() const {
+  return _got_file_size;
 }
 
 ////////////////////////////////////////////////////////////////////

+ 60 - 19
panda/src/downloader/httpChannel.cxx

@@ -44,6 +44,7 @@ HTTPChannel(HTTPClient *client) :
   _proxy_next_index = 0;
   _persistent_connection = false;
   _allow_proxy = true;
+  _proxy_tunnel = http_proxy_tunnel;
   _connect_timeout = connect_timeout;
   _http_timeout = http_timeout;
   _blocking_connect = false;
@@ -55,13 +56,18 @@ HTTPChannel(HTTPClient *client) :
   _nonblocking = false;
   _want_ssl = false;
   _proxy_serves_document = false;
-  _proxy_tunnel = false;
+  _proxy_tunnel_now = false;
   _first_byte_requested = 0;
   _last_byte_requested = 0;
   _first_byte_delivered = 0;
   _last_byte_delivered = 0;
   _read_index = 0;
+  _expected_file_size = 0;
   _file_size = 0;
+  _transfer_file_size = 0;
+  _got_expected_file_size = false;
+  _got_file_size = false;
+  _got_transfer_file_size = false;
   _bytes_downloaded = 0;
   _bytes_requested = 0;
   _status_code = 0;
@@ -179,6 +185,35 @@ get_header_value(const string &key) const {
   return string();
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPChannel::get_file_size
+//       Access: Published
+//  Description: Returns the size of the file, if it is known.
+//               Returns the value set by set_expected_file_size() if
+//               the file size is not known, or 0 if this value was
+//               not set.
+//
+//               If the file is dynamically generated, the size may
+//               not be available until a read has started
+//               (e.g. open_read_file() has been called); and even
+//               then it may increase as more of the file is read due
+//               to the nature of HTTP/1.1 requests which can change
+//               their minds midstream about how much data they're
+//               sending you.
+////////////////////////////////////////////////////////////////////
+size_t HTTPChannel::
+get_file_size() const {
+  if (_got_file_size) {
+    return _file_size;
+  } else if (_got_transfer_file_size) {
+    return _transfer_file_size;
+  } else if (_got_expected_file_size) {
+    return _expected_file_size;
+  } else {
+    return 0;
+  }
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: HTTPChannel::write_headers
 //       Access: Published
@@ -421,17 +456,15 @@ read_body() {
   }
 
   string transfer_coding = downcase(get_header_value("Transfer-Encoding"));
-  string content_length = get_header_value("Content-Length");
 
   ISocketStream *result;
   if (transfer_coding == "chunked") {
     // "chunked" transfer encoding.  This means we will have to decode
     // the length of the file as we read it in chunks.  The
     // IChunkedStream does this.
-    _file_size = 0;
     _state = S_reading_body;
     _read_index++;
-    result = new IChunkedStream(_source, (HTTPChannel *)this);
+    result = new IChunkedStream(_source, this);
 
   } else {
     // If the transfer encoding is anything else, assume "identity".
@@ -440,8 +473,7 @@ read_body() {
     // specified), or till end of file otherwise.
     _state = S_reading_body;
     _read_index++;
-    result = new IIdentityStream(_source, (HTTPChannel *)this, 
-                                 !content_length.empty(), _file_size);
+    result = new IIdentityStream(_source, this, _got_file_size, _file_size);
   }
 
   return result;
@@ -709,7 +741,7 @@ run_connecting() {
       << _bio->get_port() << "\n";
   }
 
-  if (_proxy_tunnel) {
+  if (_proxy_tunnel_now) {
     if (_proxy.get_scheme() == "socks") {
       _state = S_socks_proxy_greet;
     } else {
@@ -833,7 +865,8 @@ run_http_proxy_request_sent() {
   _current_field_name = string();
   _current_field_value = string();
   _headers.clear();
-  _file_size = 0;
+  _got_file_size = false;
+  _got_transfer_file_size = false;
   return false;
 }
 
@@ -1355,7 +1388,8 @@ run_request_sent() {
   _current_field_name = string();
   _current_field_value = string();
   _headers.clear();
-  _file_size = 0;
+  _got_file_size = false;
+  _got_transfer_file_size = false;
   return false;
 }
 
@@ -1451,15 +1485,20 @@ run_reading_header() {
     return false;
   }
 
-  _file_size = 0;
+  _got_expected_file_size = false;
+  _got_file_size = false;
+  _got_transfer_file_size = false;
+  
   string content_length = get_header_value("Content-Length");
   if (!content_length.empty()) {
     _file_size = atoi(content_length.c_str());
+    _got_file_size = true;
 
   } else if (get_status_code() == 206) {
     // Well, we didn't get a content-length from the server, but we
     // can infer the number of bytes based on the range we're given.
     _file_size = _last_byte_delivered - _first_byte_delivered + 1;
+    _got_file_size = true;
   }
   _redirect = get_header_value("Location");
 
@@ -1608,15 +1647,15 @@ run_begin_body() {
     // We have already "read" the nonexistent body.
     _state = S_read_trailer;
 
-  } else if (_file_size > 8192) {
+  } else if (get_file_size() > 8192) {
     // If we know the size of the body we are about to skip and it's
     // too large (and here we arbitrarily say 8KB is too large), then
     // don't bother skipping it--just drop the connection and get a
     // new one.
     if (downloader_cat.is_debug()) {
       downloader_cat.debug()
-        << "Dropping connection rather than skipping past " << _file_size
-        << " bytes.\n";
+        << "Dropping connection rather than skipping past " 
+        << get_file_size() << " bytes.\n";
     }
     reset_to_new();
 
@@ -1920,26 +1959,28 @@ begin_request(HTTPEnum::Method method, const DocumentSpec &url,
 ////////////////////////////////////////////////////////////////////
 void HTTPChannel::
 reconsider_proxy() {
-  _proxy_tunnel = false;
+  _proxy_tunnel_now = false;
   _proxy_serves_document = false;
 
   if (!_proxy.empty()) {
-    // If we're opening an SSL connection, or the user has explicitly
+    // If the user insists we always tunnel through a proxy, or if
+    // we're opening an SSL connection, or the user has explicitly
     // asked for a direct connection of some kind, or if we have a
     // SOCKS-style proxy; each of these demands a tunnel through the
     // proxy to speak directly to the http server.
-    _proxy_tunnel =
-      (_want_ssl || _method == HTTPEnum::M_connect || _proxy.get_scheme() == "socks");
+    _proxy_tunnel_now =
+      (get_proxy_tunnel() || _want_ssl ||
+       _method == HTTPEnum::M_connect || _proxy.get_scheme() == "socks");
 
     // Otherwise (but we still have a proxy), then we ask the proxy to
     // hand us the document.
-    _proxy_serves_document = !_proxy_tunnel;
+    _proxy_serves_document = !_proxy_tunnel_now;
   }
 
   make_header();
   make_request_text();
 
-  if (_proxy_tunnel) {
+  if (_proxy_tunnel_now) {
     // Maybe we need to tunnel through the proxy to connect to the
     // server directly.
     ostringstream request;

+ 11 - 1
panda/src/downloader/httpChannel.h

@@ -93,6 +93,8 @@ PUBLISHED:
 
   INLINE void set_allow_proxy(bool allow_proxy);
   INLINE bool get_allow_proxy() const;
+  INLINE void set_proxy_tunnel(bool proxy_tunnel);
+  INLINE bool get_proxy_tunnel() const;
 
   INLINE void set_connect_timeout(double timeout_seconds);
   INLINE double get_connect_timeout() const;
@@ -111,7 +113,9 @@ PUBLISHED:
   INLINE void set_max_updates_per_second(double max_updates_per_second);
   INLINE double get_max_updates_per_second() const;
 
+  INLINE void set_expected_file_size(size_t file_size);
   INLINE size_t get_file_size() const;
+  INLINE bool is_file_size_known() const;
 
   void write_headers(ostream &out) const;
 
@@ -255,6 +259,7 @@ private:
   PT(BioStreamPtr) _source;
   bool _persistent_connection;
   bool _allow_proxy;
+  bool _proxy_tunnel;
   double _connect_timeout;
   double _http_timeout;
   bool _blocking_connect;
@@ -274,7 +279,7 @@ private:
   string _body;
   bool _want_ssl;
   bool _proxy_serves_document;
-  bool _proxy_tunnel;
+  bool _proxy_tunnel_now;
   bool _server_response_has_no_body;
   size_t _first_byte_requested;
   size_t _last_byte_requested;
@@ -323,9 +328,14 @@ private:
   typedef pmap<string, string> Headers;
   Headers _headers;
 
+  size_t _expected_file_size;
   size_t _file_size;
+  size_t _transfer_file_size;
   size_t _bytes_downloaded;
   size_t _bytes_requested;
+  bool _got_expected_file_size;
+  bool _got_file_size;
+  bool _got_transfer_file_size;
 
   // These members are used to maintain the current state while
   // communicating with the server.  We need to store everything in

+ 27 - 0
panda/src/downloader/httpClient.I

@@ -17,6 +17,33 @@
 ////////////////////////////////////////////////////////////////////
 
 
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPClient::set_try_all_direct
+//       Access: Published
+//  Description: If this is set true, then after a connection attempt
+//               through a proxy fails, we always try a direct
+//               connection, regardless of whether the host is listed
+//               on the direct_host_spec list.  If this is false, a
+//               direct attempt is not made when we have a proxy in
+//               effect, even if the proxy fails.
+////////////////////////////////////////////////////////////////////
+INLINE void HTTPClient::
+set_try_all_direct(bool try_all_direct) {
+  _try_all_direct = try_all_direct;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: HTTPClient::get_try_all_direct
+//       Access: Published
+//  Description: Returns whether a failed connection through a proxy
+//               will be followed up by a direct connection attempt,
+//               false otherwise.
+////////////////////////////////////////////////////////////////////
+INLINE bool HTTPClient::
+get_try_all_direct() const {
+  return _try_all_direct;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: HTTPClient::set_http_version
 //       Access: Published

+ 6 - 29
panda/src/downloader/httpClient.cxx

@@ -102,6 +102,7 @@ HTTPClient() {
 
   set_proxy_spec(http_proxy);
   set_direct_host_spec(http_direct_hosts);
+  _try_all_direct = http_try_all_direct;
 
   if (!http_proxy_username.empty()) {
     set_username("*proxy", "", http_proxy_username);
@@ -155,6 +156,7 @@ void HTTPClient::
 operator = (const HTTPClient &copy) {
   _proxies_by_scheme = copy._proxies_by_scheme;
   _direct_hosts = copy._direct_hosts;
+  _try_all_direct = copy._try_all_direct;
   _http_version = copy._http_version;
   _verify_ssl = copy._verify_ssl;
   _usernames = copy._usernames;
@@ -190,33 +192,6 @@ HTTPClient::
   clear_expected_servers();
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: HTTPClient::set_proxy
-//       Access: Published
-//  Description: Specifies the proxy URL to handle all http and
-//               https requests.  Deprecated.
-////////////////////////////////////////////////////////////////////
-void HTTPClient::
-set_proxy(const URLSpec &proxy) {
-  set_proxy_spec(proxy.get_url());
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: HTTPClient::get_proxy
-//       Access: Published
-//  Description: Returns the proxy URL to handle all http and
-//               https requests.  Deprecated.
-////////////////////////////////////////////////////////////////////
-URLSpec HTTPClient::
-get_proxy() const {
-  pvector<URLSpec> proxies;
-  get_proxies_for_url(URLSpec("http://"), proxies);
-  if (!proxies.empty()) {
-    return proxies[0];
-  }
-  return URLSpec();
-}
-
 ////////////////////////////////////////////////////////////////////
 //     Function: HTTPClient::set_proxy_spec
 //       Access: Published
@@ -524,8 +499,10 @@ get_proxies_for_url(const URLSpec &url, pvector<URLSpec> &proxies) const {
     }
   }
 
-  // We always try a direct connection if all else fails.
-  temp_list.push_back(URLSpec());
+  if (_try_all_direct) {
+    // We may try a direct connection if all else fails.
+    temp_list.push_back(URLSpec());
+  }
 
   // Finally, as a very last resort, fall back to the HTTP proxy.
   if (!got_any) {

+ 4 - 3
panda/src/downloader/httpClient.h

@@ -62,15 +62,15 @@ PUBLISHED:
   void operator = (const HTTPClient &copy);
   ~HTTPClient();
 
-  void set_proxy(const URLSpec &proxy);
-  URLSpec get_proxy() const;
-
   void set_proxy_spec(const string &proxy_spec);
   string get_proxy_spec() const;
 
   void set_direct_host_spec(const string &direct_host_spec);
   string get_direct_host_spec() const;
 
+  INLINE void set_try_all_direct(bool try_all_direct);
+  INLINE bool get_try_all_direct() const;
+
   void clear_proxy();
   void add_proxy(const string &scheme, const URLSpec &proxy);
   void clear_direct_host();
@@ -141,6 +141,7 @@ private:
   ProxiesByScheme _proxies_by_scheme;
   typedef pvector<GlobPattern> DirectHosts;
   DirectHosts _direct_hosts;
+  bool _try_all_direct;
 
   HTTPEnum::HTTPVersion _http_version;
   VerifySSL _verify_ssl;