Ver Fonte

downloader: Support gzip and deflate Content-Encoding values

rdb há 4 anos atrás
pai
commit
8f1e2f1945

+ 56 - 11
panda/src/downloader/httpChannel.cxx

@@ -22,6 +22,11 @@
 #include "virtualFileMountHTTP.h"
 #include "virtualFileMountHTTP.h"
 #include "ramfile.h"
 #include "ramfile.h"
 #include "globPattern.h"
 #include "globPattern.h"
+#include "string_utils.h"
+
+#ifdef HAVE_ZLIB
+#include "zStream.h"
+#endif
 
 
 #include <stdio.h>
 #include <stdio.h>
 
 
@@ -111,6 +116,7 @@ HTTPChannel(HTTPClient *client) :
   _done_state = S_new;
   _done_state = S_new;
   _started_download = false;
   _started_download = false;
   _sent_so_far = 0;
   _sent_so_far = 0;
+  _body_socket_stream = nullptr;
   _body_stream = nullptr;
   _body_stream = nullptr;
   _owns_body_stream = false;
   _owns_body_stream = false;
   _sbio = nullptr;
   _sbio = nullptr;
@@ -550,7 +556,7 @@ run() {
  * The user is responsible for passing the returned istream to
  * The user is responsible for passing the returned istream to
  * close_read_body() later.
  * close_read_body() later.
  */
  */
-ISocketStream *HTTPChannel::
+std::istream *HTTPChannel::
 open_read_body() {
 open_read_body() {
   reset_body_stream();
   reset_body_stream();
 
 
@@ -560,14 +566,14 @@ open_read_body() {
 
 
   string transfer_coding = downcase(get_header_value("Transfer-Encoding"));
   string transfer_coding = downcase(get_header_value("Transfer-Encoding"));
 
 
-  ISocketStream *result;
+  std::istream *result;
   if (transfer_coding == "chunked") {
   if (transfer_coding == "chunked") {
     // "chunked" transfer encoding.  This means we will have to decode the
     // "chunked" transfer encoding.  This means we will have to decode the
     // length of the file as we read it in chunks.  The IChunkedStream does
     // length of the file as we read it in chunks.  The IChunkedStream does
     // this.
     // this.
     _state = S_reading_body;
     _state = S_reading_body;
     _read_index++;
     _read_index++;
-    result = new IChunkedStream(_source, this);
+    _body_socket_stream = new IChunkedStream(_source, this);
 
 
   } else {
   } else {
     // If the transfer encoding is anything else, assume "identity". This is
     // If the transfer encoding is anything else, assume "identity". This is
@@ -576,11 +582,39 @@ open_read_body() {
     // file otherwise.
     // file otherwise.
     _state = S_reading_body;
     _state = S_reading_body;
     _read_index++;
     _read_index++;
-    result = new IIdentityStream(_source, this, _got_file_size, _file_size);
+    _body_socket_stream = new IIdentityStream(_source, this, _got_file_size, _file_size);
+  }
+  result = _body_socket_stream;
+
+  string content_encoding = trim(get_header_value("Content-Encoding"));
+  if (!content_encoding.empty()) {
+    vector_string content_encodings;
+    tokenize(downcase(content_encoding), content_encodings, ",");
+    for (const string &encoding : content_encodings) {
+      string trimmed = trim(encoding);
+      if (trimmed == "identity") {
+        continue;
+      }
+#ifdef HAVE_ZLIB
+      else if (trimmed == "gzip" || trimmed == "deflate" || trimmed == "x-gzip") {
+        // "deflate" actually includes zlib header, which is accepted as well
+        result = new IDecompressStream(result, true, -1, true);
+      }
+#endif
+      else {
+        downloader_cat.error()
+          << "Content-Encoding not supported: " << trimmed << "\n";
+        delete result;
+        _body_socket_stream = nullptr;
+        _body_stream = nullptr;
+        _owns_body_stream = false;
+        return nullptr;
+      }
+    }
   }
   }
 
 
-  result->_channel = this;
   _body_stream = result;
   _body_stream = result;
+  _body_socket_stream->_channel = this;
   _owns_body_stream = false;
   _owns_body_stream = false;
 
 
   return result;
   return result;
@@ -804,9 +838,9 @@ downcase(const string &s) {
  */
  */
 void HTTPChannel::
 void HTTPChannel::
 body_stream_destructs(ISocketStream *stream) {
 body_stream_destructs(ISocketStream *stream) {
-  if (stream == _body_stream) {
+  if (stream == _body_socket_stream) {
     if (_state == S_reading_body) {
     if (_state == S_reading_body) {
-      switch (_body_stream->get_read_state()) {
+      switch (_body_socket_stream->get_read_state()) {
       case ISocketStream::RS_complete:
       case ISocketStream::RS_complete:
         finished_body(false);
         finished_body(false);
         break;
         break;
@@ -820,6 +854,8 @@ body_stream_destructs(ISocketStream *stream) {
         break;
         break;
       }
       }
     }
     }
+
+    _body_socket_stream = nullptr;
     _body_stream = nullptr;
     _body_stream = nullptr;
     _owns_body_stream = false;
     _owns_body_stream = false;
   }
   }
@@ -2160,7 +2196,7 @@ run_reading_body() {
     std::getline(*_body_stream, line);
     std::getline(*_body_stream, line);
   }
   }
 
 
-  if (!_body_stream->is_closed()) {
+  if (!_body_socket_stream->is_closed()) {
     // There's more to come later.
     // There's more to come later.
     return true;
     return true;
   }
   }
@@ -2281,7 +2317,7 @@ run_download_to_file() {
 
 
   _download_to_stream->flush();
   _download_to_stream->flush();
 
 
-  if (_body_stream->is_closed()) {
+  if (_body_socket_stream->is_closed()) {
     // Done.
     // Done.
     reset_body_stream();
     reset_body_stream();
     close_download_stream();
     close_download_stream();
@@ -2331,7 +2367,7 @@ run_download_to_ram() {
     count = _body_stream->gcount();
     count = _body_stream->gcount();
   }
   }
 
 
-  if (_body_stream->is_closed()) {
+  if (_body_socket_stream->is_closed()) {
     // Done.
     // Done.
     reset_body_stream();
     reset_body_stream();
     close_download_stream();
     close_download_stream();
@@ -2392,7 +2428,7 @@ run_download_to_stream() {
 
 
   _download_to_stream->flush();
   _download_to_stream->flush();
 
 
-  if (_body_stream->is_closed()) {
+  if (_body_socket_stream->is_closed()) {
     // Done.
     // Done.
     reset_body_stream();
     reset_body_stream();
     close_download_stream();
     close_download_stream();
@@ -3635,6 +3671,14 @@ make_header() {
       << "Content-Length: " << _body.length() << "\r\n";
       << "Content-Length: " << _body.length() << "\r\n";
   }
   }
 
 
+#ifdef HAVE_ZLIB
+  stream
+    << "Accept-Encoding: gzip, deflate, identity\r\n";
+#else
+  stream
+    << "Accept-Encoding: identity\r\n";
+#endif
+
   _header = stream.str();
   _header = stream.str();
 }
 }
 
 
@@ -3810,6 +3854,7 @@ reset_body_stream() {
       nassertv(_body_stream == nullptr && !_owns_body_stream);
       nassertv(_body_stream == nullptr && !_owns_body_stream);
     }
     }
   } else {
   } else {
+    _body_socket_stream = nullptr;
     _body_stream = nullptr;
     _body_stream = nullptr;
   }
   }
 }
 }

+ 3 - 2
panda/src/downloader/httpChannel.h

@@ -182,7 +182,7 @@ PUBLISHED:
   bool run();
   bool run();
   INLINE void begin_connect_to(const DocumentSpec &url);
   INLINE void begin_connect_to(const DocumentSpec &url);
 
 
-  ISocketStream *open_read_body();
+  std::istream *open_read_body();
   void close_read_body(std::istream *stream) const;
   void close_read_body(std::istream *stream) const;
 
 
   BLOCKING bool download_to_file(const Filename &filename, bool subdocument_resumes = true);
   BLOCKING bool download_to_file(const Filename &filename, bool subdocument_resumes = true);
@@ -416,7 +416,8 @@ private:
   size_t _sent_so_far;
   size_t _sent_so_far;
   std::string _current_field_name;
   std::string _current_field_name;
   std::string _current_field_value;
   std::string _current_field_value;
-  ISocketStream *_body_stream;
+  ISocketStream *_body_socket_stream;
+  std::istream *_body_stream;
   bool _owns_body_stream;
   bool _owns_body_stream;
   BIO *_sbio;
   BIO *_sbio;
   std::string _cipher_list;
   std::string _cipher_list;

+ 1 - 24
panda/src/downloader/httpClient.cxx

@@ -21,6 +21,7 @@
 #include "httpBasicAuthorization.h"
 #include "httpBasicAuthorization.h"
 #include "httpDigestAuthorization.h"
 #include "httpDigestAuthorization.h"
 #include "globPattern.h"
 #include "globPattern.h"
+#include "string_utils.h"
 
 
 #ifdef HAVE_OPENSSL
 #ifdef HAVE_OPENSSL
 
 
@@ -48,30 +49,6 @@ trim_blanks(const string &str) {
   return str.substr(start, end - start);
   return str.substr(start, end - start);
 }
 }
 
 
-/**
- * Chops the source string up into pieces delimited by any of the characters
- * specified in delimiters.  Repeated delimiter characters represent zero-
- * length tokens.
- *
- * It is the user's responsibility to ensure the output vector is cleared
- * before calling this function; the results will simply be appended to the
- * end of the vector.
- */
-static void
-tokenize(const string &str, vector_string &words, const string &delimiters) {
-  size_t p = 0;
-  while (p < str.length()) {
-    size_t q = str.find_first_of(delimiters, p);
-    if (q == string::npos) {
-      words.push_back(str.substr(p));
-      return;
-    }
-    words.push_back(str.substr(p, q - p));
-    p = q + 1;
-  }
-  words.push_back(string());
-}
-
 #ifndef NDEBUG
 #ifndef NDEBUG
 /**
 /**
  * This method is attached as a callback for SSL messages only when debug
  * This method is attached as a callback for SSL messages only when debug