Browse Source

Fix gzip compression/decompression over 4 GiB data size (#1002)

* Fix gzip compression/decompression over 4 GiB data size

* Add gzip test for large random data
yosh-matsuda 4 years ago
parent
commit
879dd261c2
2 changed files with 105 additions and 34 deletions
  1. 59 34
      httplib.h
  2. 46 0
      test/test.cc

+ 59 - 34
httplib.h

@@ -2578,28 +2578,40 @@ public:
                 Callback callback) override {
     assert(is_valid_);
 
-    auto flush = last ? Z_FINISH : Z_NO_FLUSH;
+    do {
+      constexpr size_t max_avail_in =
+          std::numeric_limits<decltype(strm_.avail_in)>::max();
 
-    strm_.avail_in = static_cast<decltype(strm_.avail_in)>(data_length);
-    strm_.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
+      strm_.avail_in = static_cast<decltype(strm_.avail_in)>(
+          std::min(data_length, max_avail_in));
+      strm_.next_in =
+          const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
 
-    int ret = Z_OK;
+      data_length -= strm_.avail_in;
+      data += strm_.avail_in;
 
-    std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
-    do {
-      strm_.avail_out = static_cast<uInt>(buff.size());
-      strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
+      auto flush = (last && data_length == 0) ? Z_FINISH : Z_NO_FLUSH;
+      int ret = Z_OK;
 
-      ret = deflate(&strm_, flush);
-      if (ret == Z_STREAM_ERROR) { return false; }
+      std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
+      do {
+        strm_.avail_out = static_cast<uInt>(buff.size());
+        strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
 
-      if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
-        return false;
-      }
-    } while (strm_.avail_out == 0);
+        ret = deflate(&strm_, flush);
+        if (ret == Z_STREAM_ERROR) { return false; }
+
+        if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
+          return false;
+        }
+      } while (strm_.avail_out == 0);
+
+      assert((flush == Z_FINISH && ret == Z_STREAM_END) ||
+             (flush == Z_NO_FLUSH && ret == Z_OK));
+      assert(strm_.avail_in == 0);
+
+    } while (data_length > 0);
 
-    assert((last && ret == Z_STREAM_END) || (!last && ret == Z_OK));
-    assert(strm_.avail_in == 0);
     return true;
   }
 
@@ -2633,28 +2645,41 @@ public:
 
     int ret = Z_OK;
 
-    strm_.avail_in = static_cast<decltype(strm_.avail_in)>(data_length);
-    strm_.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
+    do {
+      constexpr size_t max_avail_in =
+          std::numeric_limits<decltype(strm_.avail_in)>::max();
+
+      strm_.avail_in = static_cast<decltype(strm_.avail_in)>(
+          std::min(data_length, max_avail_in));
+      strm_.next_in =
+          const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
+
+      data_length -= strm_.avail_in;
+      data += strm_.avail_in;
+
+      std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
+      while (strm_.avail_in > 0) {
+        strm_.avail_out = static_cast<uInt>(buff.size());
+        strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
+
+        ret = inflate(&strm_, Z_NO_FLUSH);
+        assert(ret != Z_STREAM_ERROR);
+        switch (ret) {
+        case Z_NEED_DICT:
+        case Z_DATA_ERROR:
+        case Z_MEM_ERROR: inflateEnd(&strm_); return false;
+        }
 
-    std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
-    while (strm_.avail_in > 0) {
-      strm_.avail_out = static_cast<uInt>(buff.size());
-      strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
-
-      ret = inflate(&strm_, Z_NO_FLUSH);
-      assert(ret != Z_STREAM_ERROR);
-      switch (ret) {
-      case Z_NEED_DICT:
-      case Z_DATA_ERROR:
-      case Z_MEM_ERROR: inflateEnd(&strm_); return false;
+        if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
+          return false;
+        }
       }
 
-      if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
-        return false;
-      }
-    }
+      if (ret != Z_OK && ret != Z_STREAM_END) return false;
 
-    return ret == Z_OK || ret == Z_STREAM_END;
+    } while (data_length > 0);
+
+    return true;
   }
 
 private:

+ 46 - 0
test/test.cc

@@ -2868,6 +2868,52 @@ TEST(GzipDecompressor, ChunkedDecompression) {
   }
   ASSERT_EQ(data, decompressed_data);
 }
+
+TEST(GzipDecompressor, LargeRandomData) {
+
+  // prepare large random data that is difficult to be compressed and is
+  // expected to have large size even when compressed
+  std::random_device seed_gen;
+  std::mt19937 random(seed_gen());
+  constexpr auto large_size_byte = 4294967296UL;            // 4GiB
+  constexpr auto data_size = large_size_byte + 134217728UL; // + 128MiB
+  std::vector<std::uint32_t> data(data_size / sizeof(std::uint32_t));
+  std::generate(data.begin(), data.end(), [&]() { return random(); });
+
+  // compress data over 4GiB
+  std::string compressed_data;
+  compressed_data.reserve(large_size_byte + 536870912UL); // + 512MiB reserved
+  httplib::detail::gzip_compressor compressor;
+  auto result = compressor.compress(reinterpret_cast<const char *>(data.data()),
+                                    data.size() * sizeof(std::uint32_t), true,
+                                    [&](const char *data, size_t size) {
+                                      compressed_data.insert(
+                                          compressed_data.size(), data, size);
+                                      return true;
+                                    });
+  ASSERT_TRUE(result);
+
+  // FIXME: compressed data size is expected to be greater than 4GiB,
+  // but there is no guarantee
+  // ASSERT_TRUE(compressed_data.size() >= large_size_byte);
+
+  // decompress data over 4GiB
+  std::string decompressed_data;
+  decompressed_data.reserve(data_size);
+  httplib::detail::gzip_decompressor decompressor;
+  result = decompressor.decompress(
+      compressed_data.data(), compressed_data.size(),
+      [&](const char *data, size_t size) {
+        decompressed_data.insert(decompressed_data.size(), data, size);
+        return true;
+      });
+  ASSERT_TRUE(result);
+
+  // compare
+  ASSERT_EQ(data_size, decompressed_data.size());
+  ASSERT_TRUE(std::memcmp(data.data(), decompressed_data.data(), data_size) ==
+              0);
+}
 #endif
 
 #ifdef CPPHTTPLIB_BROTLI_SUPPORT