Browse Source

Work around incompatibility in <regex> in libc++

libc++ (the implementation of the C++ standard library usually used by
Clang) throws an exception for the regex used by parse_headers before
this patch for certain strings. Work around this by simplifying the
regex and parsing the header lines "by hand" partially. I have repro'd
this problem with Xcode 11.1 which I believe uses libc++ version 8.

This may be a bug in libc++ as I can't see why the regex would result in
asymptotic run-time complexity for any strings. However, it may take a
while for libc++ to be fixed and for everyone to migrate to it, so it
makes sense to work around it in this codebase for now.
Matthew DeVore 6 years ago
parent
commit
bc9251ea49
2 changed files with 72 additions and 21 deletions
  1. 17 3
      httplib.h
  2. 55 18
      test/test.cc

+ 17 - 3
httplib.h

@@ -1566,7 +1566,7 @@ inline bool read_headers(Stream &strm, Headers &headers) {
   // the left or right side of the header value:
   //  - https://stackoverflow.com/questions/50179659/
   //  - https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html
-  static std::regex re(R"((.+?):[\t ]*(.+?)[\t ]*\r\n)");
+  static std::regex re(R"((.+?):[\t ]*(.+))");
 
   const auto bufsiz = 2048;
   char buf[bufsiz];
@@ -1575,9 +1575,23 @@ inline bool read_headers(Stream &strm, Headers &headers) {
 
   for (;;) {
     if (!line_reader.getline()) { return false; }
-    if (!strcmp(line_reader.ptr(), "\r\n")) { break; }
+    const char *end = line_reader.ptr() + line_reader.size();
+    auto erase_last_char = [&](char c) {
+      if (line_reader.ptr() == end || end[-1] != c) {
+        return false;
+      }
+      end--;
+      return true;
+    };
+    if (!erase_last_char('\n')) { continue; }
+    if (!erase_last_char('\r')) { continue; }
+
+    // Blank line indicates end of headers.
+    if (line_reader.ptr() == end) { break; }
+
+    while (erase_last_char(' ') || erase_last_char('\t')) {}
     std::cmatch m;
-    if (std::regex_match(line_reader.ptr(), m, re)) {
+    if (std::regex_match(line_reader.ptr(), end, m, re)) {
       auto key = std::string(m[1]);
       auto val = std::string(m[2]);
       headers.emplace(key, val);

+ 55 - 18
test/test.cc

@@ -1766,6 +1766,30 @@ TEST_F(ServerTest, MultipartFormDataGzip) {
 }
 #endif
 
+// Sends a raw request to a server listening at HOST:PORT.
+static bool send_request(time_t read_timeout_sec, const std::string& req) {
+  auto client_sock =
+      detail::create_client_socket(HOST, PORT, /*timeout_sec=*/5);
+
+  if (client_sock == INVALID_SOCKET) { return false; }
+
+  return detail::process_and_close_socket(
+      true, client_sock, 1, read_timeout_sec, 0,
+      [&](Stream& strm, bool /*last_connection*/,
+          bool &/*connection_close*/) -> bool {
+        if (req.size() !=
+            static_cast<size_t>(strm.write(req.data(), req.size()))) {
+          return false;
+        }
+
+        char buf[512];
+
+        detail::stream_line_reader line_reader(strm, buf, sizeof(buf));
+        while (line_reader.getline()) {}
+        return true;
+      });
+}
+
 TEST(ServerRequestParsingTest, TrimWhitespaceFromHeaderValues) {
   Server svr;
   std::string header_value;
@@ -1782,34 +1806,47 @@ TEST(ServerRequestParsingTest, TrimWhitespaceFromHeaderValues) {
 
   // Only space and horizontal tab are whitespace. Make sure other whitespace-
   // like characters are not treated the same - use vertical tab and escape.
-  auto client_sock =
-      detail::create_client_socket(HOST, PORT, /*timeout_sec=*/5);
-  ASSERT_TRUE(client_sock != INVALID_SOCKET);
   const std::string req =
       "GET /validate-ws-in-headers HTTP/1.1\r\n"
       "foo: \t \v bar \e\t \r\n"
       "Connection: close\r\n"
       "\r\n";
 
-  bool process_ok = detail::process_and_close_socket(
-      true, client_sock, 1, 5, 0,
-      [&](Stream& strm, bool /*last_connection*/,
-          bool &/*connection_close*/) -> bool {
-        if (req.size() !=
-            static_cast<size_t>(strm.write(req.data(), req.size()))) {
-          return false;
-        }
+  ASSERT_TRUE(send_request(5, req));
+  svr.stop();
+  t.join();
+  EXPECT_EQ(header_value, "\v bar \e");
+}
 
-        char buf[512];
+TEST(ServerRequestParsingTest, ReadHeadersRegexComplexity) {
+  Server svr;
+  svr.Get("/hi",
+          [&](const Request & /*req*/, Response &res) {
+            res.set_content("ok", "text/plain");
+          });
 
-        detail::stream_line_reader line_reader(strm, buf, sizeof(buf));
-        while (line_reader.getline()) {}
-        return true;
-      });
-  ASSERT_TRUE(process_ok);
+  // Server read timeout must be longer than the client read timeout for the
+  // bug to reproduce, probably to force the server to process a request
+  // without a trailing blank line.
+  const time_t client_read_timeout_sec = 1;
+  svr.set_read_timeout(client_read_timeout_sec + 1, 0);
+  bool listen_thread_ok = false;
+  thread t = thread([&] { listen_thread_ok = svr.listen(HOST, PORT); });
+  while (!svr.is_running()) {
+    msleep(1);
+  }
+
+  // A certain header line causes an exception if the header property is parsed
+  // naively with a single regex. This occurs with libc++ but not libstdc++.
+  const std::string req =
+      "GET /hi HTTP/1.1\r\n"
+      " :                                                                      "
+      "                                                                       ";
+
+  ASSERT_TRUE(send_request(client_read_timeout_sec, req));
   svr.stop();
   t.join();
-  EXPECT_EQ(header_value, "\v bar \e");
+  EXPECT_TRUE(listen_thread_ok);
 }
 
 class ServerTestWithAI_PASSIVE : public ::testing::Test {