Browse Source

Added URL parser.

Branimir Karadžić 8 years ago
parent
commit
48d2b7c814
5 changed files with 283 additions and 0 deletions
  1. 56 0
      include/bx/url.h
  2. 3 0
      scripts/bx.lua
  3. 1 0
      src/amalgamated.cpp
  4. 154 0
      src/url.cpp
  5. 69 0
      tests/url_test.cpp

+ 56 - 0
include/bx/url.h

@@ -0,0 +1,56 @@
+/*
+ * Copyright 2010-2017 Branimir Karadzic. All rights reserved.
+ * License: https://github.com/bkaradzic/bx#license-bsd-2-clause
+ */
+
+#ifndef BX_URL_H_HEADER_GUARD
+#define BX_URL_H_HEADER_GUARD
+
+#include "string.h"
+
+namespace bx
+{
+	///
+	struct UrlToken
+	{
+		enum Enum
+		{
+			Scheme,
+			UserName,
+			Password,
+			Host,
+			Port,
+			Path,
+			Query,
+			Fragment,
+
+			Count
+		};
+	};
+
+	///
+	class UrlView
+	{
+	public:
+		///
+		UrlView();
+
+		///
+		void clear();
+
+		///
+		bool parse(const StringView& _url);
+
+		///
+		const StringView& get(UrlToken::Enum _token) const;
+
+	private:
+		StringView m_tokens[UrlToken::Count];
+	};
+
+	///
+	void urlEncode(const char* _str, char* _buf, uint32_t _bufSize);
+
+} // namespace bx
+
+#endif // BX_URL_H_HEADER_GUARD

+ 3 - 0
scripts/bx.lua

@@ -31,8 +31,10 @@ project "bx"
 			path.join(BX_DIR, "src/crtnone.cpp"),
 			path.join(BX_DIR, "src/crtnone.cpp"),
 			path.join(BX_DIR, "src/debug.cpp"),
 			path.join(BX_DIR, "src/debug.cpp"),
 			path.join(BX_DIR, "src/dtoa.cpp"),
 			path.join(BX_DIR, "src/dtoa.cpp"),
+			path.join(BX_DIR, "src/easing.cpp"),
 			path.join(BX_DIR, "src/file.cpp"),
 			path.join(BX_DIR, "src/file.cpp"),
 			path.join(BX_DIR, "src/filepath.cpp"),
 			path.join(BX_DIR, "src/filepath.cpp"),
+			path.join(BX_DIR, "src/hash.cpp"),
 			path.join(BX_DIR, "src/math.cpp"),
 			path.join(BX_DIR, "src/math.cpp"),
 			path.join(BX_DIR, "src/mutex.cpp"),
 			path.join(BX_DIR, "src/mutex.cpp"),
 			path.join(BX_DIR, "src/os.cpp"),
 			path.join(BX_DIR, "src/os.cpp"),
@@ -42,6 +44,7 @@ project "bx"
 			path.join(BX_DIR, "src/string.cpp"),
 			path.join(BX_DIR, "src/string.cpp"),
 			path.join(BX_DIR, "src/thread.cpp"),
 			path.join(BX_DIR, "src/thread.cpp"),
 			path.join(BX_DIR, "src/timer.cpp"),
 			path.join(BX_DIR, "src/timer.cpp"),
+			path.join(BX_DIR, "src/url.cpp"),
 		}
 		}
 	else
 	else
 		excludes {
 		excludes {

+ 1 - 0
src/amalgamated.cpp

@@ -22,3 +22,4 @@
 #include "string.cpp"
 #include "string.cpp"
 #include "thread.cpp"
 #include "thread.cpp"
 #include "timer.cpp"
 #include "timer.cpp"
+#include "url.cpp"

+ 154 - 0
src/url.cpp

@@ -0,0 +1,154 @@
+/*
+ * Copyright 2011-2017 Branimir Karadzic. All rights reserved.
+ * License: https://github.com/bkaradzic/bnet#license-bsd-2-clause
+ */
+
+#include <bx/url.h>
+
+namespace bx
+{
+	UrlView::UrlView()
+	{
+	}
+
+	void UrlView::clear()
+	{
+		for (uint32_t ii = 0; ii < UrlToken::Count; ++ii)
+		{
+			m_tokens[ii].clear();
+		}
+	}
+
+	bool UrlView::parse(const StringView& _url)
+	{
+		clear();
+
+		const char* start = _url.getPtr();
+		const char* term  = _url.getTerm();
+		const char* schemeEnd = strFind(StringView(start, term), "://");
+		const char* hostStart = NULL != schemeEnd ? schemeEnd+3 : start;
+		const char* pathStart = strFind(StringView(hostStart, term), '/');
+
+		if (NULL == schemeEnd
+		&&  NULL == pathStart)
+		{
+			return false;
+		}
+
+		if (NULL != schemeEnd
+		&& (NULL == pathStart || pathStart > schemeEnd) )
+		{
+			StringView scheme(start, schemeEnd);
+
+			if (!isAlpha(scheme) )
+			{
+				return false;
+			}
+
+			m_tokens[UrlToken::Scheme].set(scheme);
+		}
+
+		if (NULL != pathStart)
+		{
+			const char* queryStart    = strFind(StringView(pathStart, term), '?');
+			const char* fragmentStart = strFind(StringView(pathStart, term), '#');
+
+			if (NULL != fragmentStart
+			&&  fragmentStart < queryStart)
+			{
+				return false;
+			}
+
+			m_tokens[UrlToken::Path].set(pathStart
+				, NULL != queryStart    ? queryStart
+				: NULL != fragmentStart ? fragmentStart
+				: term
+				);
+
+			if (NULL != queryStart)
+			{
+				m_tokens[UrlToken::Query].set(queryStart+1
+					, NULL != fragmentStart ? fragmentStart
+					: term
+					);
+			}
+
+			if (NULL != fragmentStart)
+			{
+				m_tokens[UrlToken::Fragment].set(fragmentStart+1, term);
+			}
+
+			term = pathStart;
+		}
+
+		const char* userPassEnd   = strFind(StringView(hostStart, term), '@');
+		const char* userPassStart = NULL != userPassEnd ? hostStart : NULL;
+		hostStart = NULL != userPassEnd ? userPassEnd+1 : hostStart;
+		const char* portStart = strFind(StringView(hostStart, term), ':');
+
+		m_tokens[UrlToken::Host].set(hostStart, NULL != portStart ? portStart : term);
+
+		if (NULL != portStart)
+		{
+			m_tokens[UrlToken::Port].set(portStart+1, term);
+		}
+
+		if (NULL != userPassStart)
+		{
+			const char* passStart = strFind(StringView(userPassStart, userPassEnd), ':');
+
+			m_tokens[UrlToken::UserName].set(userPassStart
+				, NULL != passStart ? passStart
+				: userPassEnd
+				);
+
+			if (NULL != passStart)
+			{
+				m_tokens[UrlToken::Password].set(passStart+1, userPassEnd);
+			}
+		}
+
+		return true;
+	}
+
+	const StringView& UrlView::get(UrlToken::Enum _token) const
+	{
+		return m_tokens[_token];
+	}
+
+	static char toHex(char _nible)
+	{
+		return "0123456789ABCDEF"[_nible&0xf];
+	}
+
+	// https://secure.wikimedia.org/wikipedia/en/wiki/URL_encoding
+	void urlEncode(const char* _str, char* _buf, uint32_t _bufSize)
+	{
+		_bufSize--; // need space for zero terminator
+
+		uint32_t ii = 0;
+		for (char ch = *_str++
+			; '\0' != ch && ii < _bufSize
+			; ch = *_str++
+			)
+		{
+			if (isAlphaNum(ch)
+			||  ch == '-'
+			||  ch == '_'
+			||  ch == '.'
+			||  ch == '~')
+			{
+				_buf[ii++] = ch;
+			}
+			else if (ii+3 < _bufSize)
+			{
+				_buf[ii++] = '%';
+				_buf[ii++] = toHex(ch>>4);
+				_buf[ii++] = toHex(ch);
+			}
+		}
+
+		_buf[ii] = '\0';
+	}
+
+} // namespace bx

+ 69 - 0
tests/url_test.cpp

@@ -0,0 +1,69 @@
+/*
+ * Copyright 2010-2017 Branimir Karadzic. All rights reserved.
+ * License: https://github.com/bkaradzic/bx#license-bsd-2-clause
+ */
+
+#include "test.h"
+#include <bx/string.h>
+#include <bx/url.h>
+
+struct UrlTest
+{
+	bool result;
+	const char* url;
+	const char* tokens[bx::UrlToken::Count];
+};
+
+static const UrlTest s_urlTest[] =
+{
+	{ true
+	, "scheme://username:[email protected]:80/this/is/path/index.php?query=\"value\"#fragment",
+	{ "scheme", "username", "password", "host.rs", "80", "/this/is/path/index.php", "query=\"value\"", "fragment" }
+	},
+	{ true
+	, "scheme://host.rs/",
+	{ "scheme", "", "", "host.rs", "", "/", "", "" },
+	},
+	{ true
+	, "scheme://host.rs:1389/",
+	{ "scheme", "", "", "host.rs", "1389", "/", "", "" },
+	},
+	{ true
+	, "host.rs/abvgd.html",
+	{ "", "", "", "host.rs", "", "/abvgd.html", "", "" },
+	},
+	{ true
+	, "https://192.168.0.1:8080/",
+	{ "https", "", "", "192.168.0.1", "8080", "/", "", "" },
+	},
+
+	{ true
+	, "file:///d:/tmp/archive.tar.gz",
+	{ "file", "", "", "", "", "/d:/tmp/archive.tar.gz", "", "" },
+	},
+};
+
+TEST_CASE("tokenizeUrl", "")
+{
+	bx::UrlView url;
+
+	for (uint32_t ii = 0; ii < BX_COUNTOF(s_urlTest); ++ii)
+	{
+		const UrlTest& urlTest = s_urlTest[ii];
+
+		bool result = url.parse(urlTest.url);
+		REQUIRE(urlTest.result == result);
+
+		if (result)
+		{
+			for (uint32_t token = 0; token < bx::UrlToken::Count; ++token)
+			{
+//				char tmp[1024];
+//				strCopy(tmp, BX_COUNTOF(tmp), url.get(bx::UrlToken::Enum(token)) );
+//				printf("`%s`, expected: `%s`\n", tmp, urlTest.tokens[token]);
+
+				REQUIRE(0 == bx::strCmp(urlTest.tokens[token], url.get(bx::UrlToken::Enum(token)) ) );
+			}
+		}
+	}
+}