| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916 |
- //
- // URI.cpp
- //
- // $Id: //poco/1.4/Foundation/src/URI.cpp#5 $
- //
- // Library: Foundation
- // Package: URI
- // Module: URI
- //
- // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
- // and Contributors.
- //
- // SPDX-License-Identifier: BSL-1.0
- //
- #include "Poco/URI.h"
- #include "Poco/NumberFormatter.h"
- #include "Poco/Exception.h"
- #include "Poco/String.h"
- #include "Poco/NumberParser.h"
- #include "Poco/Path.h"
- namespace Poco {
- const std::string URI::RESERVED_PATH = "?#";
- const std::string URI::RESERVED_QUERY = "?#/";
- const std::string URI::RESERVED_FRAGMENT = "";
- const std::string URI::ILLEGAL = "%<>{}|\\\"^`";
- URI::URI():
- _port(0)
- {
- }
- URI::URI(const std::string& uri):
- _port(0)
- {
- parse(uri);
- }
- URI::URI(const char* uri):
- _port(0)
- {
- parse(std::string(uri));
- }
-
- URI::URI(const std::string& scheme, const std::string& pathEtc):
- _scheme(scheme),
- _port(0)
- {
- toLowerInPlace(_scheme);
- _port = getWellKnownPort();
- std::string::const_iterator beg = pathEtc.begin();
- std::string::const_iterator end = pathEtc.end();
- parsePathEtc(beg, end);
- }
-
- URI::URI(const std::string& scheme, const std::string& authority, const std::string& pathEtc):
- _scheme(scheme)
- {
- toLowerInPlace(_scheme);
- std::string::const_iterator beg = authority.begin();
- std::string::const_iterator end = authority.end();
- parseAuthority(beg, end);
- beg = pathEtc.begin();
- end = pathEtc.end();
- parsePathEtc(beg, end);
- }
- URI::URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query):
- _scheme(scheme),
- _path(path),
- _query(query)
- {
- toLowerInPlace(_scheme);
- std::string::const_iterator beg = authority.begin();
- std::string::const_iterator end = authority.end();
- parseAuthority(beg, end);
- }
- URI::URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query, const std::string& fragment):
- _scheme(scheme),
- _path(path),
- _query(query),
- _fragment(fragment)
- {
- toLowerInPlace(_scheme);
- std::string::const_iterator beg = authority.begin();
- std::string::const_iterator end = authority.end();
- parseAuthority(beg, end);
- }
- URI::URI(const URI& uri):
- _scheme(uri._scheme),
- _userInfo(uri._userInfo),
- _host(uri._host),
- _port(uri._port),
- _path(uri._path),
- _query(uri._query),
- _fragment(uri._fragment)
- {
- }
-
- URI::URI(const URI& baseURI, const std::string& relativeURI):
- _scheme(baseURI._scheme),
- _userInfo(baseURI._userInfo),
- _host(baseURI._host),
- _port(baseURI._port),
- _path(baseURI._path),
- _query(baseURI._query),
- _fragment(baseURI._fragment)
- {
- resolve(relativeURI);
- }
- URI::URI(const Path& path):
- _scheme("file"),
- _port(0)
- {
- Path absolutePath(path);
- absolutePath.makeAbsolute();
- _path = absolutePath.toString(Path::PATH_UNIX);
- }
- URI::~URI()
- {
- }
- URI& URI::operator = (const URI& uri)
- {
- if (&uri != this)
- {
- _scheme = uri._scheme;
- _userInfo = uri._userInfo;
- _host = uri._host;
- _port = uri._port;
- _path = uri._path;
- _query = uri._query;
- _fragment = uri._fragment;
- }
- return *this;
- }
-
- URI& URI::operator = (const std::string& uri)
- {
- clear();
- parse(uri);
- return *this;
- }
- URI& URI::operator = (const char* uri)
- {
- clear();
- parse(std::string(uri));
- return *this;
- }
- void URI::swap(URI& uri)
- {
- std::swap(_scheme, uri._scheme);
- std::swap(_userInfo, uri._userInfo);
- std::swap(_host, uri._host);
- std::swap(_port, uri._port);
- std::swap(_path, uri._path);
- std::swap(_query, uri._query);
- std::swap(_fragment, uri._fragment);
- }
- void URI::clear()
- {
- _scheme.clear();
- _userInfo.clear();
- _host.clear();
- _port = 0;
- _path.clear();
- _query.clear();
- _fragment.clear();
- }
- std::string URI::toString() const
- {
- std::string uri;
- if (isRelative())
- {
- encode(_path, RESERVED_PATH, uri);
- }
- else
- {
- uri = _scheme;
- uri += ':';
- std::string auth = getAuthority();
- if (!auth.empty() || _scheme == "file")
- {
- uri.append("//");
- uri.append(auth);
- }
- if (!_path.empty())
- {
- if (!auth.empty() && _path[0] != '/')
- uri += '/';
- encode(_path, RESERVED_PATH, uri);
- }
- else if (!_query.empty() || !_fragment.empty())
- {
- uri += '/';
- }
- }
- if (!_query.empty())
- {
- uri += '?';
- uri.append(_query);
- }
- if (!_fragment.empty())
- {
- uri += '#';
- encode(_fragment, RESERVED_FRAGMENT, uri);
- }
- return uri;
- }
- void URI::setScheme(const std::string& scheme)
- {
- _scheme = scheme;
- toLowerInPlace(_scheme);
- if (_port == 0)
- _port = getWellKnownPort();
- }
-
- void URI::setUserInfo(const std::string& userInfo)
- {
- _userInfo.clear();
- decode(userInfo, _userInfo);
- }
-
- void URI::setHost(const std::string& host)
- {
- _host = host;
- }
- unsigned short URI::getPort() const
- {
- if (_port == 0)
- return getWellKnownPort();
- else
- return _port;
- }
- void URI::setPort(unsigned short port)
- {
- _port = port;
- }
-
- std::string URI::getAuthority() const
- {
- std::string auth;
- if (!_userInfo.empty())
- {
- auth.append(_userInfo);
- auth += '@';
- }
- if (_host.find(':') != std::string::npos)
- {
- auth += '[';
- auth += _host;
- auth += ']';
- }
- else auth.append(_host);
- if (_port && !isWellKnownPort())
- {
- auth += ':';
- NumberFormatter::append(auth, _port);
- }
- return auth;
- }
-
- void URI::setAuthority(const std::string& authority)
- {
- _userInfo.clear();
- _host.clear();
- _port = 0;
- std::string::const_iterator beg = authority.begin();
- std::string::const_iterator end = authority.end();
- parseAuthority(beg, end);
- }
-
- void URI::setPath(const std::string& path)
- {
- _path.clear();
- decode(path, _path);
- }
-
- void URI::setRawQuery(const std::string& query)
- {
- _query = query;
- }
- void URI::setQuery(const std::string& query)
- {
- _query.clear();
- encode(query, RESERVED_QUERY, _query);
- }
- void URI::addQueryParameter(const std::string& param, const std::string& val)
- {
- std::string reserved(RESERVED_QUERY);
- reserved += "=&";
- if (!_query.empty()) _query += '&';
- encode(param, reserved, _query);
- _query += '=';
- encode(val, reserved, _query);
- }
- std::string URI::getQuery() const
- {
- std::string query;
- decode(_query, query);
- return query;
- }
- URI::QueryParameters URI::getQueryParameters() const
- {
- QueryParameters result;
- std::string::const_iterator it(_query.begin());
- std::string::const_iterator end(_query.end());
- while (it != end)
- {
- std::string name;
- std::string value;
- while (it != end && *it != '=' && *it != '&')
- {
- if (*it == '+')
- name += ' ';
- else
- name += *it;
- ++it;
- }
- if (it != end && *it == '=')
- {
- ++it;
- while (it != end && *it != '&')
- {
- if (*it == '+')
- value += ' ';
- else
- value += *it;
- ++it;
- }
- }
- std::string decodedName;
- std::string decodedValue;
- URI::decode(name, decodedName);
- URI::decode(value, decodedValue);
- result.push_back(std::make_pair(decodedName, decodedValue));
- if (it != end && *it == '&') ++it;
- }
- return result;
- }
- void URI::setQueryParameters(const QueryParameters& params)
- {
- _query.clear();
- for (QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
- {
- addQueryParameter(it->first, it->second);
- }
- }
- void URI::setFragment(const std::string& fragment)
- {
- _fragment.clear();
- decode(fragment, _fragment);
- }
- void URI::setPathEtc(const std::string& pathEtc)
- {
- _path.clear();
- _query.clear();
- _fragment.clear();
- std::string::const_iterator beg = pathEtc.begin();
- std::string::const_iterator end = pathEtc.end();
- parsePathEtc(beg, end);
- }
-
- std::string URI::getPathEtc() const
- {
- std::string pathEtc;
- encode(_path, RESERVED_PATH, pathEtc);
- if (!_query.empty())
- {
- pathEtc += '?';
- pathEtc += _query;
- }
- if (!_fragment.empty())
- {
- pathEtc += '#';
- encode(_fragment, RESERVED_FRAGMENT, pathEtc);
- }
- return pathEtc;
- }
- std::string URI::getPathAndQuery() const
- {
- std::string pathAndQuery;
- encode(_path, RESERVED_PATH, pathAndQuery);
- if (!_query.empty())
- {
- pathAndQuery += '?';
- pathAndQuery += _query;
- }
- return pathAndQuery;
- }
-
- void URI::resolve(const std::string& relativeURI)
- {
- URI parsedURI(relativeURI);
- resolve(parsedURI);
- }
- void URI::resolve(const URI& relativeURI)
- {
- if (!relativeURI._scheme.empty())
- {
- _scheme = relativeURI._scheme;
- _userInfo = relativeURI._userInfo;
- _host = relativeURI._host;
- _port = relativeURI._port;
- _path = relativeURI._path;
- _query = relativeURI._query;
- removeDotSegments();
- }
- else
- {
- if (!relativeURI._host.empty())
- {
- _userInfo = relativeURI._userInfo;
- _host = relativeURI._host;
- _port = relativeURI._port;
- _path = relativeURI._path;
- _query = relativeURI._query;
- removeDotSegments();
- }
- else
- {
- if (relativeURI._path.empty())
- {
- if (!relativeURI._query.empty())
- _query = relativeURI._query;
- }
- else
- {
- if (relativeURI._path[0] == '/')
- {
- _path = relativeURI._path;
- removeDotSegments();
- }
- else
- {
- mergePath(relativeURI._path);
- }
- _query = relativeURI._query;
- }
- }
- }
- _fragment = relativeURI._fragment;
- }
- bool URI::isRelative() const
- {
- return _scheme.empty();
- }
- bool URI::empty() const
- {
- return _scheme.empty() && _host.empty() && _path.empty() && _query.empty() && _fragment.empty();
- }
-
- bool URI::operator == (const URI& uri) const
- {
- return equals(uri);
- }
- bool URI::operator == (const std::string& uri) const
- {
- URI parsedURI(uri);
- return equals(parsedURI);
- }
- bool URI::operator != (const URI& uri) const
- {
- return !equals(uri);
- }
- bool URI::operator != (const std::string& uri) const
- {
- URI parsedURI(uri);
- return !equals(parsedURI);
- }
- bool URI::equals(const URI& uri) const
- {
- return _scheme == uri._scheme
- && _userInfo == uri._userInfo
- && _host == uri._host
- && getPort() == uri.getPort()
- && _path == uri._path
- && _query == uri._query
- && _fragment == uri._fragment;
- }
-
- void URI::normalize()
- {
- removeDotSegments(!isRelative());
- }
- void URI::removeDotSegments(bool removeLeading)
- {
- if (_path.empty()) return;
-
- bool leadingSlash = *(_path.begin()) == '/';
- bool trailingSlash = *(_path.rbegin()) == '/';
- std::vector<std::string> segments;
- std::vector<std::string> normalizedSegments;
- getPathSegments(segments);
- for (std::vector<std::string>::const_iterator it = segments.begin(); it != segments.end(); ++it)
- {
- if (*it == "..")
- {
- if (!normalizedSegments.empty())
- {
- if (normalizedSegments.back() == "..")
- normalizedSegments.push_back(*it);
- else
- normalizedSegments.pop_back();
- }
- else if (!removeLeading)
- {
- normalizedSegments.push_back(*it);
- }
- }
- else if (*it != ".")
- {
- normalizedSegments.push_back(*it);
- }
- }
- buildPath(normalizedSegments, leadingSlash, trailingSlash);
- }
- void URI::getPathSegments(std::vector<std::string>& segments)
- {
- getPathSegments(_path, segments);
- }
- void URI::getPathSegments(const std::string& path, std::vector<std::string>& segments)
- {
- std::string::const_iterator it = path.begin();
- std::string::const_iterator end = path.end();
- std::string seg;
- while (it != end)
- {
- if (*it == '/')
- {
- if (!seg.empty())
- {
- segments.push_back(seg);
- seg.clear();
- }
- }
- else seg += *it;
- ++it;
- }
- if (!seg.empty())
- segments.push_back(seg);
- }
- void URI::encode(const std::string& str, const std::string& reserved, std::string& encodedStr)
- {
- for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
- {
- char c = *it;
- if ((c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= '0' && c <= '9') ||
- c == '-' || c == '_' ||
- c == '.' || c == '~')
- {
- encodedStr += c;
- }
- else if (c <= 0x20 || c >= 0x7F || ILLEGAL.find(c) != std::string::npos || reserved.find(c) != std::string::npos)
- {
- encodedStr += '%';
- encodedStr += NumberFormatter::formatHex((unsigned) (unsigned char) c, 2);
- }
- else encodedStr += c;
- }
- }
-
- void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpace)
- {
- bool inQuery = false;
- std::string::const_iterator it = str.begin();
- std::string::const_iterator end = str.end();
- while (it != end)
- {
- char c = *it++;
- if (c == '?') inQuery = true;
- // spaces may be encoded as plus signs in the query
- if (inQuery && plusAsSpace && c == '+') c = ' ';
- else if (c == '%')
- {
- if (it == end) throw SyntaxException("URI encoding: no hex digit following percent sign", str);
- char hi = *it++;
- if (it == end) throw SyntaxException("URI encoding: two hex digits must follow percent sign", str);
- char lo = *it++;
- if (hi >= '0' && hi <= '9')
- c = hi - '0';
- else if (hi >= 'A' && hi <= 'F')
- c = hi - 'A' + 10;
- else if (hi >= 'a' && hi <= 'f')
- c = hi - 'a' + 10;
- else throw SyntaxException("URI encoding: not a hex digit");
- c *= 16;
- if (lo >= '0' && lo <= '9')
- c += lo - '0';
- else if (lo >= 'A' && lo <= 'F')
- c += lo - 'A' + 10;
- else if (lo >= 'a' && lo <= 'f')
- c += lo - 'a' + 10;
- else throw SyntaxException("URI encoding: not a hex digit");
- }
- decodedStr += c;
- }
- }
- bool URI::isWellKnownPort() const
- {
- return _port == getWellKnownPort();
- }
- unsigned short URI::getWellKnownPort() const
- {
- if (_scheme == "ftp")
- return 21;
- else if (_scheme == "ssh")
- return 22;
- else if (_scheme == "telnet")
- return 23;
- else if (_scheme == "http")
- return 80;
- else if (_scheme == "nntp")
- return 119;
- else if (_scheme == "ldap")
- return 389;
- else if (_scheme == "https")
- return 443;
- else if (_scheme == "rtsp")
- return 554;
- else if (_scheme == "sip")
- return 5060;
- else if (_scheme == "sips")
- return 5061;
- else if (_scheme == "xmpp")
- return 5222;
- else
- return 0;
- }
- void URI::parse(const std::string& uri)
- {
- std::string::const_iterator it = uri.begin();
- std::string::const_iterator end = uri.end();
- if (it == end) return;
- if (*it != '/' && *it != '.' && *it != '?' && *it != '#')
- {
- std::string scheme;
- while (it != end && *it != ':' && *it != '?' && *it != '#' && *it != '/') scheme += *it++;
- if (it != end && *it == ':')
- {
- ++it;
- if (it == end) throw SyntaxException("URI scheme must be followed by authority or path", uri);
- setScheme(scheme);
- if (*it == '/')
- {
- ++it;
- if (it != end && *it == '/')
- {
- ++it;
- parseAuthority(it, end);
- }
- else --it;
- }
- parsePathEtc(it, end);
- }
- else
- {
- it = uri.begin();
- parsePathEtc(it, end);
- }
- }
- else parsePathEtc(it, end);
- }
- void URI::parseAuthority(std::string::const_iterator& it, const std::string::const_iterator& end)
- {
- std::string userInfo;
- std::string part;
- while (it != end && *it != '/' && *it != '?' && *it != '#')
- {
- if (*it == '@')
- {
- userInfo = part;
- part.clear();
- }
- else part += *it;
- ++it;
- }
- std::string::const_iterator pbeg = part.begin();
- std::string::const_iterator pend = part.end();
- parseHostAndPort(pbeg, pend);
- _userInfo = userInfo;
- }
- void URI::parseHostAndPort(std::string::const_iterator& it, const std::string::const_iterator& end)
- {
- if (it == end) return;
- std::string host;
- if (*it == '[')
- {
- // IPv6 address
- ++it;
- while (it != end && *it != ']') host += *it++;
- if (it == end) throw SyntaxException("unterminated IPv6 address");
- ++it;
- }
- else
- {
- while (it != end && *it != ':') host += *it++;
- }
- if (it != end && *it == ':')
- {
- ++it;
- std::string port;
- while (it != end) port += *it++;
- if (!port.empty())
- {
- int nport = 0;
- if (NumberParser::tryParse(port, nport) && nport > 0 && nport < 65536)
- _port = (unsigned short) nport;
- else
- throw SyntaxException("bad or invalid port number", port);
- }
- else _port = getWellKnownPort();
- }
- else _port = getWellKnownPort();
- _host = host;
- toLowerInPlace(_host);
- }
- void URI::parsePath(std::string::const_iterator& it, const std::string::const_iterator& end)
- {
- std::string path;
- while (it != end && *it != '?' && *it != '#') path += *it++;
- decode(path, _path);
- }
- void URI::parsePathEtc(std::string::const_iterator& it, const std::string::const_iterator& end)
- {
- if (it == end) return;
- if (*it != '?' && *it != '#')
- parsePath(it, end);
- if (it != end && *it == '?')
- {
- ++it;
- parseQuery(it, end);
- }
- if (it != end && *it == '#')
- {
- ++it;
- parseFragment(it, end);
- }
- }
- void URI::parseQuery(std::string::const_iterator& it, const std::string::const_iterator& end)
- {
- _query.clear();
- while (it != end && *it != '#') _query += *it++;
- }
- void URI::parseFragment(std::string::const_iterator& it, const std::string::const_iterator& end)
- {
- std::string fragment;
- while (it != end) fragment += *it++;
- decode(fragment, _fragment);
- }
- void URI::mergePath(const std::string& path)
- {
- std::vector<std::string> segments;
- std::vector<std::string> normalizedSegments;
- bool addLeadingSlash = false;
- if (!_path.empty())
- {
- getPathSegments(segments);
- bool endsWithSlash = *(_path.rbegin()) == '/';
- if (!endsWithSlash && !segments.empty())
- segments.pop_back();
- addLeadingSlash = _path[0] == '/';
- }
- getPathSegments(path, segments);
- addLeadingSlash = addLeadingSlash || (!path.empty() && path[0] == '/');
- bool hasTrailingSlash = (!path.empty() && *(path.rbegin()) == '/');
- bool addTrailingSlash = false;
- for (std::vector<std::string>::const_iterator it = segments.begin(); it != segments.end(); ++it)
- {
- if (*it == "..")
- {
- addTrailingSlash = true;
- if (!normalizedSegments.empty())
- normalizedSegments.pop_back();
- }
- else if (*it != ".")
- {
- addTrailingSlash = false;
- normalizedSegments.push_back(*it);
- }
- else addTrailingSlash = true;
- }
- buildPath(normalizedSegments, addLeadingSlash, hasTrailingSlash || addTrailingSlash);
- }
- void URI::buildPath(const std::vector<std::string>& segments, bool leadingSlash, bool trailingSlash)
- {
- _path.clear();
- bool first = true;
- for (std::vector<std::string>::const_iterator it = segments.begin(); it != segments.end(); ++it)
- {
- if (first)
- {
- first = false;
- if (leadingSlash)
- _path += '/';
- else if (_scheme.empty() && (*it).find(':') != std::string::npos)
- _path.append("./");
- }
- else _path += '/';
- _path.append(*it);
- }
- if (trailingSlash)
- _path += '/';
- }
- } // namespace Poco
|