URL.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. /*
  2. * This source file is part of RmlUi, the HTML/CSS Interface Middleware
  3. *
  4. * For the latest information, see http://github.com/mikke89/RmlUi
  5. *
  6. * Copyright (c) 2008-2010 CodePoint Ltd, Shift Technology Ltd
  7. * Copyright (c) 2019-2023 The RmlUi Team, and contributors
  8. *
  9. * Permission is hereby granted, free of charge, to any person obtaining a copy
  10. * of this software and associated documentation files (the "Software"), to deal
  11. * in the Software without restriction, including without limitation the rights
  12. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  13. * copies of the Software, and to permit persons to whom the Software is
  14. * furnished to do so, subject to the following conditions:
  15. *
  16. * The above copyright notice and this permission notice shall be included in
  17. * all copies or substantial portions of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  22. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  24. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  25. * THE SOFTWARE.
  26. *
  27. */
  28. #include "../../Include/RmlUi/Core/URL.h"
  29. #include "../../Include/RmlUi/Core/Log.h"
  30. #include "../../Include/RmlUi/Core/StringUtilities.h"
  31. #include <stdio.h>
  32. #include <string.h>
  33. namespace Rml {
  34. const char* DEFAULT_PROTOCOL = "file";
  35. URL::URL()
  36. {
  37. port = 0;
  38. url_dirty = false;
  39. }
  40. URL::URL(const String& _url)
  41. {
  42. port = 0;
  43. RMLUI_VERIFY(SetURL(_url));
  44. }
  45. URL::URL(const char* _url)
  46. {
  47. port = 0;
  48. RMLUI_VERIFY(SetURL(_url));
  49. }
  50. URL::~URL() {}
  51. bool URL::SetURL(const String& _url)
  52. {
  53. url_dirty = false;
  54. url = _url;
  55. // Make sure an Empty URL is completely Empty.
  56. if (url.empty())
  57. {
  58. protocol.clear();
  59. login.clear();
  60. password.clear();
  61. host.clear();
  62. port = 0;
  63. path.clear();
  64. file_name.clear();
  65. extension.clear();
  66. return true;
  67. }
  68. // Find the protocol. This consists of the string appearing before the
  69. // '://' token (ie, file://, http://).
  70. const char* host_begin = strchr(_url.c_str(), ':');
  71. if (nullptr != host_begin)
  72. {
  73. protocol = String(_url.c_str(), host_begin);
  74. if (0 != strncmp(host_begin, "://", 3))
  75. {
  76. char malformed_terminator[4] = {0, 0, 0, 0};
  77. strncpy(malformed_terminator, host_begin, 3);
  78. Log::Message(Log::LT_ERROR, "Malformed protocol identifier found in URL %s; expected %s://, found %s%s.\n", _url.c_str(),
  79. protocol.c_str(), protocol.c_str(), malformed_terminator);
  80. return false;
  81. }
  82. host_begin += 3;
  83. }
  84. else
  85. {
  86. protocol = DEFAULT_PROTOCOL;
  87. host_begin = _url.c_str();
  88. }
  89. // We only want to look for a host if a protocol was specified.
  90. const char* path_begin;
  91. if (host_begin != _url.c_str())
  92. {
  93. // Find the host. This is the string appearing after the protocol or after
  94. // the username:password combination, and terminated either with a colon,
  95. // if a port is specified, or a forward slash if there is no port.
  96. // Check for a login pair
  97. const char* at_symbol = strchr(host_begin, '@');
  98. if (at_symbol)
  99. {
  100. String login_password;
  101. login_password = String(host_begin, at_symbol);
  102. host_begin = at_symbol + 1;
  103. const char* password_ptr = strchr(login_password.c_str(), ':');
  104. if (password_ptr)
  105. {
  106. login = String(login_password.c_str(), password_ptr);
  107. password = String(password_ptr + 1);
  108. }
  109. else
  110. {
  111. login = login_password;
  112. }
  113. }
  114. // Get the host portion
  115. path_begin = strchr(host_begin, '/');
  116. // Search for the colon in the host name, which will indicate a port.
  117. const char* port_begin = strchr(host_begin, ':');
  118. if (nullptr != port_begin && (nullptr == path_begin || port_begin < path_begin))
  119. {
  120. if (1 != sscanf(port_begin, ":%d", &port))
  121. {
  122. Log::Message(Log::LT_ERROR, "Malformed port number found in URL %s.\n", _url.c_str());
  123. return false;
  124. }
  125. host = String(host_begin, port_begin);
  126. // Don't continue if there is no path.
  127. if (nullptr == path_begin)
  128. {
  129. return true;
  130. }
  131. // Increment the path string past the trailing slash.
  132. ++path_begin;
  133. }
  134. else
  135. {
  136. port = -1;
  137. if (nullptr == path_begin)
  138. {
  139. host = host_begin;
  140. return true;
  141. }
  142. else
  143. {
  144. // Assign the host name, then increment the path string past the
  145. // trailing slash.
  146. host = String(host_begin, path_begin);
  147. ++path_begin;
  148. }
  149. }
  150. }
  151. else
  152. {
  153. path_begin = _url.c_str();
  154. }
  155. // Check for parameters
  156. String path_segment;
  157. const char* parameters = strchr(path_begin, '?');
  158. if (parameters)
  159. {
  160. // Pull the path segment out, so further processing doesn't read the parameters
  161. path_segment = String(path_begin, parameters);
  162. path_begin = path_segment.c_str();
  163. // Loop through all parameters, loading them
  164. StringList parameter_list;
  165. StringUtilities::ExpandString(parameter_list, parameters + 1, '&');
  166. for (size_t i = 0; i < parameter_list.size(); i++)
  167. {
  168. // Split into key and value
  169. StringList key_value;
  170. StringUtilities::ExpandString(key_value, parameter_list[i], '=');
  171. key_value[0] = UrlDecode(key_value[0]);
  172. if (key_value.size() == 2)
  173. this->parameters[key_value[0]] = UrlDecode(key_value[1]);
  174. else
  175. this->parameters[key_value[0]] = "";
  176. }
  177. }
  178. // Find the path. This is the string appearing after the host, terminated
  179. // by the last forward slash.
  180. const char* file_name_begin = strrchr(path_begin, '/');
  181. if (nullptr == file_name_begin)
  182. {
  183. // No path!
  184. file_name_begin = path_begin;
  185. path = "";
  186. }
  187. else
  188. {
  189. // Copy the path including the trailing slash.
  190. path = String(path_begin, ++file_name_begin);
  191. // Normalise the path, stripping any ../'s from it
  192. size_t parent_dir_pos = String::npos;
  193. while ((parent_dir_pos = path.find("/../")) != String::npos && parent_dir_pos != 0)
  194. {
  195. // Find the start of the parent directory.
  196. size_t parent_dir_start_pos = path.rfind('/', parent_dir_pos - 1);
  197. if (parent_dir_start_pos == String::npos)
  198. parent_dir_start_pos = 0;
  199. else
  200. parent_dir_start_pos += 1;
  201. // Strip out the parent dir and the /..
  202. path.erase(parent_dir_start_pos, parent_dir_pos - parent_dir_start_pos + 4);
  203. // We've altered the URL, mark it dirty
  204. url_dirty = true;
  205. }
  206. }
  207. // Find the file name. This is the string after the trailing slash of the
  208. // path, and just before the extension.
  209. const char* extension_begin = strrchr(file_name_begin, '.');
  210. if (nullptr == extension_begin)
  211. {
  212. file_name = file_name_begin;
  213. extension = "";
  214. }
  215. else
  216. {
  217. file_name = String(file_name_begin, extension_begin);
  218. extension = extension_begin + 1;
  219. }
  220. return true;
  221. }
  222. const String& URL::GetURL() const
  223. {
  224. if (url_dirty)
  225. ConstructURL();
  226. return url;
  227. }
  228. bool URL::SetProtocol(const String& _protocol)
  229. {
  230. protocol = _protocol;
  231. url_dirty = true;
  232. return true;
  233. }
  234. const String& URL::GetProtocol() const
  235. {
  236. return protocol;
  237. }
  238. bool URL::SetLogin(const String& _login)
  239. {
  240. login = _login;
  241. url_dirty = true;
  242. return true;
  243. }
  244. const String& URL::GetLogin() const
  245. {
  246. return login;
  247. }
  248. bool URL::SetPassword(const String& _password)
  249. {
  250. password = _password;
  251. url_dirty = true;
  252. return true;
  253. }
  254. const String& URL::GetPassword() const
  255. {
  256. return password;
  257. }
  258. bool URL::SetHost(const String& _host)
  259. {
  260. host = _host;
  261. url_dirty = true;
  262. return true;
  263. }
  264. const String& URL::GetHost() const
  265. {
  266. return host;
  267. }
  268. bool URL::SetPort(int _port)
  269. {
  270. port = _port;
  271. url_dirty = true;
  272. return true;
  273. }
  274. int URL::GetPort() const
  275. {
  276. return port;
  277. }
  278. bool URL::SetPath(const String& _path)
  279. {
  280. path = _path;
  281. url_dirty = true;
  282. return true;
  283. }
  284. bool URL::PrefixPath(const String& prefix)
  285. {
  286. // If there's no trailing slash on the end of the prefix, add one.
  287. if (!prefix.empty() && prefix[prefix.size() - 1] != '/')
  288. path = prefix + "/" + path;
  289. else
  290. path = prefix + path;
  291. url_dirty = true;
  292. return true;
  293. }
  294. const String& URL::GetPath() const
  295. {
  296. return path;
  297. }
  298. bool URL::SetFileName(const String& _file_name)
  299. {
  300. file_name = _file_name;
  301. url_dirty = true;
  302. return true;
  303. }
  304. const String& URL::GetFileName() const
  305. {
  306. return file_name;
  307. }
  308. bool URL::SetExtension(const String& _extension)
  309. {
  310. extension = _extension;
  311. url_dirty = true;
  312. return true;
  313. }
  314. const String& URL::GetExtension() const
  315. {
  316. return extension;
  317. }
  318. const URL::Parameters& URL::GetParameters() const
  319. {
  320. return parameters;
  321. }
  322. void URL::SetParameter(const String& key, const String& value)
  323. {
  324. parameters[key] = value;
  325. url_dirty = true;
  326. }
  327. void URL::SetParameters(const Parameters& _parameters)
  328. {
  329. parameters = _parameters;
  330. url_dirty = true;
  331. }
  332. void URL::ClearParameters()
  333. {
  334. parameters.clear();
  335. }
  336. String URL::GetPathedFileName() const
  337. {
  338. String pathed_file_name = path;
  339. // Append the file name.
  340. pathed_file_name += file_name;
  341. // Append the extension.
  342. if (!extension.empty())
  343. {
  344. pathed_file_name += ".";
  345. pathed_file_name += extension;
  346. }
  347. return pathed_file_name;
  348. }
  349. String URL::GetQueryString() const
  350. {
  351. String query_string;
  352. int count = 0;
  353. for (Parameters::const_iterator itr = parameters.begin(); itr != parameters.end(); ++itr)
  354. {
  355. query_string += (count == 0) ? "" : "&";
  356. query_string += UrlEncode((*itr).first);
  357. query_string += "=";
  358. query_string += UrlEncode((*itr).second);
  359. count++;
  360. }
  361. return query_string;
  362. }
  363. bool URL::operator<(const URL& rhs) const
  364. {
  365. if (url_dirty)
  366. ConstructURL();
  367. if (rhs.url_dirty)
  368. rhs.ConstructURL();
  369. return url < rhs.url;
  370. }
  371. void URL::ConstructURL() const
  372. {
  373. url = "";
  374. // Append the protocol.
  375. if (!protocol.empty() && !host.empty())
  376. {
  377. url = protocol;
  378. url += "://";
  379. }
  380. // Append login and password
  381. if (!login.empty())
  382. {
  383. url += login;
  384. if (!password.empty())
  385. {
  386. url += ":";
  387. url += password;
  388. }
  389. url += "@";
  390. }
  391. RMLUI_ASSERTMSG(password.empty() || (!password.empty() && !login.empty()), "Can't have a password without a login!");
  392. // Append the host.
  393. url += host;
  394. // Only check ports if there is some host/protocol part
  395. if (!url.empty())
  396. {
  397. if (port > 0)
  398. {
  399. RMLUI_ASSERTMSG(!host.empty(), "Can't have a port without a host!");
  400. constexpr size_t port_buffer_size = 16;
  401. char port_string[port_buffer_size];
  402. snprintf(port_string, port_buffer_size, ":%d/", port);
  403. url += port_string;
  404. }
  405. else
  406. {
  407. url += "/";
  408. }
  409. }
  410. // Append the path.
  411. if (!path.empty())
  412. {
  413. url += path;
  414. }
  415. // Append the file name.
  416. url += file_name;
  417. // Append the extension.
  418. if (!extension.empty())
  419. {
  420. url += ".";
  421. url += extension;
  422. }
  423. // Append parameters
  424. if (!parameters.empty())
  425. {
  426. url += "?";
  427. url += GetQueryString();
  428. }
  429. url_dirty = false;
  430. }
  431. String URL::UrlEncode(const String& value)
  432. {
  433. String encoded;
  434. constexpr size_t hex_buffer_size = 4;
  435. char hex[hex_buffer_size] = {0, 0, 0, 0};
  436. encoded.clear();
  437. const char* value_c = value.c_str();
  438. for (String::size_type i = 0; value_c[i]; i++)
  439. {
  440. char c = value_c[i];
  441. if (IsUnreservedChar(c))
  442. encoded += c;
  443. else
  444. {
  445. snprintf(hex, hex_buffer_size, "%%%02X", c);
  446. encoded += hex;
  447. }
  448. }
  449. return encoded;
  450. }
  451. String URL::UrlDecode(const String& value)
  452. {
  453. String decoded;
  454. decoded.clear();
  455. const char* value_c = value.c_str();
  456. String::size_type value_len = value.size();
  457. for (String::size_type i = 0; i < value_len; i++)
  458. {
  459. char c = value_c[i];
  460. if (c == '+')
  461. {
  462. decoded += ' ';
  463. }
  464. else if (c == '%')
  465. {
  466. char* endp;
  467. String t = value.substr(i + 1, 2);
  468. int ch = strtol(t.c_str(), &endp, 16);
  469. if (*endp == '\0')
  470. decoded += char(ch);
  471. else
  472. decoded += t;
  473. i += 2;
  474. }
  475. else
  476. {
  477. decoded += c;
  478. }
  479. }
  480. return decoded;
  481. }
  482. bool URL::IsUnreservedChar(const char in)
  483. {
  484. switch (in)
  485. {
  486. case '0':
  487. case '1':
  488. case '2':
  489. case '3':
  490. case '4':
  491. case '5':
  492. case '6':
  493. case '7':
  494. case '8':
  495. case '9':
  496. case 'a':
  497. case 'b':
  498. case 'c':
  499. case 'd':
  500. case 'e':
  501. case 'f':
  502. case 'g':
  503. case 'h':
  504. case 'i':
  505. case 'j':
  506. case 'k':
  507. case 'l':
  508. case 'm':
  509. case 'n':
  510. case 'o':
  511. case 'p':
  512. case 'q':
  513. case 'r':
  514. case 's':
  515. case 't':
  516. case 'u':
  517. case 'v':
  518. case 'w':
  519. case 'x':
  520. case 'y':
  521. case 'z':
  522. case 'A':
  523. case 'B':
  524. case 'C':
  525. case 'D':
  526. case 'E':
  527. case 'F':
  528. case 'G':
  529. case 'H':
  530. case 'I':
  531. case 'J':
  532. case 'K':
  533. case 'L':
  534. case 'M':
  535. case 'N':
  536. case 'O':
  537. case 'P':
  538. case 'Q':
  539. case 'R':
  540. case 'S':
  541. case 'T':
  542. case 'U':
  543. case 'V':
  544. case 'W':
  545. case 'X':
  546. case 'Y':
  547. case 'Z':
  548. case '-':
  549. case '.':
  550. case '_':
  551. case '~': return true;
  552. default: break;
  553. }
  554. return false;
  555. }
  556. } // namespace Rml