StringUtilities.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. #include "../../Include/RmlUi/Core/StringUtilities.h"
  2. #include "../../Include/RmlUi/Core/Log.h"
  3. #include <algorithm>
  4. #include <limits.h>
  5. #include <sstream>
  6. #include <stdarg.h>
  7. #include <stdio.h>
  8. #include <string.h>
  9. namespace Rml {
  10. static int FormatString(String& string, const char* format, va_list argument_list)
  11. {
  12. constexpr size_t InternalBufferSize = 256;
  13. char buffer[InternalBufferSize];
  14. char* buffer_ptr = buffer;
  15. size_t max_size = InternalBufferSize;
  16. int length = 0;
  17. for (int i = 0; i < 2; i++)
  18. {
  19. va_list argument_list_copy;
  20. va_copy(argument_list_copy, argument_list);
  21. length = vsnprintf(buffer_ptr, max_size, format, argument_list_copy);
  22. va_end(argument_list_copy);
  23. if (length < 0)
  24. {
  25. RMLUI_ERRORMSG("Error while formatting string");
  26. return 0;
  27. }
  28. if (i > 0)
  29. {
  30. RMLUI_ASSERT(string.size() == (size_t)length);
  31. break;
  32. }
  33. if ((size_t)length < max_size)
  34. {
  35. string = buffer_ptr;
  36. break;
  37. }
  38. string.resize((size_t)length);
  39. max_size = (size_t)length + 1;
  40. buffer_ptr = &(*string.begin()); // C++17 Upgrade: Replace with string.data()
  41. }
  42. return length;
  43. }
  44. int FormatString(String& string, const char* format, ...)
  45. {
  46. va_list argument_list;
  47. va_start(argument_list, format);
  48. int result = FormatString(string, format, argument_list);
  49. va_end(argument_list);
  50. return result;
  51. }
  52. String CreateString(const char* format, ...)
  53. {
  54. String result;
  55. va_list argument_list;
  56. va_start(argument_list, format);
  57. FormatString(result, format, argument_list);
  58. va_end(argument_list);
  59. return result;
  60. }
  61. static inline char CharToLower(char c)
  62. {
  63. if (c >= 'A' && c <= 'Z')
  64. c += char('a' - 'A');
  65. return c;
  66. }
  67. String StringUtilities::ToLower(String string)
  68. {
  69. std::transform(string.begin(), string.end(), string.begin(), &CharToLower);
  70. return string;
  71. }
  72. String StringUtilities::ToUpper(String string)
  73. {
  74. std::transform(string.begin(), string.end(), string.begin(), [](char c) {
  75. if (c >= 'a' && c <= 'z')
  76. c -= char('a' - 'A');
  77. return c;
  78. });
  79. return string;
  80. }
  81. RMLUICORE_API String StringUtilities::EncodeRml(const String& string)
  82. {
  83. String result;
  84. result.reserve(string.size());
  85. for (char c : string)
  86. {
  87. switch (c)
  88. {
  89. case '<': result += "&lt;"; break;
  90. case '>': result += "&gt;"; break;
  91. case '&': result += "&amp;"; break;
  92. case '"': result += "&quot;"; break;
  93. default: result += c; break;
  94. }
  95. }
  96. return result;
  97. }
  98. String StringUtilities::DecodeRml(const String& s)
  99. {
  100. String result;
  101. result.reserve(s.size());
  102. for (size_t i = 0; i < s.size();)
  103. {
  104. if (s[i] == '&')
  105. {
  106. if (s[i + 1] == 'l' && s[i + 2] == 't' && s[i + 3] == ';')
  107. {
  108. result += "<";
  109. i += 4;
  110. continue;
  111. }
  112. else if (s[i + 1] == 'g' && s[i + 2] == 't' && s[i + 3] == ';')
  113. {
  114. result += ">";
  115. i += 4;
  116. continue;
  117. }
  118. else if (s[i + 1] == 'a' && s[i + 2] == 'm' && s[i + 3] == 'p' && s[i + 4] == ';')
  119. {
  120. result += "&";
  121. i += 5;
  122. continue;
  123. }
  124. else if (s[i + 1] == 'q' && s[i + 2] == 'u' && s[i + 3] == 'o' && s[i + 4] == 't' && s[i + 5] == ';')
  125. {
  126. result += "\"";
  127. i += 6;
  128. continue;
  129. }
  130. else if (s[i + 1] == '#')
  131. {
  132. size_t start = i + 2;
  133. if (s[i + 2] == 'x')
  134. {
  135. start++;
  136. size_t j = 0;
  137. for (; j < 8; j++)
  138. {
  139. const auto& c = s[start + j];
  140. if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')))
  141. break;
  142. }
  143. if (j > 0 && s[start + j] == ';')
  144. {
  145. String tmp = s.substr(start, j);
  146. const char* begin = tmp.c_str();
  147. char* end;
  148. unsigned long code_point = strtoul(begin, &end, 16);
  149. if (code_point != 0 && code_point != ULONG_MAX)
  150. {
  151. result += ToUTF8(static_cast<Character>(code_point));
  152. i = start + (end - begin) + 1;
  153. continue;
  154. }
  155. }
  156. }
  157. else
  158. {
  159. size_t j = 0;
  160. for (; j < 8; j++)
  161. {
  162. const auto& c = s[start + j];
  163. if (!(c >= '0' && c <= '9'))
  164. break;
  165. }
  166. if (j > 0 && s[start + j] == ';')
  167. {
  168. String tmp = s.substr(start, j);
  169. const char* begin = tmp.c_str();
  170. char* end;
  171. unsigned long code_point = strtoul(begin, &end, 10);
  172. if (code_point != 0 && code_point != ULONG_MAX)
  173. {
  174. result += ToUTF8(static_cast<Character>(code_point));
  175. i = start + (end - begin) + 1;
  176. continue;
  177. }
  178. }
  179. }
  180. }
  181. }
  182. result += s[i];
  183. i += 1;
  184. }
  185. return result;
  186. }
  187. String StringUtilities::Replace(String subject, const String& search, const String& replace)
  188. {
  189. size_t pos = 0;
  190. while ((pos = subject.find(search, pos)) != String::npos)
  191. {
  192. subject.replace(pos, search.length(), replace);
  193. pos += replace.length();
  194. }
  195. return subject;
  196. }
  197. String StringUtilities::Replace(String subject, char search, char replace)
  198. {
  199. const size_t size = subject.size();
  200. for (size_t i = 0; i < size; i++)
  201. {
  202. if (subject[i] == search)
  203. subject[i] = replace;
  204. }
  205. return subject;
  206. }
  207. void StringUtilities::ExpandString(StringList& string_list, const String& string, const char delimiter, bool ignore_repeated_delimiters)
  208. {
  209. char quote = 0;
  210. bool last_char_delimiter = true;
  211. const char* ptr = string.c_str();
  212. const char* start_ptr = nullptr;
  213. const char* end_ptr = ptr;
  214. size_t num_delimiter_values = std::count(string.begin(), string.end(), delimiter);
  215. if (num_delimiter_values == 0)
  216. {
  217. string_list.push_back(StripWhitespace(string));
  218. return;
  219. }
  220. string_list.reserve(string_list.size() + num_delimiter_values + 1);
  221. while (*ptr)
  222. {
  223. // Switch into quote mode if the last char was a delimeter ( excluding whitespace )
  224. // and we're not already in quote mode
  225. if (last_char_delimiter && !quote && (*ptr == '"' || *ptr == '\''))
  226. {
  227. quote = *ptr;
  228. }
  229. // Switch out of quote mode if we encounter a quote that hasn't been escaped
  230. else if (*ptr == quote && *(ptr - 1) != '\\')
  231. {
  232. quote = 0;
  233. }
  234. // If we encounter a delimiter while not in quote mode, add the item to the list
  235. else if (*ptr == delimiter && !quote)
  236. {
  237. if (start_ptr)
  238. string_list.emplace_back(start_ptr, end_ptr + 1);
  239. else if (!ignore_repeated_delimiters)
  240. string_list.emplace_back();
  241. last_char_delimiter = true;
  242. start_ptr = nullptr;
  243. }
  244. // Otherwise if its not white space or we're in quote mode, advance the pointers
  245. else if (!IsWhitespace(*ptr) || quote)
  246. {
  247. if (!start_ptr)
  248. start_ptr = ptr;
  249. end_ptr = ptr;
  250. last_char_delimiter = false;
  251. }
  252. ptr++;
  253. }
  254. // If there's data pending, add it.
  255. if (start_ptr)
  256. string_list.emplace_back(start_ptr, end_ptr + 1);
  257. }
  258. void StringUtilities::ExpandString(StringList& string_list, const String& string, const char delimiter, char quote_character, char unquote_character,
  259. bool ignore_repeated_delimiters)
  260. {
  261. int quote_mode_depth = 0;
  262. const char* ptr = string.c_str();
  263. const char* start_ptr = nullptr;
  264. const char* end_ptr = ptr;
  265. while (*ptr)
  266. {
  267. // Increment the quote depth for each quote character encountered
  268. if (*ptr == quote_character)
  269. {
  270. ++quote_mode_depth;
  271. }
  272. // And decrement it for every unquote character
  273. else if (*ptr == unquote_character)
  274. {
  275. --quote_mode_depth;
  276. }
  277. // If we encounter a delimiter while not in quote mode, add the item to the list
  278. if (*ptr == delimiter && quote_mode_depth == 0)
  279. {
  280. if (start_ptr)
  281. string_list.emplace_back(start_ptr, end_ptr + 1);
  282. else if (!ignore_repeated_delimiters)
  283. string_list.emplace_back();
  284. start_ptr = nullptr;
  285. }
  286. // Otherwise if its not white space or we're in quote mode, advance the pointers
  287. else if (!IsWhitespace(*ptr) || quote_mode_depth > 0)
  288. {
  289. if (!start_ptr)
  290. start_ptr = ptr;
  291. end_ptr = ptr;
  292. }
  293. ptr++;
  294. }
  295. // If there's data pending, add it.
  296. if (start_ptr)
  297. string_list.emplace_back(start_ptr, end_ptr + 1);
  298. }
  299. void StringUtilities::JoinString(String& string, const StringList& string_list, const char delimiter)
  300. {
  301. for (size_t i = 0; i < string_list.size(); i++)
  302. {
  303. string += string_list[i];
  304. if (delimiter != '\0' && i < string_list.size() - 1)
  305. string += delimiter;
  306. }
  307. }
  308. String StringUtilities::StripWhitespace(const String& string)
  309. {
  310. return StripWhitespace(StringView(string));
  311. }
  312. RMLUICORE_API String StringUtilities::StripWhitespace(StringView string)
  313. {
  314. const char* start = string.begin();
  315. const char* end = string.end();
  316. while (start < end && IsWhitespace(*start))
  317. start++;
  318. while (end > start && IsWhitespace(*(end - 1)))
  319. end--;
  320. if (start < end)
  321. return String(start, end);
  322. return String();
  323. }
  324. void StringUtilities::TrimTrailingDotZeros(String& string)
  325. {
  326. size_t new_size = string.size();
  327. for (size_t i = string.size() - 1; i < string.size(); i--)
  328. {
  329. if (string[i] == '.')
  330. {
  331. new_size = i;
  332. break;
  333. }
  334. else if (string[i] == '0')
  335. new_size = i;
  336. else
  337. break;
  338. }
  339. if (new_size < string.size())
  340. string.resize(new_size);
  341. }
  342. bool StringUtilities::StartsWith(StringView string, StringView start)
  343. {
  344. if (string.size() < start.size())
  345. return false;
  346. StringView substring(string.begin(), string.begin() + start.size());
  347. return substring == start;
  348. }
  349. bool StringUtilities::EndsWith(StringView string, StringView end)
  350. {
  351. if (string.size() < end.size())
  352. return false;
  353. StringView substring(string.end() - end.size(), string.end());
  354. return substring == end;
  355. }
  356. bool StringUtilities::StringCompareCaseInsensitive(const StringView lhs, const StringView rhs)
  357. {
  358. if (lhs.size() != rhs.size())
  359. return false;
  360. const char* left = lhs.begin();
  361. const char* right = rhs.begin();
  362. const char* const left_end = lhs.end();
  363. for (; left != left_end; ++left, ++right)
  364. {
  365. if (CharToLower(*left) != CharToLower(*right))
  366. return false;
  367. }
  368. return true;
  369. }
  370. Character StringUtilities::ToCharacter(const char* p, const char* p_end)
  371. {
  372. RMLUI_ASSERTMSG(p && p != p_end, "ToCharacter expects a valid, non-empty input string");
  373. if ((*p & (1 << 7)) == 0)
  374. return static_cast<Character>(*p);
  375. int num_bytes = 0;
  376. int code = 0;
  377. if ((*p & 0b1110'0000) == 0b1100'0000)
  378. {
  379. num_bytes = 2;
  380. code = (*p & 0b0001'1111);
  381. }
  382. else if ((*p & 0b1111'0000) == 0b1110'0000)
  383. {
  384. num_bytes = 3;
  385. code = (*p & 0b0000'1111);
  386. }
  387. else if ((*p & 0b1111'1000) == 0b1111'0000)
  388. {
  389. num_bytes = 4;
  390. code = (*p & 0b0000'0111);
  391. }
  392. else
  393. {
  394. // Invalid begin byte
  395. return Character::Null;
  396. }
  397. if (p_end - p < num_bytes)
  398. return Character::Null;
  399. for (int i = 1; i < num_bytes; i++)
  400. {
  401. const char byte = *(p + i);
  402. if ((byte & 0b1100'0000) != 0b1000'0000)
  403. {
  404. // Invalid continuation byte
  405. return Character::Null;
  406. }
  407. code = ((code << 6) | (byte & 0b0011'1111));
  408. }
  409. return static_cast<Character>(code);
  410. }
  411. size_t StringUtilities::BytesUTF8(Character character)
  412. {
  413. char32_t c = (char32_t)character;
  414. if (c < 0x80)
  415. return 1;
  416. else if (c < 0x800)
  417. return 2;
  418. else if (c < 0x10000)
  419. return 3;
  420. else if (c <= 0x10FFFF)
  421. return 4;
  422. else
  423. // Invalid character.
  424. return 0;
  425. }
  426. String StringUtilities::ToUTF8(Character character)
  427. {
  428. return ToUTF8(&character, 1);
  429. }
  430. String StringUtilities::ToUTF8(const Character* characters, int num_characters)
  431. {
  432. String result;
  433. result.reserve(num_characters);
  434. bool invalid_character = false;
  435. for (int i = 0; i < num_characters; i++)
  436. {
  437. char32_t c = (char32_t)characters[i];
  438. constexpr int l3 = 0b0000'0111;
  439. constexpr int l4 = 0b0000'1111;
  440. constexpr int l5 = 0b0001'1111;
  441. constexpr int l6 = 0b0011'1111;
  442. constexpr int h1 = 0b1000'0000;
  443. constexpr int h2 = 0b1100'0000;
  444. constexpr int h3 = 0b1110'0000;
  445. constexpr int h4 = 0b1111'0000;
  446. if (c < 0x80)
  447. result += (char)c;
  448. else if (c < 0x800)
  449. result += {char(((c >> 6) & l5) | h2), char((c & l6) | h1)};
  450. else if (c < 0x10000)
  451. result += {char(((c >> 12) & l4) | h3), char(((c >> 6) & l6) | h1), char((c & l6) | h1)};
  452. else if (c <= 0x10FFFF)
  453. result += {char(((c >> 18) & l3) | h4), char(((c >> 12) & l6) | h1), char(((c >> 6) & l6) | h1), char((c & l6) | h1)};
  454. else
  455. invalid_character = true;
  456. }
  457. if (invalid_character)
  458. Log::Message(Log::LT_WARNING, "One or more invalid code points encountered while encoding to UTF-8.");
  459. return result;
  460. }
  461. size_t StringUtilities::LengthUTF8(StringView string_view)
  462. {
  463. const char* const p_end = string_view.end();
  464. // Skip any continuation bytes at the beginning
  465. const char* p = string_view.begin();
  466. size_t num_continuation_bytes = 0;
  467. while (p != p_end)
  468. {
  469. if ((*p & 0b1100'0000) == 0b1000'0000)
  470. ++num_continuation_bytes;
  471. ++p;
  472. }
  473. return string_view.size() - num_continuation_bytes;
  474. }
  475. int StringUtilities::ConvertCharacterOffsetToByteOffset(StringView string, int character_offset)
  476. {
  477. if (character_offset >= (int)string.size())
  478. return (int)string.size();
  479. int character_count = 0;
  480. for (auto it = StringIteratorU8(string.begin(), string.begin(), string.end()); it; ++it)
  481. {
  482. character_count += 1;
  483. if (character_count > character_offset)
  484. return (int)it.offset();
  485. }
  486. return (int)string.size();
  487. }
  488. int StringUtilities::ConvertByteOffsetToCharacterOffset(StringView string, int byte_offset)
  489. {
  490. int character_count = 0;
  491. for (auto it = StringIteratorU8(string.begin(), string.begin(), string.end()); it; ++it)
  492. {
  493. if (it.offset() >= byte_offset)
  494. break;
  495. character_count += 1;
  496. }
  497. return character_count;
  498. }
  499. StringView::StringView()
  500. {
  501. const char* empty_string = "";
  502. p_begin = empty_string;
  503. p_end = empty_string;
  504. }
  505. StringView::StringView(const char* p_begin, const char* p_end) : p_begin(p_begin), p_end(p_end)
  506. {
  507. RMLUI_ASSERT(p_end >= p_begin);
  508. }
  509. StringView::StringView(const String& string) : p_begin(string.data()), p_end(string.data() + string.size()) {}
  510. StringView::StringView(const String& string, size_t offset) : p_begin(string.data() + offset), p_end(string.data() + string.size()) {}
  511. StringView::StringView(const String& string, size_t offset, size_t count) :
  512. p_begin(string.data() + offset), p_end(string.data() + std::min<size_t>(offset + count, string.size()))
  513. {}
  514. bool StringView::operator==(const StringView& other) const
  515. {
  516. return size() == other.size() && strncmp(p_begin, other.p_begin, size()) == 0;
  517. }
  518. StringIteratorU8::StringIteratorU8(const char* p_begin, const char* p, const char* p_end) : view(p_begin, p_end), p(p) {}
  519. StringIteratorU8::StringIteratorU8(StringView string) : view(string), p(view.begin()) {}
  520. StringIteratorU8::StringIteratorU8(const String& string) : view(string), p(string.data()) {}
  521. StringIteratorU8::StringIteratorU8(const String& string, size_t offset) : view(string), p(string.data() + offset) {}
  522. StringIteratorU8::StringIteratorU8(const String& string, size_t offset, size_t count) : view(string, 0, offset + count), p(string.data() + offset) {}
  523. StringIteratorU8& StringIteratorU8::operator++()
  524. {
  525. RMLUI_ASSERT(p < view.end());
  526. ++p;
  527. SeekForward();
  528. return *this;
  529. }
  530. StringIteratorU8& StringIteratorU8::operator--()
  531. {
  532. RMLUI_ASSERT(p >= view.begin());
  533. --p;
  534. SeekBack();
  535. return *this;
  536. }
  537. inline void StringIteratorU8::SeekBack()
  538. {
  539. p = StringUtilities::SeekBackwardUTF8(p, view.begin());
  540. }
  541. inline void StringIteratorU8::SeekForward()
  542. {
  543. p = StringUtilities::SeekForwardUTF8(p, view.end());
  544. }
  545. } // namespace Rml