stringAPI.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673
  1. // zlib open source license
  2. //
  3. // Copyright (c) 2017 to 2026 David Forsgren Piuva
  4. //
  5. // This software is provided 'as-is', without any express or implied
  6. // warranty. In no event will the authors be held liable for any damages
  7. // arising from the use of this software.
  8. //
  9. // Permission is granted to anyone to use this software for any purpose,
  10. // including commercial applications, and to alter it and redistribute it
  11. // freely, subject to the following restrictions:
  12. //
  13. // 1. The origin of this software must not be misrepresented; you must not
  14. // claim that you wrote the original software. If you use this software
  15. // in a product, an acknowledgment in the product documentation would be
  16. // appreciated but is not required.
  17. //
  18. // 2. Altered source versions must be plainly marked as such, and must not be
  19. // misrepresented as being the original software.
  20. //
  21. // 3. This notice may not be removed or altered from any source
  22. // distribution.
  23. #ifndef DFPSR_API_STRING
  24. #define DFPSR_API_STRING
  25. #include <cstdint>
  26. #include "../base/Callback.h"
  27. #include "bufferAPI.h"
  28. #include "../base/SafePointer.h"
  29. #include "../base/DsrTraits.h"
  30. #include "../collection/List.h"
  31. #include "../settings.h"
  32. // Define DSR_INTERNAL_ACCESS before any include to get internal access to exposed types
  33. #ifdef DSR_INTERNAL_ACCESS
  34. #define IMPL_ACCESS public
  35. #else
  36. #define IMPL_ACCESS protected
  37. #endif
  38. namespace dsr {
  39. using DsrChar = char32_t;
  40. // Text files support loading UTF-8/16 BE/LE with BOM or Latin-1 without BOM
  41. enum class CharacterEncoding {
  42. Raw_Latin1, // U+00 to U+FF
  43. BOM_UTF8, // U+00000000 to U+0010FFFF
  44. BOM_UTF16BE, // U+00000000 to U+0000D7FF, U+0000E000 to U+0010FFFF
  45. BOM_UTF16LE // U+00000000 to U+0000D7FF, U+0000E000 to U+0010FFFF
  46. };
  47. // Carriage-return is removed when loading text files to prevent getting double lines
  48. // A line-feed without a line-feed character is nonsense
  49. // LineEncoding allow re-adding carriage-return before or after each line-break when saving
  50. enum class LineEncoding {
  51. CrLf, // Microsoft Windows compatible (Can also be read on other platforms by ignoring carriage return)
  52. Lf // Linux and Macintosh compatible (Might not work on non-portable text editors on Microsoft Windows)
  53. };
  54. class String;
  55. // Helper type for strings.
  56. struct Impl_CharacterView {
  57. DsrChar *data = nullptr;
  58. intptr_t length = 0;
  59. Impl_CharacterView() {}
  60. Impl_CharacterView(Handle<DsrChar> characters)
  61. : data(characters.getUnsafe()), length(characters.getElementCount()) {}
  62. Impl_CharacterView(const DsrChar *data, intptr_t length)
  63. : data(const_cast<DsrChar *>(data)), length(length) {
  64. if (data == nullptr) this->length = 0;
  65. }
  66. inline DsrChar *getUnchecked() const {
  67. return const_cast<DsrChar*>(this->data);
  68. }
  69. inline DsrChar operator [] (intptr_t index) const {
  70. if (index < 0 || index >= this->length) {
  71. return U'\0';
  72. } else {
  73. return this->data[index];
  74. }
  75. }
  76. inline void writeCharacter(intptr_t index, DsrChar character) {
  77. if (index < 0 || index >= this->length) {
  78. // TODO: Throw an error without causing bottomless recursion.
  79. } else {
  80. this->data[index] = character;
  81. }
  82. }
  83. inline SafePointer<DsrChar> getSafe(const char *name) const {
  84. return SafePointer<DsrChar>(name, this->getUnchecked(), this->length * sizeof(DsrChar));
  85. }
  86. };
  87. // Replacing String with a ReadableString reference for input arguments can make passing of U"" literals faster,
  88. // because String is not allowed to assume anything about how long the literal will be available.
  89. // Unlike String, it cannot be constructed from a "" literal, because it is not allowed to heap allocate new memory
  90. // for the conversion, only hold existing buffers alive with reference counting when casted from String.
  91. class ReadableString {
  92. IMPL_ACCESS:
  93. // A reference counted pointer to the buffer to allow passing strings around without having to clone the buffer each time
  94. // ReadableString only uses it for reference counting but String use it for reallocating
  95. Handle<DsrChar> characters;
  96. // Pointing to a subset of the buffer or memory that is not shared.
  97. Impl_CharacterView view;
  98. public:
  99. // Returning the character by value prevents writing to memory that might be a constant literal or shared with other strings
  100. inline DsrChar operator[] (intptr_t index) const {
  101. return this->view[index];
  102. }
  103. public:
  104. // Empty string U""
  105. ReadableString() {}
  106. // Implicit casting from U"text"
  107. ReadableString(const DsrChar *content);
  108. ReadableString(Handle<DsrChar> characters, Impl_CharacterView view)
  109. : characters(characters), view(view) {}
  110. // Destructor.
  111. ~ReadableString() {}
  112. // Copy constructor.
  113. ReadableString(const ReadableString& source)
  114. : characters(source.characters), view(source.view) {}
  115. // Move constructor.
  116. ReadableString(ReadableString &&source) noexcept
  117. : characters(source.characters), view(source.view) {
  118. source.characters = Handle<DsrChar>();
  119. source.view = Impl_CharacterView();
  120. }
  121. // Copy assignment.
  122. ReadableString& operator = (const ReadableString& source) {
  123. if (this != &source) {
  124. this->characters = source.characters;
  125. this->view = source.view;
  126. }
  127. return *this;
  128. };
  129. // Move assignment.
  130. ReadableString& operator = (ReadableString &&source) {
  131. if (this != &source) {
  132. this->characters = source.characters;
  133. this->view = source.view;
  134. source.characters = Handle<DsrChar>();
  135. source.view = Impl_CharacterView();
  136. }
  137. return *this;
  138. }
  139. };
  140. // A safe and simple string type
  141. // Can be constructed from ascii literals "", but U"" will preserve unicode characters.
  142. // Can be used without ReadableString, but ReadableString can be wrapped over U"" literals without allocation
  143. // UTF-32
  144. // Endianness is native
  145. // No combined characters allowed, use precomposed instead, so that the strings can guarantee a fixed character size
  146. class String : public ReadableString {
  147. public:
  148. // Constructors.
  149. String();
  150. #ifndef BAN_IMPLICIT_ASCII_CONVERSION
  151. String(const char* source);
  152. #endif
  153. String(const DsrChar* source);
  154. // Destructor.
  155. ~String() {}
  156. // Copy constructor.
  157. String(const ReadableString& source) : ReadableString(source) {}
  158. String(const String& source) : ReadableString(source) {}
  159. // Move constructor.
  160. String(ReadableString &&source) noexcept : ReadableString(std::move(source)) {}
  161. String(String &&source) noexcept : ReadableString(std::move(source)) {}
  162. // Copy assignment.
  163. String& operator = (const String& source) {
  164. if (this != &source) {
  165. this->characters = source.characters;
  166. this->view = source.view;
  167. }
  168. return *this;
  169. };
  170. // Move assignment.
  171. String& operator = (String &&source) {
  172. if (this != &source) {
  173. this->characters = source.characters;
  174. this->view = source.view;
  175. source.characters = Handle<DsrChar>();
  176. source.view = Impl_CharacterView();
  177. }
  178. return *this;
  179. }
  180. };
  181. // Used as format tags around numbers passed to string_append or string_combine
  182. // New types can implement printing to String by making wrappers from this class
  183. class Printable {
  184. public:
  185. // The method for appending the printable object into the target string
  186. virtual String& toStreamIndented(String& target, const ReadableString& indentation) const = 0;
  187. String& toStream(String& target) const;
  188. String toStringIndented(const ReadableString& indentation) const;
  189. String toString() const;
  190. virtual ~Printable();
  191. };
  192. // Used to generate fixed size ascii strings, which is useful when heap allocations are not possible
  193. // or you need a safe format until you know which encoding a system call needs to support Unicode.
  194. template <intptr_t SIZE>
  195. struct FixedAscii {
  196. char characters[SIZE];
  197. // Create a fixed size ascii string from a null terminated ascii string.
  198. // Crops if text is too long.
  199. FixedAscii(const char *text) {
  200. bool terminated = false;
  201. for (intptr_t i = 0; i < SIZE - 1; i++) {
  202. char c = text[i];
  203. if (c == '\0') {
  204. terminated = true;
  205. }
  206. if (terminated) {
  207. this->characters[i] = '\0';
  208. } else if (c > 127) {
  209. this->characters[i] = '?';
  210. } else {
  211. this->characters[i] = c;
  212. }
  213. }
  214. this->characters[SIZE - 1] = '\0';
  215. }
  216. FixedAscii(const ReadableString &text) {
  217. bool terminated = false;
  218. for (intptr_t i = 0; i < SIZE - 1; i++) {
  219. char c = text[i];
  220. if (c == '\0') {
  221. terminated = true;
  222. }
  223. if (terminated) {
  224. this->characters[i] = '\0';
  225. } else if (c > 127) {
  226. this->characters[i] = '?';
  227. } else {
  228. this->characters[i] = c;
  229. }
  230. }
  231. this->characters[SIZE - 1] = '\0';
  232. }
  233. const char * getPointer() const {
  234. return characters;
  235. }
  236. };
  237. // Helper functions to resolve ambiguity without constexpr if statements in C++ 14.
  238. String& impl_toStreamIndented_ascii(String& target, const char *value, const ReadableString& indentation);
  239. String& impl_toStreamIndented_utf32(String& target, const char32_t *value, const ReadableString& indentation);
  240. String& impl_toStreamIndented_readable(String& target, const ReadableString &value, const ReadableString& indentation);
  241. String& impl_toStreamIndented_double(String& target, const double &value, const ReadableString& indentation);
  242. String& impl_toStreamIndented_int64(String& target, const int64_t &value, const ReadableString& indentation);
  243. String& impl_toStreamIndented_uint64(String& target, const uint64_t &value, const ReadableString& indentation);
  244. // Resolving ambiguity without access to constexpr in if statements by disabling type safety with unsafeCast.
  245. template <typename T, DSR_ENABLE_IF(
  246. DSR_UTF32_LITERAL(T)
  247. #ifndef BAN_IMPLICIT_ASCII_CONVERSION
  248. || DSR_ASCII_LITERAL(T)
  249. #endif
  250. || DSR_INHERITS_FROM(T, Printable)
  251. || DSR_SAME_TYPE(T, String)
  252. || DSR_SAME_TYPE(T, ReadableString)
  253. || DSR_SAME_TYPE(T, float)
  254. || DSR_SAME_TYPE(T, double)
  255. || DSR_SAME_TYPE(T, char)
  256. || DSR_SAME_TYPE(T, char32_t)
  257. || DSR_SAME_TYPE(T, bool)
  258. || DSR_SAME_TYPE(T, short)
  259. || DSR_SAME_TYPE(T, int)
  260. || DSR_SAME_TYPE(T, long)
  261. || DSR_SAME_TYPE(T, long long)
  262. || DSR_SAME_TYPE(T, unsigned short)
  263. || DSR_SAME_TYPE(T, unsigned int)
  264. || DSR_SAME_TYPE(T, unsigned long)
  265. || DSR_SAME_TYPE(T, unsigned long long)
  266. || DSR_SAME_TYPE(T, uint8_t)
  267. || DSR_SAME_TYPE(T, uint16_t)
  268. || DSR_SAME_TYPE(T, uint32_t)
  269. || DSR_SAME_TYPE(T, uint64_t)
  270. || DSR_SAME_TYPE(T, int8_t)
  271. || DSR_SAME_TYPE(T, int16_t)
  272. || DSR_SAME_TYPE(T, int32_t)
  273. || DSR_SAME_TYPE(T, int64_t))>
  274. inline String& string_toStreamIndented(String& target, const T &value, const ReadableString& indentation) {
  275. if (DSR_UTF32_LITERAL(T)) {
  276. impl_toStreamIndented_utf32(target, unsafeCast<char32_t*>(value), indentation);
  277. #ifndef BAN_IMPLICIT_ASCII_CONVERSION
  278. } else if (DSR_ASCII_LITERAL(T)) {
  279. impl_toStreamIndented_ascii(target, unsafeCast<char*>(value), indentation);
  280. #endif
  281. } else if (DSR_INHERITS_FROM(T, Printable)) {
  282. unsafeCast<Printable>(value).toStreamIndented(target, indentation);
  283. } else if (DSR_SAME_TYPE(T, String)) {
  284. impl_toStreamIndented_readable(target, unsafeCast<String>(value), indentation);
  285. } else if (DSR_SAME_TYPE(T, ReadableString)) {
  286. impl_toStreamIndented_readable(target, unsafeCast<ReadableString>(value), indentation);
  287. } else if (DSR_SAME_TYPE(T, float)) {
  288. impl_toStreamIndented_double(target, (double)unsafeCast<float>(value), indentation);
  289. } else if (DSR_SAME_TYPE(T, double)) {
  290. impl_toStreamIndented_double(target, unsafeCast<double>(value), indentation);
  291. } else if (DSR_SAME_TYPE(T, char)) {
  292. impl_toStreamIndented_readable(target, indentation, U"");
  293. string_appendChar(target, unsafeCast<char>(value));
  294. } else if (DSR_SAME_TYPE(T, char32_t)) {
  295. impl_toStreamIndented_readable(target, indentation, U"");
  296. string_appendChar(target, unsafeCast<char32_t>(value));
  297. } else if (DSR_SAME_TYPE(T, bool)) {
  298. impl_toStreamIndented_utf32(target, unsafeCast<bool>(value) ? U"true" : U"false", indentation);
  299. } else if (DSR_SAME_TYPE(T, uint8_t)) {
  300. impl_toStreamIndented_uint64(target, (uint64_t)unsafeCast<uint8_t>(value), indentation);
  301. } else if (DSR_SAME_TYPE(T, uint16_t)) {
  302. impl_toStreamIndented_uint64(target, (uint64_t)unsafeCast<uint16_t>(value), indentation);
  303. } else if (DSR_SAME_TYPE(T, uint32_t)) {
  304. impl_toStreamIndented_uint64(target, (uint64_t)unsafeCast<uint32_t>(value), indentation);
  305. } else if (DSR_SAME_TYPE(T, uint64_t)) {
  306. impl_toStreamIndented_uint64(target, unsafeCast<uint64_t>(value), indentation);
  307. } else if (DSR_SAME_TYPE(T, int8_t)) {
  308. impl_toStreamIndented_int64(target, (int64_t)unsafeCast<int8_t>(value), indentation);
  309. } else if (DSR_SAME_TYPE(T, int16_t)) {
  310. impl_toStreamIndented_int64(target, (int64_t)unsafeCast<int16_t>(value), indentation);
  311. } else if (DSR_SAME_TYPE(T, int32_t)) {
  312. impl_toStreamIndented_int64(target, (int64_t)unsafeCast<int32_t>(value), indentation);
  313. } else if (DSR_SAME_TYPE(T, int64_t)) {
  314. impl_toStreamIndented_int64(target, unsafeCast<int64_t>(value), indentation);
  315. } else if (DSR_SAME_TYPE(T, short)) {
  316. impl_toStreamIndented_int64(target, (int64_t)unsafeCast<short>(value), indentation);
  317. } else if (DSR_SAME_TYPE(T, int)) {
  318. impl_toStreamIndented_int64(target, (int64_t)unsafeCast<int>(value), indentation);
  319. } else if (DSR_SAME_TYPE(T, long)) {
  320. impl_toStreamIndented_int64(target, (int64_t)unsafeCast<long>(value), indentation);
  321. } else if (DSR_SAME_TYPE(T, long long)) {
  322. static_assert(sizeof(long long) == 8, "You need to implement integer printing for integers larger than 64 bits, or printing long long will be truncated!");
  323. impl_toStreamIndented_int64(target, (int64_t)unsafeCast<long long>(value), indentation);
  324. } else if (DSR_SAME_TYPE(T, unsigned short)) {
  325. impl_toStreamIndented_int64(target, (int64_t)unsafeCast<unsigned short>(value), indentation);
  326. } else if (DSR_SAME_TYPE(T, unsigned int)) {
  327. impl_toStreamIndented_int64(target, (int64_t)unsafeCast<unsigned int>(value), indentation);
  328. } else if (DSR_SAME_TYPE(T, unsigned long)) {
  329. impl_toStreamIndented_int64(target, (int64_t)unsafeCast<unsigned long>(value), indentation);
  330. } else if (DSR_SAME_TYPE(T, unsigned long long)) {
  331. static_assert(sizeof(unsigned long long) == 8, "You need to implement integer printing for integers larger than 64 bits, or printing unsigned long long will be truncated!");
  332. impl_toStreamIndented_int64(target, (uint64_t)unsafeCast<unsigned long long>(value), indentation);
  333. }
  334. return target;
  335. }
  336. template<typename T>
  337. String string_toStringIndented(const T& source, const ReadableString& indentation) {
  338. String result;
  339. string_toStreamIndented(result, source, indentation);
  340. return result;
  341. }
  342. template<typename T>
  343. String string_toString(const T& source) {
  344. String result;
  345. string_toStreamIndented(result, source, U"");
  346. return result;
  347. }
  348. // ---------------- Procedural API ----------------
  349. // Sets the target string's length to zero.
  350. // Because this opens up to appending new text where sub-string may already share the buffer,
  351. // this operation will reallocate the buffer if shared with other strings.
  352. void string_clear(String& target);
  353. // Post-condition: Returns the length of source.
  354. // Example: string_length(U"ABC") == 3
  355. intptr_t string_length(const ReadableString& source);
  356. // Post-condition: Returns the base-zero index of source's first occurence of toFind, starting from startIndex. Returns -1 if not found.
  357. // Example: string_findFirst(U"ABCABCABC", U'A') == 0
  358. // Example: string_findFirst(U"ABCABCABC", U'B') == 1
  359. // Example: string_findFirst(U"ABCABCABC", U'C') == 2
  360. // Example: string_findFirst(U"ABCABCABC", U'D') == -1
  361. intptr_t string_findFirst(const ReadableString& source, DsrChar toFind, intptr_t startIndex = 0);
  362. // Post-condition: Returns the base-zero index of source's last occurence of toFind. Returns -1 if not found.
  363. // Example: string_findLast(U"ABCABCABC", U'A') == 6
  364. // Example: string_findLast(U"ABCABCABC", U'B') == 7
  365. // Example: string_findLast(U"ABCABCABC", U'C') == 8
  366. // Example: string_findLast(U"ABCABCABC", U'D') == -1
  367. intptr_t string_findLast(const ReadableString& source, DsrChar toFind);
  368. // Post-condition: Returns a sub-string of source from before the character at inclusiveStart to before the character at exclusiveEnd
  369. // Example: string_exclusiveRange(U"0123456789", 2, 4) == U"23"
  370. ReadableString string_exclusiveRange(const ReadableString& source, intptr_t inclusiveStart, intptr_t exclusiveEnd);
  371. // Post-condition: Returns a sub-string of source from before the character at inclusiveStart to after the character at inclusiveEnd
  372. // Example: string_inclusiveRange(U"0123456789", 2, 4) == U"234"
  373. ReadableString string_inclusiveRange(const ReadableString& source, intptr_t inclusiveStart, intptr_t inclusiveEnd);
  374. // Post-condition: Returns a sub-string of source from the start to before the character at exclusiveEnd
  375. // Example: string_before(U"0123456789", 5) == U"01234"
  376. ReadableString string_before(const ReadableString& source, intptr_t exclusiveEnd);
  377. // Post-condition: Returns a sub-string of source from the start to after the character at inclusiveEnd
  378. // Example: string_until(U"0123456789", 5) == U"012345"
  379. ReadableString string_until(const ReadableString& source, intptr_t inclusiveEnd);
  380. // Post-condition: Returns a sub-string of source from before the character at inclusiveStart to the end
  381. // Example: string_from(U"0123456789", 5) == U"56789"
  382. ReadableString string_from(const ReadableString& source, intptr_t inclusiveStart);
  383. // Post-condition: Returns a sub-string of source from after the character at exclusiveStart to the end
  384. // Example: string_after(U"0123456789", 5) == U"6789"
  385. ReadableString string_after(const ReadableString& source, intptr_t exclusiveStart);
  386. // Split source into a list of strings.
  387. // Post-condition:
  388. // Returns a list of strings from source by splitting along separator.
  389. // If removeWhiteSpace is true then surrounding white-space will be removed, otherwise all white-space is kept.
  390. // The separating characters are excluded from the resulting strings.
  391. // The number of strings returned in the list will equal the number of separating characters plus one, so the result may contain empty strings.
  392. // Each string in the list clones content to its own dynamic buffer. Use string_split_callback if you don't need long term storage.
  393. List<String> string_split(const ReadableString& source, DsrChar separator, bool removeWhiteSpace = false);
  394. // Split a string without needing a list to store the result.
  395. // Use string_splitCount on the same source and separator if you need to know the element count in advance.
  396. // Side-effects:
  397. // Calls action for each sub-string divided by separator in source given as the separatedText argument.
  398. void string_split_callback(Callback<void(ReadableString separatedText)> action, const ReadableString& source, DsrChar separator, bool removeWhiteSpace = false);
  399. // An alternative overload for having a very long lambda at the end.
  400. inline void string_split_callback(const ReadableString& source, DsrChar separator, bool removeWhiteSpace, Callback<void(ReadableString separatedText)> action) {
  401. string_split_callback(action, source, separator, removeWhiteSpace);
  402. }
  403. // Split source using separator, only to return the number of splits.
  404. // Useful for pre-allocation.
  405. intptr_t string_splitCount(const ReadableString& source, DsrChar separator);
  406. // Post-condition: Returns the upper case version of character if it is a lower case character, otherwise returning character as is.
  407. DsrChar character_upperCase(DsrChar character);
  408. // Post-condition: Returns the lower case version of character if it is an upper case character, otherwise returning character as is.
  409. DsrChar character_lowerCase(DsrChar character);
  410. // Post-condition: Returns true iff c is a digit.
  411. // Digit <- '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
  412. bool character_isDigit(DsrChar c);
  413. // Post-condition: Returns true iff c is an integer character.
  414. // IntegerCharacter <- '-' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
  415. bool character_isIntegerCharacter(DsrChar c);
  416. // Post-condition: Returns true iff c is a value character.
  417. // ValueCharacter <- '.' | '-' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
  418. bool character_isValueCharacter(DsrChar c);
  419. // Post-condition: Returns true iff c is a white-space character.
  420. // WhiteSpace <- ' ' | '\t' | '\v' | '\f' | '\n' | '\r'
  421. // Null terminators are excluded, because it's reserved for out of bound results.
  422. bool character_isWhiteSpace(DsrChar c);
  423. // Post-condition: Returns true iff source is a valid integer. IntegerAllowingWhiteSpace is also allowed iff allowWhiteSpace is true.
  424. // UnsignedInteger <- Digit+
  425. // Integer <- UnsignedInteger | '-' UnsignedInteger
  426. // IntegerAllowingWhiteSpace <- WhiteSpace* Integer WhiteSpace*
  427. bool string_isInteger(const ReadableString& source, bool allowWhiteSpace = true);
  428. // Post-condition: Returns true iff source is a valid integer or decimal number. DoubleAllowingWhiteSpace is also allowed iff allowWhiteSpace is true.
  429. // UnsignedDouble <- Digit+ | Digit* '.' Digit+
  430. // Double <- UnsignedDouble | '-' UnsignedDouble
  431. // DoubleAllowingWhiteSpace <- WhiteSpace* Double WhiteSpace*
  432. // Only dots are allowed as decimals.
  433. // Because being able to read files from another country without crashes is a lot more important than a detail that most people don't even notice.
  434. // Automatic nationalization made sense when most applications were written in-house before the internet existed.
  435. bool string_isDouble(const ReadableString& source, bool allowWhiteSpace = true);
  436. // Pre-condition: source must be a valid integer according to string_isInteger. Otherwise unexpected characters are simply ignored.
  437. // Post-condition: Returns the integer representation of source.
  438. // The result is signed, because the input might unexpectedly have a negation sign.
  439. // The result is large, so that one can easily check the range before assigning to a smaller integer type.
  440. int64_t string_toInteger(const ReadableString& source);
  441. // Side-effect: Appends value as a base ten integer at the end of target.
  442. void string_fromUnsigned(String& target, uint64_t value);
  443. // Post-condition: Returns value written as a base ten integer.
  444. inline String string_fromUnsigned(int64_t value) {
  445. String result; string_fromUnsigned(result, value); return result;
  446. }
  447. // Side-effect: Appends value as a base ten integer at the end of target.
  448. void string_fromSigned(String& target, int64_t value, DsrChar negationCharacter = U'-');
  449. // Post-condition: Returns value written as a base ten integer.
  450. inline String string_fromSigned(int64_t value, DsrChar negationCharacter = U'-') {
  451. String result; string_fromSigned(result, value, negationCharacter); return result;
  452. }
  453. // Pre-condition: source must be a valid double according to string_isDouble. Otherwise unexpected characters are simply ignored.
  454. // Post-condition: Returns the double precision floating-point representation of source.
  455. double string_toDouble(const ReadableString& source);
  456. // Side-effect: Appends value as a base ten decimal number at the end of target.
  457. void string_fromDouble(String& target, double value, int decimalCount = 6, bool removeTrailingZeroes = true, DsrChar decimalCharacter = U'.', DsrChar negationCharacter = U'-');
  458. // Post-condition: Returns value written as a base ten decimal number.
  459. inline String string_fromDouble(double value, int decimalCount = 6, bool removeTrailingZeroes = true, DsrChar decimalCharacter = U'.', DsrChar negationCharacter = U'-') {
  460. String result; string_fromDouble(result, value, decimalCount, removeTrailingZeroes, decimalCharacter, negationCharacter); return result;
  461. }
  462. // When BAN_IMPLICIT_ASCII_CONVERSION is defined, this is the only constructor for creating a String from "" instead of U"".
  463. String string_fromAscii(const char *text);
  464. // Loading will try to find a byte order mark and can handle UTF-8 and UTF-16.
  465. // Failure to find a byte order mark will assume that the file's content is raw Latin-1,
  466. // because automatic detection would cause random behaviour.
  467. // For portability, carriage return characters are removed,
  468. // but will be generated again using the default CrLf line encoding of string_save.
  469. // Post-condition:
  470. // Returns the content of the file referred to be filename.
  471. // If mustExist is true, then failure to load will throw an exception.
  472. // If mustExist is false, then failure to load will return an empty string.
  473. // If you want to handle files that are not found in a different way,
  474. // it is easy to use buffer_load and string_loadFromMemory separatelly.
  475. String string_load(const ReadableString& filename, bool mustExist = true);
  476. // Decode a text file from a buffer, which can be loaded using buffer_load.
  477. String string_loadFromMemory(Buffer fileContent);
  478. // Decode a null terminated string without BOM, by specifying which format it was encoded in.
  479. // Pre-conditions:
  480. // data does not start with any byte-order-mark (BOM).
  481. // data must be null terminated with '\0' in whatever format is being used. Otherwise you may have random crashes
  482. // Post-condition:
  483. // Returns a string decoded from the raw data.
  484. String string_dangerous_decodeFromData(const void* data, CharacterEncoding encoding);
  485. // Side-effect: Saves content to filename using the selected character and line encodings.
  486. // Post-condition: Returns true on success and false on failure.
  487. // Do not add carriage return characters yourself into strings, for these will be added automatically in the CrLf mode.
  488. // The internal String type should only use UTF-32 with single line feeds for breaking lines.
  489. // This makes text processing algorithms a lot cleaner when a character or line break is always one element.
  490. // UTF-8 with BOM is default by being both compact and capable of storing 21 bits of unicode.
  491. bool string_save(const ReadableString& filename, const ReadableString& content,
  492. CharacterEncoding characterEncoding = CharacterEncoding::BOM_UTF8,
  493. LineEncoding lineEncoding = LineEncoding::CrLf
  494. );
  495. // Encode the string and keep the raw buffer instead of saving it to a file.
  496. // Disabling writeByteOrderMark can be done when the result is casted to a native string for platform specific APIs, where a BOM is not allowed.
  497. // Enabling writeNullTerminator should be done when using the result as a pointer, so that the length is known when the buffer does not have padding.
  498. Buffer string_saveToMemory(const ReadableString& content,
  499. CharacterEncoding characterEncoding = CharacterEncoding::BOM_UTF8,
  500. LineEncoding lineEncoding = LineEncoding::CrLf,
  501. bool writeByteOrderMark = true,
  502. bool writeNullTerminator = false
  503. );
  504. // Post-condition: Returns true iff strings a and b are exactly equal.
  505. bool string_match(const ReadableString& a, const ReadableString& b);
  506. // Post-condition: Returns true iff strings a and b are roughly equal using a case insensitive match.
  507. bool string_caseInsensitiveMatch(const ReadableString& a, const ReadableString& b);
  508. // While string_match should be preferred over == for code readability and consistency with string_caseInsensitiveMatch,
  509. // the equality operator might be called automatically from template methods when a template type is a string.
  510. inline bool operator==(const ReadableString& a, const ReadableString& b) { return string_match(a, b); }
  511. inline bool operator!=(const ReadableString& a, const ReadableString& b) { return !string_match(a, b); }
  512. // Post-condition: Returns text converted to upper case.
  513. String string_upperCase(const ReadableString &text);
  514. // Post-condition: Returns text converted to lower case.
  515. String string_lowerCase(const ReadableString &text);
  516. // Post-condition: Returns a sub-set of text without surrounding white-space (space, tab and carriage-return).
  517. ReadableString string_removeOuterWhiteSpace(const ReadableString &text);
  518. // Post-condition: Returns rawText wrapped in a quote.
  519. // Special characters are included using escape characters, so that one can quote multiple lines but store it easily.
  520. String string_mangleQuote(const ReadableString &rawText);
  521. // Pre-condition: mangledText must be enclosed in double quotes and special characters must use escape characters (tabs in quotes are okay though).
  522. // Post-condition: Returns mangledText with quotes removed and excape tokens interpreted.
  523. String string_unmangleQuote(const ReadableString& mangledText);
  524. // Post-condition: Returns the number of strings using the same buffer, including itself.
  525. uintptr_t string_getBufferUseCount(const ReadableString& text);
  526. // Ensures safely that at least minimumLength characters can he held in the buffer
  527. void string_reserve(String& target, intptr_t minimumLength);
  528. // Append/push one character (to avoid integer to string conversion)
  529. void string_appendChar(String& target, DsrChar value);
  530. // Append elements
  531. inline void string_append(String& target) {}
  532. template<typename HEAD, typename... TAIL>
  533. inline void string_append(String& target, HEAD head, TAIL&&... tail) {
  534. string_toStreamIndented(target, head, U"");
  535. string_append(target, tail...);
  536. }
  537. // Combine a number of strings, characters and numbers
  538. // If an input type is rejected, create a Printable object to wrap around it
  539. template<typename... ARGS>
  540. inline String string_combine(ARGS&&... args) {
  541. String result;
  542. string_append(result, args...);
  543. return result;
  544. }
  545. // ---------------- Infix syntax ----------------
  546. // Operations
  547. inline String operator+ (const ReadableString& a, const ReadableString& b) { return string_combine(a, b); }
  548. inline String operator+ (const DsrChar* a, const ReadableString& b) { return string_combine(a, b); }
  549. inline String operator+ (const ReadableString& a, const DsrChar* b) { return string_combine(a, b); }
  550. inline String operator+ (const String& a, const String& b) { return string_combine(a, b); }
  551. inline String operator+ (const DsrChar* a, const String& b) { return string_combine(a, b); }
  552. inline String operator+ (const String& a, const DsrChar* b) { return string_combine(a, b); }
  553. inline String operator+ (const String& a, const ReadableString& b) { return string_combine(a, b); }
  554. inline String operator+ (const ReadableString& a, const String& b) { return string_combine(a, b); }
  555. // ---------------- Message handling ----------------
  556. enum class MessageType {
  557. Error, // Terminate as quickly as possible after saving and informing the user.
  558. Warning, // Inform the user but let the caller continue.
  559. StandardPrinting, // Print text to the terminal.
  560. DebugPrinting // Print debug information to the terminal, if debug mode is active.
  561. };
  562. // Get a reference to the thread-local buffer used for printing messages.
  563. // Can be combined with string_clear, string_append and string_sendMessage to send long messages in a thread-safe way.
  564. // Clear, fill and send.
  565. String &string_getPrintBuffer();
  566. // Send a message
  567. void string_sendMessage(const ReadableString &message, MessageType type);
  568. // Send a message directly to the default message handler, ignoring string_assignMessageHandler.
  569. void string_sendMessage_default(const ReadableString &message, MessageType type);
  570. // Get a message
  571. // Pre-condition:
  572. // The action function must throw an exception or terminate the program when given an error, otherwise string_sendMessage will throw an exception about failing to do so.
  573. // Do not call string_sendMessage directly or indirectly from within action, use string_sendMessage_default instead to avoid infinite recursion.
  574. // Terminating the program as soon as possible is ideal, but one might want to save a backup or show what went wrong in a graphical interface before terminating.
  575. // Do not throw and catch errors as if they were warnings, because throwing and catching creates a partial transaction, potentially violating type invariants.
  576. // Better to use warnings and let the sender of the warning figure out how to abort the action safely.
  577. void string_assignMessageHandler(Callback<void(const ReadableString &message, MessageType type)> action);
  578. // Undo string_assignMessageHandler, so that any messages will be handled the default way again.
  579. void string_unassignMessageHandler();
  580. // Throw an error, which must terminate the application or throw an error
  581. template<typename... ARGS>
  582. void throwError(ARGS... args) {
  583. String *target = &(string_getPrintBuffer());
  584. string_clear(*target);
  585. string_append(*target, args...);
  586. string_sendMessage(*target, MessageType::Error);
  587. }
  588. // Send a warning, which might throw an exception, terminate the application or anything else that the application requests using string_handleMessages
  589. template<typename... ARGS>
  590. void sendWarning(ARGS... args) {
  591. String *target = &(string_getPrintBuffer());
  592. string_clear(*target);
  593. string_append(*target, args...);
  594. string_sendMessage(*target, MessageType::Warning);
  595. }
  596. // Print information to the terminal or something else listening for messages using string_handleMessages
  597. template<typename... ARGS>
  598. void printText(ARGS... args) {
  599. String *target = &(string_getPrintBuffer());
  600. string_clear(*target);
  601. string_append(*target, args...);
  602. string_sendMessage(*target, MessageType::StandardPrinting);
  603. }
  604. // Debug messages are automatically disabled in release mode, so that you don't have to worry about accidentally releasing a program with poor performance from constantly printing to the terminal
  605. // Useful for selectively printing the most important information accumulated over time
  606. // Less useful for profiling, because the debug mode is slower than the release mode
  607. #ifdef NDEBUG
  608. // Supress debugText in release mode
  609. template<typename... ARGS>
  610. void debugText(ARGS... args) {}
  611. #else
  612. // Print debugText in debug mode
  613. template<typename... ARGS>
  614. void debugText(ARGS... args) {
  615. String *target = &(string_getPrintBuffer());
  616. string_clear(*target);
  617. string_append(*target, args...);
  618. string_sendMessage(*target, MessageType::DebugPrinting);
  619. }
  620. #endif
  621. }
  622. #endif