text.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. // zlib open source license
  2. //
  3. // Copyright (c) 2017 to 2019 David Forsgren Piuva
  4. //
  5. // This software is provided 'as-is', without any express or implied
  6. // warranty. In no event will the authors be held liable for any damages
  7. // arising from the use of this software.
  8. //
  9. // Permission is granted to anyone to use this software for any purpose,
  10. // including commercial applications, and to alter it and redistribute it
  11. // freely, subject to the following restrictions:
  12. //
  13. // 1. The origin of this software must not be misrepresented; you must not
  14. // claim that you wrote the original software. If you use this software
  15. // in a product, an acknowledgment in the product documentation would be
  16. // appreciated but is not required.
  17. //
  18. // 2. Altered source versions must be plainly marked as such, and must not be
  19. // misrepresented as being the original software.
  20. //
  21. // 3. This notice may not be removed or altered from any source
  22. // distribution.
  23. #ifndef DFPSR_BASE_TEXT
  24. #define DFPSR_BASE_TEXT
  25. #include <stdint.h>
  26. #include <string>
  27. // TODO: Try to hide in the implementation
  28. #include <iostream>
  29. #include <sstream>
  30. #include "Buffer.h"
  31. #include "../collection/List.h"
  32. namespace dsr {
  33. using DsrChar = char32_t;
  34. class ReadableString {
  35. protected:
  36. // A local pointer to the sub-allocation
  37. const char32_t* readSection = nullptr;
  38. // The length of the current string in characters
  39. int sectionLength = 0;
  40. public:
  41. int length() const;
  42. DsrChar read(int index) const;
  43. DsrChar operator[] (int index) const;
  44. public:
  45. // Empty string
  46. ReadableString();
  47. // Destructor
  48. virtual ~ReadableString();
  49. // UTF-32 litteral from U""
  50. // WARNING! May crash if content is freed, even if ReadableString is freed before
  51. // ReadableString may share its buffer with sub-strings of the same type
  52. ReadableString(const DsrChar *content);
  53. protected:
  54. // Returns true iff the range is safely inside of the string
  55. bool checkBound(int start, int length, bool warning = true) const;
  56. // Internal constructor
  57. ReadableString(const DsrChar *content, int sectionLength);
  58. // Create a string from an existing string
  59. // When there's no reference counter, it's important that the memory remains allocated until the application terminates
  60. // Just like when reading elements in a for loop, out-of-range only causes an exception if length > 0
  61. // Length lesser than 1 will always return an empty string
  62. virtual ReadableString getRange(int start, int length) const;
  63. public:
  64. // Converting to unknown character encoding using only the ascii character subset
  65. // A bug in GCC linking forces these to be virtual
  66. virtual std::ostream& toStream(std::ostream& out) const;
  67. virtual std::string toStdString() const;
  68. public:
  69. // Get the index of the first character in content matching toFind, or -1 if it doesn't exist.
  70. int findFirst(DsrChar toFind, int startIndex = 0) const;
  71. // Get the index of the last character in content matching toFind, or -1 if it doesn't exist.
  72. int findLast(DsrChar toFind) const;
  73. // Exclusive intervals represent the divisions between characters |⁰ A |¹ B |² C |³...
  74. // 0..2 of "ABC" then equals "AB", which has length 2 just like the index difference
  75. // 0..3 gets the whole "ABC" range, by starting from zero and ending with the character count
  76. ReadableString exclusiveRange(int inclusiveStart, int exclusiveEnd) const;
  77. // Inclusive intervals represent whole characters | A⁰ | B¹ | C² |...
  78. // 0..2 of "ABC" then equals "ABC", by taking character 0 (A), 1 (B) and 2 (C)
  79. ReadableString inclusiveRange(int inclusiveStart, int inclusiveEnd) const;
  80. // Simplified ranges
  81. ReadableString before(int exclusiveEnd) const;
  82. ReadableString until(int inclusiveEnd) const;
  83. ReadableString from(int inclusiveStart) const;
  84. ReadableString after(int exclusiveStart) const;
  85. // Split into a list of strings without allocating any new text buffers
  86. // The result can be kept after the original string has been freed, because the buffer is reference counted
  87. List<ReadableString> split(DsrChar separator) const;
  88. // Value conversion
  89. int64_t toInteger() const;
  90. double toDouble() const;
  91. };
  92. class String;
  93. // Reusable conversion methods
  94. void uintToString_arabic(String& target, uint64_t value);
  95. void intToString_arabic(String& target, int64_t value);
  96. void doubleToString_arabic(String& target, double value);
  97. // Used as format tags around numbers passed to string_append or string_combine
  98. // New types can implement printing to String by making wrappers from this class
  99. class Printable {
  100. public:
  101. // The method for appending the printable object into the target string
  102. virtual String& toStreamIndented(String& target, const ReadableString& indentation) const = 0;
  103. String& toStream(String& target) const;
  104. String toStringIndented(const ReadableString& indentation) const;
  105. String toString() const;
  106. std::ostream& toStreamIndented(std::ostream& out, const ReadableString& indentation) const;
  107. std::ostream& toStream(std::ostream& out) const;
  108. std::string toStdString() const;
  109. virtual ~Printable();
  110. };
  111. // A safe and simple string type
  112. // Can be constructed from ascii litterals "", but U"" is more universal
  113. // Can be used without ReadableString, but ReadableString can be wrapped over U"" litterals without allocation
  114. // UTF-32
  115. // Endianness is native
  116. // No combined characters allowed, use precomposed instead, so that the strings can guarantee a fixed character size
  117. class String : public ReadableString {
  118. protected:
  119. // A reference counted pointer to the buffer, just to keep the allocation
  120. std::shared_ptr<Buffer> buffer;
  121. // Same as readSection, but with write access
  122. char32_t* writeSection = nullptr;
  123. public:
  124. // The number of DsrChar characters that can be contained in the allocation before reaching the buffer's end
  125. // This doesn't imply that it's always okay to write to the remaining space, because the buffer may be shared
  126. int capacity();
  127. protected:
  128. // Internal constructor
  129. String(std::shared_ptr<Buffer> buffer, DsrChar *content, int sectionLength);
  130. // Create a string from the existing buffer without allocating any heap memory
  131. ReadableString getRange(int start, int length) const override;
  132. private:
  133. // Replaces the buffer with a new buffer holding at least newLength characters
  134. // Guarantees that the new buffer is not shared by other strings, so that it may be written to freely
  135. void reallocateBuffer(int32_t newLength, bool preserve);
  136. // Call before writing to the buffer
  137. // This hides that Strings share buffers when assigning by value or taking partial strings
  138. void cloneIfShared();
  139. void expand(int32_t newLength, bool affectUsedLength);
  140. public:
  141. // Constructors
  142. String();
  143. String(const char* source);
  144. String(const char32_t* source);
  145. String(const std::string& source);
  146. String(const ReadableString& source);
  147. String(const String& source);
  148. public:
  149. // Ensures safely that at least minimumLength characters can he held in the buffer
  150. void reserve(int32_t minimumLength);
  151. // Extend the String using more text
  152. void append(const char* source);
  153. void append(const ReadableString& source);
  154. void append(const char32_t* source);
  155. void append(const std::string& source);
  156. // Extend the String using another character
  157. void appendChar(DsrChar source);
  158. public:
  159. // Access
  160. void write(int index, DsrChar value);
  161. void clear();
  162. };
  163. // Define this overload for non-virtual source types that cannot inherit from Printable
  164. String& string_toStreamIndented(String& target, const Printable& source, const ReadableString& indentation);
  165. String& string_toStreamIndented(String& target, const char* value, const ReadableString& indentation);
  166. String& string_toStreamIndented(String& target, const ReadableString& value, const ReadableString& indentation);
  167. String& string_toStreamIndented(String& target, const char32_t* value, const ReadableString& indentation);
  168. String& string_toStreamIndented(String& target, const std::string& value, const ReadableString& indentation);
  169. String& string_toStreamIndented(String& target, const float& value, const ReadableString& indentation);
  170. String& string_toStreamIndented(String& target, const double& value, const ReadableString& indentation);
  171. String& string_toStreamIndented(String& target, const int64_t& value, const ReadableString& indentation);
  172. String& string_toStreamIndented(String& target, const uint64_t& value, const ReadableString& indentation);
  173. String& string_toStreamIndented(String& target, const int32_t& value, const ReadableString& indentation);
  174. String& string_toStreamIndented(String& target, const uint32_t& value, const ReadableString& indentation);
  175. String& string_toStreamIndented(String& target, const int16_t& value, const ReadableString& indentation);
  176. String& string_toStreamIndented(String& target, const uint16_t& value, const ReadableString& indentation);
  177. String& string_toStreamIndented(String& target, const int8_t& value, const ReadableString& indentation);
  178. String& string_toStreamIndented(String& target, const uint8_t& value, const ReadableString& indentation);
  179. // Procedural API
  180. // Post-condition:
  181. // Returns the content of the file referred to be filename.
  182. // If mustExist is true, then failure to load will throw an exception.
  183. // If mustExist is false, then failure to load will return an empty string.
  184. String string_load(const ReadableString& filename, bool mustExist = true);
  185. // Side-effect: Saves content to filename.
  186. void string_save(const ReadableString& filename, const ReadableString& content);
  187. // Post-condition: Returns true iff strings a and b are exactly equal.
  188. bool string_match(const ReadableString& a, const ReadableString& b);
  189. // Post-condition: Returns true iff strings a and b are roughly equal using a case insensitive match.
  190. bool string_caseInsensitiveMatch(const ReadableString& a, const ReadableString& b);
  191. // Post-condition: Returns text converted to upper case.
  192. String string_upperCase(const ReadableString &text);
  193. // Post-condition: Returns text converted to lower case.
  194. String string_lowerCase(const ReadableString &text);
  195. // Post-condition: Returns a clone of text without any white-space (space, tab and carriage-return).
  196. String string_removeAllWhiteSpace(const ReadableString &text);
  197. // Post-condition: Returns a sub-set of text without surrounding white-space (space, tab and carriage-return).
  198. // Unlike string_removeAllWhiteSpace, string_removeOuterWhiteSpace does not require allocating a new buffer.
  199. ReadableString string_removeOuterWhiteSpace(const ReadableString &text);
  200. // Pre-condition: Content must contain an integer, or unexpected things may happen.
  201. // Post-condition: Returns the numerical integer value of content while ignoring any forbidden characters.
  202. // Examples:
  203. // string_parseInteger(U"-25") == -25 // Good case
  204. // string_parseInteger(U" -25 ") == -25 // Still works
  205. // string_parseInteger(U" 10x10 ") == 1010 // Any digits are simply added in order while ignoring the rest
  206. int64_t string_parseInteger(const ReadableString& content);
  207. // Post-condition: Returns the double-precision floating-point approximation of content's numerical value
  208. double string_parseDouble(const ReadableString& content);
  209. // Post-condition: Returns rawText wrapped in a quote.
  210. // Special characters are included using escape characters, so that one can quote multiple lines but store it easily.
  211. String string_mangleQuote(const ReadableString &rawText);
  212. // Pre-condition: mangledText must be enclosed in double quotes and special characters must use escape characters (tabs in quotes are okay though).
  213. // Post-condition: Returns mangledText with quotes removed and excape tokens interpreted.
  214. String string_unmangleQuote(const ReadableString& mangledText);
  215. // Append one element
  216. template<typename TYPE>
  217. inline void string_append(String& target, TYPE value) {
  218. string_toStream(target, value);
  219. }
  220. // Append multiple elements
  221. template<typename HEAD, typename... TAIL>
  222. inline void string_append(String& target, HEAD head, TAIL... tail) {
  223. string_append(target, head);
  224. string_append(target, tail...);
  225. }
  226. // Combine a number of strings, characters and numbers
  227. // If an input type is rejected, create a Printable object to wrap around it
  228. template<typename... ARGS>
  229. inline String string_combine(ARGS... args) {
  230. String result;
  231. string_append(result, args...);
  232. return result;
  233. }
  234. // Operations
  235. inline String operator+ (const ReadableString& a, const ReadableString& b) { return string_combine(a, b); }
  236. inline String operator+ (const char32_t* a, const ReadableString& b) { return string_combine(a, b); }
  237. inline String operator+ (const ReadableString& a, const char32_t* b) { return string_combine(a, b); }
  238. inline String operator+ (const String& a, const String& b) { return string_combine(a, b); }
  239. inline String operator+ (const char32_t* a, const String& b) { return string_combine(a, b); }
  240. inline String operator+ (const String& a, const char32_t* b) { return string_combine(a, b); }
  241. inline String operator+ (const String& a, const ReadableString& b) { return string_combine(a, b); }
  242. inline String operator+ (const ReadableString& a, const String& b) { return string_combine(a, b); }
  243. // Print information
  244. template<typename... ARGS>
  245. void printText(ARGS... args) {
  246. String result = string_combine(args...);
  247. result.toStream(std::cout);
  248. }
  249. // Use for text printing that are useful when debugging but should not be given out in a release
  250. #ifdef NDEBUG
  251. // Supress debugText in release mode
  252. template<typename... ARGS>
  253. void debugText(ARGS... args) {}
  254. #else
  255. // Print debugText in debug mode
  256. template<typename... ARGS>
  257. void debugText(ARGS... args) { printText(args...); }
  258. #endif
  259. // Raise an exception
  260. // Only catch errors to display useful error messages, emergency backups or crash logs before terminating
  261. // Further execution after a partial transaction will break object invariants
  262. void throwErrorMessage(const String& message);
  263. template<typename... ARGS>
  264. void throwError(ARGS... args) {
  265. String result = string_combine(args...);
  266. throwErrorMessage(result);
  267. }
  268. // ---------------- Overloaded serialization ----------------
  269. // Templates reused for all types
  270. // The source must inherit from Printable or have its own string_feedIndented overload
  271. template<typename T>
  272. String& string_toStream(String& target, const T& source) {
  273. return string_toStreamIndented(target, source, U"");
  274. }
  275. template<typename T>
  276. String string_toStringIndented(const T& source, const ReadableString& indentation) {
  277. String result;
  278. string_toStreamIndented(result, source, indentation);
  279. return result;
  280. }
  281. template<typename T>
  282. String string_toString(const T& source) {
  283. return string_toStringIndented(source, U"");
  284. }
  285. template<typename T>
  286. std::ostream& string_toStreamIndented(std::ostream& target, const T& source, const ReadableString& indentation) {
  287. return target << string_toStringIndented(source, indentation);
  288. }
  289. template<typename T>
  290. std::ostream& string_toStream(std::ostream& target, const T& source) {
  291. return target << string_toString(source);
  292. }
  293. // ---------------- Below uses hard-coded portability for specific operating systems ----------------
  294. // Get a path separator for the target operating system.
  295. const char32_t* file_separator();
  296. }
  297. #endif