|
|
@@ -59,6 +59,9 @@ enum class LineEncoding {
|
|
|
Lf // Linux and Macintosh compatible (Might not work on non-portable text editors on Microsoft Windows)
|
|
|
};
|
|
|
|
|
|
+// Replacing String with a ReadableString reference for input arguments can make passing of U"" literals faster.
|
|
|
+// Unlike String, it cannot be constructed from a "" literal, because UTF-32 is used internally.
|
|
|
+// Trying to assign String onto ReadableString by value will fail because String contains more members
|
|
|
class ReadableString {
|
|
|
IMPL_ACCESS:
|
|
|
// A local pointer to the sub-allocation
|
|
|
@@ -71,25 +74,18 @@ public:
|
|
|
// Returning the character by value prevents writing to memory that might be a constant literal or shared with other strings
|
|
|
DsrChar operator[] (int64_t index) const;
|
|
|
public:
|
|
|
- // Empty string
|
|
|
+ // Empty string U""
|
|
|
ReadableString();
|
|
|
+ // Implicit casting from U"text"
|
|
|
+ // Do not use ReadableString for heap allocated allocations that might be freed during the string's life!
|
|
|
+ // String can handle dynamic memory and should be used in that case.
|
|
|
+ ReadableString(const DsrChar *content);
|
|
|
// Destructor
|
|
|
virtual ~ReadableString();
|
|
|
- // UTF-32 litteral from U""
|
|
|
- // WARNING! May crash if content is freed, even if ReadableString is freed before
|
|
|
- // ReadableString may share its buffer with sub-strings of the same type
|
|
|
- ReadableString(const DsrChar *content);
|
|
|
IMPL_ACCESS:
|
|
|
- // Returns true iff the range is safely inside of the string
|
|
|
- bool checkBound(int64_t start, int64_t length, bool warning = true) const;
|
|
|
// Internal constructor
|
|
|
ReadableString(const DsrChar *content, int64_t length);
|
|
|
public:
|
|
|
- // Create a string from an existing string
|
|
|
- // When there's no reference counter, it's important that the memory remains allocated until the application terminates
|
|
|
- // Just like when reading elements in a for loop, out-of-range only causes an exception if length > 0
|
|
|
- // Length lesser than 1 will always return an empty string
|
|
|
- virtual ReadableString getRange(int64_t start, int64_t length) const;
|
|
|
// Converting to unknown character encoding using only the ascii character subset
|
|
|
// A bug in GCC linking forces these to be virtual
|
|
|
virtual std::ostream& toStream(std::ostream& out) const;
|
|
|
@@ -114,8 +110,8 @@ public:
|
|
|
};
|
|
|
|
|
|
// A safe and simple string type
|
|
|
-// Can be constructed from ascii litterals "", but U"" is more universal
|
|
|
-// Can be used without ReadableString, but ReadableString can be wrapped over U"" litterals without allocation
|
|
|
+// Can be constructed from ascii literals "", but U"" will preserve unicode characters.
|
|
|
+// Can be used without ReadableString, but ReadableString can be wrapped over U"" literals without allocation
|
|
|
// UTF-32
|
|
|
// Endianness is native
|
|
|
// No combined characters allowed, use precomposed instead, so that the strings can guarantee a fixed character size
|
|
|
@@ -127,13 +123,9 @@ IMPL_ACCESS:
|
|
|
char32_t* writeSection = nullptr;
|
|
|
// Internal constructor
|
|
|
String(Buffer buffer, DsrChar *content, int64_t length);
|
|
|
-public:
|
|
|
// The number of DsrChar characters that can be contained in the allocation before reaching the buffer's end
|
|
|
// This doesn't imply that it's always okay to write to the remaining space, because the buffer may be shared
|
|
|
int64_t capacity();
|
|
|
- // Create a string from the existing buffer without allocating any heap memory
|
|
|
- ReadableString getRange(int64_t start, int64_t length) const override;
|
|
|
-IMPL_ACCESS:
|
|
|
// Replaces the buffer with a new buffer holding at least newLength characters
|
|
|
// Guarantees that the new buffer is not shared by other strings, so that it may be written to freely
|
|
|
void reallocateBuffer(int64_t newLength, bool preserve);
|
|
|
@@ -245,12 +237,23 @@ ReadableString string_from(const ReadableString& source, int64_t inclusiveStart)
|
|
|
// Example: string_after(U"0123456789", 5) == U"6789"
|
|
|
ReadableString string_after(const ReadableString& source, int64_t exclusiveStart);
|
|
|
|
|
|
+// TODO: Should string_split and string_split_inPlace be removed now that string_split_callback is both safer and faster?
|
|
|
+// This would remove the dependency on List, in case that one wants a different container.
|
|
|
+
|
|
|
+// Warning!
|
|
|
+// Do not use a ReadableString generated by splitting a String past the String's lifetime.
|
|
|
+// ReadableString does not allocate any heap memory but is only a view for data allocated elsewhere.
|
|
|
+// Use string_split_callback if you want something safer.
|
|
|
// Post-condition:
|
|
|
// Returns a list of strings from source by splitting along separator.
|
|
|
// The separating characters are excluded from the resulting strings.
|
|
|
// The number of strings returned in the list will equal the number of separating characters plus one, so the result may contain empty strings.
|
|
|
// Each string in the list reuses memory from the input string using reference counting, but the list itself will be allocated.
|
|
|
List<ReadableString> string_split(const ReadableString& source, DsrChar separator);
|
|
|
+// Warning!
|
|
|
+// Do not use a ReadableString generated by splitting a String past the String's lifetime.
|
|
|
+// ReadableString does not allocate any heap memory but is only a view for data allocated elsewhere.
|
|
|
+// Use string_split_callback if you want something safer.
|
|
|
// Use string_split_inPlace instead of string_split if you want to reuse the memory of an existing list.
|
|
|
// It will then only allocate when running out of buffer space.
|
|
|
// Side-effects:
|