Browse Source

Removed bound error messages by clamping sub-strings.

David Piuva 5 years ago
parent
commit
5c8c17ebed
3 changed files with 33 additions and 61 deletions
  1. 8 40
      Source/DFPSR/base/text.cpp
  2. 21 18
      Source/DFPSR/base/text.h
  3. 4 3
      Source/test/tests/StringTest.cpp

+ 8 - 40
Source/DFPSR/base/text.cpp

@@ -639,24 +639,6 @@ Buffer dsr::string_saveToMemory(const ReadableString& content, CharacterEncoding
 	return result;
 }
 
-bool ReadableString::checkBound(int64_t start, int64_t length, bool warning) const {
-	if (start < 0 || start + length > this->length) {
-		if (warning) {
-			String message;
-			string_append(message, U"\n");
-			string_append(message, U" _____________________ Sub-string bound exception! _____________________\n");
-			string_append(message, U"/\n");
-			string_append(message, U"|  Characters from ", start, U" to ", (start + length - 1), U" are out of bound!\n");
-			string_append(message, U"|  In source string of 0..", (this->length - 1), U".\n");
-			string_append(message, U"\\_______________________________________________________________________\n");
-			throwError(message);
-		}
-		return false;
-	} else {
-		return true;
-	}
-}
-
 DsrChar ReadableString::operator[] (int64_t index) const {
 	if (index < 0 || index >= this->length) {
 		return U'\0';
@@ -697,26 +679,6 @@ int64_t String::capacity() {
 	}
 }
 
-ReadableString ReadableString::getRange(int64_t start, int64_t length) const {
-	if (length < 1) {
-		return ReadableString();
-	} else if (this->checkBound(start, length)) {
-		return ReadableString(&(this->readSection[start]), length);
-	} else {
-		return ReadableString();
-	}
-}
-
-ReadableString String::getRange(int64_t start, int64_t length) const {
-	if (length < 1) {
-		return ReadableString();
-	} else if (this->checkBound(start, length)) {
-		return String(this->buffer, &(this->writeSection[start]), length);
-	} else {
-		return ReadableString();
-	}
-}
-
 static int32_t getNewBufferSize(int32_t minimumSize) {
 	if (minimumSize <= 128) {
 		return 128;
@@ -1005,11 +967,17 @@ int64_t dsr::string_findLast(const ReadableString& source, DsrChar toFind) {
 }
 
 ReadableString dsr::string_exclusiveRange(const ReadableString& source, int64_t inclusiveStart, int64_t exclusiveEnd) {
-	return source.getRange(inclusiveStart, exclusiveEnd - inclusiveStart);
+	// Return empty string for each complete miss
+	if (inclusiveStart >= source.length || exclusiveEnd <= 0) { return ReadableString(); }
+	// Automatically clamping to valid range
+	if (inclusiveStart < 0) { inclusiveStart = 0; }
+	if (exclusiveEnd > source.length) { exclusiveEnd = source.length; }
+	// Return the overlapping interval
+	return ReadableString(&(source.readSection[inclusiveStart]), exclusiveEnd - inclusiveStart);
 }
 
 ReadableString dsr::string_inclusiveRange(const ReadableString& source, int64_t inclusiveStart, int64_t inclusiveEnd) {
-	return source.getRange(inclusiveStart, inclusiveEnd + 1 - inclusiveStart);
+	return string_exclusiveRange(source, inclusiveStart, inclusiveEnd + 1);
 }
 
 ReadableString dsr::string_before(const ReadableString& source, int64_t exclusiveEnd) {

+ 21 - 18
Source/DFPSR/base/text.h

@@ -59,6 +59,9 @@ enum class LineEncoding {
 	Lf // Linux and Macintosh compatible (Might not work on non-portable text editors on Microsoft Windows)
 };
 
+// Replacing String with a ReadableString reference for input arguments can make passing of U"" literals faster.
+//   Unlike String, it cannot be constructed from a "" literal, because UTF-32 is used internally.
+//   Trying to assign String onto ReadableString by value will fail because String contains more members
 class ReadableString {
 IMPL_ACCESS:
 	// A local pointer to the sub-allocation
@@ -71,25 +74,18 @@ public:
 	// Returning the character by value prevents writing to memory that might be a constant literal or shared with other strings
 	DsrChar operator[] (int64_t index) const;
 public:
-	// Empty string
+	// Empty string U""
 	ReadableString();
+	// Implicit casting from U"text"
+	//   Do not use ReadableString for heap allocated allocations that might be freed during the string's life!
+	//   String can handle dynamic memory and should be used in that case.
+	ReadableString(const DsrChar *content);
 	// Destructor
 	virtual ~ReadableString();
-	// UTF-32 litteral from U""
-	// WARNING! May crash if content is freed, even if ReadableString is freed before
-	//          ReadableString may share its buffer with sub-strings of the same type
-	ReadableString(const DsrChar *content);
 IMPL_ACCESS:
-	// Returns true iff the range is safely inside of the string
-	bool checkBound(int64_t start, int64_t length, bool warning = true) const;
 	// Internal constructor
 	ReadableString(const DsrChar *content, int64_t length);
 public:
-	// Create a string from an existing string
-	// When there's no reference counter, it's important that the memory remains allocated until the application terminates
-	// Just like when reading elements in a for loop, out-of-range only causes an exception if length > 0
-	//   Length lesser than 1 will always return an empty string
-	virtual ReadableString getRange(int64_t start, int64_t length) const;
 	// Converting to unknown character encoding using only the ascii character subset
 	// A bug in GCC linking forces these to be virtual
 	virtual std::ostream& toStream(std::ostream& out) const;
@@ -114,8 +110,8 @@ public:
 };
 
 // A safe and simple string type
-//   Can be constructed from ascii litterals "", but U"" is more universal
-//   Can be used without ReadableString, but ReadableString can be wrapped over U"" litterals without allocation
+//   Can be constructed from ascii literals "", but U"" will preserve unicode characters.
+//   Can be used without ReadableString, but ReadableString can be wrapped over U"" literals without allocation
 //   UTF-32
 //     Endianness is native
 //     No combined characters allowed, use precomposed instead, so that the strings can guarantee a fixed character size
@@ -127,13 +123,9 @@ IMPL_ACCESS:
 	char32_t* writeSection = nullptr;
 	// Internal constructor
 	String(Buffer buffer, DsrChar *content, int64_t length);
-public:
 	// The number of DsrChar characters that can be contained in the allocation before reaching the buffer's end
 	//   This doesn't imply that it's always okay to write to the remaining space, because the buffer may be shared
 	int64_t capacity();
-	// Create a string from the existing buffer without allocating any heap memory
-	ReadableString getRange(int64_t start, int64_t length) const override;
-IMPL_ACCESS:
 	// Replaces the buffer with a new buffer holding at least newLength characters
 	// Guarantees that the new buffer is not shared by other strings, so that it may be written to freely
 	void reallocateBuffer(int64_t newLength, bool preserve);
@@ -245,12 +237,23 @@ ReadableString string_from(const ReadableString& source, int64_t inclusiveStart)
 //   Example: string_after(U"0123456789", 5) == U"6789"
 ReadableString string_after(const ReadableString& source, int64_t exclusiveStart);
 
+// TODO: Should string_split and string_split_inPlace be removed now that string_split_callback is both safer and faster?
+//       This would remove the dependency on List, in case that one wants a different container.
+
+// Warning!
+//   Do not use a ReadableString generated by splitting a String past the String's lifetime.
+//   ReadableString does not allocate any heap memory but is only a view for data allocated elsewhere.
+//   Use string_split_callback if you want something safer.
 // Post-condition:
 //   Returns a list of strings from source by splitting along separator.
 // The separating characters are excluded from the resulting strings.
 // The number of strings returned in the list will equal the number of separating characters plus one, so the result may contain empty strings.
 // Each string in the list reuses memory from the input string using reference counting, but the list itself will be allocated.
 List<ReadableString> string_split(const ReadableString& source, DsrChar separator);
+// Warning!
+//   Do not use a ReadableString generated by splitting a String past the String's lifetime.
+//   ReadableString does not allocate any heap memory but is only a view for data allocated elsewhere.
+//   Use string_split_callback if you want something safer.
 // Use string_split_inPlace instead of string_split if you want to reuse the memory of an existing list.
 //   It will then only allocate when running out of buffer space.
 // Side-effects:

+ 4 - 3
Source/test/tests/StringTest.cpp

@@ -73,9 +73,10 @@ START_TEST(String)
 		ASSERT_MATCH(dsr::string_exclusiveRange(efgh, 3, 4), U"h");
 		ASSERT_MATCH(dsr::string_combine(string_from(abcd, 2), string_before(efgh, 2)), U"cdef");
 		ASSERT_MATCH(dsr::string_exclusiveRange(abcd, 0, 0), U""); // No size returns nothing
-		ASSERT_MATCH(dsr::string_exclusiveRange(abcd, -1, -2), U""); // A negative size doesn't have to be inside
-		ASSERT_CRASH(dsr::string_inclusiveRange(abcd, -1, -1)); // Index below bound expected
-		ASSERT_CRASH(dsr::string_inclusiveRange(abcd, 4, 4)); // Index above bound expected
+		ASSERT_MATCH(dsr::string_exclusiveRange(abcd, -670214452, 2), U"ab"); // Reading out of bound is clamped
+		ASSERT_MATCH(dsr::string_exclusiveRange(abcd, 2, 985034841), U"cd"); // Reading out of bound is clamped
+		ASSERT_MATCH(dsr::string_exclusiveRange(abcd, 4, 764), U""); // Completely ous of bound returns nothing
+		ASSERT_MATCH(dsr::string_exclusiveRange(abcd, -631, 0), U""); // Completely ous of bound returns nothing
 	}
 	{ // Processing
 		dsr::String buffer = U"Garbage";