Browse Source

Made safety checks for manually identifying numbers before parsing.

David Piuva 5 years ago
parent
commit
aa4d1184ce

+ 75 - 7
Source/DFPSR/base/text.cpp

@@ -45,10 +45,6 @@ static char toAscii(DsrChar c) {
 	}
 }
 
-static bool isWhiteSpace(DsrChar c) {
-	return c <= U' ' || c == U'\t' || c == U'\r';
-}
-
 String& Printable::toStream(String& target) const {
 	return this->toStreamIndented(target, U"");
 }
@@ -146,7 +142,7 @@ String dsr::string_removeAllWhiteSpace(const ReadableString &text) {
 	result.reserve(text.length());
 	for (int i = 0; i < text.length(); i++) {
 		DsrChar c = text[i];
-		if (!isWhiteSpace(c)) {
+		if (!character_isWhiteSpace(c)) {
 			result.appendChar(c);
 		}
 	}
@@ -158,14 +154,14 @@ ReadableString dsr::string_removeOuterWhiteSpace(const ReadableString &text) {
 	int last = -1;
 	for (int i = 0; i < text.length(); i++) {
 		DsrChar c = text[i];
-		if (!isWhiteSpace(c)) {
+		if (!character_isWhiteSpace(c)) {
 			first = i;
 			break;
 		}
 	}
 	for (int i = text.length() - 1; i >= 0; i--) {
 		DsrChar c = text[i];
-		if (!isWhiteSpace(c)) {
+		if (!character_isWhiteSpace(c)) {
 			last = i;
 			break;
 		}
@@ -773,3 +769,75 @@ ReadableString dsr::string_after(const ReadableString& source, int exclusiveStar
 	return string_from(source, exclusiveStart + 1);
 }
 
+bool dsr::character_isDigit(DsrChar c) {
+	return c >= U'0' && c <= U'9';
+}
+
+bool dsr::character_isIntegerCharacter(DsrChar c) {
+	return c == U'-' || character_isDigit(c);
+}
+
+bool dsr::character_isValueCharacter(DsrChar c) {
+	return c == U'.' || character_isIntegerCharacter(c);
+}
+
+bool dsr::character_isWhiteSpace(DsrChar c) {
+	return c == U' ' || c == U'\t' || c == U'\v' || c == U'\f' || c == U'\n' || c == U'\r';
+}
+
+// Macros for implementing regular expressions with a greedy approach consuming the first match
+//   Optional accepts 0 or 1 occurence
+//   Forced accepts 1 occurence
+//   Star accepts 0..N occurence
+//   Plus accepts 1..N occurence
+#define CHARACTER_OPTIONAL(CHARACTER) if (source[readIndex] == CHARACTER) { readIndex++; }
+#define CHARACTER_FORCED(CHARACTER) if (source[readIndex] == CHARACTER) { readIndex++; } else { return false; }
+#define CHARACTER_STAR(CHARACTER) while (source[readIndex] == CHARACTER) { readIndex++; }
+#define CHARACTER_PLUS(CHARACTER) CHARACTER_FORCED(CHARACTER) CHARACTER_STAR(CHARACTER)
+#define PATTERN_OPTIONAL(PATTERN) if (character_is##PATTERN(source[readIndex])) { readIndex++; }
+#define PATTERN_FORCED(PATTERN) if (character_is##PATTERN(source[readIndex])) { readIndex++; } else { return false; }
+#define PATTERN_STAR(PATTERN) while (character_is##PATTERN(source[readIndex])) { readIndex++; }
+#define PATTERN_PLUS(PATTERN) PATTERN_FORCED(PATTERN) PATTERN_STAR(PATTERN)
+
+// The greedy approach works here, because there's no ambiguity
+bool dsr::string_isInteger(const ReadableString& source, bool allowWhiteSpace) {
+	int readIndex = 0;
+	if (allowWhiteSpace) {
+		PATTERN_STAR(WhiteSpace);
+	}
+	CHARACTER_OPTIONAL(U'-');
+	// At least one digit required
+	PATTERN_PLUS(IntegerCharacter);
+	if (allowWhiteSpace) {
+		PATTERN_STAR(WhiteSpace);
+	}
+	return true;
+}
+
+// To avoid consuming the all digits on Digit* before reaching Digit+ when there is no decimal, whole integers are judged by string_isInteger
+bool dsr::string_isDouble(const ReadableString& source, bool allowWhiteSpace) {
+	// Solving the UnsignedDouble <- Digit+ | Digit* '.' Digit+ ambiguity is done easiest by checking if there's a decimal before handling the white-space and negation
+	if (string_findFirst(source, U'.') == -1) {
+		// No decimal detected
+		return string_isInteger(source, allowWhiteSpace);
+	} else {
+		int readIndex = 0;
+		if (allowWhiteSpace) {
+			PATTERN_STAR(WhiteSpace);
+		}
+		// Double <- UnsignedDouble | '-' UnsignedDouble
+		CHARACTER_OPTIONAL(U'-');
+		// UnsignedDouble <- Digit* '.' Digit+
+		// Any number of integer digits
+		PATTERN_STAR(IntegerCharacter);
+		// Only dot for decimal
+		CHARACTER_FORCED(U'.')
+		// At least one decimal digit
+		PATTERN_PLUS(IntegerCharacter);
+		if (allowWhiteSpace) {
+			PATTERN_STAR(WhiteSpace);
+		}
+		return true;
+	}
+}
+

+ 29 - 1
Source/DFPSR/base/text.h

@@ -238,10 +238,38 @@ List<ReadableString> string_split(const ReadableString& source, DsrChar separato
 //   If appendResult is true, the result is appended to the existing target list.
 void string_split_inPlace(List<ReadableString> &target, const ReadableString& source, DsrChar separator, bool appendResult = false);
 
+// Post-condition: Returns true iff c is a digit.
+//   Digit <- '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
+bool character_isDigit(DsrChar c);
+// Post-condition: Returns true iff c is an integer character.
+//   IntegerCharacter <- '-' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
+bool character_isIntegerCharacter(DsrChar c);
+// Post-condition: Returns true iff c is a value character.
+//   ValueCharacter <- '.' | '-' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
+bool character_isValueCharacter(DsrChar c);
+// Post-condition: Returns true iff c is a white-space character.
+//   WhiteSpace <- ' ' | '\t' | '\v' | '\f' | '\n' | '\r'
+//   Null terminators are excluded, because it's reserved for out of bound results.
+bool character_isWhiteSpace(DsrChar c);
+// Post-condition: Returns true iff source is a valid integer. IntegerAllowingWhiteSpace is also allowed iff allowWhiteSpace is true.
+//   UnsignedInteger <- Digit+
+//   Integer <- UnsignedInteger | '-' UnsignedInteger
+//   IntegerAllowingWhiteSpace <- WhiteSpace* Integer WhiteSpace*
+bool string_isInteger(const ReadableString& source, bool allowWhiteSpace = true);
+// Post-condition: Returns true iff source is a valid integer or decimal number. DoubleAllowingWhiteSpace is also allowed iff allowWhiteSpace is true.
+//   UnsignedDouble <- Digit+ | Digit* '.' Digit+
+//   Double <- UnsignedDouble | '-' UnsignedDouble
+//   DoubleAllowingWhiteSpace <- WhiteSpace* Double WhiteSpace*
+// Only dots are allowed as decimals.
+//   Because being able to read files from another country without crashes is a lot more important than a detail that most people don't even notice.
+//   Automatic nationalization made sense when most applications were written in-house before the internet existed.
+bool string_isDouble(const ReadableString& source, bool allowWhiteSpace = true);
+// Pre-condition: source must be a valid integer according to string_isInteger. Otherwise unexpected characters are simply ignored.
 // Post-condition: Returns the integer representation of source.
 // The result is signed, because the input might unexpectedly have a negation sign.
 // The result is large, so that one can easily check the range before assigning to a smaller integer type.
 int64_t string_toInteger(const ReadableString& source);
+// Pre-condition: source must be a valid double according to string_isDouble. Otherwise unexpected characters are simply ignored.
 // Post-condition: Returns the double precision floating-point representation of source.
 double string_toDouble(const ReadableString& source);
 
@@ -263,7 +291,7 @@ String string_upperCase(const ReadableString &text);
 // Post-condition: Returns text converted to lower case.
 String string_lowerCase(const ReadableString &text);
 
-// Post-condition: Returns a clone of text without any white-space (space, tab and carriage-return).
+// Post-condition: Returns a clone of text without any white-space (space, tab, carriage-return, null terminator, et cetera).
 String string_removeAllWhiteSpace(const ReadableString &text);
 // Post-condition: Returns a sub-set of text without surrounding white-space (space, tab and carriage-return).
 // Unlike string_removeAllWhiteSpace, string_removeOuterWhiteSpace does not require allocating a new buffer.

+ 7 - 2
Source/DFPSR/persistent/atomic/PersistentInteger.cpp

@@ -28,8 +28,13 @@ using namespace dsr;
 PERSISTENT_DEFINITION(PersistentInteger)
 
 bool PersistentInteger::assignValue(const ReadableString &text) {
-	this->value = string_toInteger(text);
-	return true; // TODO: Discriminate bad input
+	if (string_isInteger(text)) {
+		this->value = string_toInteger(text);
+		return true;
+	} else {
+		this->value = 0;
+		return false;
+	}
 }
 
 String& PersistentInteger::toStreamIndented(String& out, const ReadableString& indentation) const {

+ 5 - 12
Source/SDK/sandbox/sprite/spriteAPI.cpp

@@ -952,32 +952,25 @@ void sprite_generateFromModel(ImageRgbaU8& targetAtlas, String& targetConfigText
 	}
 }
 
-static bool isDigit(DsrChar c) {
-	return c >= U'0' && c <= U'9';
-}
-static bool isValue(DsrChar c) {
-	return c == U'-' || c == U'.' || isDigit(c);
-}
-
 // Allowing the last decimals to deviate a bit because floating-point operations are rounded differently between computers
 static bool approximateTextMatch(const ReadableString &a, const ReadableString &b, double tolerance = 0.00002) {
 	int readerA = 0, readerB = 0;
 	while (readerA < string_length(a) && readerB < string_length(b)) {
 		DsrChar charA = a[readerA];
 		DsrChar charB = b[readerB];
-		if (isValue(charA) && isValue(charB)) {
+		if (character_isValueCharacter(charA) && character_isValueCharacter(charB)) {
 			// Scan forward on both sides while consuming content and comparing the actual value
 			int startA = readerA;
 			int startB = readerB;
 			// Only move forward on valid characters
 			if (a[readerA] == U'-') { readerA++; }
 			if (b[readerB] == U'-') { readerB++; }
-			while (isDigit(a[readerA])) { readerA++; }
-			while (isDigit(b[readerB])) { readerB++; }
+			while (character_isDigit(a[readerA])) { readerA++; }
+			while (character_isDigit(b[readerB])) { readerB++; }
 			if (a[readerA] == U'.') { readerA++; }
 			if (b[readerB] == U'.') { readerB++; }
-			while (isDigit(a[readerA])) { readerA++; }
-			while (isDigit(b[readerB])) { readerB++; }
+			while (character_isDigit(a[readerA])) { readerA++; }
+			while (character_isDigit(b[readerB])) { readerB++; }
 			// Approximate values
 			double valueA = string_toDouble(string_exclusiveRange(a, startA, readerA));
 			double valueB = string_toDouble(string_exclusiveRange(b, startB, readerB));

+ 88 - 1
Source/test/tests/StringTest.cpp

@@ -29,6 +29,19 @@ START_TEST(String)
 		ASSERT_EQUAL(string_length(U"abc"), 3);
 		ASSERT_EQUAL(string_length(U"0123456789"), 10);
 	}
+	{ // Reading characters
+		ASSERT_EQUAL(dsr::ReadableString(U"ABC")[0], U'A');
+		ASSERT_EQUAL(dsr::ReadableString(U"ABC")[1], U'B');
+		ASSERT_EQUAL(dsr::ReadableString(U"ABC")[2], U'C');
+		ASSERT_EQUAL(dsr::ReadableString(U"ABC")[3], U'\0');
+		ASSERT_EQUAL(dsr::ReadableString(U"ABC")[10], U'\0');
+		ASSERT_EQUAL(dsr::ReadableString(U"ABC")[1000000], U'\0');
+		ASSERT_EQUAL(dsr::ReadableString(U"ABC")[-1], U'\0');
+		ASSERT_EQUAL(dsr::ReadableString(U"ABC")[-1000000], U'\0');
+		ASSERT_EQUAL(dsr::ReadableString(U"")[-1], U'\0');
+		ASSERT_EQUAL(dsr::ReadableString(U"")[0], U'\0');
+		ASSERT_EQUAL(dsr::ReadableString(U"")[1], U'\0');
+	}
 	{ // Comparison
 		dsr::ReadableString litA = U"Testing \u0444";
 		dsr::ReadableString litB = U"Testing ф";
@@ -79,6 +92,80 @@ START_TEST(String)
 		dsr::String values = dsr::string_combine(U"x = ", x, U", y = ", y, U", z = ", z);
 		ASSERT_MATCH(values, U"x = 0, y = -123456, z = 100200300400500600");
 	}
+	{ // Identifying numbers
+		ASSERT_EQUAL(dsr::character_isDigit(U'0' - 1), false);
+		ASSERT_EQUAL(dsr::character_isDigit(U'0'), true);
+		ASSERT_EQUAL(dsr::character_isDigit(U'1'), true);
+		ASSERT_EQUAL(dsr::character_isDigit(U'2'), true);
+		ASSERT_EQUAL(dsr::character_isDigit(U'3'), true);
+		ASSERT_EQUAL(dsr::character_isDigit(U'4'), true);
+		ASSERT_EQUAL(dsr::character_isDigit(U'5'), true);
+		ASSERT_EQUAL(dsr::character_isDigit(U'6'), true);
+		ASSERT_EQUAL(dsr::character_isDigit(U'7'), true);
+		ASSERT_EQUAL(dsr::character_isDigit(U'8'), true);
+		ASSERT_EQUAL(dsr::character_isDigit(U'9'), true);
+		ASSERT_EQUAL(dsr::character_isDigit(U'9' + 1), false);
+		ASSERT_EQUAL(dsr::character_isDigit(U'a'), false);
+		ASSERT_EQUAL(dsr::character_isDigit(U' '), false);
+		ASSERT_EQUAL(dsr::character_isIntegerCharacter(U'-'), true);
+		ASSERT_EQUAL(dsr::character_isIntegerCharacter(U'0' - 1), false);
+		ASSERT_EQUAL(dsr::character_isIntegerCharacter(U'0'), true);
+		ASSERT_EQUAL(dsr::character_isIntegerCharacter(U'9'), true);
+		ASSERT_EQUAL(dsr::character_isIntegerCharacter(U'9' + 1), false);
+		ASSERT_EQUAL(dsr::character_isIntegerCharacter(U'a'), false);
+		ASSERT_EQUAL(dsr::character_isIntegerCharacter(U' '), false);
+		ASSERT_EQUAL(dsr::character_isValueCharacter(U'-'), true);
+		ASSERT_EQUAL(dsr::character_isValueCharacter(U'.'), true);
+		ASSERT_EQUAL(dsr::character_isValueCharacter(U'0' - 1), false);
+		ASSERT_EQUAL(dsr::character_isValueCharacter(U'0'), true);
+		ASSERT_EQUAL(dsr::character_isValueCharacter(U'9'), true);
+		ASSERT_EQUAL(dsr::character_isValueCharacter(U'9' + 1), false);
+		ASSERT_EQUAL(dsr::character_isValueCharacter(U'a'), false);
+		ASSERT_EQUAL(dsr::character_isValueCharacter(U' '), false);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U' '), true);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U'\t'), true);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U'\r'), true);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U'\0'), false);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U'a'), false);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U'1'), false);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U'('), false);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U')'), false);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U'.'), false);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U','), false);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U'-'), false);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U'_'), false);
+		ASSERT_EQUAL(dsr::character_isWhiteSpace(U'|'), false);
+		ASSERT_EQUAL(dsr::string_isInteger(U"0"), true);
+		ASSERT_EQUAL(dsr::string_isInteger(U"1"), true);
+		ASSERT_EQUAL(dsr::string_isInteger(U"-0"), true);
+		ASSERT_EQUAL(dsr::string_isInteger(U"-1"), true);
+		ASSERT_EQUAL(dsr::string_isInteger(U"0", false), true);
+		ASSERT_EQUAL(dsr::string_isInteger(U" 0 "), true);
+		ASSERT_EQUAL(dsr::string_isInteger(U" 0 ", false), false);
+		ASSERT_EQUAL(dsr::string_isInteger(U" 123"), true);
+		ASSERT_EQUAL(dsr::string_isInteger(U"-123"), true);
+		ASSERT_EQUAL(dsr::string_isInteger(U""), false);
+		ASSERT_EQUAL(dsr::string_isDouble(U"0"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"-0"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"1"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"-1"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"1.1"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"-1.1"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U".1"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"-.1"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"0", false), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U" 0 "), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U" 0 ", false), false);
+		ASSERT_EQUAL(dsr::string_isDouble(U" 123"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"-123"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"0.5"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"-0.5"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U".5"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"-.5"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"0.54321"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U"-0.54321"), true);
+		ASSERT_EQUAL(dsr::string_isDouble(U""), false);
+	}
 	// Upper case
 	ASSERT_MATCH(dsr::string_upperCase(U"a"), U"A");
 	ASSERT_MATCH(dsr::string_upperCase(U"aB"), U"AB");
@@ -96,7 +183,7 @@ START_TEST(String)
 	// Complete white space removal
 	ASSERT_MATCH(dsr::string_removeAllWhiteSpace(U" "), U"");
 	ASSERT_MATCH(dsr::string_removeAllWhiteSpace(U" abc\n	"), U"abc");
-	ASSERT_MATCH(dsr::string_removeAllWhiteSpace(U" a \f sentence \r surrounded	\n by space	\a"), U"asentencesurroundedbyspace");
+	ASSERT_MATCH(dsr::string_removeAllWhiteSpace(U" a \f sentence \r surrounded	\n by spa\vce	\t"), U"asentencesurroundedbyspace");
 	// White space removal by pointing to a section of the original input
 	ASSERT_MATCH(dsr::string_removeOuterWhiteSpace(U" "), U"");
 	ASSERT_MATCH(dsr::string_removeOuterWhiteSpace(U"  abc  "), U"abc");