Browse Source

Got rid of the buggy towlower and towupper functions from std and began implementing character conversion that actually works for the Latin-1 Unicode subset.

David Piuva 2 months ago
parent
commit
437ca534af
3 changed files with 36 additions and 2 deletions
  1. 26 2
      Source/DFPSR/api/stringAPI.cpp
  2. 4 0
      Source/DFPSR/api/stringAPI.h
  3. 6 0
      Source/test/tests/StringTest.cpp

+ 26 - 2
Source/DFPSR/api/stringAPI.cpp

@@ -114,11 +114,35 @@ bool dsr::string_caseInsensitiveMatch(const ReadableString& a, const ReadableStr
 	}
 }
 
+DsrChar dsr::character_upperCase(DsrChar character) {
+	if (U'a' <= character && character <= U'z') { // a (97) to z (122)
+		return character - (U'a' - U'A');
+	} else if (U'à' <= character && character <= U'ö') { // à (224) to ö (246)
+		return character - (U'à' - U'À');
+	} else if (U'ø' <= character && character <= U'þ') { // ø (248) to þ (254)
+		return character - (U'ø' - U'Ø');
+	} else {
+		return character;
+	}
+}
+
+DsrChar dsr::character_lowerCase(DsrChar character) {
+	if (U'A' <= character && character <= U'Z') { // A (65) to Z (90)
+		return character + (U'a' - U'A');
+	} else if (U'À' <= character && character <= U'Ö') { // À (192) to Ö (214)
+		return character + (U'à' - U'À');
+	} else if (U'Ø' <= character && character <= U'Þ') { // Ø (216) to Þ (222)
+		return character + (U'ø' - U'Ø');
+	} else {
+		return character;
+	}
+}
+
 String dsr::string_upperCase(const ReadableString &text) {
 	String result;
 	string_reserve(result, text.view.length);
 	for (intptr_t i = 0; i < text.view.length; i++) {
-		string_appendChar(result, towupper(text[i]));
+		string_appendChar(result, character_upperCase(text[i]));
 	}
 	return result;
 }
@@ -127,7 +151,7 @@ String dsr::string_lowerCase(const ReadableString &text) {
 	String result;
 	string_reserve(result, text.view.length);
 	for (intptr_t i = 0; i < text.view.length; i++) {
-		string_appendChar(result, towlower(text[i]));
+		string_appendChar(result, character_lowerCase(text[i]));
 	}
 	return result;
 }

+ 4 - 0
Source/DFPSR/api/stringAPI.h

@@ -419,6 +419,10 @@ inline void string_split_callback(const ReadableString& source, DsrChar separato
 // Useful for pre-allocation.
 intptr_t string_splitCount(const ReadableString& source, DsrChar separator);
 
+// Post-condition: Returns the upper case version of character if it is a lower case character, otherwise returning character as is.
+DsrChar character_upperCase(DsrChar character);
+// Post-condition: Returns the lower case version of character if it is an upper case character, otherwise returning character as is.
+DsrChar character_lowerCase(DsrChar character);
 // Post-condition: Returns true iff c is a digit.
 //   Digit <- '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
 bool character_isDigit(DsrChar c);

+ 6 - 0
Source/test/tests/StringTest.cpp

@@ -185,6 +185,9 @@ START_TEST(String)
 	ASSERT_EQUAL(dsr::string_upperCase(U"abc1"), U"ABC1");
 	ASSERT_EQUAL(dsr::string_upperCase(U"Abc12"), U"ABC12");
 	ASSERT_EQUAL(dsr::string_upperCase(U"ABC123"), U"ABC123");
+	ASSERT_EQUAL(dsr::string_upperCase(U"!%& abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ"), U"!%& ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+	ASSERT_EQUAL(dsr::string_upperCase(U"àáâãäåæçèéêëìíîïðñòóôõöÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ"), U"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ");
+	ASSERT_EQUAL(dsr::string_upperCase(U"øùúûüýþØÙÚÛÜÝÞ"), U"ØÙÚÛÜÝÞØÙÚÛÜÝÞ");
 	// Lower case
 	ASSERT_EQUAL(dsr::string_lowerCase(U"a"), U"a");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"aB"), U"ab");
@@ -192,6 +195,9 @@ START_TEST(String)
 	ASSERT_EQUAL(dsr::string_lowerCase(U"abc1"), U"abc1");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"Abc12"), U"abc12");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"ABC123"), U"abc123");
+	ASSERT_EQUAL(dsr::string_lowerCase(U"!%& abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ"), U"!%& abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz");
+	ASSERT_EQUAL(dsr::string_lowerCase(U"àáâãäåæçèéêëìíîïðñòóôõöÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ"), U"àáâãäåæçèéêëìíîïðñòóôõöàáâãäåæçèéêëìíîïðñòóôõö");
+	ASSERT_EQUAL(dsr::string_lowerCase(U"øùúûüýþØÙÚÛÜÝÞ"), U"øùúûüýþøùúûüýþ");
 	// White space removal by pointing to a section of the original input
 	ASSERT_EQUAL(dsr::string_removeOuterWhiteSpace(U" "), U"");
 	ASSERT_EQUAL(dsr::string_removeOuterWhiteSpace(U"  abc  "), U"abc");