Browse Source

Used new character case functions for string matching and handling unicode up to character 376.

David Piuva 2 months ago
parent
commit
e85519929a
2 changed files with 45 additions and 27 deletions
  1. 39 25
      Source/DFPSR/api/stringAPI.cpp
  2. 6 2
      Source/test/tests/StringTest.cpp

+ 39 - 25
Source/DFPSR/api/stringAPI.cpp

@@ -88,31 +88,7 @@ String Printable::toString() const {
 
 
 Printable::~Printable() {}
 Printable::~Printable() {}
 
 
-bool dsr::string_match(const ReadableString& a, const ReadableString& b) {
-	if (a.view.length != b.view.length) {
-		return false;
-	} else {
-		for (intptr_t i = 0; i < a.view.length; i++) {
-			if (a[i] != b[i]) {
-				return false;
-			}
-		}
-		return true;
-	}
-}
-
-bool dsr::string_caseInsensitiveMatch(const ReadableString& a, const ReadableString& b) {
-	if (a.view.length != b.view.length) {
-		return false;
-	} else {
-		for (intptr_t i = 0; i < a.view.length; i++) {
-			if (towupper(a[i]) != towupper(b[i])) {
-				return false;
-			}
-		}
-		return true;
-	}
-}
+// TODO: Handle ʼn (329) and the remaining Unicode characters after Ÿ (376).
 
 
 DsrChar dsr::character_upperCase(DsrChar character) {
 DsrChar dsr::character_upperCase(DsrChar character) {
 	if (U'a' <= character && character <= U'z') { // a (97) to z (122) Ascii
 	if (U'a' <= character && character <= U'z') { // a (97) to z (122) Ascii
@@ -121,8 +97,14 @@ DsrChar dsr::character_upperCase(DsrChar character) {
 		return character - (U'à' - U'À');
 		return character - (U'à' - U'À');
 	} else if (U'ø' <= character && character <= U'þ') { // ø (248) to þ (254) Latin-1
 	} else if (U'ø' <= character && character <= U'þ') { // ø (248) to þ (254) Latin-1
 		return character - (U'ø' - U'Ø');
 		return character - (U'ø' - U'Ø');
+	} else if (character == U'ÿ') { // ÿ (255)
+		return U'Ÿ'; // Ÿ (376)
 	} else if (U'Ā' <= character && character <= U'ķ') { // Ā (256) to ķ (311)
 	} else if (U'Ā' <= character && character <= U'ķ') { // Ā (256) to ķ (311)
 		return character & ~DsrChar(1);
 		return character & ~DsrChar(1);
+	} else if (U'Ĺ' <= character && character <= U'ň' && !(character & 1)) { // Even from Ĺ (313) to ň (328)
+		return character - 1;
+	} else if (U'Ŋ' <= character && character <= U'ŷ') { // Ŋ (330) to ŷ (375)
+		return character & ~DsrChar(1);
 	} else {
 	} else {
 		return character;
 		return character;
 	}
 	}
@@ -135,8 +117,14 @@ DsrChar dsr::character_lowerCase(DsrChar character) {
 		return character + (U'à' - U'À');
 		return character + (U'à' - U'À');
 	} else if (U'Ø' <= character && character <= U'Þ') { // Ø (216) to Þ (222) Latin-1
 	} else if (U'Ø' <= character && character <= U'Þ') { // Ø (216) to Þ (222) Latin-1
 		return character + (U'ø' - U'Ø');
 		return character + (U'ø' - U'Ø');
+	} else if (character == U'Ÿ') { // Ÿ (376)
+		return U'ÿ'; // ÿ (255)
 	} else if (U'Ā' <= character && character <= U'ķ') { // Ā (256) to ķ (311)
 	} else if (U'Ā' <= character && character <= U'ķ') { // Ā (256) to ķ (311)
 		return character | DsrChar(1);
 		return character | DsrChar(1);
+	} else if (U'Ĺ' <= character && character <= U'ň' && character & 1) { // Odd from Ĺ (313) to ň (328)
+		return character + 1;
+	} else if (U'Ŋ' <= character && character <= U'ŷ') { // Ŋ (330) to ŷ (375)
+		return character | DsrChar(1);
 	} else {
 	} else {
 		return character;
 		return character;
 	}
 	}
@@ -160,6 +148,32 @@ String dsr::string_lowerCase(const ReadableString &text) {
 	return result;
 	return result;
 }
 }
 
 
+bool dsr::string_match(const ReadableString& a, const ReadableString& b) {
+	if (a.view.length != b.view.length) {
+		return false;
+	} else {
+		for (intptr_t i = 0; i < a.view.length; i++) {
+			if (a[i] != b[i]) {
+				return false;
+			}
+		}
+		return true;
+	}
+}
+
+bool dsr::string_caseInsensitiveMatch(const ReadableString& a, const ReadableString& b) {
+	if (a.view.length != b.view.length) {
+		return false;
+	} else {
+		for (intptr_t i = 0; i < a.view.length; i++) {
+			if (character_upperCase(a[i]) != character_upperCase(b[i])) {
+				return false;
+			}
+		}
+		return true;
+	}
+}
+
 static intptr_t findFirstNonWhite(const ReadableString &text) {
 static intptr_t findFirstNonWhite(const ReadableString &text) {
 	for (intptr_t i = 0; i < text.view.length; i++) {
 	for (intptr_t i = 0; i < text.view.length; i++) {
 		DsrChar c = text[i];
 		DsrChar c = text[i];

+ 6 - 2
Source/test/tests/StringTest.cpp

@@ -187,8 +187,10 @@ START_TEST(String)
 	ASSERT_EQUAL(dsr::string_upperCase(U"ABC123"), U"ABC123");
 	ASSERT_EQUAL(dsr::string_upperCase(U"ABC123"), U"ABC123");
 	ASSERT_EQUAL(dsr::string_upperCase(U"!%& abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ"), U"!%& ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ");
 	ASSERT_EQUAL(dsr::string_upperCase(U"!%& abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ"), U"!%& ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ");
 	ASSERT_EQUAL(dsr::string_upperCase(U"àáâãäåæçèéêëìíîïðñòóôõöÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ"), U"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ");
 	ASSERT_EQUAL(dsr::string_upperCase(U"àáâãäåæçèéêëìíîïðñòóôõöÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ"), U"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ");
-	ASSERT_EQUAL(dsr::string_upperCase(U"øùúûüýþØÙÚÛÜÝÞ"), U"ØÙÚÛÜÝÞØÙÚÛÜÝÞ");
+	ASSERT_EQUAL(dsr::string_upperCase(U"ÿøùúûüýþŸØÙÚÛÜÝÞ"), U"ŸØÙÚÛÜÝÞŸØÙÚÛÜÝÞ");
 	ASSERT_EQUAL(dsr::string_upperCase(U"āăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķ ĸ ĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶ"), U"ĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶ ĸ ĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶ");
 	ASSERT_EQUAL(dsr::string_upperCase(U"āăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķ ĸ ĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶ"), U"ĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶ ĸ ĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶ");
+	ASSERT_EQUAL(dsr::string_upperCase(U"313 to 328 ĺļľŀłńņň ĹĻĽĿŁŃŅŇ"), U"313 TO 328 ĹĻĽĿŁŃŅŇ ĹĻĽĿŁŃŅŇ");
+	ASSERT_EQUAL(dsr::string_upperCase(U"330 to 375 ŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷ ŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶ"), U"330 TO 375 ŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶ ŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶ");
 	// Lower case
 	// Lower case
 	ASSERT_EQUAL(dsr::string_lowerCase(U"a"), U"a");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"a"), U"a");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"aB"), U"ab");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"aB"), U"ab");
@@ -198,8 +200,10 @@ START_TEST(String)
 	ASSERT_EQUAL(dsr::string_lowerCase(U"ABC123"), U"abc123");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"ABC123"), U"abc123");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"!%& abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ"), U"!%& abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"!%& abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ"), U"!%& abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"àáâãäåæçèéêëìíîïðñòóôõöÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ"), U"àáâãäåæçèéêëìíîïðñòóôõöàáâãäåæçèéêëìíîïðñòóôõö");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"àáâãäåæçèéêëìíîïðñòóôõöÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ"), U"àáâãäåæçèéêëìíîïðñòóôõöàáâãäåæçèéêëìíîïðñòóôõö");
-	ASSERT_EQUAL(dsr::string_lowerCase(U"øùúûüýþØÙÚÛÜÝÞ"), U"øùúûüýþøùúûüýþ");
+	ASSERT_EQUAL(dsr::string_lowerCase(U"ÿøùúûüýþŸØÙÚÛÜÝÞ"), U"ÿøùúûüýþÿøùúûüýþ");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"āăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķ ĸ ĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶ"), U"āăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķ ĸ āăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķ");
 	ASSERT_EQUAL(dsr::string_lowerCase(U"āăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķ ĸ ĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶ"), U"āăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķ ĸ āăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķ");
+	ASSERT_EQUAL(dsr::string_lowerCase(U"313 to 328 ĺļľŀłńņň ĹĻĽĿŁŃŅŇ"), U"313 to 328 ĺļľŀłńņň ĺļľŀłńņň");
+	ASSERT_EQUAL(dsr::string_lowerCase(U"330 to 375 ŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷ ŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶ"), U"330 to 375 ŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷ ŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷ");
 	// White space removal by pointing to a section of the original input
 	// White space removal by pointing to a section of the original input
 	ASSERT_EQUAL(dsr::string_removeOuterWhiteSpace(U" "), U"");
 	ASSERT_EQUAL(dsr::string_removeOuterWhiteSpace(U" "), U"");
 	ASSERT_EQUAL(dsr::string_removeOuterWhiteSpace(U"  abc  "), U"abc");
 	ASSERT_EQUAL(dsr::string_removeOuterWhiteSpace(U"  abc  "), U"abc");