فهرست منبع

Include unicode check in case conversions.

ToLower and ToUpper now include unicode case checks in an effort to save creating strings which are already in the correct case.
Brucey 9 ماه پیش
والد
کامیت
3b0da54f68
2فایلهای تغییر یافته به همراه189 افزوده شده و 14 حذف شده
  1. 62 14
      blitz.mod/blitz_string.c
  2. 127 0
      blitz.mod/tests/test.bmx

+ 62 - 14
blitz.mod/blitz_string.c

@@ -849,13 +849,37 @@ BBString *bbStringToLower( BBString *str ){
 	int n = 0;
 	
 	while (n < str->length) {
-		int c=str->buf[n];
-		// ascii upper or other unicode char
-		if (c >= 192 || (c>='A' && c<='Z')) {
-			break;
-		}
-		++n;
-	}
+        int c = str->buf[n];
+        if (c < 192) {
+            // ASCII character
+            if (c >= 'A' && c <= 'Z') {
+                // Found an uppercase ASCII character
+                break;
+            }
+        } else {
+            // Unicode character
+            // Check if the character is an uppercase Unicode character
+            int lo = 0, hi = (3828 / 4) - 1; // sizeof(bbToLowerData) = 3828
+            int is_upper = 0;
+            while (lo <= hi) {
+                int mid = (lo + hi) / 2;
+                int upper = bbToLowerData[mid * 2];
+                if (c < upper) {
+                    hi = mid - 1;
+                } else if (c > upper) {
+                    lo = mid + 1;
+                } else {
+                    // Found an uppercase Unicode character
+                    is_upper = 1;
+                    break;
+                }
+            }
+            if (is_upper) {
+                break;
+            }
+        }
+        ++n;
+    }
 	
 	if (n == str->length) {
 		return str;
@@ -896,13 +920,37 @@ BBString *bbStringToUpper( BBString *str ){
 	int n = 0;
 	
 	while (n < str->length) {
-		int c=str->buf[n];
-		// ascii lower or other unicode char
-		if (c >= 181 || (c>='a' && c<='z')) {
-			break;
-		}
-		++n;
-	}
+        int c = str->buf[n];
+        if (c < 181) {
+            // ASCII character
+            if (c >= 'a' && c <= 'z') {
+                // Found a lowercase ASCII character
+                break;
+            }
+        } else {
+            // Unicode character
+            // Check if the character is a lowercase Unicode character
+            int lo = 0, hi = (3860 / 4) - 1; // sizeof(bbToUpperData) = 3860
+            int is_lower = 0;
+            while (lo <= hi) {
+                int mid = (lo + hi) / 2;
+                int lower = bbToUpperData[mid * 2];
+                if (c < lower) {
+                    hi = mid - 1;
+                } else if (c > lower) {
+                    lo = mid + 1;
+                } else {
+                    // Found a lowercase Unicode character
+                    is_lower = 1;
+                    break;
+                }
+            }
+            if (is_lower) {
+                break;
+            }
+        }
+        ++n;
+    }
 	
 	if (n == str->length) {
 		return str;

+ 127 - 0
blitz.mod/tests/test.bmx

@@ -10,6 +10,15 @@ Type TStringTest Extends TTest
 	Field bigUnicode:UInt[] = [$10300, $10301, $10302, $10303, $10304, $10305, 0]
 	Field unicode:Int[] = [1055, 1088, 1080, 1074, 1077, 1090]
 	Field utf8:Byte[] = [208, 159, 209, 128, 208, 184, 208, 178, 208, 181, 209, 130, 0]
+
+	Const HELLO_UPPER:String = "HELLO"
+	Const HELLO_LOWER:String = "hello"
+	Const UMLAUT_UPPER:String = "123ÄÖÜABC"
+	Const UMLAUT_LOWER:String = "123äöüabc"
+	Const ARABIC_UPPER:String = "123كلمة"
+	Const ARABIC_LOWER:String = "123كلمة"
+	Const CYRILLIC_UPPER:String = "123БУДИНОК"
+	Const CYRILLIC_LOWER:String = "123будинок"
 	
 	Method setup() { before }
 	End Method
@@ -33,6 +42,124 @@ Type TStringTest Extends TTest
 
 	End Method
 
+	Method testASCIIToLower() { test }
+		Local s:String = HELLO_UPPER
+		assertEquals(HELLO_LOWER, s.ToLower())
+
+		Local obj:Object = HELLO_LOWER
+		Local obj1:Object = HELLO_LOWER.ToLower()
+
+		assertTrue(obj = obj1, "Already lowercase ASCII strings should return the same object")
+
+	End Method
+
+	Method testASCIIToUpper() { test }
+		Local s:String = HELLO_LOWER
+		assertEquals(HELLO_UPPER, s.ToUpper())
+
+		Local obj:Object = HELLO_UPPER
+		Local obj1:Object = HELLO_UPPER.ToUpper()
+
+		assertTrue(obj = obj1, "Already uppercase ASCII strings should return the same object")
+
+	End Method
+
+	Method testUnicodeToLower() { test }
+
+		Local s:String = UMLAUT_UPPER
+		assertEquals(UMLAUT_LOWER, s.ToLower())
+
+		Local obj:Object = UMLAUT_LOWER
+		Local obj1:Object = UMLAUT_LOWER.ToLower()
+
+		assertTrue(obj = obj1, "Already lowercase Unicode strings should return the same object")
+
+	End Method
+
+	Method testUnicodeToUpper() { test }
+
+		Local s:String = UMLAUT_LOWER
+		assertEquals(UMLAUT_UPPER, s.ToUpper())
+
+		Local obj:Object = UMLAUT_UPPER
+		Local obj1:Object = UMLAUT_UPPER.ToUpper()
+
+		assertTrue(obj = obj1, "Already uppercase Unicode strings should return the same object")
+
+	End Method
+
+	Method testArabicToLower() { test }
+
+		Local s:String = ARABIC_UPPER
+		assertEquals(ARABIC_LOWER, s.ToLower(), "Arabic lower case")
+
+		Local obj:Object = ARABIC_LOWER
+		Local obj1:Object = ARABIC_LOWER.ToLower()
+
+		assertTrue(obj = obj1, "Already lowercase Arabic strings should return the same object")
+
+	End Method
+
+	Method testArabicToUpper() { test }
+
+		Local s:String = ARABIC_LOWER
+		assertEquals(ARABIC_UPPER, s.ToUpper(), "Arabic upper case")
+
+		Local obj:Object = ARABIC_UPPER
+		Local obj1:Object = ARABIC_UPPER.ToUpper()
+
+		assertTrue(obj = obj1, "Already uppercase Arabic strings should return the same object")
+
+	End Method
+
+	Method testArabicUpperToLower() { test }
+
+		Local s:String = ARABIC_UPPER
+		assertEquals(ARABIC_UPPER, s.ToLower(), "Arabic lower case and upper case should be the same")
+
+		Local obj:Object = ARABIC_UPPER
+		Local obj1:Object = ARABIC_UPPER.ToLower()
+
+		assertTrue(obj = obj1, "Uppercase Arabic strings should return the same object when lowered")
+
+	End Method
+
+	Method testArabicLowerToUpper() { test }
+
+		Local s:String = ARABIC_LOWER
+		assertEquals(ARABIC_LOWER, s.ToUpper(), "Arabic upper case and lower case should be the same")
+
+		Local obj:Object = ARABIC_LOWER
+		Local obj1:Object = ARABIC_LOWER.ToUpper()
+
+		assertTrue(obj = obj1, "Lowercase Arabic strings should return the same object when uppered")
+
+	End Method
+
+	Method testCyrillicToLower() { test }
+
+		Local s:String = CYRILLIC_UPPER
+		assertEquals(CYRILLIC_LOWER, s.ToLower(), "Cyrillic lower case")
+
+		Local obj:Object = CYRILLIC_LOWER
+		Local obj1:Object = CYRILLIC_LOWER.ToLower()
+
+		assertTrue(obj = obj1, "Already lowercase Cyrillic strings should return the same object")
+
+	End Method
+
+	Method testCyrrilicToUpper() { test }
+
+		Local s:String = CYRILLIC_LOWER
+		assertEquals(CYRILLIC_UPPER, s.ToUpper(), "Cyrillic upper case")
+
+		Local obj:Object = CYRILLIC_UPPER
+		Local obj1:Object = CYRILLIC_UPPER.ToUpper()
+
+		assertTrue(obj = obj1, "Already uppercase Cyrillic strings should return the same object")
+
+	End Method
+
 End Type
 
 Struct STestStruct