Browse Source

2005-11-14 Atsushi Enomoto <[email protected]>

	* SimpleCollator.cs :
	  - Added CompareOrdinalIgnoreCase() for NET_2_0 RTM.
	  - Reduced extra parameter from LastIndexOfSortKey().
	  - LastIndexOf() should use GetTailContraction for the source string.
	    And then, target could match in the middle of the possible
	    "replacement contraction" of the source string, so use
	    LastIndexOfSortKey() to catch them.
	  - Fixed GetTailContraction() that caused index out of range.

	* CompareInfoTest.cs : added more tests for some specific cultures
	  (say, Japanese) that have replacement contraction in ASCII range
	  and caused incorrect results for LastIndexOf() and all dependent
	  members.


svn path=/trunk/mcs/; revision=53003
Atsushi Eno 20 years ago
parent
commit
ebe83beef3

+ 11 - 0
mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog

@@ -1,3 +1,14 @@
+2005-11-14  Atsushi Enomoto  <[email protected]>
+
+	* SimpleCollator.cs :
+	  - Added CompareOrdinalIgnoreCase() for NET_2_0 RTM.
+	  - Reduced extra parameter from LastIndexOfSortKey().
+	  - LastIndexOf() should use GetTailContraction for the source string.
+	    And then, target could match in the middle of the possible
+	    "replacement contraction" of the source string, so use
+	    LastIndexOfSortKey() to catch them.
+	  - Fixed GetTailContraction() that caused index out of range.
+
 2005-11-11  Atsushi Enomoto  <[email protected]>
 
 	* Makefile : Now use MONO_DISABLE_MANAGED_COLLATION.

+ 42 - 17
mcs/class/corlib/Mono.Globalization.Unicode/SimpleCollator.cs

@@ -187,7 +187,7 @@ namespace Mono.Globalization.Unicode
 
 /*
 // dump tailoring table
-Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
+Console.WriteLine ("******** building table for {0} : contractions - {1} diacritical - {2}",
 culture.LCID, contractions.Length, level2Maps.Length);
 foreach (Contraction c in contractions) {
 foreach (char cc in c.Source)
@@ -313,20 +313,22 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
 
 		Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
 		{
+			if (start == end || end < -1 || start >= s.Length || s.Length <= end + 1)
+				throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
 			for (int i = 0; i < clist.Length; i++) {
 				Contraction ct = clist [i];
-				int diff = ct.Source [0] - s [end];
+				int diff = ct.Source [0] - s [end + 1];
 				if (diff > 0)
 					return null; // it's already sorted
 				else if (diff < 0)
 					continue;
 				char [] chars = ct.Source;
-				if (start - end + 1 < chars.Length)
-					continue;
+
 				bool match = true;
-				int offset = start - chars.Length + 1;
+				if (chars.Length > start - end)
+					continue;
 				for (int n = 0; n < chars.Length; n++)
-					if (s [offset + n] != chars [n]) {
+					if (s [start - n] != chars [chars.Length - 1 - n]) {
 						match = false;
 						break;
 					}
@@ -667,6 +669,23 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
 				len1 == min ? - 1 : 1;
 		}
 
+		private int CompareOrdinalIgnoreCase (string s1, int idx1, int len1,
+			string s2, int idx2, int len2)
+		{
+			int min = len1 < len2 ? len1 : len2;
+			int end1 = idx1 + min;
+			int end2 = idx2 + min;
+			if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
+				throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
+			TextInfo ti = invariant.textInfo;
+			for (int i1 = idx1, i2 = idx2;
+				i1 < end1 && i2 < end2; i1++, i2++)
+				if (ti.ToLower (s1 [i1]) != ti.ToLower (s2 [i2]))
+					return ti.ToLower (s1 [i1]) - ti.ToLower (s2 [i2]);
+			return len1 == len2 ? 0 :
+				len1 == min ? - 1 : 1;
+		}
+
 		public unsafe int Compare (string s1, int idx1, int len1,
 			string s2, int idx2, int len2, CompareOptions options)
 		{
@@ -681,6 +700,10 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
 //				return 0;
 			if (options == CompareOptions.Ordinal)
 				return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
+#if NET_2_0
+			if (options == CompareOptions.OrdinalIgnoreCase)
+				return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
+#endif
 
 #if false // stable easy version, depends on GetSortKey().
 			SortKey sk1 = GetSortKey (s1, idx1, len1, options);
@@ -1156,7 +1179,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
 			return IsSuffix (src, target, src.Length - 1, src.Length, opt);
 		}
 
-		public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
+		public unsafe bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
 		{
 			if (target.Length == 0)
 				return true;
@@ -1448,7 +1471,8 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
 			ClearBuffer (sk1, 4);
 			ClearBuffer (sk2, 4);
 
-			// If target is contraction, then use string search.
+			// If target is a replacement contraction, then use 
+			// string search.
 			Contraction ct = GetContraction (target);
 			if (ct != null) {
 				if (ct.Replacement != null)
@@ -1460,7 +1484,7 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
 						sk2 [bi] = ct.SortKey [bi];
 					return LastIndexOfSortKey (opt, s, start,
 						start, length, sk2,
-						char.MinValue, -1, true,
+						-1, true,
 						checkedFlags, ref prev, sk1);
 				}
 			}
@@ -1472,14 +1496,14 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
 					targetSortKey [2] = Level2 (ti, ExtenderType.None);
 				targetSortKey [3] = Uni.Level3 (ti);
 				return LastIndexOfSortKey (opt, s, start, start,
-					length, targetSortKey, target,
+					length, targetSortKey,
 					ti, !Uni.HasSpecialWeight ((char) ti),
 					checkedFlags, ref prev, sk1);
 			}
 		}
 
 		// Searches target byte[] keydata
-		unsafe int LastIndexOfSortKey (COpt opt, string s, int start, int orgStart, int length, byte* sortkey, char target, int ti, bool noLv4, byte* checkedFlags, ref PreviousInfo prev, byte* sk)
+		unsafe int LastIndexOfSortKey (COpt opt, string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, byte* checkedFlags, ref PreviousInfo prev, byte* sk)
 		{
 			int end = start - length;
 			int idx = start;
@@ -1541,7 +1565,7 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
 						start, length, checkedFlags,
 						targetSortKey, ref prev, sk1, sk2);
 				else
-					idx = LastIndexOfSortKey (opt, s, start, orgStart, length, sk, tc, ti, noLv4, checkedFlags, ref prev, sk1);
+					idx = LastIndexOfSortKey (opt, s, start, orgStart, length, sk, ti, noLv4, checkedFlags, ref prev, sk1);
 				if (idx < 0)
 					return -1;
 				length -= start - idx;
@@ -1736,7 +1760,7 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
 				idx--;
 			}
 			if (ext == ExtenderType.None)
-				ct = GetContraction (s, idx, end);
+				ct = GetTailContraction (s, idx, end);
 			// if lv4 exists, it never matches contraction
 			if (ct != null) {
 				idx -= ct.Source.Length;
@@ -1751,10 +1775,11 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
 					// Here is the core of LAMESPEC
 					// described at the top of the source.
 					int dummy = ct.Replacement.Length - 1;
-					return MatchesBackward (opt, 
-						ct.Replacement, ref dummy,
-						dummy, -1, ti, sortkey, noLv4,
-						checkedFlags, ref prev, charSortKey);
+					return 0 <= LastIndexOfSortKey (opt,
+						ct.Replacement, dummy, dummy,
+						ct.Replacement.Length, sortkey,
+						ti, noLv4, checkedFlags,
+						ref prev, charSortKey);
 				}
 			} else if (ext == ExtenderType.None) {
 				if (si < 0)

+ 7 - 0
mcs/class/corlib/Test/System.Globalization/ChangeLog

@@ -1,3 +1,10 @@
+2005-11-14  Atsushi Enomoto  <[email protected]>
+
+	* CompareInfoTest.cs : added more tests for some specific cultures
+	  (say, Japanese) that have replacement contraction in ASCII range
+	  and caused incorrect results for LastIndexOf() and all dependent
+	  members.
+
 2005-10-14  Ben Maurer  <[email protected]>
 
 	* DateTimeFormatInfoTest.cs: New test to make sure we do not

+ 17 - 0
mcs/class/corlib/Test/System.Globalization/CompareInfoTest.cs

@@ -785,6 +785,9 @@ public class CompareInfoTest : Assertion
 		AssertLastIndexOf ("#8-2", 1, "UAEE", '\u00C6');
 		AssertLastIndexOf ("#9", -1, "UA", '\u00C6');
 		AssertLastIndexOf ("#10", -1, "UE", '\u00C6');
+		AssertLastIndexOf ("#11", 0, "\\", '\\');
+		AssertEquals ("#11en", 0, new CultureInfo ("en").CompareInfo.LastIndexOf ("\\", '\\'));
+		AssertEquals ("#11ja", 0, new CultureInfo ("ja").CompareInfo.LastIndexOf ("\\", '\\'));
 	}
 
 	[Test]
@@ -821,6 +824,9 @@ public class CompareInfoTest : Assertion
 		AssertIsPrefix ("#14", true, "\uff21\uff21", "\uff21", CompareOptions.None);
 		AssertIsPrefix ("#15", true, "\uff21\uff21", "\u3007\uff21", CompareOptions.None);
 		AssertIsPrefix ("#16", true, "\uff21\uff21", "\uff21\u3007", CompareOptions.None);
+		AssertIsPrefix ("#17", true, "\\b\\a a", "\\b\\a a");
+		Assert ("#17en", new CultureInfo ("en").CompareInfo.IsPrefix ("\\b\\a a", "\\b\\a a"));
+		Assert ("#17ja", new CultureInfo ("ja").CompareInfo.IsPrefix ("\\b\\a a", "\\b\\a a"));
 	}
 
 	[Test]
@@ -889,6 +895,10 @@ public class CompareInfoTest : Assertion
 		// empty suffix always matches the source.
 		AssertIsSuffix ("#17", true, "", "");
 		AssertIsSuffix ("#18", true, "/test.css", "");
+		AssertIsSuffix ("#19", true, "/test.css", "/test.css");
+		AssertIsSuffix ("#20", true, "\\b\\a a", "\\b\\a a");
+		Assert ("#20en", new CultureInfo ("en").CompareInfo.IsSuffix ("\\b\\a a", "\\b\\a a"));
+		Assert ("#20ja", new CultureInfo ("ja").CompareInfo.IsSuffix ("\\b\\a a", "\\b\\a a"));
 	}
 
 	[Test]
@@ -929,6 +939,10 @@ public class CompareInfoTest : Assertion
 		AssertIndexOf ("#16", -1, String.Empty, "\u3007");
 		AssertIndexOf ("#17", 0, "A", "\u3007");
 		AssertIndexOf ("#18", 0, "ABC", "\u3007");
+
+		AssertIndexOf ("#19", 0, "\\b\\a a", "\\b\\a a");
+		AssertEquals ("#19en", 0, new CultureInfo ("en").CompareInfo.IndexOf ("\\b\\a a", "\\b\\a a"));
+		AssertEquals ("#19ja", 0, new CultureInfo ("ja").CompareInfo.IndexOf ("\\b\\a a", "\\b\\a a"));
 	}
 
 	[Test]
@@ -981,6 +995,9 @@ public class CompareInfoTest : Assertion
 		AssertLastIndexOf ("#16", 1, "\uff21\uff21", "\uff21\u3007", CompareOptions.None);
 		AssertLastIndexOf ("#17", 1, "\uff21\uff21", "\u3007", CompareOptions.None);
 		AssertLastIndexOf ("#18", 1, "\u3007\uff21", "\uff21", CompareOptions.None);
+		AssertLastIndexOf ("#19", 0, "\\b\\a a", "\\b\\a a");
+		AssertEquals ("#19en", 0, new CultureInfo ("en").CompareInfo.LastIndexOf ("\\b\\a a", "\\b\\a a"));
+		AssertEquals ("#19ja", 0, new CultureInfo ("ja").CompareInfo.LastIndexOf ("\\b\\a a", "\\b\\a a"));
 	}
 
 	[Test]