Pārlūkot izejas kodu

missing methods for UnicodeString #8298 #8175

Aleksandr Kuzmenko 6 gadi atpakaļ
vecāks
revīzija
49e3465874
2 mainītis faili ar 316 papildinājumiem un 0 dzēšanām
  1. 229 0
      std/UnicodeString.hx
  2. 87 0
      tests/unit/src/unitstd/UnicodeString.unit.hx

+ 229 - 0
std/UnicodeString.hx

@@ -178,6 +178,235 @@ abstract UnicodeString(String) from String to String {
 		return null;
 	}
 
+	/**
+		Returns the position of the leftmost occurrence of `str` within `this`
+		String.
+
+		If `startIndex` is given, the search is performed within the substring
+		of `this` String starting from `startIndex` (if `startIndex` is posivite
+		or 0) or `max(this.length + startIndex, 0)` (if `startIndex` is negative).
+
+		If `startIndex` exceeds `this.length`, -1 is returned.
+
+		Otherwise the search is performed within `this` String. In either case,
+		the returned position is relative to the beginning of `this` String.
+
+		If `str` cannot be found, -1 is returned.
+	**/
+	public function indexOf(str:String, ?startIndex:Int):Int {
+		if(startIndex == null) {
+			startIndex = 0;
+		} else {
+			if(startIndex < 0) {
+				startIndex = (this:UnicodeString).length + startIndex;
+			}
+		}
+
+		var unicodeOffset = 0;
+		var nativeOffset = 0;
+		var matchingOffset = 0;
+		var result = -1;
+		while(nativeOffset <= this.length) {
+			var c = StringTools.utf16CodePointAt(this, nativeOffset);
+
+			if(unicodeOffset >= startIndex) {
+				var c2 = StringTools.utf16CodePointAt(str, matchingOffset);
+				if(c == c2) {
+					if(matchingOffset == 0) {
+						result = unicodeOffset;
+					}
+					matchingOffset++;
+					if(c2 >= StringTools.MIN_SURROGATE_CODE_POINT) {
+						matchingOffset++;
+					}
+					if(matchingOffset == str.length) {
+						return result;
+					}
+				} else if(matchingOffset != 0) {
+					result = -1;
+					matchingOffset = 0;
+					continue;
+				}
+			}
+
+			nativeOffset++;
+			if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
+				nativeOffset++;
+			}
+			unicodeOffset++;
+		}
+		return -1;
+	}
+
+	/**
+		Returns the position of the rightmost occurrence of `str` within `this`
+		String.
+
+		If `startIndex` is given, the search is performed within the substring
+		of `this` String from 0 to `startIndex + str.length`. Otherwise the search
+		is performed within `this` String. In either case, the returned position
+		is relative to the beginning of `this` String.
+
+		If `str` cannot be found, -1 is returned.
+	**/
+	public function lastIndexOf(str:String, ?startIndex:Int):Int {
+		if(startIndex == null) {
+			startIndex = this.length;
+		} else if(startIndex < 0) {
+			startIndex = 0;
+		}
+
+		var unicodeOffset = 0;
+		var nativeOffset = 0;
+		var result = -1;
+		var lastIndex = -1;
+		var matchingOffset = 0;
+		var strUnicodeLength = (str:UnicodeString).length;
+		while(nativeOffset < this.length && unicodeOffset < startIndex + strUnicodeLength) {
+			var c = StringTools.utf16CodePointAt(this, nativeOffset);
+
+			var c2 = StringTools.utf16CodePointAt(str, matchingOffset);
+			if(c == c2) {
+				if(matchingOffset == 0) {
+					lastIndex = unicodeOffset;
+				}
+				matchingOffset++;
+				if(c2 >= StringTools.MIN_SURROGATE_CODE_POINT) {
+					matchingOffset++;
+				}
+				if(matchingOffset == str.length) {
+					result = lastIndex;
+					lastIndex = -1;
+				}
+			} else if(matchingOffset != 0) {
+				lastIndex = -1;
+				matchingOffset = 0;
+				continue;
+			}
+
+			nativeOffset++;
+			if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
+				nativeOffset++;
+			}
+			unicodeOffset++;
+		}
+		return result;
+	}
+
+	/**
+		Returns `len` characters of `this` String, starting at position `pos`.
+
+		If `len` is omitted, all characters from position `pos` to the end of
+		`this` String are included.
+
+		If `pos` is negative, its value is calculated from the end of `this`
+		String by `this.length + pos`. If this yields a negative value, 0 is
+		used instead.
+
+		If the calculated position + `len` exceeds `this.length`, the characters
+		from that position to the end of `this` String are returned.
+
+		If `len` is negative, the result is unspecified.
+	**/
+	public function substr(pos:Int, ?len:Int):String {
+		if(pos < 0) {
+			pos = (this:UnicodeString).length + pos;
+			if(pos < 0) {
+				pos = 0;
+			}
+		}
+		if(len < 0) {
+			len = (this:UnicodeString).length + len;
+		}
+		if(len <= 0) {
+			return "";
+		}
+
+		var unicodeOffset = 0;
+		var nativeOffset = 0;
+		var fromOffset = -1;
+		var subLength = 0;
+		while(nativeOffset < this.length) {
+			var c = StringTools.utf16CodePointAt(this, nativeOffset);
+
+			if(unicodeOffset >= pos) {
+				if(fromOffset < 0) {
+					if(len == null) {
+						return this.substr(nativeOffset);
+					}
+					fromOffset = nativeOffset;
+				}
+				subLength++;
+				if(subLength >= len) {
+					var lastOffset = (c < StringTools.MIN_SURROGATE_CODE_POINT ? nativeOffset : nativeOffset + 1);
+					return this.substr(fromOffset, lastOffset - fromOffset + 1);
+				}
+			}
+
+			nativeOffset += (c >= StringTools.MIN_SURROGATE_CODE_POINT ? 2 : 1);
+			unicodeOffset++;
+		}
+		return (fromOffset < 0 ? "" : this.substr(fromOffset));
+	}
+
+	/**
+		Returns the part of `this` String from `startIndex` to but not including `endIndex`.
+
+		If `startIndex` or `endIndex` are negative, 0 is used instead.
+
+		If `startIndex` exceeds `endIndex`, they are swapped.
+
+		If the (possibly swapped) `endIndex` is omitted or exceeds
+		`this.length`, `this.length` is used instead.
+
+		If the (possibly swapped) `startIndex` exceeds `this.length`, the empty
+		String `""` is returned.
+	**/
+	public function substring(startIndex:Int, ?endIndex:Int):String {
+		if(startIndex < 0) {
+			startIndex = 0;
+		}
+		if(endIndex != null) {
+			if(endIndex < 0) {
+				endIndex = 0;
+			}
+			if(startIndex == endIndex) {
+				return "";
+			}
+			if(startIndex > endIndex) {
+				var tmp = startIndex;
+				startIndex = endIndex;
+				endIndex = tmp;
+			}
+		}
+
+		var unicodeOffset = 0;
+		var nativeOffset = 0;
+		var fromOffset = -1;
+		var subLength = 0;
+		while(nativeOffset < this.length) {
+			var c = StringTools.utf16CodePointAt(this, nativeOffset);
+
+			if(startIndex <= unicodeOffset) {
+				if(fromOffset < 0) {
+					if(endIndex == null) {
+						return this.substr(nativeOffset);
+					}
+					fromOffset = nativeOffset;
+				}
+				subLength++;
+				if(subLength >= endIndex - startIndex) {
+					var lastOffset = (c < StringTools.MIN_SURROGATE_CODE_POINT ? nativeOffset : nativeOffset + 1);
+					return this.substr(fromOffset, lastOffset - fromOffset + 1);
+				}
+			}
+
+			nativeOffset += (c >= StringTools.MIN_SURROGATE_CODE_POINT ? 2 : 1);
+			unicodeOffset++;
+		}
+		return (fromOffset < 0 ? "" : this.substr(fromOffset));
+	}
+
 	function get_length():Int {
 		var l = 0;
 		for(c in new StringIteratorUnicode(this)) {

+ 87 - 0
tests/unit/src/unitstd/UnicodeString.unit.hx

@@ -28,6 +28,93 @@ s.charCodeAt(2) == codes[2];
 s.charCodeAt(3) == null;
 s.charCodeAt(-1) == null;
 
+// indexOf
+var s:UnicodeString = "𠜎zяяw";
+s.indexOf("𠜎") == 0;
+s.indexOf("z") == 1;
+s.indexOf("я") == 2;
+s.indexOf("zя") == 1;
+s.indexOf("w") == 4;
+s.indexOf("яw") == 3;
+s.indexOf("f") == -1;
+s.indexOf("я", 0) == 2;
+s.indexOf("я", 1) == 2;
+s.indexOf("я", 2) == 2;
+s.indexOf("я", 3) == 3;
+s.indexOf("я", 4) == -1;
+s.indexOf("я", 40) == -1;
+s.indexOf("я", -1) == -1;
+s.indexOf("я", -2) == 3;
+s.indexOf("я", -3) == 2;
+s.indexOf("я", -4) == 2;
+s.indexOf("я", -5) == 2;
+s.indexOf("я", -50) == 2;
+
+// lastIndexOf
+var s:UnicodeString = "𠜎zяяw";
+s.lastIndexOf("𠜎") == 0;
+s.lastIndexOf("z") == 1;
+s.lastIndexOf("я") == 3;
+s.lastIndexOf("zя") == 1;
+s.lastIndexOf("яw") == 3;
+s.lastIndexOf("f") == -1;
+s.lastIndexOf("я", 0) == -1;
+s.lastIndexOf("я", 1) == -1;
+s.lastIndexOf("я", 2) == 2;
+s.lastIndexOf("я", 3) == 3;
+s.lastIndexOf("я", 4) == 3;
+s.lastIndexOf("я", 40) == 3;
+s.lastIndexOf("я", -1) == -1;
+s.lastIndexOf("𠜎z", -1) == 0;
+
+// substr
+var s:UnicodeString = "𠜎zяяw";
+s.substr(0) == "𠜎zяяw";
+s.substr(1) == "zяяw";
+s.substr(5) == "";
+s.substr(4) == "w";
+s.substr(3) == "яw";
+s.substr(-1) == "w";
+s.substr(-2) == "яw";
+s.substr(-4) == "zяяw";
+s.substr(-5) == "𠜎zяяw";
+s.substr(-100) == "𠜎zяяw";
+s.substr(0, 0) == "";
+s.substr(0, 1) == "𠜎";
+s.substr(0, 2) == "𠜎z";
+s.substr(0, 100) == "𠜎zяяw";
+s.substr(0, -1) == "𠜎zяя";
+s.substr(0, -2) == "𠜎zя";
+s.substr(0, -100) == "";
+
+// substring
+var s:UnicodeString = "𠜎zяяw";
+s.substring(0, 0) == "";
+s.substring(0, 1) == "𠜎";
+s.substring(1, 0) == "𠜎";
+s.substring(0, 2) == "𠜎z";
+s.substring(2, 0) == "𠜎z";
+s.substring(-1, 0) == "";
+s.substring(0, -1) == "";
+s.substring(-1, -1) == "";
+s.substring(-1, 1) == "𠜎";
+s.substring(1, -1) == "𠜎";
+s.substring(-1, 2) == "𠜎z";
+s.substring(2, -1) == "𠜎z";
+s.substring(0) == "𠜎zяяw";
+s.substring(1) == "zяяw";
+s.substring(2) == "яяw";
+s.substring(0, -1) == "";
+s.substring(5, 0) == "𠜎zяяw";
+s.substring(0, 100) == "𠜎zяяw";
+s.substring(100, 120) == "";
+s.substring(100, 0) == "𠜎zяяw";
+s.substring(120, 100) == "";
+s.substring(1, 4) == "zяя";
+s.substring(4, 1) == "zяя";
+
+var s = new UnicodeString("𠜎zя");
+
 // @:op(UnicodeString)
 var s2 = new UnicodeString("𠜎z");
 s != s2;