6 rokov pred · 49e3465874
--- a/std/UnicodeString.hx
+++ b/std/UnicodeString.hx
@@ -178,6 +178,235 @@ abstract UnicodeString(String) from String to String {
 
				 		return null;
			
 
				 	}
			
 
				 
			
 
				+	/**
			
 
				+		Returns the position of the leftmost occurrence of `str` within `this`
			
 
				+		String.
			
 
				+
			
 
				+		If `startIndex` is given, the search is performed within the substring
			
 
				+		of `this` String starting from `startIndex` (if `startIndex` is posivite
			
 
				+		or 0) or `max(this.length + startIndex, 0)` (if `startIndex` is negative).
			
 
				+
			
 
				+		If `startIndex` exceeds `this.length`, -1 is returned.
			
 
				+
			
 
				+		Otherwise the search is performed within `this` String. In either case,
			
 
				+		the returned position is relative to the beginning of `this` String.
			
 
				+
			
 
				+		If `str` cannot be found, -1 is returned.
			
 
				+	**/
			
 
				+	public function indexOf(str:String, ?startIndex:Int):Int {
			
 
				+		if(startIndex == null) {
			
 
				+			startIndex = 0;
			
 
				+		} else {
			
 
				+			if(startIndex < 0) {
			
 
				+				startIndex = (this:UnicodeString).length + startIndex;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		var unicodeOffset = 0;
			
 
				+		var nativeOffset = 0;
			
 
				+		var matchingOffset = 0;
			
 
				+		var result = -1;
			
 
				+		while(nativeOffset <= this.length) {
			
 
				+			var c = StringTools.utf16CodePointAt(this, nativeOffset);
			
 
				+
			
 
				+			if(unicodeOffset >= startIndex) {
			
 
				+				var c2 = StringTools.utf16CodePointAt(str, matchingOffset);
			
 
				+				if(c == c2) {
			
 
				+					if(matchingOffset == 0) {
			
 
				+						result = unicodeOffset;
			
 
				+					}
			
 
				+					matchingOffset++;
			
 
				+					if(c2 >= StringTools.MIN_SURROGATE_CODE_POINT) {
			
 
				+						matchingOffset++;
			
 
				+					}
			
 
				+					if(matchingOffset == str.length) {
			
 
				+						return result;
			
 
				+					}
			
 
				+				} else if(matchingOffset != 0) {
			
 
				+					result = -1;
			
 
				+					matchingOffset = 0;
			
 
				+					continue;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			nativeOffset++;
			
 
				+			if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
			
 
				+				nativeOffset++;
			
 
				+			}
			
 
				+			unicodeOffset++;
			
 
				+		}
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+		Returns the position of the rightmost occurrence of `str` within `this`
			
 
				+		String.
			
 
				+
			
 
				+		If `startIndex` is given, the search is performed within the substring
			
 
				+		of `this` String from 0 to `startIndex + str.length`. Otherwise the search
			
 
				+		is performed within `this` String. In either case, the returned position
			
 
				+		is relative to the beginning of `this` String.
			
 
				+
			
 
				+		If `str` cannot be found, -1 is returned.
			
 
				+	**/
			
 
				+	public function lastIndexOf(str:String, ?startIndex:Int):Int {
			
 
				+		if(startIndex == null) {
			
 
				+			startIndex = this.length;
			
 
				+		} else if(startIndex < 0) {
			
 
				+			startIndex = 0;
			
 
				+		}
			
 
				+
			
 
				+		var unicodeOffset = 0;
			
 
				+		var nativeOffset = 0;
			
 
				+		var result = -1;
			
 
				+		var lastIndex = -1;
			
 
				+		var matchingOffset = 0;
			
 
				+		var strUnicodeLength = (str:UnicodeString).length;
			
 
				+		while(nativeOffset < this.length && unicodeOffset < startIndex + strUnicodeLength) {
			
 
				+			var c = StringTools.utf16CodePointAt(this, nativeOffset);
			
 
				+
			
 
				+			var c2 = StringTools.utf16CodePointAt(str, matchingOffset);
			
 
				+			if(c == c2) {
			
 
				+				if(matchingOffset == 0) {
			
 
				+					lastIndex = unicodeOffset;
			
 
				+				}
			
 
				+				matchingOffset++;
			
 
				+				if(c2 >= StringTools.MIN_SURROGATE_CODE_POINT) {
			
 
				+					matchingOffset++;
			
 
				+				}
			
 
				+				if(matchingOffset == str.length) {
			
 
				+					result = lastIndex;
			
 
				+					lastIndex = -1;
			
 
				+				}
			
 
				+			} else if(matchingOffset != 0) {
			
 
				+				lastIndex = -1;
			
 
				+				matchingOffset = 0;
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			nativeOffset++;
			
 
				+			if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
			
 
				+				nativeOffset++;
			
 
				+			}
			
 
				+			unicodeOffset++;
			
 
				+		}
			
 
				+		return result;
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+		Returns `len` characters of `this` String, starting at position `pos`.
			
 
				+
			
 
				+		If `len` is omitted, all characters from position `pos` to the end of
			
 
				+		`this` String are included.
			
 
				+
			
 
				+		If `pos` is negative, its value is calculated from the end of `this`
			
 
				+		String by `this.length + pos`. If this yields a negative value, 0 is
			
 
				+		used instead.
			
 
				+
			
 
				+		If the calculated position + `len` exceeds `this.length`, the characters
			
 
				+		from that position to the end of `this` String are returned.
			
 
				+
			
 
				+		If `len` is negative, the result is unspecified.
			
 
				+	**/
			
 
				+	public function substr(pos:Int, ?len:Int):String {
			
 
				+		if(pos < 0) {
			
 
				+			pos = (this:UnicodeString).length + pos;
			
 
				+			if(pos < 0) {
			
 
				+				pos = 0;
			
 
				+			}
			
 
				+		}
			
 
				+		if(len < 0) {
			
 
				+			len = (this:UnicodeString).length + len;
			
 
				+		}
			
 
				+		if(len <= 0) {
			
 
				+			return "";
			
 
				+		}
			
 
				+
			
 
				+		var unicodeOffset = 0;
			
 
				+		var nativeOffset = 0;
			
 
				+		var fromOffset = -1;
			
 
				+		var subLength = 0;
			
 
				+		while(nativeOffset < this.length) {
			
 
				+			var c = StringTools.utf16CodePointAt(this, nativeOffset);
			
 
				+
			
 
				+			if(unicodeOffset >= pos) {
			
 
				+				if(fromOffset < 0) {
			
 
				+					if(len == null) {
			
 
				+						return this.substr(nativeOffset);
			
 
				+					}
			
 
				+					fromOffset = nativeOffset;
			
 
				+				}
			
 
				+				subLength++;
			
 
				+				if(subLength >= len) {
			
 
				+					var lastOffset = (c < StringTools.MIN_SURROGATE_CODE_POINT ? nativeOffset : nativeOffset + 1);
			
 
				+					return this.substr(fromOffset, lastOffset - fromOffset + 1);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			nativeOffset += (c >= StringTools.MIN_SURROGATE_CODE_POINT ? 2 : 1);
			
 
				+			unicodeOffset++;
			
 
				+		}
			
 
				+		return (fromOffset < 0 ? "" : this.substr(fromOffset));
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+		Returns the part of `this` String from `startIndex` to but not including `endIndex`.
			
 
				+
			
 
				+		If `startIndex` or `endIndex` are negative, 0 is used instead.
			
 
				+
			
 
				+		If `startIndex` exceeds `endIndex`, they are swapped.
			
 
				+
			
 
				+		If the (possibly swapped) `endIndex` is omitted or exceeds
			
 
				+		`this.length`, `this.length` is used instead.
			
 
				+
			
 
				+		If the (possibly swapped) `startIndex` exceeds `this.length`, the empty
			
 
				+		String `""` is returned.
			
 
				+	**/
			
 
				+	public function substring(startIndex:Int, ?endIndex:Int):String {
			
 
				+		if(startIndex < 0) {
			
 
				+			startIndex = 0;
			
 
				+		}
			
 
				+		if(endIndex != null) {
			
 
				+			if(endIndex < 0) {
			
 
				+				endIndex = 0;
			
 
				+			}
			
 
				+			if(startIndex == endIndex) {
			
 
				+				return "";
			
 
				+			}
			
 
				+			if(startIndex > endIndex) {
			
 
				+				var tmp = startIndex;
			
 
				+				startIndex = endIndex;
			
 
				+				endIndex = tmp;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		var unicodeOffset = 0;
			
 
				+		var nativeOffset = 0;
			
 
				+		var fromOffset = -1;
			
 
				+		var subLength = 0;
			
 
				+		while(nativeOffset < this.length) {
			
 
				+			var c = StringTools.utf16CodePointAt(this, nativeOffset);
			
 
				+
			
 
				+			if(startIndex <= unicodeOffset) {
			
 
				+				if(fromOffset < 0) {
			
 
				+					if(endIndex == null) {
			
 
				+						return this.substr(nativeOffset);
			
 
				+					}
			
 
				+					fromOffset = nativeOffset;
			
 
				+				}
			
 
				+				subLength++;
			
 
				+				if(subLength >= endIndex - startIndex) {
			
 
				+					var lastOffset = (c < StringTools.MIN_SURROGATE_CODE_POINT ? nativeOffset : nativeOffset + 1);
			
 
				+					return this.substr(fromOffset, lastOffset - fromOffset + 1);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			nativeOffset += (c >= StringTools.MIN_SURROGATE_CODE_POINT ? 2 : 1);
			
 
				+			unicodeOffset++;
			
 
				+		}
			
 
				+		return (fromOffset < 0 ? "" : this.substr(fromOffset));
			
 
				+	}
			
 
				+
			
 
				 	function get_length():Int {
			
 
				 		var l = 0;
			
 
				 		for(c in new StringIteratorUnicode(this)) {
			
--- a/tests/unit/src/unitstd/UnicodeString.unit.hx
+++ b/tests/unit/src/unitstd/UnicodeString.unit.hx
@@ -28,6 +28,93 @@ s.charCodeAt(2) == codes[2];
 
				 s.charCodeAt(3) == null;
			
 
				 s.charCodeAt(-1) == null;
			
 
				 
			
 
				+// indexOf
			
 
				+var s:UnicodeString = "𠜎zяяw";
			
 
				+s.indexOf("𠜎") == 0;
			
 
				+s.indexOf("z") == 1;
			
 
				+s.indexOf("я") == 2;
			
 
				+s.indexOf("zя") == 1;
			
 
				+s.indexOf("w") == 4;
			
 
				+s.indexOf("яw") == 3;
			
 
				+s.indexOf("f") == -1;
			
 
				+s.indexOf("я", 0) == 2;
			
 
				+s.indexOf("я", 1) == 2;
			
 
				+s.indexOf("я", 2) == 2;
			
 
				+s.indexOf("я", 3) == 3;
			
 
				+s.indexOf("я", 4) == -1;
			
 
				+s.indexOf("я", 40) == -1;
			
 
				+s.indexOf("я", -1) == -1;
			
 
				+s.indexOf("я", -2) == 3;
			
 
				+s.indexOf("я", -3) == 2;
			
 
				+s.indexOf("я", -4) == 2;
			
 
				+s.indexOf("я", -5) == 2;
			
 
				+s.indexOf("я", -50) == 2;
			
 
				+
			
 
				+// lastIndexOf
			
 
				+var s:UnicodeString = "𠜎zяяw";
			
 
				+s.lastIndexOf("𠜎") == 0;
			
 
				+s.lastIndexOf("z") == 1;
			
 
				+s.lastIndexOf("я") == 3;
			
 
				+s.lastIndexOf("zя") == 1;
			
 
				+s.lastIndexOf("яw") == 3;
			
 
				+s.lastIndexOf("f") == -1;
			
 
				+s.lastIndexOf("я", 0) == -1;
			
 
				+s.lastIndexOf("я", 1) == -1;
			
 
				+s.lastIndexOf("я", 2) == 2;
			
 
				+s.lastIndexOf("я", 3) == 3;
			
 
				+s.lastIndexOf("я", 4) == 3;
			
 
				+s.lastIndexOf("я", 40) == 3;
			
 
				+s.lastIndexOf("я", -1) == -1;
			
 
				+s.lastIndexOf("𠜎z", -1) == 0;
			
 
				+
			
 
				+// substr
			
 
				+var s:UnicodeString = "𠜎zяяw";
			
 
				+s.substr(0) == "𠜎zяяw";
			
 
				+s.substr(1) == "zяяw";
			
 
				+s.substr(5) == "";
			
 
				+s.substr(4) == "w";
			
 
				+s.substr(3) == "яw";
			
 
				+s.substr(-1) == "w";
			
 
				+s.substr(-2) == "яw";
			
 
				+s.substr(-4) == "zяяw";
			
 
				+s.substr(-5) == "𠜎zяяw";
			
 
				+s.substr(-100) == "𠜎zяяw";
			
 
				+s.substr(0, 0) == "";
			
 
				+s.substr(0, 1) == "𠜎";
			
 
				+s.substr(0, 2) == "𠜎z";
			
 
				+s.substr(0, 100) == "𠜎zяяw";
			
 
				+s.substr(0, -1) == "𠜎zяя";
			
 
				+s.substr(0, -2) == "𠜎zя";
			
 
				+s.substr(0, -100) == "";
			
 
				+
			
 
				+// substring
			
 
				+var s:UnicodeString = "𠜎zяяw";
			
 
				+s.substring(0, 0) == "";
			
 
				+s.substring(0, 1) == "𠜎";
			
 
				+s.substring(1, 0) == "𠜎";
			
 
				+s.substring(0, 2) == "𠜎z";
			
 
				+s.substring(2, 0) == "𠜎z";
			
 
				+s.substring(-1, 0) == "";
			
 
				+s.substring(0, -1) == "";
			
 
				+s.substring(-1, -1) == "";
			
 
				+s.substring(-1, 1) == "𠜎";
			
 
				+s.substring(1, -1) == "𠜎";
			
 
				+s.substring(-1, 2) == "𠜎z";
			
 
				+s.substring(2, -1) == "𠜎z";
			
 
				+s.substring(0) == "𠜎zяяw";
			
 
				+s.substring(1) == "zяяw";
			
 
				+s.substring(2) == "яяw";
			
 
				+s.substring(0, -1) == "";
			
 
				+s.substring(5, 0) == "𠜎zяяw";
			
 
				+s.substring(0, 100) == "𠜎zяяw";
			
 
				+s.substring(100, 120) == "";
			
 
				+s.substring(100, 0) == "𠜎zяяw";
			
 
				+s.substring(120, 100) == "";
			
 
				+s.substring(1, 4) == "zяя";
			
 
				+s.substring(4, 1) == "zяя";
			
 
				+
			
 
				+var s = new UnicodeString("𠜎zя");
			
 
				+
			
 
				 // @:op(UnicodeString)
			
 
				 var s2 = new UnicodeString("𠜎z");
			
 
				 s != s2;