Ver código fonte

Make String.indexOf actually conform to some specification (#11569)

* check spec

* fix eval and UnicodeString

* fix python

* fix neko

* maybe fix PHP

* maybe fix lua?

somehow I doubt it

* fix order

* deal with "" edge case

* try to get the documentation right

* maybe like this?
Simon Krajewski 1 ano atrás
pai
commit
0e71626ba3

+ 2 - 1
src/macro/eval/evalStdLib.ml

@@ -2250,9 +2250,10 @@ module StdString = struct
 		let str = this str in
 		let this = this vthis in
 		let i = default_int startIndex 0 in
+		let i = max 0 i in
 		try
 			if str.slength = 0 then
-				vint (max 0 (min i this.slength))
+				vint (min i this.slength)
 			else begin
 				let i =
 					if i >= this.slength then raise Not_found

+ 10 - 8
std/String.hx

@@ -74,18 +74,20 @@ extern class String {
 	function charCodeAt(index:Int):Null<Int>;
 
 	/**
-		Returns the position of the leftmost occurrence of `str` within `this`
-		String.
+		Returns the position of the leftmost occurrence of `str` within `this` String.
 
-		If `startIndex` is given, the search is performed within the substring
-		of `this` String starting from `startIndex`.
+		If `str` is the empty String `""`, then:
+			* If `startIndex` is not specified or < 0, 0 is returned.
+			* If `startIndex >= this.length`, `this.length` is returned.
+			* Otherwise, `startIndex` is returned,
 
-		If `startIndex` exceeds `this.length`, -1 is returned.
+		Otherwise, if `startIndex` is not specified or < 0, it is treated as 0.
 
-		If `startIndex` is negative, the result is unspecifed.
+		If `startIndex >= this.length`, -1 is returned.
 
-		Otherwise the search is performed within `this` String. In either case,
-		the returned position is relative to the beginning of `this` String.
+		Otherwise the search is performed within the substring of `this` String starting
+		at `startIndex`. If `str` is found, the position of its first character in `this`
+		String relative to position 0 is returned.
 
 		If `str` cannot be found, -1 is returned.
 	**/

+ 9 - 17
std/UnicodeString.hx

@@ -188,27 +188,19 @@ abstract UnicodeString(String) from String to String {
 	}
 
 	/**
-		Returns the position of the leftmost occurrence of `str` within `this`
-		String.
-
-		If `startIndex` is given, the search is performed within the substring
-		of `this` String starting from `startIndex` (if `startIndex` is posivite
-		or 0) or `max(this.length + startIndex, 0)` (if `startIndex` is negative).
-
-		If `startIndex` exceeds `this.length`, -1 is returned.
-
-		Otherwise the search is performed within `this` String. In either case,
-		the returned position is relative to the beginning of `this` String.
-
-		If `str` cannot be found, -1 is returned.
+		@see String.indexOf
 	**/
 	public function indexOf(str:String, ?startIndex:Int):Int {
-		if (startIndex == null) {
-			startIndex = 0;
+		var startIndex:Int = if (startIndex == null || startIndex < 0) {
+			0;
 		} else {
-			if (startIndex < 0) {
-				startIndex = (this : UnicodeString).length + startIndex;
+			startIndex;
+		}
+		if (str.length == 0) {
+			if (startIndex > length) {
+				return length;
 			}
+			return startIndex;
 		}
 
 		var unicodeOffset = 0;

+ 2 - 4
std/lua/_std/String.hx

@@ -62,7 +62,7 @@ class String {
 		return BaseString.lower(this);
 
 	public function indexOf(str:String, ?startIndex:Int):Int {
-		if (startIndex == null)
+		if (startIndex == null || startIndex < 0)
 			startIndex = 1;
 		else
 			startIndex += 1;
@@ -79,9 +79,7 @@ class String {
 	static function indexOfEmpty(s:String, startIndex:Int):Int {
 		var length = BaseString.len(s);
 		if (startIndex < 0) {
-			startIndex = length + startIndex;
-			if (startIndex < 0)
-				startIndex = 0;
+			startIndex = 0;
 		}
 		return startIndex > length ? length : startIndex;
 	}

+ 1 - 1
std/neko/_std/String.hx

@@ -57,8 +57,8 @@
 	}
 
 	public function indexOf(str:String, ?startIndex:Int):Int {
+		var startIndex = startIndex == null || startIndex < 0 ? 0 : startIndex;
 		if (str.length == 0) {
-			var startIndex = startIndex == null ? 0 : startIndex;
 			var min = startIndex > length ? length : startIndex;
 			return min < 0 ? 0 : min;
 		}

+ 3 - 12
std/php/Boot.hx

@@ -773,19 +773,10 @@ private class HxString {
 		if (search.length == 0) {
 			return Global.max(0, Global.min(startIndex == null ? 0 : startIndex, str.length));
 		}
-		if (startIndex == null) {
+		if (startIndex == null || startIndex < 0) {
 			startIndex = 0;
-		} else {
-			var length = str.length;
-			if (startIndex < 0) {
-				startIndex += length;
-				if (startIndex < 0) {
-					startIndex = 0;
-				}
-			}
-			if (startIndex >= length && search != '') {
-				return -1;
-			}
+		} else if (startIndex >= str.length) {
+			return -1;
 		}
 		var index:EitherType<Int, Bool> = if (search == '') {
 			var length = str.length;

+ 17 - 14
std/python/internal/StringImpl.hx

@@ -45,15 +45,15 @@ class StringImpl {
 	public static function lastIndexOf(s:String, str:String, ?startIndex:Int):Int {
 		if (str == "") {
 			var i = startIndex == null ? s.length : startIndex;
-			return UBuiltins.max(0, UBuiltins.min(i,  s.length));
-		}
-		else if (startIndex == null) {
+			return UBuiltins.max(0, UBuiltins.min(i, s.length));
+		} else if (startIndex == null) {
 			return Syntax.callField(s, "rfind", str, 0, s.length);
-		} else if(str == "") {
+		} else if (str == "") {
 			var length = s.length;
-			if(startIndex < 0) {
+			if (startIndex < 0) {
 				startIndex = length + startIndex;
-				if(startIndex < 0) startIndex = 0;
+				if (startIndex < 0)
+					startIndex = 0;
 			}
 			return startIndex > length ? length : startIndex;
 		} else {
@@ -79,23 +79,26 @@ class StringImpl {
 	}
 
 	@:ifFeature("dynamic_read.indexOf", "anon_optional_read.indexOf", "python.internal.StringImpl.indexOf")
-	public static function indexOf (s:String, str:String, ?startIndex:Int) {
+	public static function indexOf(s:String, str:String, ?startIndex:Int) {
 		if (str == "") {
 			var i = startIndex == null ? 0 : startIndex;
-			return UBuiltins.max(0, UBuiltins.min(i,  s.length));
-		}
-		else if (startIndex == null)
+			return UBuiltins.max(0, UBuiltins.min(i, s.length));
+		} else if (startIndex == null || startIndex < 0)
 			return Syntax.callField(s, "find", str);
-		else
+		else if (startIndex >= s.length) {
+			return -1;
+		} else {
 			return indexOfImpl(s, str, startIndex);
+		}
 	}
 
 	static function indexOfImpl(s:String, str:String, startIndex:Int) {
-		if(str == "") {
+		if (str == "") {
 			var length = s.length;
-			if(startIndex < 0) {
+			if (startIndex < 0) {
 				startIndex = length + startIndex;
-				if(startIndex < 0) startIndex = 0;
+				if (startIndex < 0)
+					startIndex = 0;
 			}
 			return startIndex > length ? length : startIndex;
 		}

+ 0 - 2
tests/unit/src/unit/issues/Issue5271.hx

@@ -27,9 +27,7 @@ class Issue5271 extends unit.Test {
 		eq(3, "dog".indexOf("", 3));
 		eq(3, "dog".indexOf("", 4));
 		eq(3, "dog".indexOf("", 10));
-		#if !lua
 		eq(0, "dog".indexOf("", -1));
-		#end
 
 		eq(-1, "dogdog".indexOf("cat"));
 		eq(3, "dogcat".indexOf("cat"));

+ 46 - 37
tests/unit/src/unitstd/String.unit.hx

@@ -2,21 +2,18 @@
 var str = "foo";
 var str2 = new String(str);
 str == str2;
-
 // toUpperCase
 "foo".toUpperCase() == "FOO";
 "_bar".toUpperCase() == "_BAR";
 "123b".toUpperCase() == "123B";
 "".toUpperCase() == "";
 "A".toUpperCase() == "A";
-
 // toLowerCase
 "FOO".toLowerCase() == "foo";
 "_BAR".toLowerCase() == "_bar";
 "123B".toLowerCase() == "123b";
 "".toLowerCase() == "";
 "a".toLowerCase() == "a";
-
 // charAt
 var s = "foo1bar";
 s.charAt(0) == "f";
@@ -27,11 +24,10 @@ s.charAt(4) == "b";
 s.charAt(5) == "a";
 s.charAt(6) == "r";
 s.charAt(7) == "";
-s.charAt( -1) == "";
+s.charAt(-1) == "";
 "".charAt(0) == "";
 "".charAt(1) == "";
-"".charAt( -1) == "";
-
+"".charAt(-1) == "";
 // charCodeAt
 var s = "foo1bar";
 s.charCodeAt(0) == 102;
@@ -42,8 +38,7 @@ s.charCodeAt(4) == 98;
 s.charCodeAt(5) == 97;
 s.charCodeAt(6) == 114;
 s.charCodeAt(7) == null;
-s.charCodeAt( -1) == null;
-
+s.charCodeAt(-1) == null;
 // code
 "f".code == 102;
 "o".code == 111;
@@ -51,7 +46,6 @@ s.charCodeAt( -1) == null;
 "b".code == 98;
 "a".code == 97;
 "r".code == 114;
-
 // indexOf
 var s = "foo1bar";
 s.indexOf("") == 0;
@@ -62,25 +56,47 @@ s.indexOf("b") == 4;
 s.indexOf("a") == 5;
 s.indexOf("r") == 6;
 s.indexOf("z") == -1;
-//s.indexOf(null) == -1;
-//s.indexOf(null, 1) == -1;
-//s.indexOf(null, -1) == -1;
+// empty string
+s.indexOf("") == 0;
+s.indexOf("", -1) == 0;
+s.indexOf("", 0) == 0;
+s.indexOf("", 1) == 1;
+s.indexOf("", 2) == 2;
+s.indexOf("", 3) == 3;
+s.indexOf("", 4) == 4;
+s.indexOf("", 5) == 5;
+s.indexOf("", 6) == 6;
+s.indexOf("", 7) == 7;
+s.indexOf("", 8) == 7;
+// negative startIndex
+s.indexOf("f", -1) == 0;
+s.indexOf("o", -1) == 1;
+s.indexOf("1", -1) == 3;
+s.indexOf("b", -1) == 4;
+s.indexOf("a", -1) == 5;
+s.indexOf("r", -1) == 6;
+s.indexOf("z", -1) == -1;
+// startIndex >= length
+s.indexOf("f", 7) == -1;
+s.indexOf("o", 7) == -1;
+s.indexOf("1", 7) == -1;
+s.indexOf("b", 7) == -1;
+s.indexOf("a", 7) == -1;
+s.indexOf("r", 7) == -1;
+s.indexOf("z", 7) == -1;
+// s.indexOf(null) == -1;
+// s.indexOf(null, 1) == -1;
+// s.indexOf(null, -1) == -1;
 s.indexOf("foo") == 0;
 s.indexOf("oo") == 1;
-//s.indexOf("bart") == -1;
-//s.indexOf("r", -1) == -1;
-//s.indexOf("r", -10) == -1;
+// s.indexOf("bart") == -1;
 s.indexOf("", 2) == 2;
 s.indexOf("", 200) == s.length;
 s.indexOf("o", 1) == 1;
 s.indexOf("o", 2) == 2;
 s.indexOf("o", 3) == -1;
-//s.indexOf("", -10) == 0;
-//s.indexOf("", 7) == 7; // see #8117
-//s.indexOf("", 8) == -1; // see #8117
 s.indexOf("r", 7) == -1;
 s.indexOf("r", 8) == -1;
-
 // lastIndexOf
 var s = "foofoofoobarbar";
 s.lastIndexOf("") == s.length;
@@ -95,9 +111,9 @@ s.lastIndexOf("barb") == 9;
 s.lastIndexOf("barb", 12) == 9;
 s.lastIndexOf("barb", 13) == 9;
 s.lastIndexOf("z") == -1;
-//s.lastIndexOf(null) == -1;
-//s.lastIndexOf(null, 1) == -1;
-//s.lastIndexOf(null, 14) == -1;
+// s.lastIndexOf(null) == -1;
+// s.lastIndexOf(null, 1) == -1;
+// s.lastIndexOf(null, 14) == -1;
 s.lastIndexOf("", 2) == 2;
 s.lastIndexOf("", 200) == s.length;
 s.lastIndexOf("r", 14) == 14;
@@ -111,14 +127,14 @@ s.lastIndexOf("bar", 9) == 9;
 s.lastIndexOf("bar", 8) == -1;
 s.lastIndexOf("a", s.length) == 13;
 s.lastIndexOf("a", s.length + 9000) == 13;
-
 // split
 var s = "xfooxfooxxbarxbarxx";
-s.split("x") == ["", "foo", "foo", "", "bar", "bar", "",""];
-s.split("xx") == ["xfooxfoo","barxbar",""];
-s.split("") == ["x", "f", "o", "o", "x", "f", "o", "o", "x", "x", "b", "a", "r", "x", "b", "a", "r", "x", "x"];
+s.split("x") == ["", "foo", "foo", "", "bar", "bar", "", ""];
+s.split("xx") == ["xfooxfoo", "barxbar", ""];
+s.split("") == [
+	"x", "f", "o", "o", "x", "f", "o", "o", "x", "x", "b", "a", "r", "x", "b", "a", "r", "x", "x"
+];
 s.split("z") == ["xfooxfooxxbarxbarxx"];
-
 // substr
 var s = "xfooxfooxxbarxbarxx";
 s.substr(0) == "xfooxfooxxbarxbarxx";
@@ -130,17 +146,16 @@ s.substr(-1) == "x";
 s.substr(-2) == "xx";
 s.substr(-18) == "fooxfooxxbarxbarxx";
 s.substr(-19) == "xfooxfooxxbarxbarxx";
-s.substr( -100) == "xfooxfooxxbarxbarxx";
+s.substr(-100) == "xfooxfooxxbarxbarxx";
 s.substr(0, 0) == "";
 s.substr(0, 1) == "x";
 s.substr(0, 2) == "xf";
 s.substr(0, 100) == "xfooxfooxxbarxbarxx";
 s.substr(0, -1) == "xfooxfooxxbarxbarx";
 s.substr(0, -2) == "xfooxfooxxbarxbar";
-//s.substr(1, -2) == "fooxfooxxbarxbar";
-//s.substr(2, -2) == "ooxfooxxbarxbar";
+// s.substr(1, -2) == "fooxfooxxbarxbar";
+// s.substr(2, -2) == "ooxfooxxbarxbar";
 s.substr(0, -100) == "";
-
 // substring
 var s = "xfooxfooxxbarxbarxx";
 s.substring(0, 0) == "";
@@ -165,31 +180,25 @@ s.substring(100, 0) == "xfooxfooxxbarxbarxx";
 s.substring(120, 100) == "";
 s.substring(5, 8) == "foo";
 s.substring(8, 5) == "foo";
-
 // fromCharCode
 String.fromCharCode(65) == "A";
-
 // ensure int strings compared as strings, not parsed ints (issue #3734)
 ("3" > "11") == true;
 (" 3" < "3") == true;
-
 // string comparison (see #8332)
 ("a" < "b") == true;
 ("a" <= "b") == true;
 ("a" > "b") == false;
 ("a" >= "b") == false;
-
 #if target.unicode
 ("𠜎zя" > "abя") == true;
 ("𠜎zя" >= "abя") == true;
 ("𠜎zя" < "abя") == false;
 ("𠜎zя" <= "abя") == false;
-
 #if target.utf16
 // since U+10002 in UTF16 is D800 DC02
 ("\u{FF61}" < "\u{10002}") == false;
 #else
 ("\u{FF61}" < "\u{10002}") == true;
 #end
-
 #end

+ 22 - 27
tests/unit/src/unitstd/UnicodeString.unit.hx

@@ -4,30 +4,26 @@ var codes = [132878, 122, 1103];
 
 // length
 s.length == codes.length;
-
 // // toUpperCase, toLowerCase
 // var turkishLower = "ğüşıiöç";
 // var turkishUpper = "ĞÜŞIİÖÇ";
 // turkishUpper == turkishLower.toUpperCase();
 // turkishLower == turkishUpper.toLowerCase();
-
 // charAt
 s.charAt(0) == "𠜎";
 s.charAt(1) == "z";
 s.charAt(2) == "я";
 s.charAt(3) == "";
-s.charAt( -1) == "";
-("":UnicodeString).charAt(0) == "";
-("":UnicodeString).charAt(1) == "";
-("":UnicodeString).charAt( -1) == "";
-
+s.charAt(-1) == "";
+("" : UnicodeString).charAt(0) == "";
+("" : UnicodeString).charAt(1) == "";
+("" : UnicodeString).charAt(-1) == "";
 // charCodeAt
 s.charCodeAt(0) == codes[0];
 s.charCodeAt(1) == codes[1];
 s.charCodeAt(2) == codes[2];
 s.charCodeAt(3) == null;
 s.charCodeAt(-1) == null;
-
 // indexOf
 var s:UnicodeString = "𠜎zяяw";
 s.indexOf("𠜎") == 0;
@@ -43,15 +39,22 @@ s.indexOf("я", 2) == 2;
 s.indexOf("я", 3) == 3;
 s.indexOf("я", 4) == -1;
 s.indexOf("я", 40) == -1;
-#if !lua // TODO https://github.com/HaxeFoundation/haxe/pull/8370
-s.indexOf("я", -1) == -1;
-s.indexOf("я", -2) == 3;
+s.indexOf("я", -1) == 2;
+s.indexOf("я", -2) == 2;
 s.indexOf("я", -3) == 2;
 s.indexOf("я", -4) == 2;
 s.indexOf("я", -5) == 2;
 s.indexOf("я", -50) == 2;
-#end
-
+// empty string
+s.indexOf("") == 0;
+s.indexOf("", -1) == 0;
+s.indexOf("", 0) == 0;
+s.indexOf("", 1) == 1;
+s.indexOf("", 2) == 2;
+s.indexOf("", 3) == 3;
+s.indexOf("", 4) == 4;
+s.indexOf("", 5) == 5;
+s.indexOf("", 6) == 5;
 // lastIndexOf
 var s:UnicodeString = "𠜎zяяw";
 s.lastIndexOf("𠜎") == 0;
@@ -66,7 +69,6 @@ s.lastIndexOf("я", 2) == 2;
 s.lastIndexOf("я", 3) == 3;
 s.lastIndexOf("я", 4) == 3;
 s.lastIndexOf("я", 40) == 3;
-
 // substr
 var s:UnicodeString = "𠜎zяяw";
 s.substr(0) == "𠜎zяяw";
@@ -86,7 +88,6 @@ s.substr(0, 100) == "𠜎zяяw";
 s.substr(0, -1) == "𠜎zяя";
 s.substr(0, -2) == "𠜎zя";
 s.substr(0, -100) == "";
-
 // substring
 var s:UnicodeString = "𠜎zяяw";
 s.substring(0, 0) == "";
@@ -112,7 +113,6 @@ s.substring(100, 0) == "𠜎zяяw";
 s.substring(120, 100) == "";
 s.substring(1, 4) == "zяя";
 s.substring(4, 1) == "zяя";
-
 var s = new UnicodeString("𠜎zя");
 
 // @:op(UnicodeString)
@@ -126,7 +126,6 @@ s2 <= s;
 (s + s2).length == s.length + s2.length;
 var s3 = s;
 (s3 += s2).length == s.length + s2.length;
-
 // @:op(String)
 var s2 = "abя";
 s != s2;
@@ -135,19 +134,16 @@ s > s2;
 s >= s2;
 s2 < s;
 s2 <= s;
-(s + s2).length == s.length + (s2:UnicodeString).length;
+(s + s2).length == s.length + (s2 : UnicodeString).length;
 var s3 = s;
-(s3 += s2).length == s.length + (s2:UnicodeString).length;
-
+(s3 += s2).length == s.length + (s2 : UnicodeString).length;
 // iterator
-aeq(codes, [for(c in s) c]);
-
+aeq(codes, [for (c in s) c]);
 // keyValueIterator
-var keys = [for(i in 0...codes.length) i];
-var actualKeyCodes = [for(i => c in s) [i, c]];
+var keys = [for (i in 0...codes.length) i];
+var actualKeyCodes = [for (i => c in s) [i, c]];
 aeq(keys, actualKeyCodes.map(a -> a[0]));
 aeq(codes, actualKeyCodes.map(a -> a[1]));
-
 // validate
 UnicodeString.validate(haxe.io.Bytes.ofHex("f0a9b8bde38182c3ab61"), UTF8) == true;
 UnicodeString.validate(haxe.io.Bytes.ofHex("ed9fbf"), UTF8) == true;
@@ -158,7 +154,6 @@ UnicodeString.validate(haxe.io.Bytes.ofHex("c0af"), UTF8) == false; // overlong
 UnicodeString.validate(haxe.io.Bytes.ofHex("eda080"), UTF8) == false; // surrogate byte sequence
 UnicodeString.validate(haxe.io.Bytes.ofHex("edbfbf"), UTF8) == false; // surrogate byte sequence
 UnicodeString.validate(haxe.io.Bytes.ofHex("f4908080"), UTF8) == false; // U+110000
-
 #else
 1 == 1;
-#end
+#end