Browse Source

`Std.parseInt()` fixes and optimisations (#10664)

* [tests] Add more parseInt and parseFloat tests

- Label tests
- Including some tests from #4132
- Test trailing text after hexadecimal
- Add test to ensure parseInt does not handle scientific notation
- Enable whitespace test for neko, hl, and c++
- Add test for all whitespace
- Unspecify parseInt case where `0x` is not followed by digits
- Test invalid sign usage

* [php] Disable a parseInt test

* [cs,java,jvm,python] Optimise parseInt

- Each part of the Int string is handled separately so we don't have
a single loop with every single check
- Now it also properly handles trailing text after a hexadecimal value

* [js,flash] Cleanup Std.parseInt

js:
- Allow + sign when using hexadecimal notation
- Since ES5, we don't have to worry about Javascript's parseInt allowing
octal so we can avoid the extra checks when targetting ES5+
- Fix #9198

flash:
- Copy js' implementation of Std.parseInt to flash (no need to worry
about octal as it was only supported before flash 9)

* [php] Fix and optimise Std.parseInt

- Allow + in hexadecimal notation
- Ignore all whitespace before checking for hexadecimal prefix
- Use native php functions instead of the slower charAt()
  - Add str_starts_with extern, and polyfill it for php 7

* [lua] Fix parseInt issues

- Handle all whitespace properly
- Do not crash when hexadecimal number is preceded by two signs
- Do not crash when hexadecimal prefix is followed by non hexadecimal
character

* [eval] Fix handling of plus signs in Std.parseInt

- Do not allow multiple signs
- Do not allow + followed by whitespace

* [eval] Allow all whitespace in `Std.parseFloat`

* [tests] Disable a parseFloat test for eval

* [docs] Update parseInt and parseFloat docs

* [tests] Add test for #7028
tobil4sk 3 năm trước cách đây
mục cha
commit
45f547f079

+ 23 - 8
src/core/numeric.ml

@@ -45,11 +45,15 @@ let float_repres f =
 			Printf.sprintf "%.18g" f
 			Printf.sprintf "%.18g" f
 		in valid_float_lexeme float_val
 		in valid_float_lexeme float_val
 
 
+let is_whitespace code =
+	code > 8 && code < 14
+
 let parse_float s =
 let parse_float s =
 	let rec loop sp i =
 	let rec loop sp i =
 		if i = String.length s then (if sp = 0 then s else String.sub s sp (i - sp)) else
 		if i = String.length s then (if sp = 0 then s else String.sub s sp (i - sp)) else
 		match String.unsafe_get s i with
 		match String.unsafe_get s i with
-		| ' ' | '\t' when sp = i -> loop (sp + 1) (i + 1)
+		| ' ' when sp = i -> loop (sp + 1) (i + 1)
+		| c when sp = i && is_whitespace (Char.code c) -> loop (sp + 1) (i + 1)
 		| '0'..'9' | '-' | '+' | 'e' | 'E' | '.' -> loop sp (i + 1)
 		| '0'..'9' | '-' | '+' | 'e' | 'E' | '.' -> loop sp (i + 1)
 		| _ -> String.sub s sp (i - sp)
 		| _ -> String.sub s sp (i - sp)
 	in
 	in
@@ -64,14 +68,25 @@ let parse_int s =
 			| '0'..'9' | 'a'..'f' | 'A'..'F' -> loop_hex sp (i + 1)
 			| '0'..'9' | 'a'..'f' | 'A'..'F' -> loop_hex sp (i + 1)
 			| _ -> String.sub s sp (i - sp)
 			| _ -> String.sub s sp (i - sp)
 	in
 	in
-	let rec loop sp i digits_count =
+	let rec loop_dec sp i =
 		if i = String.length s then (if sp = 0 then s else String.sub s sp (i - sp)) else
 		if i = String.length s then (if sp = 0 then s else String.sub s sp (i - sp)) else
 		match String.unsafe_get s i with
 		match String.unsafe_get s i with
-		| '0'..'9' -> loop sp (i + 1) (digits_count + 1)
-		| ' ' | '+' when sp = i -> loop (sp + 1) (i + 1) digits_count
-		| c when sp = i && Char.code c > 8 && Char.code c < 14 -> loop (sp + 1) (i + 1) digits_count
-		| '-' when i = sp -> loop sp (i + 1) digits_count
-		| ('x' | 'X') when digits_count = 1 && String.get s (i - 1) = '0' -> loop_hex sp (i + 1)
+		| '0'..'9' -> loop_dec sp (i + 1)
 		| _ -> String.sub s sp (i - sp)
 		| _ -> String.sub s sp (i - sp)
 	in
 	in
-	Int32.of_string (loop 0 0 0)
+	let handle_digits sp i =
+		if i + 1 < String.length s && String.get s i = '0' &&
+			(String.get s (i + 1) = 'x' || String.get s (i + 1) = 'X')
+		then loop_hex sp (i + 2)
+		else loop_dec sp i
+	in
+	let rec loop sp =
+		if sp = String.length s then "" else
+		match String.unsafe_get s sp with
+		| ' ' -> loop (sp + 1)
+		| '+' -> handle_digits (sp + 1) (sp + 1)
+		| '-' -> handle_digits sp (sp + 1)
+		| c when is_whitespace (Char.code c) -> loop (sp + 1)
+		| _ -> handle_digits sp sp
+	in
+	Int32.of_string (loop 0)

+ 17 - 12
std/Std.hx

@@ -97,21 +97,23 @@ extern class Std {
 
 
 		Leading whitespaces are ignored.
 		Leading whitespaces are ignored.
 
 
-		If `x` starts with 0x or 0X, hexadecimal notation is recognized where the following digits may
-		contain 0-9 and A-F.
+		`x` may optionally start with a + or - to denote a postive or negative value respectively.
 
 
-		Otherwise `x` is read as decimal number with 0-9 being allowed characters. `x` may also start with
-		a - to denote a negative value.
+		If the optional sign is followed 0x or 0X, hexadecimal notation is recognized where the following
+		digits may contain 0-9 and A-F. Both the prefix and digits are case insensitive.
 
 
-		In decimal mode, parsing continues until an invalid character is detected, in which case the
-		result up to that point is returned. For hexadecimal notation, the effect of invalid characters
-		is unspecified.
+		Otherwise `x` is read as decimal number with 0-9 being allowed characters. Octal and binary
+		notations are not supported.
+
+		Parsing continues until an invalid character is detected, in which case the result up to
+		that point is returned. However, in decimal mode, the effect of `x` containing an e or E
+		is unspecified, and it may be taken as scientific exponential notation.
 
 
-		Leading 0s that are not part of the 0x/0X hexadecimal notation are ignored, which means octal
-		notation is not supported.
+		If `x` is `null`, the result is `null`.
+		If `x` cannot be parsed as integer or is empty, the result is `null`.
 
 
-		If `x` is null, the result is unspecified.
-		If `x` cannot be parsed as integer, the result is `null`.
+		If `x` starts with a hexadecimal prefix which is not followed by at least one valid hexadecimal
+		digit, the result is unspecified.
 	**/
 	**/
 	static function parseInt(x:String):Null<Int>;
 	static function parseInt(x:String):Null<Int>;
 
 
@@ -119,9 +121,12 @@ extern class Std {
 		Converts a `String` to a `Float`.
 		Converts a `String` to a `Float`.
 
 
 		The parsing rules for `parseInt` apply here as well, with the exception of invalid input
 		The parsing rules for `parseInt` apply here as well, with the exception of invalid input
-		resulting in a `NaN` value instead of null.
+		resulting in a `NaN` value instead of `null`. Also, hexadecimal support is **not** specified.
 
 
 		Additionally, decimal notation may contain a single `.` to denote the start of the fractions.
 		Additionally, decimal notation may contain a single `.` to denote the start of the fractions.
+
+		It may also end with `e` or `E` followed by optional minus or plus sign and a sequence of
+		digits (defines exponent to base 10).
 	**/
 	**/
 	static function parseFloat(x:String):Float;
 	static function parseFloat(x:String):Float;
 
 

+ 62 - 40
std/cs/_std/Std.hx

@@ -23,6 +23,8 @@
 import cs.Boot;
 import cs.Boot;
 import cs.Lib;
 import cs.Lib;
 
 
+using StringTools;
+
 @:coreApi @:nativeGen class Std {
 @:coreApi @:nativeGen class Std {
 	@:deprecated('Std.is is deprecated. Use Std.isOfType instead.')
 	@:deprecated('Std.is is deprecated. Use Std.isOfType instead.')
 	public static inline function is(v:Dynamic, t:Dynamic):Bool {
 	public static inline function is(v:Dynamic, t:Dynamic):Bool {
@@ -79,54 +81,74 @@ import cs.Lib;
 		return cast x;
 		return cast x;
 	}
 	}
 
 
+	static inline function isSpaceChar(code:Int):Bool
+		return (code > 8 && code < 14) || code == 32;
+
+	static inline function isHexPrefix(cur:Int, next:Int):Bool
+		return cur == '0'.code && (next == 'x'.code || next == 'X'.code);
+
+	static inline function isDecimalDigit(code:Int):Bool
+		return '0'.code <= code && code <= '9'.code;
+
+	static inline function isHexadecimalDigit(code:Int):Bool
+		return isDecimalDigit(code) || ('a'.code <= code && code <= 'f'.code) || ('A'.code <= code && code <= 'F'.code);
+
 	public static function parseInt(x:String):Null<Int> {
 	public static function parseInt(x:String):Null<Int> {
 		if (x == null)
 		if (x == null)
 			return null;
 			return null;
 
 
-		var base = 10;
-		var len = x.length;
-		var foundCount = 0;
-		var sign = 0;
-		var firstDigitIndex = 0;
-		var lastDigitIndex = -1;
-		var previous = 0;
-
-		for(i in 0...len) {
-			var c = StringTools.fastCodeAt(x, i);
-			switch c {
-				case _ if((c > 8 && c < 14) || c == 32):
-					if(foundCount > 0) {
-						return null;
-					}
-					continue;
-				case '-'.code if(foundCount == 0):
-					sign = -1;
-				case '+'.code if(foundCount == 0):
-					sign = 1;
-				case '0'.code if(foundCount == 0 || (foundCount == 1 && sign != 0)):
-				case 'x'.code | 'X'.code if(previous == '0'.code && ((foundCount == 1 && sign == 0) || (foundCount == 2 && sign != 0))):
-					base = 16;
-				case _ if('0'.code <= c && c <= '9'.code):
-				case _ if(base == 16 && (('a'.code <= c && c <= 'z'.code) || ('A'.code <= c && c <= 'Z'.code))):
-				case _:
-					break;
-			}
-			if((foundCount == 0 && sign == 0) || (foundCount == 1 && sign != 0)) {
-				firstDigitIndex = i;
+		final len = x.length;
+		var index = 0;
+
+		inline function hasIndex(index:Int)
+			return index < len;
+
+		// skip whitespace
+		while (hasIndex(index)) {
+			if (!isSpaceChar(x.unsafeCodeAt(index)))
+				break;
+			++index;
+		}
+
+		// handle sign
+		final isNegative = hasIndex(index) && {
+			final sign = x.unsafeCodeAt(index);
+			if (sign == '-'.code || sign == '+'.code) {
+				++index;
 			}
 			}
-			foundCount++;
-			lastDigitIndex = i;
-			previous = c;
+			sign == '-'.code;
 		}
 		}
-		if(firstDigitIndex <= lastDigitIndex) {
-			var digits = x.substring(firstDigitIndex, lastDigitIndex + 1);
-			return try {
-				(sign == -1 ? -1 : 1) * cs.system.Convert.ToInt32(digits, base);
-			} catch(e:cs.system.FormatException) {
-				null;
+
+		// handle base
+		final isHexadecimal = hasIndex(index + 1) && isHexPrefix(x.unsafeCodeAt(index), x.unsafeCodeAt(index + 1));
+		if (isHexadecimal)
+			index += 2; // skip prefix
+
+		// handle digits
+		final firstInvalidIndex = {
+			var cur = index;
+			if (isHexadecimal) {
+				while (hasIndex(cur)) {
+					if (!isHexadecimalDigit(x.unsafeCodeAt(cur)))
+						break;
+					++cur;
+				}
+			} else {
+				while (hasIndex(cur)) {
+					if (!isDecimalDigit(x.unsafeCodeAt(cur)))
+						break;
+					++cur;
+				}
 			}
 			}
+			cur;
 		}
 		}
-		return null;
+
+		// no valid digits
+		if (index == firstInvalidIndex)
+			return null;
+
+		final result = cs.system.Convert.ToInt32(x.substring(index, firstInvalidIndex), if (isHexadecimal) 16 else 10);
+		return if (isNegative) -result else result;
 	}
 	}
 
 
 	public static function parseFloat(x:String):Float {
 	public static function parseFloat(x:String):Float {

+ 6 - 0
std/flash/Lib.hx

@@ -100,4 +100,10 @@ class Lib {
 		str = str.split("\\").join("\\\\");
 		str = str.split("\\").join("\\\\");
 		flash.external.ExternalInterface.call("console." + type, str);
 		flash.external.ExternalInterface.call("console." + type, str);
 	}
 	}
+
+	public static var parseInt(get, never):(string:String, ?radix:Int) -> Float;
+
+	extern static inline function get_parseInt():(string:String, ?radix:Int) -> Float {
+		return untyped __global__["parseInt"];
+	}
 }
 }

+ 6 - 7
std/flash/_std/Std.hx

@@ -49,13 +49,12 @@ import flash.Boot;
 		return untyped __int__(x);
 		return untyped __int__(x);
 	}
 	}
 
 
-	public static function parseInt(x:String):Null<Int>
-		untyped {
-			var v = __global__["parseInt"](x);
-			if (__global__["isNaN"](v))
-				return null;
-			return v;
-		}
+	public static function parseInt(x:String):Null<Int> {
+		final v = flash.Lib.parseInt(x);
+		if (Math.isNaN(v))
+			return null;
+		return cast v;
+	}
 
 
 	public static function parseFloat(x:String):Float {
 	public static function parseFloat(x:String):Float {
 		return untyped __global__["parseFloat"](x);
 		return untyped __global__["parseFloat"](x);

+ 62 - 40
std/java/_std/Std.hx

@@ -23,6 +23,8 @@
 import java.Boot;
 import java.Boot;
 import java.Lib;
 import java.Lib;
 
 
+using StringTools;
+
 @:coreApi @:nativeGen class Std {
 @:coreApi @:nativeGen class Std {
 	@:deprecated('Std.is is deprecated. Use Std.isOfType instead.')
 	@:deprecated('Std.is is deprecated. Use Std.isOfType instead.')
 	public static inline function is(v:Dynamic, t:Dynamic):Bool {
 	public static inline function is(v:Dynamic, t:Dynamic):Bool {
@@ -63,54 +65,74 @@ import java.Lib;
 		return cast x;
 		return cast x;
 	}
 	}
 
 
+	static inline function isSpaceChar(code:Int):Bool
+		return (code > 8 && code < 14) || code == 32;
+
+	static inline function isHexPrefix(cur:Int, next:Int):Bool
+		return cur == '0'.code && (next == 'x'.code || next == 'X'.code);
+
+	static inline function isDecimalDigit(code:Int):Bool
+		return '0'.code <= code && code <= '9'.code;
+
+	static inline function isHexadecimalDigit(code:Int):Bool
+		return isDecimalDigit(code) || ('a'.code <= code && code <= 'f'.code) || ('A'.code <= code && code <= 'F'.code);
+
 	public static function parseInt(x:String):Null<Int> {
 	public static function parseInt(x:String):Null<Int> {
 		if (x == null)
 		if (x == null)
 			return null;
 			return null;
 
 
-		var base = 10;
-		var len = x.length;
-		var foundCount = 0;
-		var sign = 0;
-		var firstDigitIndex = 0;
-		var lastDigitIndex = -1;
-		var previous = 0;
-
-		for(i in 0...len) {
-			var c = StringTools.fastCodeAt(x, i);
-			switch c {
-				case _ if((c > 8 && c < 14) || c == 32):
-					if(foundCount > 0) {
-						return null;
-					}
-					continue;
-				case '-'.code if(foundCount == 0):
-					sign = -1;
-				case '+'.code if(foundCount == 0):
-					sign = 1;
-				case '0'.code if(foundCount == 0 || (foundCount == 1 && sign != 0)):
-				case 'x'.code | 'X'.code if(previous == '0'.code && ((foundCount == 1 && sign == 0) || (foundCount == 2 && sign != 0))):
-					base = 16;
-				case _ if('0'.code <= c && c <= '9'.code):
-				case _ if(base == 16 && (('a'.code <= c && c <= 'z'.code) || ('A'.code <= c && c <= 'Z'.code))):
-				case _:
-					break;
-			}
-			if((foundCount == 0 && sign == 0) || (foundCount == 1 && sign != 0)) {
-				firstDigitIndex = i;
+		final len = x.length;
+		var index = 0;
+
+		inline function hasIndex(index:Int)
+			return index < len;
+
+		// skip whitespace
+		while (hasIndex(index)) {
+			if (!isSpaceChar(x.unsafeCodeAt(index)))
+				break;
+			++index;
+		}
+
+		// handle sign
+		final isNegative = hasIndex(index) && {
+			final sign = x.unsafeCodeAt(index);
+			if (sign == '-'.code || sign == '+'.code) {
+				++index;
 			}
 			}
-			foundCount++;
-			lastDigitIndex = i;
-			previous = c;
+			sign == '-'.code;
 		}
 		}
-		if(firstDigitIndex <= lastDigitIndex) {
-			var digits = x.substring(firstDigitIndex + (base == 16 ? 2 : 0), lastDigitIndex + 1);
-			return try {
-				(sign == -1 ? -1 : 1) * java.lang.Integer.parseInt(digits, base);
-			} catch(e:java.lang.NumberFormatException) {
-				null;
+
+		// handle base
+		final isHexadecimal = hasIndex(index + 1) && isHexPrefix(x.unsafeCodeAt(index), x.unsafeCodeAt(index + 1));
+		if (isHexadecimal)
+			index += 2; // skip prefix
+
+		// handle digits
+		final firstInvalidIndex = {
+			var cur = index;
+			if (isHexadecimal) {
+				while (hasIndex(cur)) {
+					if (!isHexadecimalDigit(x.unsafeCodeAt(cur)))
+						break;
+					++cur;
+				}
+			} else {
+				while (hasIndex(cur)) {
+					if (!isDecimalDigit(x.unsafeCodeAt(cur)))
+						break;
+					++cur;
+				}
 			}
 			}
+			cur;
 		}
 		}
-		return null;
+
+		// no valid digits
+		if (index == firstInvalidIndex)
+			return null;
+
+		final result = java.lang.Integer.parseInt(x.substring(index, firstInvalidIndex), if (isHexadecimal) 16 else 10);
+		return if (isNegative) -result else result;
 	}
 	}
 
 
 	public static function parseFloat(x:String):Float {
 	public static function parseFloat(x:String):Float {

+ 16 - 9
std/js/_std/Std.hx

@@ -54,17 +54,24 @@ import js.Syntax;
 
 
 	@:pure
 	@:pure
 	public static function parseInt(x:String):Null<Int> {
 	public static function parseInt(x:String):Null<Int> {
-		if(x != null) {
-			for(i in 0...x.length) {
-				var c = StringTools.fastCodeAt(x, i);
-				if(c <= 8 || (c >= 14 && c != ' '.code && c != '-'.code)) {
-					var nc = StringTools.fastCodeAt(x, i + 1);
-					var v = js.Lib.parseInt(x, (nc == "x".code || nc == "X".code) ? 16 : 10);
-					return Math.isNaN(v) ? null : cast v;
-				}
+		#if (js_es >= 5)
+		final v = js.Lib.parseInt(x);
+		#else
+		// before ES5, octal was supported in some implementations, so we need to explicitly use base 10 or 16
+		if (x == null)
+			return null;
+		var v:Float = Math.NaN;
+		for (i => c in StringTools.keyValueIterator(x)) {
+			if ((c <= 8 || c >= 14) && !(c == ' '.code || c == '-'.code || c == '+'.code)) {
+				final nc = js.Syntax.code("{0}[{1}]", x, i + 1);
+				v = js.Lib.parseInt(x, c == '0'.code && (nc == "x" || nc == "X") ? 16 : 10);
+				break;
 			}
 			}
 		}
 		}
-		return null;
+		#end
+		if (Math.isNaN(v))
+			return null;
+		return cast v;
 	}
 	}
 
 
 	public static inline function parseFloat(x:String):Float {
 	public static inline function parseFloat(x:String):Float {

+ 62 - 41
std/jvm/_std/Std.hx

@@ -22,6 +22,8 @@
 
 
 import jvm.Jvm;
 import jvm.Jvm;
 
 
+using StringTools;
+
 @:coreApi
 @:coreApi
 class Std {
 class Std {
 	@:deprecated('Std.is is deprecated. Use Std.isOfType instead.')
 	@:deprecated('Std.is is deprecated. Use Std.isOfType instead.')
@@ -63,55 +65,74 @@ class Std {
 		return cast x;
 		return cast x;
 	}
 	}
 
 
+	static inline function isSpaceChar(code:Int):Bool
+		return (code > 8 && code < 14) || code == 32;
+
+	static inline function isHexPrefix(cur:Int, next:Int):Bool
+		return cur == '0'.code && (next == 'x'.code || next == 'X'.code);
+
+	static inline function isDecimalDigit(code:Int):Bool
+		return '0'.code <= code && code <= '9'.code;
+
+	static inline function isHexadecimalDigit(code:Int):Bool
+		return isDecimalDigit(code) || ('a'.code <= code && code <= 'f'.code) || ('A'.code <= code && code <= 'F'.code);
+
 	public static function parseInt(x:String):Null<Int> {
 	public static function parseInt(x:String):Null<Int> {
-		if (x == null) {
+		if (x == null)
 			return null;
 			return null;
+
+		final len = x.length;
+		var index = 0;
+
+		inline function hasIndex(index:Int)
+			return index < len;
+
+		// skip whitespace
+		while (hasIndex(index)) {
+			if (!isSpaceChar(x.unsafeCodeAt(index)))
+				break;
+			++index;
 		}
 		}
 
 
-		var base = 10;
-		var len = x.length;
-		var foundCount = 0;
-		var sign = 0;
-		var firstDigitIndex = 0;
-		var lastDigitIndex = -1;
-		var previous = 0;
-
-		for (i in 0...len) {
-			var c = StringTools.fastCodeAt(x, i);
-			switch c {
-				case _ if ((c > 8 && c < 14) || c == 32):
-					if (foundCount > 0) {
-						return null;
-					}
-					continue;
-				case '-'.code if (foundCount == 0):
-					sign = -1;
-				case '+'.code if (foundCount == 0):
-					sign = 1;
-				case '0'.code if (foundCount == 0 || (foundCount == 1 && sign != 0)):
-				case 'x'.code | 'X'.code if (previous == '0'.code && ((foundCount == 1 && sign == 0) || (foundCount == 2 && sign != 0))):
-					base = 16;
-				case _ if ('0'.code <= c && c <= '9'.code):
-				case _ if (base == 16 && (('a'.code <= c && c <= 'z'.code) || ('A'.code <= c && c <= 'Z'.code))):
-				case _:
-					break;
-			}
-			if ((foundCount == 0 && sign == 0) || (foundCount == 1 && sign != 0)) {
-				firstDigitIndex = i;
+		// handle sign
+		final isNegative = hasIndex(index) && {
+			final sign = x.unsafeCodeAt(index);
+			if (sign == '-'.code || sign == '+'.code) {
+				++index;
 			}
 			}
-			foundCount++;
-			lastDigitIndex = i;
-			previous = c;
+			sign == '-'.code;
 		}
 		}
-		if (firstDigitIndex <= lastDigitIndex) {
-			var digits = x.substring(firstDigitIndex + (base == 16 ? 2 : 0), lastDigitIndex + 1);
-			return try {
-				(sign == -1 ? -1 : 1) * java.lang.Integer.parseInt(digits, base);
-			} catch (e:java.lang.NumberFormatException) {
-				null;
+
+		// handle base
+		final isHexadecimal = hasIndex(index + 1) && isHexPrefix(x.unsafeCodeAt(index), x.unsafeCodeAt(index + 1));
+		if (isHexadecimal)
+			index += 2; // skip prefix
+
+		// handle digits
+		final firstInvalidIndex = {
+			var cur = index;
+			if (isHexadecimal) {
+				while (hasIndex(cur)) {
+					if (!isHexadecimalDigit(x.unsafeCodeAt(cur)))
+						break;
+					++cur;
+				}
+			} else {
+				while (hasIndex(cur)) {
+					if (!isDecimalDigit(x.unsafeCodeAt(cur)))
+						break;
+					++cur;
+				}
 			}
 			}
+			cur;
 		}
 		}
-		return null;
+
+		// no valid digits
+		if (index == firstInvalidIndex)
+			return null;
+
+		final result = java.lang.Integer.parseInt(x.substring(index, firstInvalidIndex), if (isHexadecimal) 16 else 10);
+		return if (isNegative) -result else result;
 	}
 	}
 
 
 	public static function parseFloat(x:String):Float {
 	public static function parseFloat(x:String):Float {

+ 11 - 15
std/lua/_std/Std.hx

@@ -58,22 +58,18 @@ import lua.NativeStringTools;
 	public static function parseInt(x:String):Null<Int> {
 	public static function parseInt(x:String):Null<Int> {
 		if (x == null)
 		if (x == null)
 			return null;
 			return null;
-		var hexMatch = NativeStringTools.match(x, "^[ \t\r\n]*([%-+]*0[xX][%da-fA-F]*)");
-		if (hexMatch != null) {
-			var sign = switch StringTools.fastCodeAt(hexMatch, 0) {
-				case '-'.code: -1;
-				case '+'.code: 1;
-				case _: 0;
-			}
-			return (sign == -1 ? -1 : 1) * lua.Lua.tonumber(hexMatch.substr(sign == 0 ? 2 : 3), 16);
-		} else {
-			var intMatch = NativeStringTools.match(x, "^ *[%-+]?%d*");
-			if (intMatch != null) {
-				return lua.Lua.tonumber(intMatch);
-			} else {
-				return null;
-			}
+		untyped {
+			__lua__("local sign, numString = {0}", NativeStringTools.match(x, "^%s*([%-+]?)0[xX]([%da-fA-F]*)"));
+			if (numString != null)
+				return switch sign {
+					case '-': -lua.Lua.tonumber(numString, 16);
+					case _: lua.Lua.tonumber(numString, 16);
+				}
 		}
 		}
+		final intMatch = NativeStringTools.match(x, "^%s*[%-+]?%d*");
+		if (intMatch == null)
+			return null;
+		return lua.Lua.tonumber(intMatch);
 	}
 	}
 
 
 	public static function parseFloat(x:String):Float {
 	public static function parseFloat(x:String):Float {

+ 10 - 0
std/php/Global.hx

@@ -390,6 +390,11 @@ extern class Global {
 	**/
 	**/
 	static function strcmp(str1:String, str2:String):Int;
 	static function strcmp(str1:String, str2:String):Int;
 
 
+	/**
+		@see https://www.php.net/manual/en/function.strspn.php
+	**/
+	static function strspn(string:String, characters:String, offset:Int = 0, ?length:Int):Int;
+
 	/**
 	/**
 		@see http://php.net/manual/en/function.strtr.php
 		@see http://php.net/manual/en/function.strtr.php
 	**/
 	**/
@@ -407,6 +412,11 @@ extern class Global {
 	static function str_replace(search:EitherType<String, NativeArray>, replace:EitherType<String, NativeArray>, subject:EitherType<String, NativeArray>,
 	static function str_replace(search:EitherType<String, NativeArray>, replace:EitherType<String, NativeArray>, subject:EitherType<String, NativeArray>,
 		?count:Int):EitherType<String, NativeArray>;
 		?count:Int):EitherType<String, NativeArray>;
 
 
+	/**
+		@see https://www.php.net/manual/en/function.str-starts-with.php
+	**/
+	static function str_starts_with(haystack:String, needle:String):Bool;
+
 	/**
 	/**
 		@see http://php.net/manual/en/function.explode.php
 		@see http://php.net/manual/en/function.explode.php
 	**/
 	**/

+ 8 - 0
std/php/_polyfills.php

@@ -60,4 +60,12 @@ namespace { //Namespace declaration is required because this file is included un
 		}
 		}
 	}
 	}
 
 
+	/**
+	 * @see https://www.php.net/manual/en/function.str-starts-with.php
+	 */
+	if (!function_exists('str_starts_with')) {
+		function str_starts_with($str, $start) {
+    		return (@substr_compare($str, $start, 0, strlen($start))==0);
+		}
+	}
 }
 }

+ 13 - 14
std/php/_std/Std.hx

@@ -52,22 +52,21 @@ import php.Syntax;
 	}
 	}
 
 
 	public static function parseInt(x:String):Null<Int> {
 	public static function parseInt(x:String):Null<Int> {
-		if (Global.is_numeric(x)) {
-			return Global.intval(x, 10);
-		} else {
-			x = Global.ltrim(x);
-			var firstCharIndex = (x.charAt(0) == '-' ? 1 : 0);
-			var firstCharCode = x.charCodeAt(firstCharIndex);
-			if (!isDigitCode(firstCharCode)) {
+		x = Global.ltrim(x, " \t\n\x0b\x0c\r");
+		final digitsOnly = Global.ltrim(x, "+-");
+		if (Global.str_starts_with(digitsOnly, '0x') || Global.str_starts_with(digitsOnly, '0X')) {
+			final val = Global.intval(x, 16); // hexadecimal
+			// if the value was 0, ensure there is only a maximum of one + or - sign
+			if (val == 0 && digitsOnly.length + 1 < x.length)
 				return null;
 				return null;
-			}
-			var secondChar = x.charAt(firstCharIndex + 1);
-			if (secondChar == 'x' || secondChar == 'X') {
-				return Global.intval(x, 0);
-			} else {
-				return Global.intval(x, 10);
-			}
+			return val;
 		}
 		}
+		final val = Global.intval(x, 10);
+		// if the value was 0, make sure it wasn't because the string had no valid digits
+		// last check ensures there is only a maximum of one + or - sign
+		if (val == 0 && (Global.strspn(digitsOnly, "0123456789", 0, 1) == 0 || digitsOnly.length + 1 < x.length))
+			return null;
+		return val;
 	}
 	}
 
 
 	public static function parseFloat(x:String):Float {
 	public static function parseFloat(x:String):Float {

+ 60 - 43
std/python/_std/Std.hx

@@ -130,57 +130,74 @@ import python.Syntax;
 		}
 		}
 	}
 	}
 
 
+	static inline function isSpaceChar(char:String):Bool
+		return Syntax.isIn(char, " \n\r\t\x0b\x0c");
+
+	static inline function isHexPrefix(cur:String, next:String):Bool
+		return cur == '0' && (next == 'x' || next == 'X');
+
+	static inline function isDecimalDigit(char:String):Bool
+		return Syntax.isIn(char, "0123456789");
+
+	static inline function isHexadecimalDigit(char:String):Bool
+		return Syntax.isIn(char, "0123456789abcdefABCDEF");
+
 	public static function parseInt(x:String):Null<Int> {
 	public static function parseInt(x:String):Null<Int> {
 		if (x == null)
 		if (x == null)
 			return null;
 			return null;
-		try {
-			return UBuiltins.int(x);
-		} catch (e:Dynamic) {
-			var base = 10;
-			var len = x.length;
-			var foundCount = 0;
-			var sign = 0;
-			var firstDigitIndex = 0;
-			var lastDigitIndex = -1;
-			var previous = 0;
-
-			for(i in 0...len) {
-				var c = StringTools.fastCodeAt(x, i);
-				switch c {
-					case _ if((c > 8 && c < 14) || c == 32):
-						if(foundCount > 0) {
-							return null;
-						}
-						continue;
-					case '-'.code if(foundCount == 0):
-						sign = -1;
-					case '+'.code if(foundCount == 0):
-						sign = 1;
-					case '0'.code if(foundCount == 0 || (foundCount == 1 && sign != 0)):
-					case 'x'.code | 'X'.code if(previous == '0'.code && ((foundCount == 1 && sign == 0) || (foundCount == 2 && sign != 0))):
-						base = 16;
-					case _ if('0'.code <= c && c <= '9'.code):
-					case _ if(base == 16 && (('a'.code <= c && c <= 'z'.code) || ('A'.code <= c && c <= 'Z'.code))):
-					case _:
+
+		final len = x.length;
+		var index = 0;
+
+		inline function hasIndex(index:Int)
+			return index < len;
+
+		// skip whitespace
+		while (hasIndex(index)) {
+			if (!isSpaceChar(Syntax.arrayAccess(x, index)))
+				break;
+			++index;
+		}
+
+		// handle sign
+		final isNegative = hasIndex(index) && {
+			final sign = Syntax.arrayAccess(x, index);
+			if (sign == '-' || sign == '+') {
+				++index;
+			}
+			sign == '-';
+		}
+
+		// handle base
+		final isHexadecimal = hasIndex(index + 1) && isHexPrefix(Syntax.arrayAccess(x, index), Syntax.arrayAccess(x, index + 1));
+		if (isHexadecimal)
+			index += 2; // skip prefix
+
+		// handle digits
+		final firstInvalidIndex = {
+			var cur = index;
+			if (isHexadecimal) {
+				while (hasIndex(cur)) {
+					if (!isHexadecimalDigit(Syntax.arrayAccess(x, cur)))
 						break;
 						break;
+					++cur;
 				}
 				}
-				if((foundCount == 0 && sign == 0) || (foundCount == 1 && sign != 0)) {
-					firstDigitIndex = i;
-				}
-				foundCount++;
-				lastDigitIndex = i;
-				previous = c;
-			}
-			if(firstDigitIndex <= lastDigitIndex) {
-				var digits = x.substring(firstDigitIndex, lastDigitIndex + 1);
-				return try {
-					(sign == -1 ? -1 : 1) * UBuiltins.int(digits, base);
-				} catch(e:Dynamic) {
-					null;
+			} else {
+				while (hasIndex(cur)) {
+					if (!isDecimalDigit(Syntax.arrayAccess(x, cur)))
+						break;
+					++cur;
 				}
 				}
 			}
 			}
-			return null;
+			cur;
 		}
 		}
+
+		// no valid digits
+		if (index == firstInvalidIndex)
+			return null;
+
+		final result = python.internal.UBuiltins.int(x.substring(index, firstInvalidIndex), if (isHexadecimal) 16 else 10);
+		return if (isNegative) -result else result;
 	}
 	}
 
 
 	static function shortenPossibleNumber(x:String):String {
 	static function shortenPossibleNumber(x:String):String {

+ 9 - 0
tests/unit/src/unit/issues/Issue9198.hx

@@ -0,0 +1,9 @@
+package unit.issues;
+
+import unit.Test;
+
+class Issue9198 extends Test {
+	public function test() {
+		eq(null, Std.parseInt("axolotl"));
+	}
+}

+ 92 - 29
tests/unit/src/unitstd/Std.unit.hx

@@ -71,59 +71,122 @@ Std.int(0.7) == 0;
 Std.int(0.2) == 0;
 Std.int(0.2) == 0;
 
 
 // parseInt
 // parseInt
+
+// general
 Std.parseInt("0") == 0;
 Std.parseInt("0") == 0;
 Std.parseInt("-1") == -1;
 Std.parseInt("-1") == -1;
-Std.parseInt("   5") == 5;
+// preceeding zeroes
 Std.parseInt("0001") == 1;
 Std.parseInt("0001") == 1;
 Std.parseInt("0010") == 10;
 Std.parseInt("0010") == 10;
-Std.parseInt("100") == 100;
-Std.parseInt("-100") == -100;
+// trailing text
 Std.parseInt("100x123") == 100;
 Std.parseInt("100x123") == 100;
 Std.parseInt("12foo13") == 12;
 Std.parseInt("12foo13") == 12;
-Std.parseInt("") == null;
-Std.parseInt("abcd") == null;
-Std.parseInt("a10") == null;
-Std.parseInt(null) == null;
-Std.parseInt("0xFF") == 255;
+#if !php // https://github.com/HaxeFoundation/haxe/issues/10617
+Std.parseInt("23e2") == 23;
+#end
+Std.parseInt("0x10z") == 16;
+Std.parseInt("0x10x123") == 16;
+Std.parseInt("0xff\n") == 255;
+// hexadecimals
+Std.parseInt("0xff") == 255;
 Std.parseInt("0x123") == 291;
 Std.parseInt("0x123") == 291;
 Std.parseInt("0XFF") == 255;
 Std.parseInt("0XFF") == 255;
 Std.parseInt("0X123") == 291;
 Std.parseInt("0X123") == 291;
 Std.parseInt("0X01") == 1;
 Std.parseInt("0X01") == 1;
 Std.parseInt("0x01") == 1;
 Std.parseInt("0x01") == 1;
-#if !neko //sorry, neko
-#if !hl //see https://github.com/HaxeFoundation/hashlink/issues/330
-#if !cpp //see https://github.com/HaxeFoundation/hxcpp/issues/869
-Std.parseInt('  	-0x10') == -16;
-#end
-#end
-#end
-
-#if !neko
+// signs
+Std.parseInt("123") == 123;
 Std.parseInt("+123") == 123;
 Std.parseInt("+123") == 123;
+Std.parseInt("-123") == -123;
+Std.parseInt("0xa0") == 160;
+Std.parseInt("+0xa0") == 160;
 Std.parseInt("-0xa0") == -160;
 Std.parseInt("-0xa0") == -160;
-#end
+// whitespace: space, horizontal tab, newline, vertical tab, form feed, and carriage return
+Std.parseInt("   5") == 5;
+Std.parseInt(" \t\n\x0b\x0c\r16") == 16;
+Std.parseInt(" \t\n\x0b\x0c\r0xa") == 10;
+// whitespace and signs
+Std.parseInt('  	16') == 16;
+Std.parseInt('  	-16') == -16;
+Std.parseInt('  	+16') == 16;
+Std.parseInt('  	0x10') == 16;
+Std.parseInt('  	-0x10') == -16;
+Std.parseInt('  	+0x10') == 16;
+// binary and octal unsupported
+Std.parseInt("010") == 10;
+Std.parseInt("0b10") == 0;
+// null
+Std.parseInt(null) == null;
+// no number
+Std.parseInt("") == null;
+Std.parseInt("abcd") == null;
+Std.parseInt("a10") == null;
+// invalid use of signs
+Std.parseInt("++123") == null;
+Std.parseInt("+-123") == null;
+Std.parseInt("-+123") == null;
+Std.parseInt("--123") == null;
+Std.parseInt("+ 123") == null;
+Std.parseInt("- 123") == null;
+Std.parseInt("++0x123") == null;
+Std.parseInt("+-0x123") == null;
+Std.parseInt("-+0x123") == null;
+Std.parseInt("--0x123") == null;
+Std.parseInt("+ 0x123") == null;
+Std.parseInt("- 0x123") == null;
+// hexadecimal prefix with no number
+unspec(Std.parseInt.bind("0x"));
+unspec(Std.parseInt.bind("0x C"));
+unspec(Std.parseInt.bind("0x+A"));
 
 
 // parseFloat
 // parseFloat
+
+// general
 Std.parseFloat("0") == 0.;
 Std.parseFloat("0") == 0.;
-Std.parseFloat("   5.3") == 5.3;
+Std.parseFloat("0.0") == 0.;
+// preceeding zeroes
 Std.parseFloat("0001") == 1.;
 Std.parseFloat("0001") == 1.;
-Std.parseFloat("100.45") == 100.45;
-Std.parseFloat("-100.01") == -100.01;
+Std.parseFloat("0010") == 10.;
+// trailing text
 Std.parseFloat("100x123") == 100.;
 Std.parseFloat("100x123") == 100.;
-Math.isNaN(Std.parseFloat("")) == true;
-Math.isNaN(Std.parseFloat("abcd")) == true;
-Math.isNaN(Std.parseFloat("a10")) == true;
-Math.isNaN(Std.parseFloat(null)) == true;
+Std.parseFloat("12foo13") == 12.;
 Std.parseFloat("5.3 ") == 5.3;
 Std.parseFloat("5.3 ") == 5.3;
-Std.parseFloat("0.0") == 0.;
 Std.parseFloat("5.3 1") == 5.3;
 Std.parseFloat("5.3 1") == 5.3;
+// signs
+Std.parseFloat("123.45") == 123.45;
+Std.parseFloat("+123.45") == 123.45;
+Std.parseFloat("-123.45") == -123.45;
+// whitespace: space, horizontal tab, newline, vertical tab, form feed, and carriage return
+Std.parseFloat("   5.2") == 5.2;
+Std.parseFloat(" \t\n\x0b\x0c\r1.6") == 1.6;
+// whitespace and signs
+Std.parseFloat('  	1.6') == 1.6;
+Std.parseFloat('  	-1.6') == -1.6;
+Std.parseFloat('  	+1.6') == 1.6;
+// exponent
+Std.parseFloat("2.426670815e12") == 2.426670815e12;
+Std.parseFloat("2.426670815E12") == 2.426670815e12;
 Std.parseFloat("2.426670815e+12") == 2.426670815e+12;
 Std.parseFloat("2.426670815e+12") == 2.426670815e+12;
 Std.parseFloat("2.426670815E+12") == 2.426670815e+12;
 Std.parseFloat("2.426670815E+12") == 2.426670815e+12;
 Std.parseFloat("2.426670815e-12") == 2.426670815e-12;
 Std.parseFloat("2.426670815e-12") == 2.426670815e-12;
 Std.parseFloat("2.426670815E-12") == 2.426670815e-12;
 Std.parseFloat("2.426670815E-12") == 2.426670815e-12;
-// Std.parseInt("0x C") == 0;
-// Std.parseInt("0x+A") == 0;
-Std.parseFloat("    \t42.2") == 42.2;
+#if !interp
+Std.parseFloat("6e") == 6;
+Std.parseFloat("6E") == 6;
+#end
+// null
+Math.isNaN(Std.parseFloat(null)) == true;
+// no number
+Math.isNaN(Std.parseFloat("")) == true;
+Math.isNaN(Std.parseFloat("abcd")) == true;
+Math.isNaN(Std.parseFloat("a10")) == true;
+// invalid use of signs
+Math.isNaN(Std.parseFloat("++12.3")) == true;
+Math.isNaN(Std.parseFloat("+-12.3")) == true;
+Math.isNaN(Std.parseFloat("-+12.3")) == true;
+Math.isNaN(Std.parseFloat("--12.3")) == true;
+Math.isNaN(Std.parseFloat("+ 12.3")) == true;
+Math.isNaN(Std.parseFloat("- 12.3")) == true;
 
 
 // random
 // random
 var x = Std.random(2);
 var x = Std.random(2);