6 years ago · b49724d594
--- a/src/macro/eval/evalHash.ml
+++ b/src/macro/eval/evalHash.ml
@@ -49,7 +49,6 @@ let key_haxe_ds_ObjectMap = hash "haxe.ds.ObjectMap"
 
															 let key_haxe_macro_Position = hash "haxe.macro.Position"
														
 
															 let key_haxe_macro_LazyType = hash "haxe.macro.LazyType"
														
 
															 let key_haxe_macro_TypeDecl = hash "haxe.macro.TypeDecl"
														
 
															-let key_haxe_Utf8 = hash "haxe.Utf8"
														
 
															 let key_haxe_macro_Ref = hash "haxe.macro.Ref"
														
 
															 let key_haxe_io_Error = hash "haxe.io.Error"
														
 
															 let key_haxe_io_Bytes = hash "haxe.io.Bytes"
														
--- a/src/macro/eval/evalStdLib.ml
+++ b/src/macro/eval/evalStdLib.ml
@@ -3114,11 +3114,6 @@ let init_constructors builtins =
 
															 			| _ -> assert false
														
 
															 		);
														
 
															 	add key_StringBuf (fun _ -> encode_instance key_StringBuf ~kind:(IBuffer (VStringBuffer.create())));
														
 
															-	add key_haxe_Utf8
														
 
															-		(fun vl -> match vl with
														
 
															-			| [size] -> encode_instance key_haxe_Utf8 ~kind:(IUtf8 (UTF8.Buf.create (default_int size 0)))
														
 
															-			| _ -> assert false
														
 
															-		);
														
 
															 	add key_haxe_ds_StringMap (fun _ -> encode_string_map_direct (StringHashtbl.create ()));
														
 
															 	add key_haxe_ds_IntMap (fun _ -> encode_int_map_direct (IntHashtbl.create ()));
														
 
															 	add key_haxe_ds_ObjectMap (fun _ -> encode_object_map_direct (Obj.magic (ValueHashtbl.create 0)));
														
@@ -3209,7 +3204,6 @@ let init_empty_constructors builtins =
 
															 	Hashtbl.add h key_Date (fun () -> encode_instance key_Date ~kind:(IDate 0.));
														
 
															 	Hashtbl.add h key_EReg (fun () -> encode_instance key_EReg ~kind:(IRegex {r = Pcre.regexp ""; r_rex_string = create_ascii "~//"; r_global = false; r_string = ""; r_groups = [||]}));
														
 
															 	Hashtbl.add h key_String (fun () -> encode_string "");
														
 
															-	Hashtbl.add h key_haxe_Utf8 (fun () -> encode_instance key_haxe_Utf8 ~kind:(IUtf8 (UTF8.Buf.create 0)));
														
 
															 	Hashtbl.add h key_haxe_ds_StringMap (fun () -> encode_instance key_haxe_ds_StringMap ~kind:(IStringMap (StringHashtbl.create ())));
														
 
															 	Hashtbl.add h key_haxe_ds_IntMap (fun () -> encode_instance key_haxe_ds_IntMap ~kind:(IIntMap (IntHashtbl.create ())));
														
 
															 	Hashtbl.add h key_haxe_ds_ObjectMap (fun () -> encode_instance key_haxe_ds_ObjectMap ~kind:(IObjectMap (Obj.magic (ValueHashtbl.create 0))));
														
--- a/std/StringTools.hx
+++ b/std/StringTools.hx
@@ -553,4 +553,14 @@ class StringTools {
 
															 	private static var _urlDecode = neko.Lib.load("std","url_decode",1);
														
 
															 	#end
														
 
															+	#if utf16
														
 
															+	static inline var MIN_SURROGATE_CODE_POINT = 65536;
														
 
															+	static inline function utf16CodePointAt(s:String, index:Int):Int {
														
 
															+		var c = StringTools.fastCodeAt(s, index);
														
 
															+		if (c >= 0xD800 && c <= 0xDBFF) {
														
 
															+			c = ((c -0xD7C0) << 10) | (StringTools.fastCodeAt(s, index + 1) & 0x3FF);
														
 
															+		}
														
 
															+		return c;
														
 
															+	}
														
 
															+	#end
														
 
															 }
														
--- a/std/UnicodeString.hx
+++ b/std/UnicodeString.hx
@@ -0,0 +1,215 @@
 
															+/*
														
 
															+ * Copyright (C)2005-2019 Haxe Foundation
														
 
															+ *
														
 
															+ * Permission is hereby granted, free of charge, to any person obtaining a
														
 
															+ * copy of this software and associated documentation files (the "Software"),
														
 
															+ * to deal in the Software without restriction, including without limitation
														
 
															+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
														
 
															+ * and/or sell copies of the Software, and to permit persons to whom the
														
 
															+ * Software is furnished to do so, subject to the following conditions:
														
 
															+ *
														
 
															+ * The above copyright notice and this permission notice shall be included in
														
 
															+ * all copies or substantial portions of the Software.
														
 
															+ *
														
 
															+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
														
 
															+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
														
 
															+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
														
 
															+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
														
 
															+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
														
 
															+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
														
 
															+ * DEALINGS IN THE SOFTWARE.
														
 
															+ */
														
 
															+
														
 
															+import haxe.io.Bytes;
														
 
															+import haxe.io.Encoding;
														
 
															+import haxe.iterators.StringIteratorUnicode;
														
 
															+import haxe.iterators.StringKeyValueIteratorUnicode;
														
 
															+
														
 
															+/**
														
 
															+	This abstract provides consistent cross-target unicode support.
														
 
															+
														
 
															+	@see https://haxe.org/manual/std-UnicodeString.html
														
 
															+**/
														
 
															+@:forward
														
 
															+@:access(StringTools)
														
 
															+abstract UnicodeString(String) from String to String {
														
 
															+
														
 
															+	/**
														
 
															+		Tells if `b` is a correctly encoded UTF8 byte sequence.
														
 
															+	**/
														
 
															+	static public function validate(b:Bytes, encoding:Encoding) : Bool {
														
 
															+		switch(encoding) {
														
 
															+			case RawNative: throw "UnicodeString.validate: RawNative encoding is not supported";
														
 
															+			case UTF8:
														
 
															+				var data = b.getData();
														
 
															+				var pos = 0;
														
 
															+				var max = b.length;
														
 
															+				while( pos < max) {
														
 
															+					var c:Int = Bytes.fastGet(data, pos++);
														
 
															+					if(c < 0x80) {
														
 
															+					} else if(c < 0xC2) {
														
 
															+						return false;
														
 
															+					} else if(c < 0xE0) {
														
 
															+						if(pos + 1 > max) {
														
 
															+							return false;
														
 
															+						}
														
 
															+						var c2:Int = Bytes.fastGet(data, pos++);
														
 
															+						if(c2 < 0x80 || c2 > 0xBF) {
														
 
															+							return false;
														
 
															+						}
														
 
															+					} else if(c < 0xF0) {
														
 
															+						if(pos + 2 > max) {
														
 
															+							return false;
														
 
															+						}
														
 
															+						var c2:Int = Bytes.fastGet(data, pos++);
														
 
															+						if(c == 0xE0) {
														
 
															+							if(c2 < 0xA0 || c2 > 0xBF) return false;
														
 
															+						} else {
														
 
															+							if(c2 < 0x80 || c2 > 0xBF) return false;
														
 
															+						}
														
 
															+						var c3:Int = Bytes.fastGet(data, pos++);
														
 
															+						if(c3 < 0x80 || c3 > 0xBF) {
														
 
															+							return false;
														
 
															+						}
														
 
															+						c = (c << 16) | (c2 << 8) | c3;
														
 
															+						if(0xEDA080 <= c && c <= 0xEDBFBF) { //surrogate pairs
														
 
															+							return false;
														
 
															+						}
														
 
															+					} else if(c > 0xF4) {
														
 
															+						return false;
														
 
															+					} else {
														
 
															+						if(pos + 3 > max) {
														
 
															+							return false;
														
 
															+						}
														
 
															+						var c2:Int = Bytes.fastGet(data, pos++);
														
 
															+						if(c == 0xF0) {
														
 
															+							if(c2 < 0x90 || c2 > 0xBF) return false;
														
 
															+						} else if(c == 0xF4) {
														
 
															+							if(c2 < 0x80 || c2 > 0x8F) return false;
														
 
															+						} else {
														
 
															+							if(c2 < 0x80 || c2 > 0xBF) return false;
														
 
															+						}
														
 
															+						var c3:Int = Bytes.fastGet(data, pos++);
														
 
															+						if(c3 < 0x80 || c3 > 0xBF) {
														
 
															+							return false;
														
 
															+						}
														
 
															+						var c4:Int = Bytes.fastGet(data, pos++);
														
 
															+						if(c4 < 0x80 || c4 > 0xBF) {
														
 
															+							return false;
														
 
															+						}
														
 
															+					}
														
 
															+				}
														
 
															+				return true;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+#if (target.unicode)
														
 
															+
														
 
															+	/**
														
 
															+		The number of characters in `this` String.
														
 
															+	**/
														
 
															+	public var length(get,never):Int;
														
 
															+
														
 
															+	/**
														
 
															+		Creates an instance of UnicodeString.
														
 
															+	**/
														
 
															+	public inline function new(string:String):Void {
														
 
															+		this = string;
														
 
															+	}
														
 
															+
														
 
															+	/**
														
 
															+		Returns the character at position `index` of `this` String.
														
 
															+
														
 
															+		If `index` is negative or exceeds `this.length`, the empty String `""`
														
 
															+		is returned.
														
 
															+	**/
														
 
															+	#if !utf16 inline #end
														
 
															+	public function charAt(index:Int):String {
														
 
															+		#if utf16
														
 
															+			if(index < 0) return '';
														
 
															+			var unicodeOffset = 0;
														
 
															+			var nativeOffset = 0;
														
 
															+			while(nativeOffset < this.length) {
														
 
															+				var c = StringTools.utf16CodePointAt(this, nativeOffset++);
														
 
															+				if(unicodeOffset == index) {
														
 
															+					return String.fromCharCode(c);
														
 
															+				}
														
 
															+				if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
														
 
															+					nativeOffset++;
														
 
															+				}
														
 
															+				unicodeOffset++;
														
 
															+			}
														
 
															+			return '';
														
 
															+		#else
														
 
															+			return this.charAt(index);
														
 
															+		#end
														
 
															+	}
														
 
															+
														
 
															+	/**
														
 
															+		Returns the character code at position `index` of `this` String.
														
 
															+
														
 
															+		If `index` is negative or exceeds `this.length`, `null` is returned.
														
 
															+	**/
														
 
															+	#if !utf16 inline #end
														
 
															+	public function charCodeAt(index:Int):Null<Int> {
														
 
															+		#if utf16
														
 
															+			if(index < 0) return null;
														
 
															+			var unicodeOffset = 0;
														
 
															+			var nativeOffset = 0;
														
 
															+			while(nativeOffset < this.length) {
														
 
															+				var c = StringTools.utf16CodePointAt(this, nativeOffset++);
														
 
															+				if(unicodeOffset == index) {
														
 
															+					return c;
														
 
															+				}
														
 
															+				if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
														
 
															+					nativeOffset++;
														
 
															+				}
														
 
															+				unicodeOffset++;
														
 
															+			}
														
 
															+			return null;
														
 
															+		#else
														
 
															+			return this.charCodeAt(index);
														
 
															+		#end
														
 
															+	}
														
 
															+
														
 
															+	/**
														
 
															+		Returns an iterator of the unicode code points.
														
 
															+	**/
														
 
															+	public inline function iterator():StringIteratorUnicode {
														
 
															+		return new StringIteratorUnicode(this);
														
 
															+	}
														
 
															+
														
 
															+	/**
														
 
															+		Returns an iterator of the code point indices and unicode code points.
														
 
															+	**/
														
 
															+	public inline function keyValueIterator():StringKeyValueIteratorUnicode {
														
 
															+		return new StringKeyValueIteratorUnicode(this);
														
 
															+	}
														
 
															+
														
 
															+	#if !utf16 inline #end
														
 
															+	function get_length():Int {
														
 
															+		#if utf16
														
 
															+			var l = 0;
														
 
															+			for(c in new StringIteratorUnicode(this)) {
														
 
															+				l++;
														
 
															+			}
														
 
															+			return l;
														
 
															+		#else
														
 
															+			return this.length;
														
 
															+		#end
														
 
															+	}
														
 
															+
														
 
															+#end
														
 
															+
														
 
															+	@:op(A < B) static function lt(a:UnicodeString, b:UnicodeString):Bool;
														
 
															+	@:op(A <= B) static function lte(a:UnicodeString, b:UnicodeString):Bool;
														
 
															+	@:op(A > B) static function gt(a:UnicodeString, b:UnicodeString):Bool;
														
 
															+	@:op(A >= B) static function gte(a:UnicodeString, b:UnicodeString):Bool;
														
 
															+	@:op(A == B) static function eq(a:UnicodeString, b:UnicodeString):Bool;
														
 
															+	@:op(A != B) static function neq(a:UnicodeString, b:UnicodeString):Bool;
														
 
															+	@:op(A + B) static function add(a:UnicodeString, b:UnicodeString):UnicodeString;
														
 
															+	@:op(A += B) static function assignAdd(a:UnicodeString, b:UnicodeString):UnicodeString;
														
 
															+
														
 
															+	@:op(A + B) @:commutative static function add(a:UnicodeString, b:String):UnicodeString;
														
 
															+	@:op(A += B) @:commutative static function assignAdd(a:UnicodeString, b:String):UnicodeString;
														
 
															+}
														
--- a/std/cpp/_std/haxe/Utf8.hx
+++ b/std/cpp/_std/haxe/Utf8.hx
@@ -1,87 +0,0 @@
 
															-/*
														
 
															- * Copyright (C)2005-2019 Haxe Foundation
														
 
															- *
														
 
															- * Permission is hereby granted, free of charge, to any person obtaining a
														
 
															- * copy of this software and associated documentation files (the "Software"),
														
 
															- * to deal in the Software without restriction, including without limitation
														
 
															- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
														
 
															- * and/or sell copies of the Software, and to permit persons to whom the
														
 
															- * Software is furnished to do so, subject to the following conditions:
														
 
															- *
														
 
															- * The above copyright notice and this permission notice shall be included in
														
 
															- * all copies or substantial portions of the Software.
														
 
															- *
														
 
															- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
														
 
															- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
														
 
															- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
														
 
															- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
														
 
															- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
														
 
															- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
														
 
															- * DEALINGS IN THE SOFTWARE.
														
 
															- */
														
 
															-package haxe;
														
 
															-
														
 
															-using cpp.NativeString;
														
 
															-
														
 
															-@:coreApi
														
 
															-class Utf8
														
 
															-{
														
 
															-   var __s:Array<Int>;
														
 
															-
														
 
															-	public function new( ?size : Null<Int> ) : Void {
														
 
															-      __s = new Array<Int>();
														
 
															-      if (size!=null && size>0)
														
 
															-         cpp.NativeArray.reserve(__s,size);
														
 
															-	}
														
 
															-
														
 
															-	public function addChar( c : Int ) : Void {
														
 
															-      __s.push(c);
														
 
															-	}
														
 
															-
														
 
															-	public function toString() : String {
														
 
															-		return untyped __global__.__hxcpp_char_array_to_utf8_string(__s);
														
 
															-	}
														
 
															-
														
 
															-   // Incoming string is array of bytes containing possibly invalid utf8 chars
														
 
															-   // Result is the same string with the bytes expanded into utf8 sequences
														
 
															-	public static function encode( s : String ) : String {
														
 
															-		return untyped __global__.__hxcpp_char_bytes_to_utf8_string(s);
														
 
															-	}
														
 
															-
														
 
															-   // Incoming string is array of bytes representing valid utf8 chars
														
 
															-   // Result is a string containing the compressed bytes
														
 
															-	public static function decode( s : String ) : String {
														
 
															-		return untyped __global__.__hxcpp_utf8_string_to_char_bytes(s);
														
 
															-	}
														
 
															-
														
 
															-	public #if !cppia inline #end static function iter( s : String, chars : Int -> Void ) : Void {
														
 
															-      var src = s.c_str();
														
 
															-      var end = src.add( s.length );
														
 
															-
														
 
															-      while(src.lt(end))
														
 
															-         chars(src.ptr.utf8DecodeAdvance());
														
 
															-	}
														
 
															-
														
 
															-	public static function charCodeAt( s : String, index : Int ) : Int {
														
 
															-      return s.utf8CharCodeAt(index);
														
 
															-	}
														
 
															-
														
 
															-	public static function validate( s : String ) : Bool {
														
 
															-      return s.utf8IsValid();
														
 
															-	}
														
 
															-
														
 
															-	public static function length( s : String ) : Int {
														
 
															-      return s.utf8Length();
														
 
															-	}
														
 
															-
														
 
															-	public static function compare( a : String, b : String ) : Int {
														
 
															-      return a.compare(b);
														
 
															-	}
														
 
															-
														
 
															-	public static function sub( s : String, pos : Int, len : Int ) : String {
														
 
															-      return s.utf8Sub(pos,len);
														
 
															-	}
														
 
															-
														
 
															-}
														
 
															-
														
 
															-
														
--- a/std/cpp/cppia/HostClasses.hx
+++ b/std/cpp/cppia/HostClasses.hx
@@ -92,7 +92,6 @@ class HostClasses
 
															    "haxe.Unserializer",
														
 
															    "haxe.Resource",
														
 
															    "haxe.Template",
														
 
															-   "haxe.Utf8",
														
 
															    "haxe.Log",
														
 
															    "haxe.zip.Compress",
														
 
															    "haxe.zip.Uncompress",
														
@@ -124,7 +123,6 @@ class HostClasses
 
															    "haxe.CallStack",
														
 
															    "haxe.Resource",
														
 
															-   "haxe.Utf8",
														
 
															    "haxe.Int64",
														
 
															    "haxe.Int32",
														
 
															    "haxe.Serializer",
														
--- a/std/eval/_std/haxe/Utf8.hx
+++ b/std/eval/_std/haxe/Utf8.hx
@@ -1,37 +0,0 @@
 
															-/*
														
 
															- * Copyright (C)2005-2019 Haxe Foundation
														
 
															- *
														
 
															- * Permission is hereby granted, free of charge, to any person obtaining a
														
 
															- * copy of this software and associated documentation files (the "Software"),
														
 
															- * to deal in the Software without restriction, including without limitation
														
 
															- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
														
 
															- * and/or sell copies of the Software, and to permit persons to whom the
														
 
															- * Software is furnished to do so, subject to the following conditions:
														
 
															- *
														
 
															- * The above copyright notice and this permission notice shall be included in
														
 
															- * all copies or substantial portions of the Software.
														
 
															- *
														
 
															- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
														
 
															- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
														
 
															- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
														
 
															- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
														
 
															- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
														
 
															- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
														
 
															- * DEALINGS IN THE SOFTWARE.
														
 
															- */
														
 
															-package haxe;
														
 
															-
														
 
															-@:coreApi
														
 
															-extern class Utf8 {
														
 
															-	public function new( ?size : Int ):Void;
														
 
															-	public function addChar( c : Int ) : Void;
														
 
															-	public function toString() : String;
														
 
															-	public static function iter( s : String, chars : Int -> Void ):Void;
														
 
															-	public static function encode( s : String ) : String;
														
 
															-	public static function decode( s : String ) : String;
														
 
															-	public static function charCodeAt( s : String, index : Int ) : Int;
														
 
															-	public static function validate( s : String ) : Bool;
														
 
															-	public static function length( s : String ) : Int;
														
 
															-	public static function compare( a : String, b : String ) : Int;
														
 
															-	public static function sub( s : String, pos : Int, len : Int ) : String;
														
 
															-}
														
--- a/std/haxe/Utf8.hx
+++ b/std/haxe/Utf8.hx
@@ -1,112 +0,0 @@
 
															-/*
														
 
															- * Copyright (C)2005-2019 Haxe Foundation
														
 
															- *
														
 
															- * Permission is hereby granted, free of charge, to any person obtaining a
														
 
															- * copy of this software and associated documentation files (the "Software"),
														
 
															- * to deal in the Software without restriction, including without limitation
														
 
															- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
														
 
															- * and/or sell copies of the Software, and to permit persons to whom the
														
 
															- * Software is furnished to do so, subject to the following conditions:
														
 
															- *
														
 
															- * The above copyright notice and this permission notice shall be included in
														
 
															- * all copies or substantial portions of the Software.
														
 
															- *
														
 
															- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
														
 
															- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
														
 
															- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
														
 
															- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
														
 
															- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
														
 
															- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
														
 
															- * DEALINGS IN THE SOFTWARE.
														
 
															- */
														
 
															-package haxe;
														
 
															-
														
 
															-/**
														
 
															-	Since not all platforms guarantee that `String` always uses UTF-8 encoding, you
														
 
															-	can use this cross-platform API to perform operations on such strings.
														
 
															-**/
														
 
															-class Utf8 {
														
 
															-
														
 
															-	var __b : String;
														
 
															-
														
 
															-	/**
														
 
															-		Allocate a new Utf8 buffer using an optional bytes size.
														
 
															-	**/
														
 
															-	public function new( ?size : Int ) {
														
 
															-		__b = "";
														
 
															-	}
														
 
															-
														
 
															-	/**
														
 
															-		Add the given UTF8 character code to the buffer.
														
 
															-	**/
														
 
															-	public inline function addChar( c : Int ) : Void {
														
 
															-		__b += String.fromCharCode(c);
														
 
															-	}
														
 
															-
														
 
															-	/**
														
 
															-		Returns the buffer converted to a String.
														
 
															-	**/
														
 
															-	public inline function toString() : String {
														
 
															-		return __b;
														
 
															-	}
														
 
															-
														
 
															-	/**
														
 
															-		Call the `chars` function for each UTF8 char of the string.
														
 
															-	**/
														
 
															-	public static function iter( s : String, chars : Int -> Void ) {
														
 
															-		for( i in 0...s.length )
														
 
															-			chars(s.charCodeAt(i));
														
 
															-	}
														
 
															-
														
 
															-	/**
														
 
															-		Encode the input ISO string into the corresponding UTF8 one.
														
 
															-	**/
														
 
															-	public static function encode( s : String ) : String {
														
 
															-		throw "Not implemented";
														
 
															-	}
														
 
															-
														
 
															-	/**
														
 
															-		Decode an UTF8 string back to an ISO string.
														
 
															-		Throw an exception if a given UTF8 character is not supported by the decoder.
														
 
															-	**/
														
 
															-	public static function decode( s : String ) : String {
														
 
															-		throw "Not implemented";
														
 
															-	}
														
 
															-
														
 
															-	/**
														
 
															-		Similar to `String.charCodeAt` but uses the UTF8 character position.
														
 
															-	**/
														
 
															-	public static inline function charCodeAt( s : String, index : Int ) : Int {
														
 
															-		return s.charCodeAt(index);
														
 
															-	}
														
 
															-
														
 
															-	/**
														
 
															-		Tells if the String is correctly encoded as UTF8.
														
 
															-	**/
														
 
															-	public static inline function validate( s : String ) : Bool {
														
 
															-		return true;
														
 
															-	}
														
 
															-
														
 
															-	/**
														
 
															-		Returns the number of UTF8 chars of the String.
														
 
															-	**/
														
 
															-	#if js extern #end
														
 
															-	public static inline function length( s : String ) : Int {
														
 
															-		return s.length;
														
 
															-	}
														
 
															-
														
 
															-	/**
														
 
															-		Compare two UTF8 strings, character by character.
														
 
															-	**/
														
 
															-	public static function compare( a : String, b : String ) : Int {
														
 
															-		return a > b ? 1 : (a == b ? 0 : -1);
														
 
															-	}
														
 
															-
														
 
															-	/**
														
 
															-		This is similar to `String.substr` but the `pos` and `len` parts are considering UTF8 characters.
														
 
															-	**/
														
 
															-	public static inline function sub( s : String, pos : Int, len : Int ) : String {
														
 
															-		return s.substr(pos,len);
														
 
															-	}
														
 
															-
														
 
															-}
														
--- a/std/haxe/format/JsonPrinter.hx
+++ b/std/haxe/format/JsonPrinter.hx
@@ -185,8 +185,8 @@ class JsonPrinter {
 
															 	}
														
 
															 	function quote( s : String ) {
														
 
															-		#if (neko || php || cpp)
														
 
															-		if( s.length != haxe.Utf8.length(s) ) {
														
 
															+		#if neko
														
 
															+		if( s.length != neko.Utf8.length(s) ) {
														
 
															 			quoteUtf8(s);
														
 
															 			return;
														
 
															 		}
														
@@ -236,10 +236,10 @@ class JsonPrinter {
 
															 		addChar('"'.code);
														
 
															 	}
														
 
															-	#if (neko || php || cpp)
														
 
															+	#if neko
														
 
															 	function quoteUtf8( s : String ) {
														
 
															-		var u = new haxe.Utf8();
														
 
															-		haxe.Utf8.iter(s,function(c) {
														
 
															+		var u = new neko.Utf8();
														
 
															+		neko.Utf8.iter(s,function(c) {
														
 
															 			switch( c ) {
														
 
															 			case '\\'.code, '"'.code: u.addChar('\\'.code); u.addChar(c);
														
 
															 			case '\n'.code: u.addChar('\\'.code); u.addChar('n'.code);
														
--- a/std/haxe/iterators/StringIteratorUnicode.hx
+++ b/std/haxe/iterators/StringIteratorUnicode.hx
@@ -50,14 +50,17 @@ class StringIteratorUnicode {
 
															 	/**
														
 
															 		See `Iterator.next`
														
 
															 	**/
														
 
															+	@:access(StringTools)
														
 
															 	public inline function next() {
														
 
															-		var c = StringTools.fastCodeAt(s, offset++);
														
 
															 		#if utf16
														
 
															-		if (c >= 0xD800 && c <= 0xDBFF) {
														
 
															-			c = ((c -0xD7C0) << 10) | (StringTools.fastCodeAt(s, offset++) & 0x3FF);
														
 
															+		var c = StringTools.utf16CodePointAt(s, offset++);
														
 
															+		if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
														
 
															+			offset++;
														
 
															 		}
														
 
															-		#end
														
 
															 		return c;
														
 
															+		#else
														
 
															+		return StringTools.fastCodeAt(s, offset++);
														
 
															+		#end
														
 
															 	}
														
 
															 	/**
														
--- a/std/haxe/iterators/StringKeyValueIteratorUnicode.hx
+++ b/std/haxe/iterators/StringKeyValueIteratorUnicode.hx
@@ -53,14 +53,17 @@ class StringKeyValueIteratorUnicode {
 
															 	/**
														
 
															 		See `Iterator.next`
														
 
															 	**/
														
 
															+	@:access(StringTools)
														
 
															 	public inline function next() {
														
 
															-		var c = StringTools.fastCodeAt(s, byteOffset++);
														
 
															 		#if utf16
														
 
															-		if (c >= 0xD800 && c <= 0xDBFF) {
														
 
															-			c = ((c -0xD7C0) << 10) | (StringTools.fastCodeAt(s, byteOffset++) & 0x3FF);
														
 
															+		var c = StringTools.utf16CodePointAt(s, byteOffset++);
														
 
															+		if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
														
 
															+			byteOffset++;
														
 
															 		}
														
 
															-		#end
														
 
															 		return { key: charOffset++, value: c };
														
 
															+		#else
														
 
															+		return { key: charOffset++, value: StringTools.fastCodeAt(s, byteOffset++) };
														
 
															+		#end
														
 
															 	}
														
 
															 	/**
														
--- a/std/neko/_std/haxe/Utf8.hx
+++ b/std/neko/_std/haxe/Utf8.hx
@@ -19,7 +19,7 @@
 
															  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
														
 
															  * DEALINGS IN THE SOFTWARE.
														
 
															  */
														
 
															-package haxe;
														
 
															+package neko;
														
 
															 @:coreApi
														
 
															 class Utf8 {
														
--- a/std/php/_std/haxe/Utf8.hx
+++ b/std/php/_std/haxe/Utf8.hx
@@ -1,88 +0,0 @@
 
															-/*
														
 
															- * Copyright (C)2005-2019 Haxe Foundation
														
 
															- *
														
 
															- * Permission is hereby granted, free of charge, to any person obtaining a
														
 
															- * copy of this software and associated documentation files (the "Software"),
														
 
															- * to deal in the Software without restriction, including without limitation
														
 
															- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
														
 
															- * and/or sell copies of the Software, and to permit persons to whom the
														
 
															- * Software is furnished to do so, subject to the following conditions:
														
 
															- *
														
 
															- * The above copyright notice and this permission notice shall be included in
														
 
															- * all copies or substantial portions of the Software.
														
 
															- *
														
 
															- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
														
 
															- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
														
 
															- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
														
 
															- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
														
 
															- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
														
 
															- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
														
 
															- * DEALINGS IN THE SOFTWARE.
														
 
															- */
														
 
															-package haxe;
														
 
															-
														
 
															-import php.Global;
														
 
															-
														
 
															-@:coreApi
														
 
															-class Utf8 {
														
 
															-
														
 
															-	var __b : String;
														
 
															-
														
 
															-	public function new( ?size : Int ) : Void {
														
 
															-		__b = '';
														
 
															-	}
														
 
															-
														
 
															-	public function addChar( c : Int ) : Void {
														
 
															-		__b += uchr(c);
														
 
															-	}
														
 
															-
														
 
															-	public function toString() : String {
														
 
															-		return __b;
														
 
															-	}
														
 
															-
														
 
															-	public static function encode( s : String ) : String {
														
 
															-		return Global.utf8_encode(s);
														
 
															-	}
														
 
															-
														
 
															-	public static function decode( s : String ) : String {
														
 
															-		return Global.utf8_decode(s);
														
 
															-	}
														
 
															-
														
 
															-	public static function iter(s : String, chars : Int -> Void ) : Void {
														
 
															-		var len = length(s);
														
 
															-		for(i in 0...len) {
														
 
															-			chars(charCodeAt(s, i));
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	public static function charCodeAt( s : String, index : Int ) : Int {
														
 
															-		return uord(sub(s, index, 1));
														
 
															-	}
														
 
															-
														
 
															-	static function uchr(i : Int) : String {
														
 
															-		return Global.mb_convert_encoding(Global.pack('N', i), 'UTF-8', 'UCS-4BE');
														
 
															-	}
														
 
															-
														
 
															-	static function uord(s : String) : Int {
														
 
															-		var c = Global.unpack('N', Global.mb_convert_encoding(s, 'UCS-4BE', 'UTF-8'));
														
 
															-		return c[1];
														
 
															-	}
														
 
															-
														
 
															-	public static function validate( s : String ) : Bool {
														
 
															-		return Global.mb_check_encoding(s, enc);
														
 
															-	}
														
 
															-
														
 
															-	public static function length( s : String ) : Int {
														
 
															-		return Global.mb_strlen(s, enc);
														
 
															-	}
														
 
															-
														
 
															-	public static function compare( a : String, b : String ) : Int {
														
 
															-		return Global.strcmp(a, b);
														
 
															-	}
														
 
															-
														
 
															-	public static function sub( s : String, pos : Int, len : Int ) : String {
														
 
															-		return Global.mb_substr(s, pos, len, enc);
														
 
															-	}
														
 
															-
														
 
															-	private static inline var enc = "UTF-8";
														
 
															-}
														
--- a/tests/unit/src/unit/TestJson.hx
+++ b/tests/unit/src/unit/TestJson.hx
@@ -48,11 +48,6 @@ class TestJson extends Test {
 
															 	// TODO: test pretty-printing (also with objects with skipped function fields!)
														
 
															 	function testHaxeJson() {
														
 
															-		#if php
														
 
															-		// php's haxe.Utf8 uses mbstring
														
 
															-		if (php.Global.extension_loaded("mbstring")) {
														
 
															-		#end
														
 
															-
														
 
															 		var str = haxe.format.JsonPrinter.print( { x : -4500, y : 1.456, a : ["hello", "wor'\"\n\t\rd"], b : function() {} } );
														
 
															 		str = str.substr(1, str.length - 2); // remove {}
														
 
															 		var parts = str.split(",");
														
@@ -96,10 +91,6 @@ class TestJson extends Test {
 
															 		eq(haxe.format.JsonPrinter.print(Math.NaN), "null");
														
 
															 		eq(haxe.format.JsonPrinter.print(function() {}), "\"<fun>\"");
														
 
															 		eq(haxe.format.JsonPrinter.print({a: function() {}, b: 1}), "{\"b\":1}");
														
 
															-
														
 
															-		#if php
														
 
															-		}
														
 
															-		#end
														
 
															 	}
														
 
															 	function test3690() {
														
--- a/tests/unit/src/unit/UnitBuilder.hx
+++ b/tests/unit/src/unit/UnitBuilder.hx
@@ -127,7 +127,7 @@ class UnitBuilder {
 
															 	static public function read(path:String) {
														
 
															 		var p = Context.makePosition( { min:0, max:0, file:path } );
														
 
															 		var file = sys.io.File.getContent(path);
														
 
															-		var code = Context.parseInlineString("{" + file + "}", p);
														
 
															+		var code = Context.parseInlineString("{" + file + "\n}", p);
														
 
															 		function mkBlock(e:Expr) {
														
 
															 			return switch(e.expr) {
														
 
															 				case EBlock(b): b;
														
--- a/tests/unit/src/unitstd/UnicodeString.unit.hx
+++ b/tests/unit/src/unitstd/UnicodeString.unit.hx
@@ -0,0 +1,77 @@
 
															+#if (target.unicode)
														
 
															+var s = new UnicodeString("𠜎zя");
														
 
															+var codes = [132878, 122, 1103];
														
 
															+
														
 
															+// length
														
 
															+s.length == codes.length;
														
 
															+
														
 
															+// // toUpperCase, toLowerCase
														
 
															+// var turkishLower = "ğüşıiöç";
														
 
															+// var turkishUpper = "ĞÜŞIİÖÇ";
														
 
															+// turkishUpper == turkishLower.toUpperCase();
														
 
															+// turkishLower == turkishUpper.toLowerCase();
														
 
															+
														
 
															+// charAt
														
 
															+s.charAt(0) == "𠜎";
														
 
															+s.charAt(1) == "z";
														
 
															+s.charAt(2) == "я";
														
 
															+s.charAt(3) == "";
														
 
															+s.charAt( -1) == "";
														
 
															+("":UnicodeString).charAt(0) == "";
														
 
															+("":UnicodeString).charAt(1) == "";
														
 
															+("":UnicodeString).charAt( -1) == "";
														
 
															+
														
 
															+// charCodeAt
														
 
															+s.charCodeAt(0) == codes[0];
														
 
															+s.charCodeAt(1) == codes[1];
														
 
															+s.charCodeAt(2) == codes[2];
														
 
															+s.charCodeAt(3) == null;
														
 
															+s.charCodeAt(-1) == null;
														
 
															+
														
 
															+// @:op(UnicodeString)
														
 
															+var s2 = new UnicodeString("𠜎z");
														
 
															+s != s2;
														
 
															+!(s == s2);
														
 
															+s > s2;
														
 
															+s >= s2;
														
 
															+s2 < s;
														
 
															+s2 <= s;
														
 
															+(s + s2).length == s.length + s2.length;
														
 
															+var s3 = s;
														
 
															+(s3 += s2).length == s.length + s2.length;
														
 
															+
														
 
															+// @:op(String)
														
 
															+var s2 = "abя";
														
 
															+s != s2;
														
 
															+!(s == s2);
														
 
															+s > s2;
														
 
															+s >= s2;
														
 
															+s2 < s;
														
 
															+s2 <= s;
														
 
															+(s + s2).length == s.length + (s2:UnicodeString).length;
														
 
															+var s3 = s;
														
 
															+(s3 += s2).length == s.length + (s2:UnicodeString).length;
														
 
															+
														
 
															+// iterator
														
 
															+aeq(codes, [for(c in s) c]);
														
 
															+
														
 
															+// keyValueIterator
														
 
															+var keys = [for(i in 0...codes.length) i];
														
 
															+var actualKeyCodes = [for(i => c in s) [i, c]];
														
 
															+aeq(keys, actualKeyCodes.map(a -> a[0]));
														
 
															+aeq(codes, actualKeyCodes.map(a -> a[1]));
														
 
															+
														
 
															+// validate
														
 
															+UnicodeString.validate(haxe.io.Bytes.ofHex("f0a9b8bde38182c3ab61"), UTF8) == true;
														
 
															+UnicodeString.validate(haxe.io.Bytes.ofHex("ed9fbf"), UTF8) == true;
														
 
															+UnicodeString.validate(haxe.io.Bytes.ofHex("ee8080"), UTF8) == true;
														
 
															+UnicodeString.validate(haxe.io.Bytes.ofHex("f48fbfbf"), UTF8) == true;
														
 
															+UnicodeString.validate(haxe.io.Bytes.ofHex("f0a9b8bde381c3ab61"), UTF8) == false;
														
 
															+UnicodeString.validate(haxe.io.Bytes.ofHex("c0af"), UTF8) == false; // overlong sequence
														
 
															+UnicodeString.validate(haxe.io.Bytes.ofHex("eda080"), UTF8) == false; // surrogate byte sequence
														
 
															+UnicodeString.validate(haxe.io.Bytes.ofHex("edbfbf"), UTF8) == false; // surrogate byte sequence
														
 
															+UnicodeString.validate(haxe.io.Bytes.ofHex("f4908080"), UTF8) == false; // U+110000
														
 
															+
														
 
															+#else
														
 
															+1 == 1;
														
 
															+#end
														
--- a/tests/unit/src/unitstd/haxe/Utf8.unit.hx
+++ b/tests/unit/src/unitstd/haxe/Utf8.unit.hx
@@ -1,56 +0,0 @@
 
															-#if false
														
 
															-// disabled tests with outside BMP chars (will be reenabled when we support them)
														
 
															-var str = "あ𠀀い";
														
 
															-haxe.Utf8.length(str) == 3;
														
 
															-haxe.Utf8.charCodeAt(str, 0) == 0x3042;
														
 
															-haxe.Utf8.charCodeAt(str, 1) == 0x20000;
														
 
															-haxe.Utf8.charCodeAt(str, 2) == 0x3044;
														
 
															-var buf = new haxe.Utf8();
														
 
															-buf.addChar(0x3042);
														
 
															-buf.addChar(0x20000);
														
 
															-buf.addChar(0x3044);
														
 
															-buf.toString() == str;
														
 
															-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 3), str) == 0;
														
 
															-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 2), "あ𠀀") == 0;
														
 
															-haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 2), "𠀀い") == 0;
														
 
															-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 0), "") == 0;
														
 
															-haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 0), "") == 0;
														
 
															-haxe.Utf8.compare(haxe.Utf8.sub(str, 9, 0), "") == 0;
														
 
															-#end
														
 
															-
														
 
															-
														
 
															-// same tests with BMP chars (actually UCS2 compliance only)
														
 
															-var str = "あéい";
														
 
															-haxe.Utf8.length(str) == 3;
														
 
															-haxe.Utf8.charCodeAt(str, 0) == 0x3042;
														
 
															-haxe.Utf8.charCodeAt(str, 1) == 0xE9;
														
 
															-haxe.Utf8.charCodeAt(str, 2) == 0x3044;
														
 
															-var big = new haxe.Utf8(10);
														
 
															-big.toString().length == 0;
														
 
															-var buf = new haxe.Utf8();
														
 
															-buf.addChar(0x3042);
														
 
															-buf.addChar(0xE9);
														
 
															-buf.addChar(0x3044);
														
 
															-buf.toString() == str;
														
 
															-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 3), str) == 0;
														
 
															-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 2), "あé") == 0;
														
 
															-haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 2), "éい") == 0;
														
 
															-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 0), "") == 0;
														
 
															-haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 0), "") == 0;
														
 
															-
														
 
															-// unspecify outside of range Utf8.sub
														
 
															-// haxe.Utf8.compare(haxe.Utf8.sub(str, 9, 0), "") == 0;
														
 
															-
														
 
															-// #if (neko || php || cpp || lua || macro)
														
 
															-// TODO neko, cpp, macro
														
 
															-#if php
														
 
															-haxe.Utf8.validate(haxe.io.Bytes.ofHex("f0a9b8bde38182c3ab61").toString()) == true;
														
 
															-haxe.Utf8.validate(haxe.io.Bytes.ofHex("ed9fbf").toString()) == true;
														
 
															-haxe.Utf8.validate(haxe.io.Bytes.ofHex("ee8080").toString()) == true;
														
 
															-haxe.Utf8.validate(haxe.io.Bytes.ofHex("f48fbfbf").toString()) == true;
														
 
															-haxe.Utf8.validate(haxe.io.Bytes.ofHex("f0a9b8bde381c3ab61").toString()) == false;
														
 
															-haxe.Utf8.validate(haxe.io.Bytes.ofHex("c0af").toString()) == false; // redundant sequence
														
 
															-haxe.Utf8.validate(haxe.io.Bytes.ofHex("eda080").toString()) == false; // surrogate byte sequence
														
 
															-haxe.Utf8.validate(haxe.io.Bytes.ofHex("edbfbf").toString()) == false; // surrogate byte sequence
														
 
															-haxe.Utf8.validate(haxe.io.Bytes.ofHex("f4908080").toString()) == false; // U+110000
														
 
															-#end