6 年之前 · b49724d594
--- a/src/macro/eval/evalHash.ml
+++ b/src/macro/eval/evalHash.ml
@@ -49,7 +49,6 @@ let key_haxe_ds_ObjectMap = hash "haxe.ds.ObjectMap"
 
				 let key_haxe_macro_Position = hash "haxe.macro.Position"
			
 
				 let key_haxe_macro_LazyType = hash "haxe.macro.LazyType"
			
 
				 let key_haxe_macro_TypeDecl = hash "haxe.macro.TypeDecl"
			
 
				-let key_haxe_Utf8 = hash "haxe.Utf8"
			
 
				 let key_haxe_macro_Ref = hash "haxe.macro.Ref"
			
 
				 let key_haxe_io_Error = hash "haxe.io.Error"
			
 
				 let key_haxe_io_Bytes = hash "haxe.io.Bytes"
			
--- a/src/macro/eval/evalStdLib.ml
+++ b/src/macro/eval/evalStdLib.ml
@@ -3114,11 +3114,6 @@ let init_constructors builtins =
 
				 			| _ -> assert false
			
 
				 		);
			
 
				 	add key_StringBuf (fun _ -> encode_instance key_StringBuf ~kind:(IBuffer (VStringBuffer.create())));
			
 
				-	add key_haxe_Utf8
			
 
				-		(fun vl -> match vl with
			
 
				-			| [size] -> encode_instance key_haxe_Utf8 ~kind:(IUtf8 (UTF8.Buf.create (default_int size 0)))
			
 
				-			| _ -> assert false
			
 
				-		);
			
 
				 	add key_haxe_ds_StringMap (fun _ -> encode_string_map_direct (StringHashtbl.create ()));
			
 
				 	add key_haxe_ds_IntMap (fun _ -> encode_int_map_direct (IntHashtbl.create ()));
			
 
				 	add key_haxe_ds_ObjectMap (fun _ -> encode_object_map_direct (Obj.magic (ValueHashtbl.create 0)));
			
@@ -3209,7 +3204,6 @@ let init_empty_constructors builtins =
 
				 	Hashtbl.add h key_Date (fun () -> encode_instance key_Date ~kind:(IDate 0.));
			
 
				 	Hashtbl.add h key_EReg (fun () -> encode_instance key_EReg ~kind:(IRegex {r = Pcre.regexp ""; r_rex_string = create_ascii "~//"; r_global = false; r_string = ""; r_groups = [||]}));
			
 
				 	Hashtbl.add h key_String (fun () -> encode_string "");
			
 
				-	Hashtbl.add h key_haxe_Utf8 (fun () -> encode_instance key_haxe_Utf8 ~kind:(IUtf8 (UTF8.Buf.create 0)));
			
 
				 	Hashtbl.add h key_haxe_ds_StringMap (fun () -> encode_instance key_haxe_ds_StringMap ~kind:(IStringMap (StringHashtbl.create ())));
			
 
				 	Hashtbl.add h key_haxe_ds_IntMap (fun () -> encode_instance key_haxe_ds_IntMap ~kind:(IIntMap (IntHashtbl.create ())));
			
 
				 	Hashtbl.add h key_haxe_ds_ObjectMap (fun () -> encode_instance key_haxe_ds_ObjectMap ~kind:(IObjectMap (Obj.magic (ValueHashtbl.create 0))));
			
--- a/std/StringTools.hx
+++ b/std/StringTools.hx
@@ -553,4 +553,14 @@ class StringTools {
 
				 	private static var _urlDecode = neko.Lib.load("std","url_decode",1);
			
 
				 	#end
			
 
				 
			
 
				+	#if utf16
			
 
				+	static inline var MIN_SURROGATE_CODE_POINT = 65536;
			
 
				+	static inline function utf16CodePointAt(s:String, index:Int):Int {
			
 
				+		var c = StringTools.fastCodeAt(s, index);
			
 
				+		if (c >= 0xD800 && c <= 0xDBFF) {
			
 
				+			c = ((c -0xD7C0) << 10) | (StringTools.fastCodeAt(s, index + 1) & 0x3FF);
			
 
				+		}
			
 
				+		return c;
			
 
				+	}
			
 
				+	#end
			
 
				 }
			
--- a/std/UnicodeString.hx
+++ b/std/UnicodeString.hx
@@ -0,0 +1,215 @@
 
				+/*
			
 
				+ * Copyright (C)2005-2019 Haxe Foundation
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a
			
 
				+ * copy of this software and associated documentation files (the "Software"),
			
 
				+ * to deal in the Software without restriction, including without limitation
			
 
				+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
			
 
				+ * and/or sell copies of the Software, and to permit persons to whom the
			
 
				+ * Software is furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ */
			
 
				+
			
 
				+import haxe.io.Bytes;
			
 
				+import haxe.io.Encoding;
			
 
				+import haxe.iterators.StringIteratorUnicode;
			
 
				+import haxe.iterators.StringKeyValueIteratorUnicode;
			
 
				+
			
 
				+/**
			
 
				+	This abstract provides consistent cross-target unicode support.
			
 
				+
			
 
				+	@see https://haxe.org/manual/std-UnicodeString.html
			
 
				+**/
			
 
				+@:forward
			
 
				+@:access(StringTools)
			
 
				+abstract UnicodeString(String) from String to String {
			
 
				+
			
 
				+	/**
			
 
				+		Tells if `b` is a correctly encoded UTF8 byte sequence.
			
 
				+	**/
			
 
				+	static public function validate(b:Bytes, encoding:Encoding) : Bool {
			
 
				+		switch(encoding) {
			
 
				+			case RawNative: throw "UnicodeString.validate: RawNative encoding is not supported";
			
 
				+			case UTF8:
			
 
				+				var data = b.getData();
			
 
				+				var pos = 0;
			
 
				+				var max = b.length;
			
 
				+				while( pos < max) {
			
 
				+					var c:Int = Bytes.fastGet(data, pos++);
			
 
				+					if(c < 0x80) {
			
 
				+					} else if(c < 0xC2) {
			
 
				+						return false;
			
 
				+					} else if(c < 0xE0) {
			
 
				+						if(pos + 1 > max) {
			
 
				+							return false;
			
 
				+						}
			
 
				+						var c2:Int = Bytes.fastGet(data, pos++);
			
 
				+						if(c2 < 0x80 || c2 > 0xBF) {
			
 
				+							return false;
			
 
				+						}
			
 
				+					} else if(c < 0xF0) {
			
 
				+						if(pos + 2 > max) {
			
 
				+							return false;
			
 
				+						}
			
 
				+						var c2:Int = Bytes.fastGet(data, pos++);
			
 
				+						if(c == 0xE0) {
			
 
				+							if(c2 < 0xA0 || c2 > 0xBF) return false;
			
 
				+						} else {
			
 
				+							if(c2 < 0x80 || c2 > 0xBF) return false;
			
 
				+						}
			
 
				+						var c3:Int = Bytes.fastGet(data, pos++);
			
 
				+						if(c3 < 0x80 || c3 > 0xBF) {
			
 
				+							return false;
			
 
				+						}
			
 
				+						c = (c << 16) | (c2 << 8) | c3;
			
 
				+						if(0xEDA080 <= c && c <= 0xEDBFBF) { //surrogate pairs
			
 
				+							return false;
			
 
				+						}
			
 
				+					} else if(c > 0xF4) {
			
 
				+						return false;
			
 
				+					} else {
			
 
				+						if(pos + 3 > max) {
			
 
				+							return false;
			
 
				+						}
			
 
				+						var c2:Int = Bytes.fastGet(data, pos++);
			
 
				+						if(c == 0xF0) {
			
 
				+							if(c2 < 0x90 || c2 > 0xBF) return false;
			
 
				+						} else if(c == 0xF4) {
			
 
				+							if(c2 < 0x80 || c2 > 0x8F) return false;
			
 
				+						} else {
			
 
				+							if(c2 < 0x80 || c2 > 0xBF) return false;
			
 
				+						}
			
 
				+						var c3:Int = Bytes.fastGet(data, pos++);
			
 
				+						if(c3 < 0x80 || c3 > 0xBF) {
			
 
				+							return false;
			
 
				+						}
			
 
				+						var c4:Int = Bytes.fastGet(data, pos++);
			
 
				+						if(c4 < 0x80 || c4 > 0xBF) {
			
 
				+							return false;
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+				return true;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+#if (target.unicode)
			
 
				+
			
 
				+	/**
			
 
				+		The number of characters in `this` String.
			
 
				+	**/
			
 
				+	public var length(get,never):Int;
			
 
				+
			
 
				+	/**
			
 
				+		Creates an instance of UnicodeString.
			
 
				+	**/
			
 
				+	public inline function new(string:String):Void {
			
 
				+		this = string;
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+		Returns the character at position `index` of `this` String.
			
 
				+
			
 
				+		If `index` is negative or exceeds `this.length`, the empty String `""`
			
 
				+		is returned.
			
 
				+	**/
			
 
				+	#if !utf16 inline #end
			
 
				+	public function charAt(index:Int):String {
			
 
				+		#if utf16
			
 
				+			if(index < 0) return '';
			
 
				+			var unicodeOffset = 0;
			
 
				+			var nativeOffset = 0;
			
 
				+			while(nativeOffset < this.length) {
			
 
				+				var c = StringTools.utf16CodePointAt(this, nativeOffset++);
			
 
				+				if(unicodeOffset == index) {
			
 
				+					return String.fromCharCode(c);
			
 
				+				}
			
 
				+				if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
			
 
				+					nativeOffset++;
			
 
				+				}
			
 
				+				unicodeOffset++;
			
 
				+			}
			
 
				+			return '';
			
 
				+		#else
			
 
				+			return this.charAt(index);
			
 
				+		#end
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+		Returns the character code at position `index` of `this` String.
			
 
				+
			
 
				+		If `index` is negative or exceeds `this.length`, `null` is returned.
			
 
				+	**/
			
 
				+	#if !utf16 inline #end
			
 
				+	public function charCodeAt(index:Int):Null<Int> {
			
 
				+		#if utf16
			
 
				+			if(index < 0) return null;
			
 
				+			var unicodeOffset = 0;
			
 
				+			var nativeOffset = 0;
			
 
				+			while(nativeOffset < this.length) {
			
 
				+				var c = StringTools.utf16CodePointAt(this, nativeOffset++);
			
 
				+				if(unicodeOffset == index) {
			
 
				+					return c;
			
 
				+				}
			
 
				+				if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
			
 
				+					nativeOffset++;
			
 
				+				}
			
 
				+				unicodeOffset++;
			
 
				+			}
			
 
				+			return null;
			
 
				+		#else
			
 
				+			return this.charCodeAt(index);
			
 
				+		#end
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+		Returns an iterator of the unicode code points.
			
 
				+	**/
			
 
				+	public inline function iterator():StringIteratorUnicode {
			
 
				+		return new StringIteratorUnicode(this);
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+		Returns an iterator of the code point indices and unicode code points.
			
 
				+	**/
			
 
				+	public inline function keyValueIterator():StringKeyValueIteratorUnicode {
			
 
				+		return new StringKeyValueIteratorUnicode(this);
			
 
				+	}
			
 
				+
			
 
				+	#if !utf16 inline #end
			
 
				+	function get_length():Int {
			
 
				+		#if utf16
			
 
				+			var l = 0;
			
 
				+			for(c in new StringIteratorUnicode(this)) {
			
 
				+				l++;
			
 
				+			}
			
 
				+			return l;
			
 
				+		#else
			
 
				+			return this.length;
			
 
				+		#end
			
 
				+	}
			
 
				+
			
 
				+#end
			
 
				+
			
 
				+	@:op(A < B) static function lt(a:UnicodeString, b:UnicodeString):Bool;
			
 
				+	@:op(A <= B) static function lte(a:UnicodeString, b:UnicodeString):Bool;
			
 
				+	@:op(A > B) static function gt(a:UnicodeString, b:UnicodeString):Bool;
			
 
				+	@:op(A >= B) static function gte(a:UnicodeString, b:UnicodeString):Bool;
			
 
				+	@:op(A == B) static function eq(a:UnicodeString, b:UnicodeString):Bool;
			
 
				+	@:op(A != B) static function neq(a:UnicodeString, b:UnicodeString):Bool;
			
 
				+	@:op(A + B) static function add(a:UnicodeString, b:UnicodeString):UnicodeString;
			
 
				+	@:op(A += B) static function assignAdd(a:UnicodeString, b:UnicodeString):UnicodeString;
			
 
				+
			
 
				+	@:op(A + B) @:commutative static function add(a:UnicodeString, b:String):UnicodeString;
			
 
				+	@:op(A += B) @:commutative static function assignAdd(a:UnicodeString, b:String):UnicodeString;
			
 
				+}
			
--- a/std/cpp/_std/haxe/Utf8.hx
+++ b/std/cpp/_std/haxe/Utf8.hx
@@ -1,87 +0,0 @@
 
				-/*
			
 
				- * Copyright (C)2005-2019 Haxe Foundation
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a
			
 
				- * copy of this software and associated documentation files (the "Software"),
			
 
				- * to deal in the Software without restriction, including without limitation
			
 
				- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
			
 
				- * and/or sell copies of the Software, and to permit persons to whom the
			
 
				- * Software is furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in
			
 
				- * all copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				- * DEALINGS IN THE SOFTWARE.
			
 
				- */
			
 
				-package haxe;
			
 
				-
			
 
				-using cpp.NativeString;
			
 
				-
			
 
				-@:coreApi
			
 
				-class Utf8
			
 
				-{
			
 
				-   var __s:Array<Int>;
			
 
				-
			
 
				-	public function new( ?size : Null<Int> ) : Void {
			
 
				-      __s = new Array<Int>();
			
 
				-      if (size!=null && size>0)
			
 
				-         cpp.NativeArray.reserve(__s,size);
			
 
				-	}
			
 
				-
			
 
				-	public function addChar( c : Int ) : Void {
			
 
				-      __s.push(c);
			
 
				-	}
			
 
				-
			
 
				-	public function toString() : String {
			
 
				-		return untyped __global__.__hxcpp_char_array_to_utf8_string(__s);
			
 
				-	}
			
 
				-
			
 
				-   // Incoming string is array of bytes containing possibly invalid utf8 chars
			
 
				-   // Result is the same string with the bytes expanded into utf8 sequences
			
 
				-	public static function encode( s : String ) : String {
			
 
				-		return untyped __global__.__hxcpp_char_bytes_to_utf8_string(s);
			
 
				-	}
			
 
				-
			
 
				-   // Incoming string is array of bytes representing valid utf8 chars
			
 
				-   // Result is a string containing the compressed bytes
			
 
				-	public static function decode( s : String ) : String {
			
 
				-		return untyped __global__.__hxcpp_utf8_string_to_char_bytes(s);
			
 
				-	}
			
 
				-
			
 
				-	public #if !cppia inline #end static function iter( s : String, chars : Int -> Void ) : Void {
			
 
				-      var src = s.c_str();
			
 
				-      var end = src.add( s.length );
			
 
				-
			
 
				-      while(src.lt(end))
			
 
				-         chars(src.ptr.utf8DecodeAdvance());
			
 
				-	}
			
 
				-
			
 
				-	public static function charCodeAt( s : String, index : Int ) : Int {
			
 
				-      return s.utf8CharCodeAt(index);
			
 
				-	}
			
 
				-
			
 
				-	public static function validate( s : String ) : Bool {
			
 
				-      return s.utf8IsValid();
			
 
				-	}
			
 
				-
			
 
				-	public static function length( s : String ) : Int {
			
 
				-      return s.utf8Length();
			
 
				-	}
			
 
				-
			
 
				-	public static function compare( a : String, b : String ) : Int {
			
 
				-      return a.compare(b);
			
 
				-	}
			
 
				-
			
 
				-	public static function sub( s : String, pos : Int, len : Int ) : String {
			
 
				-      return s.utf8Sub(pos,len);
			
 
				-	}
			
 
				-
			
 
				-}
			
 
				-
			
 
				-
			
--- a/std/cpp/cppia/HostClasses.hx
+++ b/std/cpp/cppia/HostClasses.hx
@@ -92,7 +92,6 @@ class HostClasses
 
				    "haxe.Unserializer",
			
 
				    "haxe.Resource",
			
 
				    "haxe.Template",
			
 
				-   "haxe.Utf8",
			
 
				    "haxe.Log",
			
 
				    "haxe.zip.Compress",
			
 
				    "haxe.zip.Uncompress",
			
@@ -124,7 +123,6 @@ class HostClasses
 
				 
			
 
				    "haxe.CallStack",
			
 
				    "haxe.Resource",
			
 
				-   "haxe.Utf8",
			
 
				    "haxe.Int64",
			
 
				    "haxe.Int32",
			
 
				    "haxe.Serializer",
			
--- a/std/eval/_std/haxe/Utf8.hx
+++ b/std/eval/_std/haxe/Utf8.hx
@@ -1,37 +0,0 @@
 
				-/*
			
 
				- * Copyright (C)2005-2019 Haxe Foundation
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a
			
 
				- * copy of this software and associated documentation files (the "Software"),
			
 
				- * to deal in the Software without restriction, including without limitation
			
 
				- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
			
 
				- * and/or sell copies of the Software, and to permit persons to whom the
			
 
				- * Software is furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in
			
 
				- * all copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				- * DEALINGS IN THE SOFTWARE.
			
 
				- */
			
 
				-package haxe;
			
 
				-
			
 
				-@:coreApi
			
 
				-extern class Utf8 {
			
 
				-	public function new( ?size : Int ):Void;
			
 
				-	public function addChar( c : Int ) : Void;
			
 
				-	public function toString() : String;
			
 
				-	public static function iter( s : String, chars : Int -> Void ):Void;
			
 
				-	public static function encode( s : String ) : String;
			
 
				-	public static function decode( s : String ) : String;
			
 
				-	public static function charCodeAt( s : String, index : Int ) : Int;
			
 
				-	public static function validate( s : String ) : Bool;
			
 
				-	public static function length( s : String ) : Int;
			
 
				-	public static function compare( a : String, b : String ) : Int;
			
 
				-	public static function sub( s : String, pos : Int, len : Int ) : String;
			
 
				-}
			
--- a/std/haxe/Utf8.hx
+++ b/std/haxe/Utf8.hx
@@ -1,112 +0,0 @@
 
				-/*
			
 
				- * Copyright (C)2005-2019 Haxe Foundation
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a
			
 
				- * copy of this software and associated documentation files (the "Software"),
			
 
				- * to deal in the Software without restriction, including without limitation
			
 
				- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
			
 
				- * and/or sell copies of the Software, and to permit persons to whom the
			
 
				- * Software is furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in
			
 
				- * all copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				- * DEALINGS IN THE SOFTWARE.
			
 
				- */
			
 
				-package haxe;
			
 
				-
			
 
				-/**
			
 
				-	Since not all platforms guarantee that `String` always uses UTF-8 encoding, you
			
 
				-	can use this cross-platform API to perform operations on such strings.
			
 
				-**/
			
 
				-class Utf8 {
			
 
				-
			
 
				-	var __b : String;
			
 
				-
			
 
				-	/**
			
 
				-		Allocate a new Utf8 buffer using an optional bytes size.
			
 
				-	**/
			
 
				-	public function new( ?size : Int ) {
			
 
				-		__b = "";
			
 
				-	}
			
 
				-
			
 
				-	/**
			
 
				-		Add the given UTF8 character code to the buffer.
			
 
				-	**/
			
 
				-	public inline function addChar( c : Int ) : Void {
			
 
				-		__b += String.fromCharCode(c);
			
 
				-	}
			
 
				-
			
 
				-	/**
			
 
				-		Returns the buffer converted to a String.
			
 
				-	**/
			
 
				-	public inline function toString() : String {
			
 
				-		return __b;
			
 
				-	}
			
 
				-
			
 
				-	/**
			
 
				-		Call the `chars` function for each UTF8 char of the string.
			
 
				-	**/
			
 
				-	public static function iter( s : String, chars : Int -> Void ) {
			
 
				-		for( i in 0...s.length )
			
 
				-			chars(s.charCodeAt(i));
			
 
				-	}
			
 
				-
			
 
				-	/**
			
 
				-		Encode the input ISO string into the corresponding UTF8 one.
			
 
				-	**/
			
 
				-	public static function encode( s : String ) : String {
			
 
				-		throw "Not implemented";
			
 
				-	}
			
 
				-
			
 
				-	/**
			
 
				-		Decode an UTF8 string back to an ISO string.
			
 
				-		Throw an exception if a given UTF8 character is not supported by the decoder.
			
 
				-	**/
			
 
				-	public static function decode( s : String ) : String {
			
 
				-		throw "Not implemented";
			
 
				-	}
			
 
				-
			
 
				-	/**
			
 
				-		Similar to `String.charCodeAt` but uses the UTF8 character position.
			
 
				-	**/
			
 
				-	public static inline function charCodeAt( s : String, index : Int ) : Int {
			
 
				-		return s.charCodeAt(index);
			
 
				-	}
			
 
				-
			
 
				-	/**
			
 
				-		Tells if the String is correctly encoded as UTF8.
			
 
				-	**/
			
 
				-	public static inline function validate( s : String ) : Bool {
			
 
				-		return true;
			
 
				-	}
			
 
				-
			
 
				-	/**
			
 
				-		Returns the number of UTF8 chars of the String.
			
 
				-	**/
			
 
				-	#if js extern #end
			
 
				-	public static inline function length( s : String ) : Int {
			
 
				-		return s.length;
			
 
				-	}
			
 
				-
			
 
				-	/**
			
 
				-		Compare two UTF8 strings, character by character.
			
 
				-	**/
			
 
				-	public static function compare( a : String, b : String ) : Int {
			
 
				-		return a > b ? 1 : (a == b ? 0 : -1);
			
 
				-	}
			
 
				-
			
 
				-	/**
			
 
				-		This is similar to `String.substr` but the `pos` and `len` parts are considering UTF8 characters.
			
 
				-	**/
			
 
				-	public static inline function sub( s : String, pos : Int, len : Int ) : String {
			
 
				-		return s.substr(pos,len);
			
 
				-	}
			
 
				-
			
 
				-}
			
--- a/std/haxe/format/JsonPrinter.hx
+++ b/std/haxe/format/JsonPrinter.hx
@@ -185,8 +185,8 @@ class JsonPrinter {
 
				 	}
			
 
				 
			
 
				 	function quote( s : String ) {
			
 
				-		#if (neko || php || cpp)
			
 
				-		if( s.length != haxe.Utf8.length(s) ) {
			
 
				+		#if neko
			
 
				+		if( s.length != neko.Utf8.length(s) ) {
			
 
				 			quoteUtf8(s);
			
 
				 			return;
			
 
				 		}
			
@@ -236,10 +236,10 @@ class JsonPrinter {
 
				 		addChar('"'.code);
			
 
				 	}
			
 
				 
			
 
				-	#if (neko || php || cpp)
			
 
				+	#if neko
			
 
				 	function quoteUtf8( s : String ) {
			
 
				-		var u = new haxe.Utf8();
			
 
				-		haxe.Utf8.iter(s,function(c) {
			
 
				+		var u = new neko.Utf8();
			
 
				+		neko.Utf8.iter(s,function(c) {
			
 
				 			switch( c ) {
			
 
				 			case '\\'.code, '"'.code: u.addChar('\\'.code); u.addChar(c);
			
 
				 			case '\n'.code: u.addChar('\\'.code); u.addChar('n'.code);
			
--- a/std/haxe/iterators/StringIteratorUnicode.hx
+++ b/std/haxe/iterators/StringIteratorUnicode.hx
@@ -50,14 +50,17 @@ class StringIteratorUnicode {
 
				 	/**
			
 
				 		See `Iterator.next`
			
 
				 	**/
			
 
				+	@:access(StringTools)
			
 
				 	public inline function next() {
			
 
				-		var c = StringTools.fastCodeAt(s, offset++);
			
 
				 		#if utf16
			
 
				-		if (c >= 0xD800 && c <= 0xDBFF) {
			
 
				-			c = ((c -0xD7C0) << 10) | (StringTools.fastCodeAt(s, offset++) & 0x3FF);
			
 
				+		var c = StringTools.utf16CodePointAt(s, offset++);
			
 
				+		if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
			
 
				+			offset++;
			
 
				 		}
			
 
				-		#end
			
 
				 		return c;
			
 
				+		#else
			
 
				+		return StringTools.fastCodeAt(s, offset++);
			
 
				+		#end
			
 
				 	}
			
 
				 
			
 
				 	/**
			
--- a/std/haxe/iterators/StringKeyValueIteratorUnicode.hx
+++ b/std/haxe/iterators/StringKeyValueIteratorUnicode.hx
@@ -53,14 +53,17 @@ class StringKeyValueIteratorUnicode {
 
				 	/**
			
 
				 		See `Iterator.next`
			
 
				 	**/
			
 
				+	@:access(StringTools)
			
 
				 	public inline function next() {
			
 
				-		var c = StringTools.fastCodeAt(s, byteOffset++);
			
 
				 		#if utf16
			
 
				-		if (c >= 0xD800 && c <= 0xDBFF) {
			
 
				-			c = ((c -0xD7C0) << 10) | (StringTools.fastCodeAt(s, byteOffset++) & 0x3FF);
			
 
				+		var c = StringTools.utf16CodePointAt(s, byteOffset++);
			
 
				+		if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
			
 
				+			byteOffset++;
			
 
				 		}
			
 
				-		#end
			
 
				 		return { key: charOffset++, value: c };
			
 
				+		#else
			
 
				+		return { key: charOffset++, value: StringTools.fastCodeAt(s, byteOffset++) };
			
 
				+		#end
			
 
				 	}
			
 
				 
			
 
				 	/**
			
--- a/std/neko/_std/haxe/Utf8.hx
+++ b/std/neko/_std/haxe/Utf8.hx
@@ -19,7 +19,7 @@
 
				  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				  * DEALINGS IN THE SOFTWARE.
			
 
				  */
			
 
				-package haxe;
			
 
				+package neko;
			
 
				 
			
 
				 @:coreApi
			
 
				 class Utf8 {
			
--- a/std/php/_std/haxe/Utf8.hx
+++ b/std/php/_std/haxe/Utf8.hx
@@ -1,88 +0,0 @@
 
				-/*
			
 
				- * Copyright (C)2005-2019 Haxe Foundation
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a
			
 
				- * copy of this software and associated documentation files (the "Software"),
			
 
				- * to deal in the Software without restriction, including without limitation
			
 
				- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
			
 
				- * and/or sell copies of the Software, and to permit persons to whom the
			
 
				- * Software is furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in
			
 
				- * all copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				- * DEALINGS IN THE SOFTWARE.
			
 
				- */
			
 
				-package haxe;
			
 
				-
			
 
				-import php.Global;
			
 
				-
			
 
				-@:coreApi
			
 
				-class Utf8 {
			
 
				-
			
 
				-	var __b : String;
			
 
				-
			
 
				-	public function new( ?size : Int ) : Void {
			
 
				-		__b = '';
			
 
				-	}
			
 
				-
			
 
				-	public function addChar( c : Int ) : Void {
			
 
				-		__b += uchr(c);
			
 
				-	}
			
 
				-
			
 
				-	public function toString() : String {
			
 
				-		return __b;
			
 
				-	}
			
 
				-
			
 
				-	public static function encode( s : String ) : String {
			
 
				-		return Global.utf8_encode(s);
			
 
				-	}
			
 
				-
			
 
				-	public static function decode( s : String ) : String {
			
 
				-		return Global.utf8_decode(s);
			
 
				-	}
			
 
				-
			
 
				-	public static function iter(s : String, chars : Int -> Void ) : Void {
			
 
				-		var len = length(s);
			
 
				-		for(i in 0...len) {
			
 
				-			chars(charCodeAt(s, i));
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	public static function charCodeAt( s : String, index : Int ) : Int {
			
 
				-		return uord(sub(s, index, 1));
			
 
				-	}
			
 
				-
			
 
				-	static function uchr(i : Int) : String {
			
 
				-		return Global.mb_convert_encoding(Global.pack('N', i), 'UTF-8', 'UCS-4BE');
			
 
				-	}
			
 
				-
			
 
				-	static function uord(s : String) : Int {
			
 
				-		var c = Global.unpack('N', Global.mb_convert_encoding(s, 'UCS-4BE', 'UTF-8'));
			
 
				-		return c[1];
			
 
				-	}
			
 
				-
			
 
				-	public static function validate( s : String ) : Bool {
			
 
				-		return Global.mb_check_encoding(s, enc);
			
 
				-	}
			
 
				-
			
 
				-	public static function length( s : String ) : Int {
			
 
				-		return Global.mb_strlen(s, enc);
			
 
				-	}
			
 
				-
			
 
				-	public static function compare( a : String, b : String ) : Int {
			
 
				-		return Global.strcmp(a, b);
			
 
				-	}
			
 
				-
			
 
				-	public static function sub( s : String, pos : Int, len : Int ) : String {
			
 
				-		return Global.mb_substr(s, pos, len, enc);
			
 
				-	}
			
 
				-
			
 
				-	private static inline var enc = "UTF-8";
			
 
				-}
			
--- a/tests/unit/src/unit/TestJson.hx
+++ b/tests/unit/src/unit/TestJson.hx
@@ -48,11 +48,6 @@ class TestJson extends Test {
 
				 
			
 
				 	// TODO: test pretty-printing (also with objects with skipped function fields!)
			
 
				 	function testHaxeJson() {
			
 
				-		#if php
			
 
				-		// php's haxe.Utf8 uses mbstring
			
 
				-		if (php.Global.extension_loaded("mbstring")) {
			
 
				-		#end
			
 
				-
			
 
				 		var str = haxe.format.JsonPrinter.print( { x : -4500, y : 1.456, a : ["hello", "wor'\"\n\t\rd"], b : function() {} } );
			
 
				 		str = str.substr(1, str.length - 2); // remove {}
			
 
				 		var parts = str.split(",");
			
@@ -96,10 +91,6 @@ class TestJson extends Test {
 
				 		eq(haxe.format.JsonPrinter.print(Math.NaN), "null");
			
 
				 		eq(haxe.format.JsonPrinter.print(function() {}), "\"<fun>\"");
			
 
				 		eq(haxe.format.JsonPrinter.print({a: function() {}, b: 1}), "{\"b\":1}");
			
 
				-
			
 
				-		#if php
			
 
				-		}
			
 
				-		#end
			
 
				 	}
			
 
				 
			
 
				 	function test3690() {
			
--- a/tests/unit/src/unit/UnitBuilder.hx
+++ b/tests/unit/src/unit/UnitBuilder.hx
@@ -127,7 +127,7 @@ class UnitBuilder {
 
				 	static public function read(path:String) {
			
 
				 		var p = Context.makePosition( { min:0, max:0, file:path } );
			
 
				 		var file = sys.io.File.getContent(path);
			
 
				-		var code = Context.parseInlineString("{" + file + "}", p);
			
 
				+		var code = Context.parseInlineString("{" + file + "\n}", p);
			
 
				 		function mkBlock(e:Expr) {
			
 
				 			return switch(e.expr) {
			
 
				 				case EBlock(b): b;
			
--- a/tests/unit/src/unitstd/UnicodeString.unit.hx
+++ b/tests/unit/src/unitstd/UnicodeString.unit.hx
@@ -0,0 +1,77 @@
 
				+#if (target.unicode)
			
 
				+var s = new UnicodeString("𠜎zя");
			
 
				+var codes = [132878, 122, 1103];
			
 
				+
			
 
				+// length
			
 
				+s.length == codes.length;
			
 
				+
			
 
				+// // toUpperCase, toLowerCase
			
 
				+// var turkishLower = "ğüşıiöç";
			
 
				+// var turkishUpper = "ĞÜŞIİÖÇ";
			
 
				+// turkishUpper == turkishLower.toUpperCase();
			
 
				+// turkishLower == turkishUpper.toLowerCase();
			
 
				+
			
 
				+// charAt
			
 
				+s.charAt(0) == "𠜎";
			
 
				+s.charAt(1) == "z";
			
 
				+s.charAt(2) == "я";
			
 
				+s.charAt(3) == "";
			
 
				+s.charAt( -1) == "";
			
 
				+("":UnicodeString).charAt(0) == "";
			
 
				+("":UnicodeString).charAt(1) == "";
			
 
				+("":UnicodeString).charAt( -1) == "";
			
 
				+
			
 
				+// charCodeAt
			
 
				+s.charCodeAt(0) == codes[0];
			
 
				+s.charCodeAt(1) == codes[1];
			
 
				+s.charCodeAt(2) == codes[2];
			
 
				+s.charCodeAt(3) == null;
			
 
				+s.charCodeAt(-1) == null;
			
 
				+
			
 
				+// @:op(UnicodeString)
			
 
				+var s2 = new UnicodeString("𠜎z");
			
 
				+s != s2;
			
 
				+!(s == s2);
			
 
				+s > s2;
			
 
				+s >= s2;
			
 
				+s2 < s;
			
 
				+s2 <= s;
			
 
				+(s + s2).length == s.length + s2.length;
			
 
				+var s3 = s;
			
 
				+(s3 += s2).length == s.length + s2.length;
			
 
				+
			
 
				+// @:op(String)
			
 
				+var s2 = "abя";
			
 
				+s != s2;
			
 
				+!(s == s2);
			
 
				+s > s2;
			
 
				+s >= s2;
			
 
				+s2 < s;
			
 
				+s2 <= s;
			
 
				+(s + s2).length == s.length + (s2:UnicodeString).length;
			
 
				+var s3 = s;
			
 
				+(s3 += s2).length == s.length + (s2:UnicodeString).length;
			
 
				+
			
 
				+// iterator
			
 
				+aeq(codes, [for(c in s) c]);
			
 
				+
			
 
				+// keyValueIterator
			
 
				+var keys = [for(i in 0...codes.length) i];
			
 
				+var actualKeyCodes = [for(i => c in s) [i, c]];
			
 
				+aeq(keys, actualKeyCodes.map(a -> a[0]));
			
 
				+aeq(codes, actualKeyCodes.map(a -> a[1]));
			
 
				+
			
 
				+// validate
			
 
				+UnicodeString.validate(haxe.io.Bytes.ofHex("f0a9b8bde38182c3ab61"), UTF8) == true;
			
 
				+UnicodeString.validate(haxe.io.Bytes.ofHex("ed9fbf"), UTF8) == true;
			
 
				+UnicodeString.validate(haxe.io.Bytes.ofHex("ee8080"), UTF8) == true;
			
 
				+UnicodeString.validate(haxe.io.Bytes.ofHex("f48fbfbf"), UTF8) == true;
			
 
				+UnicodeString.validate(haxe.io.Bytes.ofHex("f0a9b8bde381c3ab61"), UTF8) == false;
			
 
				+UnicodeString.validate(haxe.io.Bytes.ofHex("c0af"), UTF8) == false; // overlong sequence
			
 
				+UnicodeString.validate(haxe.io.Bytes.ofHex("eda080"), UTF8) == false; // surrogate byte sequence
			
 
				+UnicodeString.validate(haxe.io.Bytes.ofHex("edbfbf"), UTF8) == false; // surrogate byte sequence
			
 
				+UnicodeString.validate(haxe.io.Bytes.ofHex("f4908080"), UTF8) == false; // U+110000
			
 
				+
			
 
				+#else
			
 
				+1 == 1;
			
 
				+#end
			
--- a/tests/unit/src/unitstd/haxe/Utf8.unit.hx
+++ b/tests/unit/src/unitstd/haxe/Utf8.unit.hx
@@ -1,56 +0,0 @@
 
				-#if false
			
 
				-// disabled tests with outside BMP chars (will be reenabled when we support them)
			
 
				-var str = "あ𠀀い";
			
 
				-haxe.Utf8.length(str) == 3;
			
 
				-haxe.Utf8.charCodeAt(str, 0) == 0x3042;
			
 
				-haxe.Utf8.charCodeAt(str, 1) == 0x20000;
			
 
				-haxe.Utf8.charCodeAt(str, 2) == 0x3044;
			
 
				-var buf = new haxe.Utf8();
			
 
				-buf.addChar(0x3042);
			
 
				-buf.addChar(0x20000);
			
 
				-buf.addChar(0x3044);
			
 
				-buf.toString() == str;
			
 
				-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 3), str) == 0;
			
 
				-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 2), "あ𠀀") == 0;
			
 
				-haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 2), "𠀀い") == 0;
			
 
				-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 0), "") == 0;
			
 
				-haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 0), "") == 0;
			
 
				-haxe.Utf8.compare(haxe.Utf8.sub(str, 9, 0), "") == 0;
			
 
				-#end
			
 
				-
			
 
				-
			
 
				-// same tests with BMP chars (actually UCS2 compliance only)
			
 
				-var str = "あéい";
			
 
				-haxe.Utf8.length(str) == 3;
			
 
				-haxe.Utf8.charCodeAt(str, 0) == 0x3042;
			
 
				-haxe.Utf8.charCodeAt(str, 1) == 0xE9;
			
 
				-haxe.Utf8.charCodeAt(str, 2) == 0x3044;
			
 
				-var big = new haxe.Utf8(10);
			
 
				-big.toString().length == 0;
			
 
				-var buf = new haxe.Utf8();
			
 
				-buf.addChar(0x3042);
			
 
				-buf.addChar(0xE9);
			
 
				-buf.addChar(0x3044);
			
 
				-buf.toString() == str;
			
 
				-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 3), str) == 0;
			
 
				-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 2), "あé") == 0;
			
 
				-haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 2), "éい") == 0;
			
 
				-haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 0), "") == 0;
			
 
				-haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 0), "") == 0;
			
 
				-
			
 
				-// unspecify outside of range Utf8.sub
			
 
				-// haxe.Utf8.compare(haxe.Utf8.sub(str, 9, 0), "") == 0;
			
 
				-
			
 
				-// #if (neko || php || cpp || lua || macro)
			
 
				-// TODO neko, cpp, macro
			
 
				-#if php
			
 
				-haxe.Utf8.validate(haxe.io.Bytes.ofHex("f0a9b8bde38182c3ab61").toString()) == true;
			
 
				-haxe.Utf8.validate(haxe.io.Bytes.ofHex("ed9fbf").toString()) == true;
			
 
				-haxe.Utf8.validate(haxe.io.Bytes.ofHex("ee8080").toString()) == true;
			
 
				-haxe.Utf8.validate(haxe.io.Bytes.ofHex("f48fbfbf").toString()) == true;
			
 
				-haxe.Utf8.validate(haxe.io.Bytes.ofHex("f0a9b8bde381c3ab61").toString()) == false;
			
 
				-haxe.Utf8.validate(haxe.io.Bytes.ofHex("c0af").toString()) == false; // redundant sequence
			
 
				-haxe.Utf8.validate(haxe.io.Bytes.ofHex("eda080").toString()) == false; // surrogate byte sequence
			
 
				-haxe.Utf8.validate(haxe.io.Bytes.ofHex("edbfbf").toString()) == false; // surrogate byte sequence
			
 
				-haxe.Utf8.validate(haxe.io.Bytes.ofHex("f4908080").toString()) == false; // U+110000
			
 
				-#end