Browse Source

Unicode tests (#7009)

* started unicode unit tests

* more unicode tests and api changes

* turn off neko (will not be made unicode compatible)

* flash unicode support

* added polyfill for missing IE String.fromCodePoint

* make ucs2 with utf16 encoding the default for tests compliance

* [typer] allow the String module to have multiple module types

I think taking the closure of e.g. String's `fromCharCode` on JS causes a `_String_Impl` class to be created.

* [eval] disable unicode tests

* [java/cs] Update Bytes code to support the RawNative encoding

Also fix `String.fromCharCode` when the code point is a surrogate pair

* [unicode] move some code from HL to common

* [hxcpp] Some work to prepare for utf16 strings

* [hxcpp] Remove compiler warnings.

* [hxcpp] Allow hxcpp strings to be non-utf8

* [hxcpp] Use the same hash for utf8 and wide-char representations.  Remove assumptions about hxcpp string format from crypto.Sha

* [hxcpp] Do not assume utf8 on hxcpp.  Add optimization for Std.string in the case of passed String

* Use native utf8 encoding for hxcpp. Disable unicode tests for cpp without 'hxcpp_smart_strings' define

* Add hxcpp_smart_strings define to compile

* [cpp] remove unused variables

* Add some unicode indexOf tests

* [php] load the file with polyfills

* [php] Converted String to multibyte

* [php] fixed StringTools.fastCodeAt() for utf8

* [php] bytes io seems fixed

* [php] fixed xml.Parser for unicode

* [php] fixed haxe.JsonParser for unicode

* [php] php strings are binary-safe

* [php] fixed sha224 & sha256 tests

* [hxcpp] Export spcial cpp types as fsUnkown to cppia.  Change definition of cpp.Star to allow null setting.  Add cpp.Native class for some easier access to cpp.Star pointers.  Bump hxcpp_api_version to 400.

* [lua] refactor lib installation methods

* [lua] lib version adjustment

* update luarocks lib name

* [lua] Add Utf8 extern, use it for base String class

* [lua] use native string tools inside Bytes

* [lua] use fast byte decoding for utf8

* [lua] remove slice allocation inside of Bytes

* [lua] fix utf8 in xml

* [lua] fix stack overflow for byte decoding

* [lua] adjust offsets for byte encoding

* [lua] get rid of defunct Utf8 implementation

* [lua] remove hardcoded stack size limit

* [lua] set utf8 handling for related std string methods and tests

* [lua] Std.string checks for a userdata metatable and will use that if present

* [php] fixed JsonParser and xml Parser

* [lua] remove special utf8 handling logic from json/xml parser

* [lua] skip sha tests for now

* add more tests because I hate myself

* fix

* [eval] it passes!

* [eval] cleanup

* [eval] try to get substr/substring right and add some tests

* [eval] fix and test Reflect.compare

* [as3] make fromCodePoint public

* [eval] make (last)indexOf ECMA-compliant with regards to ""

see #5271

* [lua] use safe table method for decoding bytes to strings

* [lua] reformat NativeStringTools and remove non-existent charCodeAt function

* [lua] use NativeStringTools for byte management in Process

* [python] get tests to pass
Nicolas Cannasse 7 years ago
parent
commit
03659011fc
73 changed files with 1584 additions and 788 deletions
  1. 66 0
      src/context/common.ml
  2. 1 1
      src/generators/gencs.ml
  3. 2 2
      src/generators/genjava.ml
  4. 2 1
      src/generators/genlua.ml
  5. 5 3
      src/generators/genphp7.ml
  6. 1 1
      src/generators/hl2c.ml
  7. 0 21
      src/generators/hlcode.ml
  8. 7 7
      src/generators/hlinterp.ml
  9. 4 7
      src/macro/eval/evalArray.ml
  10. 62 0
      src/macro/eval/evalBytes.ml
  11. 2 7
      src/macro/eval/evalContext.ml
  12. 5 5
      src/macro/eval/evalDebugCLI.ml
  13. 5 5
      src/macro/eval/evalDebugSocket.ml
  14. 4 4
      src/macro/eval/evalDecode.ml
  15. 5 4
      src/macro/eval/evalEmitter.ml
  16. 8 4
      src/macro/eval/evalEncode.ml
  17. 2 2
      src/macro/eval/evalExceptions.ml
  18. 2 2
      src/macro/eval/evalField.ml
  19. 1 1
      src/macro/eval/evalJit.ml
  20. 6 6
      src/macro/eval/evalMain.ml
  21. 16 7
      src/macro/eval/evalMisc.ml
  22. 53 37
      src/macro/eval/evalPrinting.ml
  23. 266 186
      src/macro/eval/evalStdLib.ml
  24. 235 0
      src/macro/eval/evalString.ml
  25. 38 5
      src/macro/eval/evalValue.ml
  26. 4 3
      src/typing/typer.ml
  27. 1 1
      std/StringTools.hx
  28. 3 2
      std/cs/internal/StringExt.hx
  29. 3 2
      std/eval/_std/haxe/io/Bytes.hx
  30. 1 1
      std/eval/_std/haxe/io/BytesBuffer.hx
  31. 9 0
      std/flash/Boot.hx
  32. 43 0
      std/flash/_std/String.hx
  33. 2 2
      std/haxe/format/JsonParser.hx
  34. 56 17
      std/haxe/io/Bytes.hx
  35. 3 3
      std/haxe/io/BytesBuffer.hx
  36. 2 2
      std/haxe/io/BytesInput.hx
  37. 5 2
      std/haxe/io/BytesOutput.hx
  38. 33 0
      std/haxe/io/Encoding.hx
  39. 2 2
      std/haxe/io/Input.hx
  40. 2 2
      std/haxe/io/Output.hx
  41. 1 1
      std/haxe/xml/Parser.hx
  42. 12 6
      std/hl/_std/haxe/io/Bytes.hx
  43. 2 2
      std/hl/_std/haxe/io/BytesBuffer.hx
  44. 1 0
      std/java/_std/String.hx
  45. 2 5
      std/java/internal/StringExt.hx
  46. 8 1
      std/js/_std/String.hx
  47. 37 21
      std/js/_std/haxe/io/Bytes.hx
  48. 11 10
      std/js/_std/haxe/io/BytesBuffer.hx
  49. 11 1
      std/lua/Boot.hx
  50. 36 41
      std/lua/NativeStringTools.hx
  51. 14 17
      std/lua/_std/String.hx
  52. 0 250
      std/lua/_std/haxe/Utf8.hx
  53. 6 5
      std/lua/_std/sys/io/Process.hx
  54. 117 0
      std/lua/lib/luautf8/Utf8.hx
  55. 35 26
      std/php/Boot.hx
  56. 42 0
      std/php/Global.hx
  57. 43 0
      std/php/_polyfills.php
  58. 6 10
      std/php/_std/String.hx
  59. 1 1
      std/php/_std/StringTools.hx
  60. 4 3
      std/php/_std/haxe/io/Bytes.hx
  61. 5 5
      std/php/_std/haxe/io/BytesBuffer.hx
  62. 2 1
      std/python/_std/sys/io/FileInput.hx
  63. 2 1
      std/python/_std/sys/io/FileOutput.hx
  64. 2 1
      std/python/io/IInput.hx
  65. 2 1
      std/python/io/IOutput.hx
  66. 1 1
      tests/optimization/src/issues/Issue6015.hx
  67. 16 18
      tests/runci/targets/Lua.hx
  68. 1 0
      tests/unit/compile-cpp.hxml
  69. 1 0
      tests/unit/compile-cppia-host.hxml
  70. 1 1
      tests/unit/src/unit/TestResource.hx
  71. 193 0
      tests/unit/src/unitstd/Unicode.unit.hx
  72. 1 1
      tests/unit/src/unitstd/haxe/Utf8.unit.hx
  73. 1 1
      tests/unit/src/unitstd/haxe/crypto/Hmac.unit.hx

+ 66 - 0
src/context/common.ml

@@ -604,6 +604,72 @@ let url_encode_s s =
 	url_encode s (Buffer.add_char b);
 	url_encode s (Buffer.add_char b);
 	Buffer.contents b
 	Buffer.contents b
 
 
+(* UTF8 *)
+
+let to_utf8 str p =
+	let u8 = try
+		UTF8.validate str;
+		str;
+	with
+		UTF8.Malformed_code ->
+			(* ISO to utf8 *)
+			let b = UTF8.Buf.create 0 in
+			String.iter (fun c -> UTF8.Buf.add_char b (UChar.of_char c)) str;
+			UTF8.Buf.contents b
+	in
+	let ccount = ref 0 in
+	UTF8.iter (fun c ->
+		let c = UChar.code c in
+		if (c >= 0xD800 && c <= 0xDFFF) || c >= 0x110000 then abort "Invalid unicode char" p;
+		incr ccount;
+		if c > 0x10000 then incr ccount;
+	) u8;
+	u8, !ccount
+
+let utf16_add buf c =
+	let add c =
+		Buffer.add_char buf (char_of_int (c land 0xFF));
+		Buffer.add_char buf (char_of_int (c lsr 8));
+	in
+	if c >= 0 && c < 0x10000 then begin
+		if c >= 0xD800 && c <= 0xDFFF then failwith ("Invalid unicode char " ^ string_of_int c);
+		add c;
+	end else if c < 0x110000 then begin
+		let c = c - 0x10000 in
+		add ((c asr 10) + 0xD800);
+		add ((c land 1023) + 0xDC00);
+	end else
+		failwith ("Invalid unicode char " ^ string_of_int c)
+
+let utf8_to_utf16 str zt =
+	let b = Buffer.create (String.length str * 2) in
+	(try UTF8.iter (fun c -> utf16_add b (UChar.code c)) str with Invalid_argument _ | UChar.Out_of_range -> ()); (* if malformed *)
+	if zt then utf16_add b 0;
+	Buffer.contents b
+
+let utf16_to_utf8 str =
+	let b = Buffer.create 0 in
+	let add c = Buffer.add_char b (char_of_int (c land 0xFF)) in
+	let get i = int_of_char (String.unsafe_get str i) in
+	let rec loop i =
+		if i >= String.length str then ()
+		else begin
+			let c = get i in
+			if c < 0x80 then begin
+				add c;
+				loop (i + 2);
+			end else if c < 0x800 then begin
+				let c = c lor ((get (i + 1)) lsl 8) in
+				add c;
+				add (c lsr 8);
+				loop (i + 2);
+			end else
+				assert false;
+		end
+	in
+	loop 0;
+	Buffer.contents b
+
 let add_diagnostics_message com s p sev =
 let add_diagnostics_message com s p sev =
 	let di = com.shared.shared_display_information in
 	let di = com.shared.shared_display_information in
 	di.diagnostics_messages <- (s,p,sev) :: di.diagnostics_messages
 	di.diagnostics_messages <- (s,p,sev) :: di.diagnostics_messages

+ 1 - 1
src/generators/gencs.ml

@@ -382,7 +382,7 @@ struct
 					{ e with eexpr = TField(run ef, FDynamic "ToUpperInvariant") }
 					{ e with eexpr = TField(run ef, FDynamic "ToUpperInvariant") }
 
 
 				| TCall( { eexpr = TField(_, FStatic({ cl_path = [], "String" }, { cf_name = "fromCharCode" })) }, [cc] ) ->
 				| TCall( { eexpr = TField(_, FStatic({ cl_path = [], "String" }, { cf_name = "fromCharCode" })) }, [cc] ) ->
-					{ e with eexpr = TNew(get_cl_from_t basic.tstring, [], [mk_cast tchar (run cc); make_int gen.gcon.basic 1 cc.epos]) }
+					{ e with eexpr = TCall(mk_static_field_access_infer string_ext "fromCharCode" e.epos [], [run cc]) }
 				| TCall( { eexpr = TField(ef, FInstance({ cl_path = [], "String" }, _, { cf_name = ("charAt" as field) })) }, args )
 				| TCall( { eexpr = TField(ef, FInstance({ cl_path = [], "String" }, _, { cf_name = ("charAt" as field) })) }, args )
 				| TCall( { eexpr = TField(ef, FInstance({ cl_path = [], "String" }, _, { cf_name = ("charCodeAt" as field) })) }, args )
 				| TCall( { eexpr = TField(ef, FInstance({ cl_path = [], "String" }, _, { cf_name = ("charCodeAt" as field) })) }, args )
 				| TCall( { eexpr = TField(ef, FInstance({ cl_path = [], "String" }, _, { cf_name = ("indexOf" as field) })) }, args )
 				| TCall( { eexpr = TField(ef, FInstance({ cl_path = [], "String" }, _, { cf_name = ("indexOf" as field) })) }, args )

+ 2 - 2
src/generators/genjava.ml

@@ -1548,9 +1548,9 @@ let generate con =
 					) 0 el);
 					) 0 el);
 					write w "}"
 					write w "}"
 				| TCall( ( { eexpr = TField(_, FStatic({ cl_path = ([], "String") }, { cf_name = "fromCharCode" })) } ), [cc] ) ->
 				| TCall( ( { eexpr = TField(_, FStatic({ cl_path = ([], "String") }, { cf_name = "fromCharCode" })) } ), [cc] ) ->
-						write w "Character.toString((char) ";
+						write w "new java.lang.String( java.lang.Character.toChars((int) ";
 						expr_s w cc;
 						expr_s w cc;
-						write w ")"
+						write w ") )"
 				| TCall ({ eexpr = TIdent "__is__" }, [ expr; { eexpr = TTypeExpr(md) } ] ) ->
 				| TCall ({ eexpr = TIdent "__is__" }, [ expr; { eexpr = TTypeExpr(md) } ] ) ->
 					write w "( ";
 					write w "( ";
 					expr_s w expr;
 					expr_s w expr;

+ 2 - 1
src/generators/genlua.ml

@@ -647,8 +647,9 @@ and gen_expr ?(local=true) ctx e = begin
         gen_value ctx x;
         gen_value ctx x;
         print ctx "[1]"
         print ctx "[1]"
     | TField (e, ef) when is_string_expr e && field_name ef = "length"->
     | TField (e, ef) when is_string_expr e && field_name ef = "length"->
-        spr ctx "#";
+        spr ctx "__lua_lib_luautf8_Utf8.len(";
         gen_value ctx e;
         gen_value ctx e;
+        spr ctx ")";
     | TField (e, ef) when is_possible_string_field e (field_name ef)  ->
     | TField (e, ef) when is_possible_string_field e (field_name ef)  ->
         add_feature ctx "use._hx_wrap_if_string_field";
         add_feature ctx "use._hx_wrap_if_string_field";
         add_feature ctx "use.string";
         add_feature ctx "use.string";

+ 5 - 3
src/generators/genphp7.ml

@@ -2201,9 +2201,9 @@ class code_writer (ctx:Common.context) hx_type_path php_name =
 		method write_expr_field expr access =
 		method write_expr_field expr access =
 			match access with
 			match access with
 				| FInstance ({ cl_path = [], "String"}, _, { cf_name = "length"; cf_kind = Var _ }) ->
 				| FInstance ({ cl_path = [], "String"}, _, { cf_name = "length"; cf_kind = Var _ }) ->
-					self#write "strlen(";
+					self#write "mb_strlen(";
 					self#write_expr expr;
 					self#write_expr expr;
-					self#write ")"
+					self#write ", 'UTF-8')"
 				| FInstance (_, _, field) -> self#write_expr_for_field_access expr "->" (field_name field)
 				| FInstance (_, _, field) -> self#write_expr_for_field_access expr "->" (field_name field)
 				| FStatic (_, ({ cf_kind = Var _ } as field)) ->
 				| FStatic (_, ({ cf_kind = Var _ } as field)) ->
 					(match (reveal_expr expr).eexpr with
 					(match (reveal_expr expr).eexpr with
@@ -2254,7 +2254,9 @@ class code_writer (ctx:Common.context) hx_type_path php_name =
 					self#write "()"
 					self#write "()"
 				end
 				end
 		(**
 		(**
-			Writes field access on Dynamic expression to output buffer
+			Writes field access on Dynamic expression to output buffer.
+			Returns `true` if requested field is most likely belongs to String (and field resolution will be handled at runtime).
+			Otherwise returns `false`
 		*)
 		*)
 		method write_expr_field_if_string expr field_name =
 		method write_expr_field_if_string expr field_name =
 			(* Special case for String fields *)
 			(* Special case for String fields *)

+ 1 - 1
src/generators/hl2c.ml

@@ -1215,7 +1215,7 @@ let write_c com file (code:code) =
 				sexpr "}";
 				sexpr "}";
 			end
 			end
 		end else if String.length str >= string_data_limit then
 		end else if String.length str >= string_data_limit then
-			let s = utf8_to_utf16 str in
+			let s = Common.utf8_to_utf16 str true in
 			sline "// %s..." (String.escaped (String.sub str 0 (string_data_limit-4)));
 			sline "// %s..." (String.escaped (String.sub str 0 (string_data_limit-4)));
 			output ctx (Printf.sprintf "vbyte string$%d[] = {" i);
 			output ctx (Printf.sprintf "vbyte string$%d[] = {" i);
 			output_bytes (output ctx) s;
 			output_bytes (output ctx) s;

+ 0 - 21
src/generators/hlcode.ml

@@ -335,27 +335,6 @@ let hl_hash b =
 	in
 	in
 	loop 0
 	loop 0
 
 
-let utf16_add buf c =
-	let add c =
-		Buffer.add_char buf (char_of_int (c land 0xFF));
-		Buffer.add_char buf (char_of_int (c lsr 8));
-	in
-	if c >= 0 && c < 0x10000 then begin
-		if c >= 0xD800 && c <= 0xDFFF then failwith ("Invalid unicode char " ^ string_of_int c);
-		add c;
-	end else if c < 0x110000 then begin
-		let c = c - 0x10000 in
-		add ((c asr 10) + 0xD800);
-		add ((c land 1023) + 0xDC00);
-	end else
-		failwith ("Invalid unicode char " ^ string_of_int c)
-
-let utf8_to_utf16 str =
-	let b = Buffer.create (String.length str * 2) in
-	(try UTF8.iter (fun c -> utf16_add b (UChar.code c)) str with Invalid_argument _ | UChar.Out_of_range -> ()); (* if malformed *)
-	utf16_add b 0;
-	Buffer.contents b
-
 let rec get_index name p =
 let rec get_index name p =
 	try
 	try
 		PMap.find name p.pindex
 		PMap.find name p.pindex

+ 7 - 7
src/generators/hlinterp.ml

@@ -286,7 +286,7 @@ let fstr = function
 	| FFun f -> "function@" ^ string_of_int f.findex
 	| FFun f -> "function@" ^ string_of_int f.findex
 	| FNativeFun (s,_,_) -> "native[" ^ s ^ "]"
 	| FNativeFun (s,_,_) -> "native[" ^ s ^ "]"
 
 
-let caml_to_hl str = utf8_to_utf16 str
+let caml_to_hl str = Common.utf8_to_utf16 str true
 
 
 let hash ctx str =
 let hash ctx str =
 	let h = hl_hash str in
 	let h = hl_hash str in
@@ -311,7 +311,7 @@ let utf16_iter f s =
 	loop 0
 	loop 0
 
 
 let utf16_char buf c =
 let utf16_char buf c =
-	utf16_add buf (int_of_char c)
+	Common.utf16_add buf (int_of_char c)
 
 
 let hl_to_caml str =
 let hl_to_caml str =
 	let utf16_eof s =
 	let utf16_eof s =
@@ -1763,9 +1763,9 @@ let load_native ctx lib name t =
 						if c >= int_of_char 'a' && c <= int_of_char 'z' then c + int_of_char 'A' - int_of_char 'a'
 						if c >= int_of_char 'a' && c <= int_of_char 'z' then c + int_of_char 'A' - int_of_char 'a'
 						else c
 						else c
 					in
 					in
-					utf16_add buf c
+					Common.utf16_add buf c
 				) (String.sub s (int pos) ((int len) lsl 1));
 				) (String.sub s (int pos) ((int len) lsl 1));
-				utf16_add buf 0;
+				Common.utf16_add buf 0;
 				VBytes (Buffer.contents buf)
 				VBytes (Buffer.contents buf)
 			| _ -> assert false)
 			| _ -> assert false)
 		| "ucs2_lower" ->
 		| "ucs2_lower" ->
@@ -1777,9 +1777,9 @@ let load_native ctx lib name t =
 						if c >= int_of_char 'A' && c <= int_of_char 'Z' then c + int_of_char 'a' - int_of_char 'A'
 						if c >= int_of_char 'A' && c <= int_of_char 'Z' then c + int_of_char 'a' - int_of_char 'A'
 						else c
 						else c
 					in
 					in
-					utf16_add buf c
+					Common.utf16_add buf c
 				) (String.sub s (int pos) ((int len) lsl 1));
 				) (String.sub s (int pos) ((int len) lsl 1));
-				utf16_add buf 0;
+				Common.utf16_add buf 0;
 				VBytes (Buffer.contents buf)
 				VBytes (Buffer.contents buf)
 			| _ -> assert false)
 			| _ -> assert false)
 		| "url_encode" ->
 		| "url_encode" ->
@@ -1788,7 +1788,7 @@ let load_native ctx lib name t =
 				let s = hl_to_caml s in
 				let s = hl_to_caml s in
 				let buf = Buffer.create 0 in
 				let buf = Buffer.create 0 in
 				Common.url_encode s (utf16_char buf);
 				Common.url_encode s (utf16_char buf);
-				utf16_add buf 0;
+				Common.utf16_add buf 0;
 				let str = Buffer.contents buf in
 				let str = Buffer.contents buf in
 				set_ref r (to_int (String.length str lsr 1 - 1));
 				set_ref r (to_int (String.length str lsr 1 - 1));
 				VBytes str
 				VBytes str

+ 4 - 7
src/macro/eval/evalArray.ml

@@ -19,6 +19,7 @@
 
 
 open Globals
 open Globals
 open EvalValue
 open EvalValue
+open EvalString
 
 
 let create values = {
 let create values = {
 	avalues = values;
 	avalues = values;
@@ -26,13 +27,9 @@ let create values = {
 }
 }
 
 
 let array_join a f sep =
 let array_join a f sep =
-	let buf = Rope.Buffer.create 0 in
-	let last = Array.length a - 1 in
-	Array.iteri (fun i v ->
-		Rope.Buffer.add_rope buf (f v);
-		if i <> last then Rope.Buffer.add_rope buf sep;
-	) a;
-	Rope.Buffer.contents buf
+	let l = Array.map f a in
+	let l = Array.to_list l in
+	join sep l
 
 
 let to_list a = Array.to_list (Array.sub a.avalues 0 a.alength)
 let to_list a = Array.to_list (Array.sub a.avalues 0 a.alength)
 
 

+ 62 - 0
src/macro/eval/evalBytes.ml

@@ -0,0 +1,62 @@
+(*
+	The Haxe Compiler
+	Copyright (C) 2005-2018  Haxe Foundation
+
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License
+	as published by the Free Software Foundation; either version 2
+	of the License, or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with this program; if not, write to the Free Software
+	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *)
+
+let read_byte this i = int_of_char (Bytes.get this i)
+
+let read_ui16 this i =
+	let ch1 = read_byte this i in
+	let ch2 = read_byte this (i + 1) in
+	ch1 lor (ch2 lsl 8)
+
+let read_i32 this i =
+	let ch1 = read_byte this i in
+	let ch2 = read_byte this (i + 1) in
+	let ch3 = read_byte this (i + 2) in
+	let base = Int32.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in
+	let big = Int32.shift_left (Int32.of_int (read_byte this (i + 3))) 24 in
+	Int32.logor base big
+
+let read_i64 this i =
+	let ch1 = read_byte this i in
+	let ch2 = read_byte this (i + 1) in
+	let ch3 = read_byte this (i + 2) in
+	let ch4 = read_byte this (i + 3) in
+	let base = Int64.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in
+	let small = Int64.logor base (Int64.shift_left (Int64.of_int ch4) 24) in
+	let big = Int64.of_int32 (read_i32 this (i + 4)) in
+	Int64.logor (Int64.shift_left big 32) small
+
+let write_byte this i v =
+	Bytes.set this i (Char.unsafe_chr v)
+
+let write_ui16 this i v =
+	write_byte this i v;
+	write_byte this (i + 1) (v lsr 8)
+
+let write_i32 this i v =
+	let base = Int32.to_int v in
+	let big = Int32.to_int (Int32.shift_right_logical v 24) in
+	write_byte this i base;
+	write_byte this (i + 1) (base lsr 8);
+	write_byte this (i + 2) (base lsr 16);
+	write_byte this (i + 3) big
+
+let write_i64 this i v =
+	write_i32 this i (Int64.to_int32 v);
+	write_i32 this (i + 4) (Int64.to_int32 (Int64.shift_right_logical v 32))

+ 2 - 7
src/macro/eval/evalContext.ml

@@ -21,6 +21,7 @@ open Globals
 open Type
 open Type
 open EvalValue
 open EvalValue
 open EvalHash
 open EvalHash
+open EvalString
 
 
 type var_info = string
 type var_info = string
 
 
@@ -166,12 +167,6 @@ let rec kind_name ctx kind =
 	in
 	in
 	loop kind ctx.environment_offset
 	loop kind ctx.environment_offset
 
 
-let vstring s =
-	VString (s,lazy (Rope.to_string s))
-
-let vstring_direct (r,s) =
-	VString(r,s)
-
 let call_function f vl = f vl
 let call_function f vl = f vl
 
 
 let object_fields o =
 let object_fields o =
@@ -210,7 +205,7 @@ let throw v p =
 
 
 let exc v = throw v null_pos
 let exc v = throw v null_pos
 
 
-let exc_string str = exc (vstring (Rope.of_string str))
+let exc_string str = exc (vstring (EvalString.create_ascii str))
 
 
 let error_message = exc_string
 let error_message = exc_string
 
 

+ 5 - 5
src/macro/eval/evalDebugCLI.ml

@@ -43,13 +43,13 @@ let value_string value =
 		| VFalse -> "Bool","false"
 		| VFalse -> "Bool","false"
 		| VInt32 i -> "Int",Int32.to_string i
 		| VInt32 i -> "Int",Int32.to_string i
 		| VFloat f -> "Float",string_of_float f
 		| VFloat f -> "Float",string_of_float f
-		| VEnumValue ev -> rev_hash_s ev.epath,Rope.to_string (s_enum_value 0 ev)
+		| VEnumValue ev -> rev_hash_s ev.epath,EvalString.get (s_enum_value 0 ev)
 		| VObject o -> "Anonymous",fields_string (depth + 1) (object_fields o)
 		| VObject o -> "Anonymous",fields_string (depth + 1) (object_fields o)
-		| VString(_,s) -> "String","\"" ^ (Ast.s_escape (Lazy.force s)) ^ "\""
-		| VArray va -> "Array",Rope.to_string (s_array (depth + 1) va)
-		| VVector vv -> "Vector",Rope.to_string (s_vector (depth + 1) vv)
+		| VString s -> "String","\"" ^ (Ast.s_escape (Lazy.force s.sstring)) ^ "\""
+		| VArray va -> "Array",EvalString.get (s_array (depth + 1) va)
+		| VVector vv -> "Vector",EvalString.get (s_vector (depth + 1) vv)
 		| VInstance vi -> rev_hash_s vi.iproto.ppath,instance_fields (depth + 1) vi
 		| VInstance vi -> rev_hash_s vi.iproto.ppath,instance_fields (depth + 1) vi
-		| VPrototype proto -> "Anonymous",Rope.to_string (s_proto_kind proto)
+		| VPrototype proto -> "Anonymous",EvalString.get (s_proto_kind proto)
 		| VFunction _ | VFieldClosure _ -> "Function","fun"
 		| VFunction _ | VFieldClosure _ -> "Function","fun"
 		| VLazy f -> value_string depth (!f())
 		| VLazy f -> value_string depth (!f())
 	in
 	in

+ 5 - 5
src/macro/eval/evalDebugSocket.ml

@@ -32,10 +32,10 @@ let var_to_json name value access =
 				| vl -> name ^ "(...)"
 				| vl -> name ^ "(...)"
 			end
 			end
 		| VObject o -> "{...}"
 		| VObject o -> "{...}"
-		| VString(_,s) -> string_repr s
+		| VString s -> string_repr s.sstring
 		| VArray _ | VVector _ -> "[...]"
 		| VArray _ | VVector _ -> "[...]"
 		| VInstance vi -> (rev_hash_s vi.iproto.ppath) ^ " {...}"
 		| VInstance vi -> (rev_hash_s vi.iproto.ppath) ^ " {...}"
-		| VPrototype proto -> Rope.to_string (s_proto_kind proto)
+		| VPrototype proto -> EvalString.get (s_proto_kind proto)
 		| VFunction _ | VFieldClosure _ -> "<fun>"
 		| VFunction _ | VFieldClosure _ -> "<fun>"
 		| VLazy f -> level2_value_repr (!f())
 		| VLazy f -> level2_value_repr (!f())
 	in
 	in
@@ -65,13 +65,13 @@ let var_to_json name value access =
 			in
 			in
 			jv type_s value_s is_structured
 			jv type_s value_s is_structured
 		| VObject o -> jv "Anonymous" (fields_string (object_fields o)) true (* TODO: false for empty structures *)
 		| VObject o -> jv "Anonymous" (fields_string (object_fields o)) true (* TODO: false for empty structures *)
-		| VString(_,s) -> jv "String" (string_repr s) false
+		| VString s -> jv "String" (string_repr s.sstring) false
 		| VArray va -> jv "Array" (array_elems (EvalArray.to_list va)) true (* TODO: false for empty arrays *)
 		| VArray va -> jv "Array" (array_elems (EvalArray.to_list va)) true (* TODO: false for empty arrays *)
 		| VVector vv -> jv "Vector" (array_elems (Array.to_list vv)) true
 		| VVector vv -> jv "Vector" (array_elems (Array.to_list vv)) true
 		| VInstance vi ->
 		| VInstance vi ->
 			let class_name = rev_hash_s vi.iproto.ppath in
 			let class_name = rev_hash_s vi.iproto.ppath in
 			jv class_name (class_name ^ " " ^ (fields_string (instance_fields vi))) true
 			jv class_name (class_name ^ " " ^ (fields_string (instance_fields vi))) true
-		| VPrototype proto -> jv "Anonymous" (Rope.to_string (s_proto_kind proto)) false (* TODO: show statics *)
+		| VPrototype proto -> jv "Anonymous" (EvalString.get (s_proto_kind proto)) false (* TODO: show statics *)
 		| VFunction _ | VFieldClosure _ -> jv "Function" "<fun>" false
 		| VFunction _ | VFieldClosure _ -> jv "Function" "<fun>" false
 		| VLazy f -> value_string (!f())
 		| VLazy f -> value_string (!f())
 	in
 	in
@@ -176,7 +176,7 @@ let output_inner_vars v access =
 				let a = access ^ "." ^ n in
 				let a = access ^ "." ^ n in
 				n, v, a
 				n, v, a
 			) fields
 			) fields
-		| VString(_,s) -> []
+		| VString _ -> []
 		| VArray va ->
 		| VArray va ->
 			let l = EvalArray.to_list va in
 			let l = EvalArray.to_list va in
 			List.mapi (fun i v ->
 			List.mapi (fun i v ->

+ 4 - 4
src/macro/eval/evalDecode.ml

@@ -50,15 +50,15 @@ let decode_varray v = match v with
 	| _ -> unexpected_value v "array"
 	| _ -> unexpected_value v "array"
 
 
 let decode_string v = match v with
 let decode_string v = match v with
-	| VString(r,s) -> Lazy.force s
+	| VString s -> EvalString.get s
 	| _ -> unexpected_value v "string"
 	| _ -> unexpected_value v "string"
 
 
 let decode_rope v = match v with
 let decode_rope v = match v with
-	| VString(s,_) -> s
+	| VString s -> s.srope
 	| _ -> unexpected_value v "string"
 	| _ -> unexpected_value v "string"
 
 
-let decode_rope_string v = match v with
-	| VString(r,s) -> r,s
+let decode_vstring v = match v with
+	| VString s -> s
 	| _ -> unexpected_value v "string"
 	| _ -> unexpected_value v "string"
 
 
 let decode_bytes v = match v with
 let decode_bytes v = match v with

+ 5 - 4
src/macro/eval/evalEmitter.ml

@@ -321,7 +321,7 @@ let emit_proto_field_read proto i env =
 
 
 let emit_instance_field_read exec i env = match exec env with
 let emit_instance_field_read exec i env = match exec env with
 	| VInstance vi -> vi.ifields.(i)
 	| VInstance vi -> vi.ifields.(i)
-	| VString(_,s) -> vint (String.length (Lazy.force s))
+	| VString s -> vint (s.slength)
 	| v -> unexpected_value v "instance"
 	| v -> unexpected_value v "instance"
 
 
 let emit_field_closure exec name env =
 let emit_field_closure exec name env =
@@ -363,10 +363,11 @@ let emit_enum_parameter_read exec i env = match exec env with
 	| v1 -> unexpected_value v1 "enum value"
 	| v1 -> unexpected_value v1 "enum value"
 
 
 let emit_string_cca exec1 exec2 p env =
 let emit_string_cca exec1 exec2 p env =
-	let s = decode_string (exec1 env) in
+	let s = decode_vstring (exec1 env) in
 	let index = decode_int_p (exec2 env) p in
 	let index = decode_int_p (exec2 env) p in
-	if index >= String.length s then vnull
-	else vint (int_of_char s.[index])
+	if index < 0 || index >= s.slength then vnull
+	else if s.sascii then vint (int_of_char (String.get (Lazy.force s.sstring) index))
+	else vint (EvalString.read_char s (index lsl 1))
 
 
 (* Write *)
 (* Write *)
 
 

+ 8 - 4
src/macro/eval/evalEncode.ml

@@ -22,6 +22,7 @@ open EvalValue
 open EvalExceptions
 open EvalExceptions
 open EvalContext
 open EvalContext
 open EvalHash
 open EvalHash
+open EvalString
 
 
 (* Functions *)
 (* Functions *)
 
 
@@ -191,10 +192,13 @@ let encode_array l =
 	encode_array_instance (EvalArray.create (Array.of_list l))
 	encode_array_instance (EvalArray.create (Array.of_list l))
 
 
 let encode_string s =
 let encode_string s =
-	VString(Rope.of_string s,lazy s)
+	vstring (create_ascii s)
 
 
-let encode_rope s =
-	vstring s
+let encode_rope r =
+	vstring (create_ascii_of_rope r)
+
+let encode_rope_ucs2 r length =
+	vstring (create_ucs2_of_rope r length)
 
 
 let encode_bytes s =
 let encode_bytes s =
 	encode_instance key_haxe_io_Bytes ~kind:(IBytes s)
 	encode_instance key_haxe_io_Bytes ~kind:(IBytes s)
@@ -210,7 +214,7 @@ let encode_object_map_direct h =
 
 
 let encode_string_map convert m =
 let encode_string_map convert m =
 	let h = StringHashtbl.create 0 in
 	let h = StringHashtbl.create 0 in
-	PMap.iter (fun key value -> StringHashtbl.add h (Rope.of_string key,lazy key) (convert value)) m;
+	PMap.iter (fun key value -> StringHashtbl.add h (create_ascii key) (convert value)) m;
 	encode_string_map_direct h
 	encode_string_map_direct h
 
 
 let fake_proto path =
 let fake_proto path =

+ 2 - 2
src/macro/eval/evalExceptions.ml

@@ -127,8 +127,8 @@ let catch_exceptions ctx ?(final=(fun() -> ())) f p =
 			get_ctx_ref := prev;
 			get_ctx_ref := prev;
 			final();
 			final();
 			match v1,v2 with
 			match v1,v2 with
-				| VString(_,s),VInstance {ikind = IPos p} ->
-					raise (Error.Error (Error.Custom (Lazy.force s),p))
+				| VString s,VInstance {ikind = IPos p} ->
+					raise (Error.Error (Error.Custom (EvalString.get s),p))
 				| _ ->
 				| _ ->
 					Error.error "Something went wrong" null_pos
 					Error.error "Something went wrong" null_pos
 		end else begin
 		end else begin

+ 2 - 2
src/macro/eval/evalField.ml

@@ -55,8 +55,8 @@ let field_raise v f =
 	| VVector vv ->
 	| VVector vv ->
 		if f = key_length then vint (Array.length vv)
 		if f = key_length then vint (Array.length vv)
 		else proto_field_direct (get_ctx()).vector_prototype f
 		else proto_field_direct (get_ctx()).vector_prototype f
-	| VString (_,s) ->
-		if f = key_length then vint (String.length (Lazy.force s))
+	| VString s ->
+		if f = key_length then vint (s.slength)
 		else proto_field_direct (get_ctx()).string_prototype f
 		else proto_field_direct (get_ctx()).string_prototype f
 	| VInstance vi -> (try instance_field vi f with Not_found -> proto_field_raise vi.iproto f)
 	| VInstance vi -> (try instance_field vi f with Not_found -> proto_field_raise vi.iproto f)
 	| _ -> raise Not_found
 	| _ -> raise Not_found

+ 1 - 1
src/macro/eval/evalJit.ml

@@ -36,7 +36,7 @@ let rope_path t = match follow t with
 let eone = mk (TConst(TInt (Int32.one))) t_dynamic null_pos
 let eone = mk (TConst(TInt (Int32.one))) t_dynamic null_pos
 
 
 let eval_const = function
 let eval_const = function
-	| TString s -> vstring (Rope.of_string s)
+	| TString s -> EvalString.bytes_to_utf8 (Bytes.unsafe_of_string s)
 	| TInt i32 -> vint32 i32
 	| TInt i32 -> vint32 i32
 	| TFloat f -> vfloat (float_of_string f)
 	| TFloat f -> vfloat (float_of_string f)
 	| TBool b -> vbool b
 	| TBool b -> vbool b

+ 6 - 6
src/macro/eval/evalMain.ml

@@ -226,13 +226,13 @@ let value_signature v =
 		| VInstance {ikind = IDate f} ->
 		| VInstance {ikind = IDate f} ->
 			cache v (fun () ->
 			cache v (fun () ->
 				addc 'v';
 				addc 'v';
-				add (Rope.to_string (s_date f))
+				add (EvalString.get (s_date f))
 			)
 			)
 		| VInstance {ikind = IStringMap map} ->
 		| VInstance {ikind = IStringMap map} ->
 			cache v (fun() ->
 			cache v (fun() ->
 				addc 'b';
 				addc 'b';
-				StringHashtbl.iter (fun (_,s) value ->
-					adds (Lazy.force s);
+				StringHashtbl.iter (fun s value ->
+					adds (Lazy.force s.sstring);
 					loop value
 					loop value
 				) map;
 				) map;
 				addc 'h'
 				addc 'h'
@@ -278,8 +278,8 @@ let value_signature v =
 				loop_fields fields;
 				loop_fields fields;
 				addc 'g';
 				addc 'g';
 			)
 			)
-		| VString(_,s) ->
-			adds (Lazy.force s)
+		| VString s ->
+			adds (Lazy.force s.sstring)
 		| VArray {avalues = a} | VVector a ->
 		| VArray {avalues = a} | VVector a ->
 			cache v (fun () ->
 			cache v (fun () ->
 				addc 'a';
 				addc 'a';
@@ -399,7 +399,7 @@ let rec value_to_expr v p =
 	| VFalse -> (EConst (Ident "false"),p)
 	| VFalse -> (EConst (Ident "false"),p)
 	| VInt32 i -> (EConst (Int (Int32.to_string i)),p)
 	| VInt32 i -> (EConst (Int (Int32.to_string i)),p)
 	| VFloat f -> haxe_float f p
 	| VFloat f -> haxe_float f p
-	| VString(r,s) -> (EConst (String (Lazy.force s)),p)
+	| VString s -> (EConst (String (Lazy.force s.sstring)),p)
 	| VArray va -> (EArrayDecl (List.map (fun v -> value_to_expr v p) (EvalArray.to_list va)),p)
 	| VArray va -> (EArrayDecl (List.map (fun v -> value_to_expr v p) (EvalArray.to_list va)),p)
 	| VObject o -> (EObjectDecl (List.map (fun (k,v) ->
 	| VObject o -> (EObjectDecl (List.map (fun (k,v) ->
 			let n = rev_hash_s k in
 			let n = rev_hash_s k in

+ 16 - 7
src/macro/eval/evalMisc.ml

@@ -27,6 +27,7 @@ open EvalDecode
 open EvalExceptions
 open EvalExceptions
 open EvalPrinting
 open EvalPrinting
 open EvalHash
 open EvalHash
+open EvalString
 
 
 let throw_string s p =
 let throw_string s p =
 	throw (encode_string s) p
 	throw (encode_string s) p
@@ -96,8 +97,16 @@ let rec compare a b =
 	| VTrue,VTrue | VFalse,VFalse -> CEq
 	| VTrue,VTrue | VFalse,VFalse -> CEq
 	| VFalse,VTrue -> CInf
 	| VFalse,VTrue -> CInf
 	| VTrue,VFalse -> CSup
 	| VTrue,VFalse -> CSup
-	| VString(_,s1),VString(_,s2) ->
-		let r = String.compare (Lazy.force s1) (Lazy.force s2) in
+	| VString s1,VString s2 ->
+		let s1' = Lazy.force s1.sstring in
+		let s2' = Lazy.force s2.sstring in
+		let s1,s2 = match s1.sascii,s2.sascii with
+		| true,true
+		| false,false -> s1',s2'
+		| true,false -> extend_ascii s1',s2'
+		| false,true -> s1',extend_ascii s2'
+		in
+		let r = String.compare s1 s2 in
 		if r = 0 then CEq else if r < 0 then CInf else CSup
 		if r = 0 then CEq else if r < 0 then CInf else CSup
 	| VFunction(a,_), VFunction(b,_) -> if a == b then CEq else CUndef
 	| VFunction(a,_), VFunction(b,_) -> if a == b then CEq else CUndef
 	| VArray va1,VArray va2 -> if va1 == va2 then CEq else CUndef
 	| VArray va1,VArray va2 -> if va1 == va2 then CEq else CUndef
@@ -139,7 +148,7 @@ and equals_structurally a b =
 	| VFloat a,VFloat b -> a = b
 	| VFloat a,VFloat b -> a = b
 	| VFloat a,VInt32 b -> a = (Int32.to_float b)
 	| VFloat a,VInt32 b -> a = (Int32.to_float b)
 	| VInt32 a,VFloat b -> (Int32.to_float a) = b
 	| VInt32 a,VFloat b -> (Int32.to_float a) = b
-	| VString(_,s1),VString(_,s2) -> Lazy.force s1 = Lazy.force s2
+	| VString s1,VString s2 -> Lazy.force s1.sstring = Lazy.force s2.sstring (* STODO *)
 	| VArray a,VArray b -> a == b || arrays_equal equals_structurally a.avalues b.avalues
 	| VArray a,VArray b -> a == b || arrays_equal equals_structurally a.avalues b.avalues
 	| VVector a,VVector b -> a == b || arrays_equal equals_structurally a b
 	| VVector a,VVector b -> a == b || arrays_equal equals_structurally a b
 	| VObject a,VObject b -> a == b || arrays_equal equals_structurally a.ofields b.ofields && IntMap.equal equals_structurally a.oextra b.oextra
 	| VObject a,VObject b -> a == b || arrays_equal equals_structurally a.ofields b.ofields && IntMap.equal equals_structurally a.oextra b.oextra
@@ -157,10 +166,10 @@ let op_add v1 v2 = match v1,v2 with
 	| VInt32 i1,VInt32 i2 -> vint32 (Int32.add i1 i2)
 	| VInt32 i1,VInt32 i2 -> vint32 (Int32.add i1 i2)
 	| VFloat f1,VFloat f2 -> vfloat (f1 +. f2)
 	| VFloat f1,VFloat f2 -> vfloat (f1 +. f2)
 	| VInt32 i,VFloat f | VFloat f,VInt32 i -> vfloat ((Int32.to_float i) +. f)
 	| VInt32 i,VFloat f | VFloat f,VInt32 i -> vfloat ((Int32.to_float i) +. f)
-	| VString(s1,_),VString(s2,_) -> encode_rope (Rope.concat2 s1 s2)
-	| VString(s1,_),v2 -> encode_rope (Rope.concat2 s1 (s_value 0 v2))
-	| v1,VString(s2,_) -> encode_rope (Rope.concat2 (s_value 0 v1) s2)
-	| v1,v2 -> encode_rope (Rope.concat2 (s_value 0 v1) (s_value 0 v2))
+	| VString s1,VString s2 -> vstring (concat s1 s2)
+	| VString s1,v2 -> vstring (concat s1 (s_value 0 v2))
+	| v1,VString s2 -> vstring (concat (s_value 0 v1) s2)
+	| v1,v2 -> vstring (concat (s_value 0 v1) (s_value 0 v2))
 
 
 let op_mult p v1 v2 = match v1,v2 with
 let op_mult p v1 v2 = match v1,v2 with
 	| VInt32 i1,VInt32 i2 -> vint32 (Int32.mul i1 i2)
 	| VInt32 i1,VInt32 i2 -> vint32 (Int32.mul i1 i2)

+ 53 - 37
src/macro/eval/evalPrinting.ml

@@ -23,42 +23,56 @@ open EvalValue
 open EvalContext
 open EvalContext
 open EvalField
 open EvalField
 open EvalHash
 open EvalHash
-
 open Rope
 open Rope
-
-let rnull = of_string "null"
-let rcomma = of_char ','
-let rtrue = of_string "true"
-let rfalse = of_string "false"
-let rfun = of_string "#fun"
-let rclosure = of_string "#closure"
+open EvalString
+
+let rempty = create_ascii ""
+let rbropen = create_ascii "{"
+let rbrclose = create_ascii "}"
+let rbkopen = create_ascii "["
+let rbkclose = create_ascii "]"
+let rpopen = create_ascii "("
+let rpclose = create_ascii ")"
+let rcolon = create_ascii ":"
+let rgt = create_ascii ">"
+let rstop = create_ascii "<...>"
+let rnull = create_ascii "null"
+let rcomma = create_ascii ","
+let rtrue = create_ascii "true"
+let rfalse = create_ascii "false"
+let rfun = create_ascii "#fun"
+let rclosure = create_ascii "#closure"
 
 
 let s_date d =
 let s_date d =
 	let open Unix in
 	let open Unix in
 	let t = localtime d in
 	let t = localtime d in
-	of_string (Printf.sprintf "%.4d-%.2d-%.2d %.2d:%.2d:%.2d" (t.tm_year + 1900) (t.tm_mon + 1) t.tm_mday t.tm_hour t.tm_min t.tm_sec)
+	create_ascii (Printf.sprintf "%.4d-%.2d-%.2d %.2d:%.2d:%.2d" (t.tm_year + 1900) (t.tm_mon + 1) t.tm_mday t.tm_hour t.tm_min t.tm_sec)
+
+let s_hash key = create_ascii_of_rope (EvalHash.rev_hash key)
 
 
 let rec s_object depth o =
 let rec s_object depth o =
 	let fields = object_fields o in
 	let fields = object_fields o in
-	let fields = List.map (fun (key,value) -> (concat empty [EvalHash.rev_hash key; of_string ": "; s_value depth value])) fields in
-	concat empty [
-		of_char '{';
-		concat rcomma fields;
-		of_char '}'
-	]
+	let s,_ = List.fold_left (fun (s,sep) (key,value) ->
+		let s = concat s sep in
+		let s = concat s (s_hash key) in
+		let s = concat s rcolon in
+		let s = concat s (s_value depth value) in
+		(s,rcomma)
+	) (rempty,rbropen) fields in
+	concat s rbrclose
 
 
 and s_array depth va =
 and s_array depth va =
-	concat empty [
-		of_char '[';
+	join rempty [
+		rbkopen;
 		EvalArray.join va (s_value depth) rcomma;
 		EvalArray.join va (s_value depth) rcomma;
-		of_char ']';
+		rbkclose;
 	]
 	]
 
 
 and s_vector depth vv =
 and s_vector depth vv =
-	concat empty [
-		of_char '[';
+	join rempty [
+		rbkopen;
 		EvalArray.join (EvalArray.create vv) (s_value depth) rcomma;
 		EvalArray.join (EvalArray.create vv) (s_value depth) rcomma;
-		of_char ']';
+		rbkclose;
 	]
 	]
 
 
 and s_enum_ctor_name ve =
 and s_enum_ctor_name ve =
@@ -72,18 +86,18 @@ and s_enum_ctor_name ve =
 and s_enum_value depth ve =
 and s_enum_value depth ve =
 	let name = s_enum_ctor_name ve in
 	let name = s_enum_ctor_name ve in
 	match ve.eargs with
 	match ve.eargs with
-	| [||] -> of_string name
+	| [||] -> create_ascii name
 	| vl ->
 	| vl ->
-		concat empty [
-			of_string name;
-			of_char '(';
-			concat rcomma (Array.to_list (Array.map (s_value (depth + 1)) vl));
-			of_char ')'
+		join rempty [
+			create_ascii name;
+			rpopen;
+			join rcomma (Array.to_list (Array.map (s_value (depth + 1)) vl));
+			rpclose;
 		]
 		]
 
 
 and s_proto_kind proto = match proto.pkind with
 and s_proto_kind proto = match proto.pkind with
-	| PClass _ -> concat empty [of_string "Class<"; rev_hash proto.ppath; of_char '>']
-	| PEnum _ -> concat empty [of_string "Enum<"; rev_hash proto.ppath; of_char '>']
+	| PClass _ -> join rempty [create_ascii "Class<"; s_hash proto.ppath; rgt]
+	| PEnum _ -> join rempty [create_ascii "Enum<"; s_hash proto.ppath; rgt]
 	| PInstance | PObject -> assert false
 	| PInstance | PObject -> assert false
 
 
 and s_value depth v =
 and s_value depth v =
@@ -91,25 +105,25 @@ and s_value depth v =
 		let vf = field_raise v EvalHash.key_toString in
 		let vf = field_raise v EvalHash.key_toString in
 		s_value (depth + 1) (call_value_on v vf [])
 		s_value (depth + 1) (call_value_on v vf [])
 	in
 	in
-	if depth > 5 then of_string "<...>"
+	if depth > 5 then rstop
 	else match v with
 	else match v with
 	| VNull -> rnull
 	| VNull -> rnull
-	| VInt32 i32 -> of_string (Int32.to_string i32)
+	| VInt32 i32 -> create_ascii(Int32.to_string i32)
 	| VTrue -> rtrue
 	| VTrue -> rtrue
 	| VFalse -> rfalse
 	| VFalse -> rfalse
 	| VFloat f ->
 	| VFloat f ->
 		let s = Numeric.float_repres f in
 		let s = Numeric.float_repres f in
 		let len = String.length s in
 		let len = String.length s in
-		of_string (if String.unsafe_get s (len - 1) = '.' then String.sub s 0 (len - 1) else s)
-	| VFunction (f,_) -> concat2 rfun (Rope.of_string (""))
+		create_ascii (if String.unsafe_get s (len - 1) = '.' then String.sub s 0 (len - 1) else s)
+	| VFunction (f,_) -> rfun
 	| VFieldClosure _ -> rclosure
 	| VFieldClosure _ -> rclosure
 	| VEnumValue ve -> s_enum_value depth ve
 	| VEnumValue ve -> s_enum_value depth ve
-	| VString(s,_) -> s
+	| VString s -> s
 	| VArray va -> s_array (depth + 1) va
 	| VArray va -> s_array (depth + 1) va
 	| VVector vv -> s_vector (depth + 1) vv
 	| VVector vv -> s_vector (depth + 1) vv
 	| VInstance {ikind=IDate d} -> s_date d
 	| VInstance {ikind=IDate d} -> s_date d
-	| VInstance {ikind=IPos p} -> of_string ("#pos(" ^ Lexer.get_error_pos (Printf.sprintf "%s:%d:") p ^ ")")
-	| VInstance i -> (try call_to_string () with Not_found -> rev_hash i.iproto.ppath)
+	| VInstance {ikind=IPos p} -> create_ascii ("#pos(" ^ Lexer.get_error_pos (Printf.sprintf "%s:%d:") p ^ ")") (* STODO: not ascii? *)
+	| VInstance i -> (try call_to_string () with Not_found -> s_hash i.iproto.ppath)
 	| VObject o -> (try call_to_string () with Not_found -> s_object (depth + 1) o)
 	| VObject o -> (try call_to_string () with Not_found -> s_object (depth + 1) o)
 	| VLazy f -> s_value depth (!f())
 	| VLazy f -> s_value depth (!f())
 	| VPrototype proto ->
 	| VPrototype proto ->
@@ -126,4 +140,6 @@ and call_value_on vthis v vl =
 	| VFieldClosure(v1,f) -> call_function f (v1 :: vl)
 	| VFieldClosure(v1,f) -> call_function f (v1 :: vl)
 	| _ -> exc_string ("Cannot call " ^ (value_string v))
 	| _ -> exc_string ("Cannot call " ^ (value_string v))
 
 
-and value_string v = Rope.to_string (s_value 0 v)
+and value_string v =
+	let s = s_value 0 v in
+	EvalString.get s

+ 266 - 186
src/macro/eval/evalStdLib.ml

@@ -27,6 +27,7 @@ open EvalPrinting
 open EvalMisc
 open EvalMisc
 open EvalField
 open EvalField
 open EvalHash
 open EvalHash
+open EvalString
 
 
 let macro_lib = Hashtbl.create 0
 let macro_lib = Hashtbl.create 0
 
 
@@ -73,8 +74,8 @@ module StdEvalVector = struct
 
 
 	let join = vifun1 (fun vthis sep ->
 	let join = vifun1 (fun vthis sep ->
 		let this = this vthis in
 		let this = this vthis in
-		let sep = decode_rope sep in
-		encode_rope (EvalArray.array_join this (s_value 0) sep)
+		let sep = decode_vstring sep in
+		vstring ((EvalArray.array_join this (s_value 0) sep))
 	)
 	)
 
 
 	let map = vifun1 (fun vthis f ->
 	let map = vifun1 (fun vthis f ->
@@ -141,9 +142,9 @@ module StdArray = struct
 	)
 	)
 
 
 	let join = vifun1 (fun vthis sep ->
 	let join = vifun1 (fun vthis sep ->
-		let sep = decode_rope sep in
+		let sep = decode_vstring sep in
 		let s = EvalArray.join (this vthis) (s_value 0) sep in
 		let s = EvalArray.join (this vthis) (s_value 0) sep in
-		encode_rope s
+		vstring s
 	)
 	)
 
 
 	let lastIndexOf = vifun2 (fun vthis x fromIndex ->
 	let lastIndexOf = vifun2 (fun vthis x fromIndex ->
@@ -230,7 +231,7 @@ module StdArray = struct
 	)
 	)
 
 
 	let toString = vifun0 (fun vthis ->
 	let toString = vifun0 (fun vthis ->
-		encode_rope (s_array 0 (this vthis))
+		vstring (s_array 0 (this vthis))
 	)
 	)
 
 
 	let unshift = vifun1 (fun vthis v ->
 	let unshift = vifun1 (fun vthis v ->
@@ -252,54 +253,12 @@ let outside_bounds () =
 	exc (proto_field_direct haxe_io_Error key_OutsideBounds)
 	exc (proto_field_direct haxe_io_Error key_OutsideBounds)
 
 
 module StdBytes = struct
 module StdBytes = struct
+	open EvalBytes
+
 	let this vthis = match vthis with
 	let this vthis = match vthis with
 		| VInstance {ikind = IBytes o} -> o
 		| VInstance {ikind = IBytes o} -> o
 		| v -> unexpected_value v "bytes"
 		| v -> unexpected_value v "bytes"
 
 
-	let read_byte this i = int_of_char (Bytes.get this i)
-
-	let read_ui16 this i =
-		let ch1 = read_byte this i in
-		let ch2 = read_byte this (i + 1) in
-		ch1 lor (ch2 lsl 8)
-
-	let read_i32 this i =
-		let ch1 = read_byte this i in
-		let ch2 = read_byte this (i + 1) in
-		let ch3 = read_byte this (i + 2) in
-		let base = Int32.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in
-		let big = Int32.shift_left (Int32.of_int (read_byte this (i + 3))) 24 in
-		Int32.logor base big
-
-	let read_i64 this i =
-		let ch1 = read_byte this i in
-		let ch2 = read_byte this (i + 1) in
-		let ch3 = read_byte this (i + 2) in
-		let ch4 = read_byte this (i + 3) in
-		let base = Int64.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in
-		let small = Int64.logor base (Int64.shift_left (Int64.of_int ch4) 24) in
-		let big = Int64.of_int32 (read_i32 this (i + 4)) in
-		Int64.logor (Int64.shift_left big 32) small
-
-	let write_byte this i v =
-		Bytes.set this i (Char.unsafe_chr v)
-
-	let write_ui16 this i v =
-		write_byte this i v;
-		write_byte this (i + 1) (v lsr 8)
-
-	let write_i32 this i v =
-		let base = Int32.to_int v in
-		let big = Int32.to_int (Int32.shift_right_logical v 24) in
-		write_byte this i base;
-		write_byte this (i + 1) (base lsr 8);
-		write_byte this (i + 2) (base lsr 16);
-		write_byte this (i + 3) big
-
-	let write_i64 this i v =
-		write_i32 this i (Int64.to_int32 v);
-		write_i32 this (i + 4) (Int64.to_int32 (Int64.shift_right_logical v 32))
-
 	let alloc = vfun1 (fun length ->
 	let alloc = vfun1 (fun length ->
 		let length = decode_int length in
 		let length = decode_int length in
 		encode_bytes (Bytes.make length (Char.chr 0))
 		encode_bytes (Bytes.make length (Char.chr 0))
@@ -367,11 +326,12 @@ module StdBytes = struct
 			outside_bounds()
 			outside_bounds()
 	)
 	)
 
 
-	let getString = vifun2 (fun vthis pos len ->
+	let getString = vifun3 (fun vthis pos len encoding ->
 		let this = this vthis in
 		let this = this vthis in
 		let pos = decode_int pos in
 		let pos = decode_int pos in
 		let len = decode_int len in
 		let len = decode_int len in
-		encode_string (Bytes.unsafe_to_string ((try Bytes.sub this pos len with _ -> outside_bounds())));
+		let s = try Bytes.sub this pos len with _ -> outside_bounds() in
+		bytes_to_utf8 s
 	)
 	)
 
 
 	let getUInt16 = vifun1 (fun vthis pos ->
 	let getUInt16 = vifun1 (fun vthis pos ->
@@ -380,8 +340,14 @@ module StdBytes = struct
 
 
 	let ofData = vfun1 (fun v -> v)
 	let ofData = vfun1 (fun v -> v)
 
 
-	let ofString = vfun1 (fun v ->
-		encode_bytes (Bytes.of_string (decode_string v))
+	let ofString = vfun2 (fun v encoding ->
+		let s = decode_vstring v in
+		if s.sascii then
+			encode_bytes (Bytes.of_string (Lazy.force s.sstring))
+		else begin
+			let s = utf16_to_utf8 (Lazy.force s.sstring) in
+			encode_bytes (Bytes.of_string s)
+		end
 	)
 	)
 
 
 	let set = vifun2 (fun vthis pos v ->
 	let set = vifun2 (fun vthis pos v ->
@@ -452,7 +418,7 @@ module StdBytes = struct
 	)
 	)
 
 
 	let toString = vifun0 (fun vthis ->
 	let toString = vifun0 (fun vthis ->
-		encode_string (Bytes.to_string (this vthis))
+		bytes_to_utf8 (this vthis)
 	)
 	)
 end
 end
 
 
@@ -491,7 +457,7 @@ module StdBytesBuffer = struct
 		vnull
 		vnull
 	)
 	)
 
 
-	let addString = vifun1 (fun vthis src ->
+	let addString = vifun2 (fun vthis src encoding ->
 		let this = this vthis in
 		let this = this vthis in
 		let src = decode_string src in
 		let src = decode_string src in
 		Buffer.add_string this src;
 		Buffer.add_string this src;
@@ -746,7 +712,7 @@ module StdDate = struct
 	let getSeconds = vifun0 (fun vthis -> vint (localtime (this vthis)).tm_sec)
 	let getSeconds = vifun0 (fun vthis -> vint (localtime (this vthis)).tm_sec)
 	let getTime = vifun0 (fun vthis -> vfloat ((this vthis) *. 1000.))
 	let getTime = vifun0 (fun vthis -> vfloat ((this vthis) *. 1000.))
 	let now = vfun0 (fun () -> encode_date (time()))
 	let now = vfun0 (fun () -> encode_date (time()))
-	let toString = vifun0 (fun vthis -> encode_rope (s_date (this vthis)))
+	let toString = vifun0 (fun vthis -> vstring (s_date (this vthis)))
 end
 end
 
 
 module StdEReg = struct
 module StdEReg = struct
@@ -1118,9 +1084,9 @@ module StdFPHelper = struct
 		let low = decode_i32 low in
 		let low = decode_i32 low in
 		let high = decode_i32 high in
 		let high = decode_i32 high in
 		let b = Bytes.make 8 '0' in
 		let b = Bytes.make 8 '0' in
-		StdBytes.write_i32 b 0 low;
-		StdBytes.write_i32 b 4 high;
-		let i64 = StdBytes.read_i64 b 0 in
+		EvalBytes.write_i32 b 0 low;
+		EvalBytes.write_i32 b 4 high;
+		let i64 = EvalBytes.read_i64 b 0 in
 		vfloat (Int64.float_of_bits i64)
 		vfloat (Int64.float_of_bits i64)
 	)
 	)
 end
 end
@@ -1415,14 +1381,10 @@ module StdMap (Hashtbl : Hashtbl.S) = struct
 		);
 		);
 		"toString",vifun0 (fun vthis ->
 		"toString",vifun0 (fun vthis ->
 			let open Rope in
 			let open Rope in
-			let s = concat empty [
-				of_char '{';
-				concat rcomma
-					(Hashtbl.fold (fun key vvalue acc -> (concat empty [str key; of_string " => "; s_value 0 vvalue]) :: acc) (this vthis) [])
-				;
-				of_char '}'
-			] in
-			encode_rope s
+			let l = Hashtbl.fold (fun key vvalue acc -> (join rempty [str key; create_ascii " => "; s_value 0 vvalue]) :: acc) (this vthis) [] in
+			let s = join rcomma l in
+			let s = join rempty [rbropen;s;rbrclose] in
+			vstring s
 		);
 		);
 	]
 	]
 end
 end
@@ -1689,7 +1651,7 @@ module StdResource = struct
 	)
 	)
 
 
 	let getString = vfun1 (fun name ->
 	let getString = vfun1 (fun name ->
-		try encode_string (Hashtbl.find ((get_ctx()).curapi.MacroApi.get_com()).resources (decode_string name)) with Not_found -> vnull
+		try bytes_to_utf8 (Bytes.unsafe_of_string (Hashtbl.find ((get_ctx()).curapi.MacroApi.get_com()).resources (decode_string name))) with Not_found -> vnull
 	)
 	)
 
 
 	let getBytes = vfun1 (fun name ->
 	let getBytes = vfun1 (fun name ->
@@ -1874,7 +1836,7 @@ module StdStd = struct
 	)
 	)
 
 
 	let string = vfun1 (fun v ->
 	let string = vfun1 (fun v ->
-		encode_rope (s_value 0 v)
+		vstring (s_value 0 v)
 	)
 	)
 
 
 	let int = vfun1 (fun v ->
 	let int = vfun1 (fun v ->
@@ -1897,67 +1859,114 @@ end
 
 
 module StdString = struct
 module StdString = struct
 	let this vthis = match vthis with
 	let this vthis = match vthis with
-		| VString(r,_) -> r
-		| v -> unexpected_value v "string"
-
-	let this_pair vthis = match vthis with
-		| VString(r,s) -> r,Lazy.force s
-		| v -> unexpected_value v "string"
-
-	let this_string vthis = match vthis with
-		| VString(_,s) -> Lazy.force s
+		| VString s -> s
 		| v -> unexpected_value v "string"
 		| v -> unexpected_value v "string"
 
 
 	let charAt = vifun1 (fun vthis index ->
 	let charAt = vifun1 (fun vthis index ->
-		let this = this_string vthis in
+		let this = this vthis in
 		let i = decode_int index in
 		let i = decode_int index in
-		if i < 0 || i >= String.length this then encode_rope Rope.empty
-		else encode_rope (Rope.of_char (String.get this i))
+		if i < 0 || i >= this.slength then encode_rope Rope.empty
+		else begin
+			let s = Lazy.force this.sstring in
+			if this.sascii then encode_rope (Rope.of_char (String.get s i))
+			else begin
+				let b = Bytes.create 2 in
+				EvalBytes.write_ui16 b 0 (read_char this (i lsl 1));
+				let c = Bytes.unsafe_get b 0 in
+				let s = if (int_of_char c) < 0x80 then create_ascii (String.make 1 c)
+				else create_ucs2 (Bytes.unsafe_to_string b) 1 in
+				vstring s
+			end
+		end
 	)
 	)
 
 
 	let charCodeAt = vifun1 (fun vthis index ->
 	let charCodeAt = vifun1 (fun vthis index ->
-		let this = this_string vthis in
+		let this = this vthis in
 		let i = decode_int index in
 		let i = decode_int index in
-		if i < 0 || i >= String.length this then vnull
-		else vint (int_of_char (String.get this i))
+		if i < 0 || i >= this.slength then vnull
+		else if this.sascii then vint (int_of_char (String.get (Lazy.force this.sstring) i))
+		else vint (read_char this (i lsl 1))
 	)
 	)
 
 
 	let fromCharCode = vfun1 (fun i ->
 	let fromCharCode = vfun1 (fun i ->
 		let i = decode_int i in
 		let i = decode_int i in
-		if i < 0 || i > 0xFF then vnull
-		else encode_rope (Rope.of_char (char_of_int i))
+		try
+			vstring (from_char_code i)
+		with
+		| Not_found ->
+			vnull
+		| InvalidUnicodeChar ->
+			exc_string ("Invalid unicode char " ^ (string_of_int i))
 	)
 	)
 
 
 	let indexOf = vifun2 (fun vthis str startIndex ->
 	let indexOf = vifun2 (fun vthis str startIndex ->
+		let str = this str in
 		let this = this vthis in
 		let this = this vthis in
-		let str = decode_string str in
 		let i = default_int startIndex 0 in
 		let i = default_int startIndex 0 in
 		try
 		try
-			vint (Rope.search_forward_string str this i)
+			if Rope.length str.srope = 0 then
+				vint (max 0 (min i this.slength))
+			else if this.sascii then
+				vint (Rope.search_forward_string (Lazy.force str.sstring) this.srope i)
+			else begin
+				let pat = Str.regexp (maybe_extend_ascii str) in
+				let s = Lazy.force this.sstring in
+				vint ((Str.search_forward pat s (i lsl 1)) lsr 1);
+			end
 		with Not_found ->
 		with Not_found ->
 			vint (-1)
 			vint (-1)
 	)
 	)
 
 
 	let lastIndexOf = vifun2 (fun vthis str startIndex ->
 	let lastIndexOf = vifun2 (fun vthis str startIndex ->
-		let this = this_string vthis in
-		let str = decode_string str in
-		let i = default_int startIndex (String.length this - 1) in
+		let str = this str in
+		let this = this vthis in
 		try
 		try
-			if i >= String.length this || i < 0 then raise Not_found;
-			vint (Str.search_backward (Str.regexp_string str) this i)
+			if Rope.length str.srope = 0 then begin
+				let i = default_int startIndex this.slength in
+				vint (max 0 (min i this.slength))
+			end else begin
+				let i = default_int startIndex (this.slength - 1) in
+				if i >= this.slength || i < 0 then raise Not_found;
+				let s = Lazy.force this.sstring in
+				if this.sascii then
+					vint (Str.search_backward (Str.regexp_string (Lazy.force str.sstring)) s i)
+				else begin
+					let pat = Str.regexp (maybe_extend_ascii str) in
+					vint ((Str.search_backward pat s (i lsl 1)) lsr 1);
+				end
+			end
 		with Not_found ->
 		with Not_found ->
 			vint (-1)
 			vint (-1)
 	)
 	)
 
 
 	let split = vifun1 (fun vthis delimiter ->
 	let split = vifun1 (fun vthis delimiter ->
-		let this,s = this_pair vthis in
-		let delimiter = decode_string delimiter in
+		let this = this vthis in
+		let ascii = this.sascii in
+		let this,s = this.srope,Lazy.force this.sstring in
+		let delimiter = Lazy.force (decode_vstring delimiter).sstring in
 		let l_delimiter = String.length delimiter in
 		let l_delimiter = String.length delimiter in
 		let l_this = Rope.length this in
 		let l_this = Rope.length this in
-		if l_delimiter = 0 then
-			encode_array (List.map (fun chr -> encode_string (String.make 1 chr)) (ExtString.String.explode s))
-		else if l_delimiter > l_this then
-			encode_array [encode_rope this]
+		let encode_range pos length =
+			let s = Rope.sub this pos length in
+			if ascii then encode_rope s
+			else encode_rope_ucs2 s (length lsr 1)
+		in
+		if l_delimiter = 0 then begin
+			if ascii then
+				encode_array (List.map (fun chr -> encode_string (String.make 1 chr)) (ExtString.String.explode s))
+			else begin
+				let acc = DynArray.create () in
+				let bs = Bytes.unsafe_of_string s in
+				for i = 0 to (l_this - 1) lsr 1 do
+					let b = Bytes.create 2 in
+					Bytes.unsafe_set b 0 (Bytes.unsafe_get bs (i lsl 1));
+					Bytes.unsafe_set b 1 (Bytes.unsafe_get bs ((i lsl 1 + 1)));
+					DynArray.add acc (vstring (create_ucs2 (Bytes.unsafe_to_string b) 1));
+				done;
+				encode_array (DynArray.to_list acc)
+			end
+		end else if l_delimiter > l_this then
+			encode_array [encode_range 0 (Rope.length this)]
 		else begin
 		else begin
 			let chr = delimiter.[0] in
 			let chr = delimiter.[0] in
 			let acc = DynArray.create () in
 			let acc = DynArray.create () in
@@ -1973,20 +1982,20 @@ module StdString = struct
 					if not (loop2 1) then
 					if not (loop2 1) then
 						loop k (index + 1)
 						loop k (index + 1)
 					else begin
 					else begin
-						DynArray.add acc (encode_rope (Rope.sub this k (index - k)));
+						DynArray.add acc (encode_range k (index - k));
 						loop (index + l_delimiter) (index + l_delimiter)
 						loop (index + l_delimiter) (index + l_delimiter)
 					end
 					end
 				with Not_found ->
 				with Not_found ->
-					DynArray.add acc (encode_rope (Rope.sub this k (l_this - k)))
+					DynArray.add acc (encode_range k (l_this - k))
 			in
 			in
 			let rec loop1 i =
 			let rec loop1 i =
 				try
 				try
 					if i = l_this then raise Not_found;
 					if i = l_this then raise Not_found;
 					let index = String.index_from s i chr in
 					let index = String.index_from s i chr in
-					DynArray.add acc (encode_rope (Rope.sub this i (index - i)));
+					DynArray.add acc (encode_range i (index - i));
 					loop1 (index + l_delimiter)
 					loop1 (index + l_delimiter)
 				with Not_found ->
 				with Not_found ->
-					DynArray.add acc (encode_rope (Rope.sub this i (l_this - i)))
+					DynArray.add acc (encode_range i (l_this - i))
 			in
 			in
 			if l_delimiter = 1 then loop1 0 else loop 0 0;
 			if l_delimiter = 1 then loop1 0 else loop 0 0;
 			encode_array_instance (EvalArray.create (DynArray.to_array acc))
 			encode_array_instance (EvalArray.create (DynArray.to_array acc))
@@ -1996,52 +2005,75 @@ module StdString = struct
 	let substr = vifun2 (fun vthis pos len ->
 	let substr = vifun2 (fun vthis pos len ->
 		let this = this vthis in
 		let this = this vthis in
 		let pos = decode_int pos in
 		let pos = decode_int pos in
-		if pos >= Rope.length this then
+		let r = this.srope in
+		if pos >= this.slength then
 			encode_rope Rope.empty
 			encode_rope Rope.empty
 		else begin
 		else begin
 			let pos = if pos < 0 then begin
 			let pos = if pos < 0 then begin
-				let pos = Rope.length this + pos in
+				let pos = this.slength + pos in
 				if pos < 0 then 0 else pos
 				if pos < 0 then 0 else pos
 			end else pos in
 			end else pos in
-			let len = default_int len (Rope.length this - pos) in
-			let len = if len < 0 then Rope.length this + len - pos else len in
-			let s =
-				if len < 0 then Rope.empty
-				else if len + pos > Rope.length this then Rope.sub this pos (Rope.length this - pos)
-				else Rope.sub this pos len
-			in
-			encode_rope s
+			if this.sascii then begin
+				let len = default_int len (Rope.length r - pos) in
+				let len = if len < 0 then Rope.length r + len - pos else len in
+				let s =
+					if len < 0 then Rope.empty
+					else if len + pos > Rope.length r then Rope.sub r pos (Rope.length r - pos)
+					else Rope.sub r pos len
+				in
+				encode_rope s
+			end else begin
+				let pos = pos lsl 1 in
+				let len = match len with
+					| VNull -> (Rope.length r - pos)
+					| VInt32 i -> Int32.to_int i lsl 1
+					| _ -> unexpected_value len "int"
+				in
+				let len = if len < 0 then Rope.length r + len - pos else len in
+				let s =
+					if len < 0 then Rope.empty
+					else if len + pos > Rope.length r then Rope.sub r pos (Rope.length r - pos)
+					else Rope.sub r pos len
+				in
+				vstring (create_ucs2_of_rope s (len lsr 1))
+			end
 		end
 		end
 	)
 	)
 
 
 	let substring = vifun2 (fun vthis startIndex endIndex ->
 	let substring = vifun2 (fun vthis startIndex endIndex ->
 		let this = this vthis in
 		let this = this vthis in
 		let first = decode_int startIndex in
 		let first = decode_int startIndex in
-		let l = Rope.length this in
+		let l = this.slength in
 		let last = default_int endIndex l in
 		let last = default_int endIndex l in
 		let first = if first < 0 then 0 else first in
 		let first = if first < 0 then 0 else first in
 		let last = if last < 0 then 0 else last in
 		let last = if last < 0 then 0 else last in
 		let first,last = if first > last then last,first else first,last in
 		let first,last = if first > last then last,first else first,last in
 		let last = if last > l then l else last in
 		let last = if last > l then l else last in
-		let s = if first > l then
-			Rope.empty
-		else
-			Rope.sub this first (last - first)
-		in
-		encode_rope s
+		if first > l then
+			encode_rope Rope.empty
+		else begin
+			if this.sascii then
+				encode_rope (Rope.sub this.srope first (last - first))
+			else begin
+				let first = first lsl 1 in
+				let last = last lsl 1 in
+				let length = last - first in
+				let r = Rope.sub this.srope first length in
+				vstring (create_ucs2_of_rope r length)
+			end
+		end
 	)
 	)
 
 
-	let toLowerCase = vifun0 (fun vthis -> encode_rope (Rope.lowercase (this vthis)))
+	let toLowerCase = vifun0 (fun vthis -> encode_rope (Rope.lowercase (this vthis).srope))
 
 
 	let toString = vifun0 (fun vthis -> vthis)
 	let toString = vifun0 (fun vthis -> vthis)
 
 
-	let toUpperCase = vifun0 (fun vthis -> encode_rope (Rope.uppercase (this vthis)))
+	let toUpperCase = vifun0 (fun vthis -> encode_rope (Rope.uppercase (this vthis).srope))
 
 
-	let cca = vifun1 (fun vthis i ->
-		let this = this_string vthis in
-		let i = decode_int i in
-		if i < 0 || i >= String.length this then vnull
-		else vint (int_of_char (String.unsafe_get this i))
+	let cca = charCodeAt
+
+	let isAscii = vifun0 (fun vthis ->
+		vbool (this vthis).sascii
 	)
 	)
 end
 end
 
 
@@ -2049,46 +2081,75 @@ module StdStringBuf = struct
 	module Buffer = Rope.Buffer
 	module Buffer = Rope.Buffer
 
 
 	let this vthis = match vthis with
 	let this vthis = match vthis with
-		| VInstance {ikind = IBuffer buf} -> buf
+		| VInstance {ikind = IBuffer sb} -> sb
 		| v -> unexpected_value v "string"
 		| v -> unexpected_value v "string"
 
 
 	let add = vifun1 (fun vthis x ->
 	let add = vifun1 (fun vthis x ->
 		let this = this vthis in
 		let this = this vthis in
-		begin match x with
-			| VString(s,_) -> Buffer.add_rope this s
-			| _ -> Buffer.add_string this (value_string x)
-		end;
+		let s = match x with
+			| VString s -> s
+			| _ -> create_ascii (value_string x)
+		in
+		AwareBuffer.add_string this s;
 		vnull;
 		vnull;
 	)
 	)
 
 
 	let addChar = vifun1 (fun vthis c ->
 	let addChar = vifun1 (fun vthis c ->
 		let this = this vthis in
 		let this = this vthis in
-		let c = decode_int c in
-		let c = try char_of_int c with _ -> exc_string "char_of_int" in
-		Buffer.add_char this c;
+		let i = decode_int c in
+		let add i =
+			if this.bascii then AwareBuffer.promote_to_ucs this;
+			Buffer.add_char this.bbuffer (char_of_int (i land 0xFF));
+			Buffer.add_char this.bbuffer (char_of_int (i lsr 8));
+			this.blength <- this.blength + 1;
+		in
+		begin if i < 0 then
+			()
+		else if i < 128 then begin
+			if this.bascii then begin
+				Buffer.add_char this.bbuffer (char_of_int i);
+				this.blength <- this.blength + 1;
+			end else
+				add i
+		end else if i < 0x10000 then begin
+			if i >= 0xD800 && i <= 0xDFFF then exc_string ("Invalid unicode char " ^ (string_of_int i));
+			add i
+		end else if i < 0x110000 then begin
+			let i = i - 0x10000 in
+			add ((i lsr 10 + 0xD800));
+			add ((i land 1023) + 0xDC00);
+		end else
+			exc_string ("Invalid unicode char " ^ (string_of_int i))
+		end;
 		vnull
 		vnull
 	)
 	)
 
 
 	let addSub = vifun3 (fun vthis s pos len ->
 	let addSub = vifun3 (fun vthis s pos len ->
 		let this = this vthis in
 		let this = this vthis in
-		let s = decode_rope s in
+		let s = decode_vstring s in
 		let i = decode_int pos in
 		let i = decode_int pos in
+		let i = if s.sascii then i else i lsl 1 in
 		let len = match len with
 		let len = match len with
-			| VNull -> Rope.length s - i
-			| VInt32 i -> Int32.to_int i
+			| VNull -> Rope.length s.srope - i
+			| VInt32 i -> Int32.to_int i lsl (if s.sascii then 0 else 1)
 			| _ -> unexpected_value len "int"
 			| _ -> unexpected_value len "int"
 		in
 		in
-		Buffer.add_rope this (Rope.sub s i len);
+		let s' = Rope.sub s.srope i len in
+		let s' = if s.sascii then create_ascii_of_rope s'
+		else create_ucs2_of_rope s' len in
+		AwareBuffer.add_string this s';
 		vnull
 		vnull
 	)
 	)
 
 
 	let get_length = vifun0 (fun vthis ->
 	let get_length = vifun0 (fun vthis ->
 		let this = this vthis in
 		let this = this vthis in
-		vint (Buffer.length this)
+		vint this.blength
 	)
 	)
 
 
 	let toString = vifun0 (fun vthis ->
 	let toString = vifun0 (fun vthis ->
-		encode_rope (Buffer.contents (this vthis))
+		let this = this vthis in
+		let s = AwareBuffer.contents this in
+		vstring s
 	)
 	)
 end
 end
 
 
@@ -2098,12 +2159,7 @@ module StdStringTools = struct
 		Common.url_encode s (Rope.Buffer.add_char b);
 		Common.url_encode s (Rope.Buffer.add_char b);
 		Rope.Buffer.contents b
 		Rope.Buffer.contents b
 
 
-	let fastCodeAt = vfun2 (fun s index ->
-		let s = decode_string s in
-		let index = decode_int index in
-		if index >= String.length s then vnull
-		else vint (int_of_char s.[index])
-	)
+	let fastCodeAt = StdString.charCodeAt
 
 
 	let urlEncode = vfun1 (fun s ->
 	let urlEncode = vfun1 (fun s ->
 		let s = decode_string s in
 		let s = decode_string s in
@@ -2112,7 +2168,10 @@ module StdStringTools = struct
 
 
 	let urlDecode = vfun1 (fun s ->
 	let urlDecode = vfun1 (fun s ->
 		let s = decode_string s in
 		let s = decode_string s in
-		let b = Rope.Buffer.create 0 in
+		let b = AwareBuffer.create () in
+		let add s =
+			AwareBuffer.add_string b s
+		in
 		let len = String.length s in
 		let len = String.length s in
 		let decode c =
 		let decode c =
 			match c with
 			match c with
@@ -2121,28 +2180,62 @@ module StdStringTools = struct
 			| 'A'..'F' -> Some (int_of_char c - int_of_char 'A' + 10)
 			| 'A'..'F' -> Some (int_of_char c - int_of_char 'A' + 10)
 			| _ -> None
 			| _ -> None
 		in
 		in
+		let decode_hex i =
+			let p1 = (try decode (String.get s i) with _ -> None) in
+			let p2 = (try decode (String.get s (i + 1)) with _ -> None) in
+			match p1, p2 with
+			| Some c1, Some c2 ->
+				Some (((c1 lsl 4) lor c2))
+			| _ ->
+				None
+		in
+		let expect_hex i =
+			match String.unsafe_get s i with
+			| '%' ->
+				begin match decode_hex (i + 1) with
+				| None -> exc_string "Malformed"
+				| Some c -> c
+				end
+			| _ -> exc_string "Malformed"
+		in
 		let rec loop i =
 		let rec loop i =
 			if i = len then () else
 			if i = len then () else
 			let c = String.unsafe_get s i in
 			let c = String.unsafe_get s i in
 			match c with
 			match c with
 			| '%' ->
 			| '%' ->
-				let p1 = (try decode (String.get s (i + 1)) with _ -> None) in
-				let p2 = (try decode (String.get s (i + 2)) with _ -> None) in
-				(match p1, p2 with
-				| Some c1, Some c2 ->
-					Rope.Buffer.add_char b (char_of_int ((c1 lsl 4) lor c2));
-					loop (i + 3)
-				| _ ->
-					loop (i + 1));
+				begin match decode_hex (i + 1) with
+				| Some c ->
+					if c < 0x80 then begin
+						add (create_ascii (String.make 1 (char_of_int c)));
+						loop (i + 3)
+					end else if c < 0xE0 then begin
+						let c2 = expect_hex (i + 3) in
+						add (from_char_code (((c land 0x3F) lsl 6) lor (c2 land 0x7F)));
+						loop (i + 6)
+					end else if c < 0xF0 then begin
+						let c2 = expect_hex (i + 3) in
+						let c3 = expect_hex (i + 6) in
+						add (from_char_code (((c land 0x1F) lsl 12) lor ((c2 land 0x7F) lsl 6) lor (c3 land 0x7F)));
+						loop (i + 9)
+					end else
+						let c2 = expect_hex (i + 3) in
+						let c3 = expect_hex (i + 6) in
+						let c4 = expect_hex (i + 9) in
+						let k = ((c land 0x0F) lsl 18) lor ((c2 land 0x7F) lsl 12) lor ((c3 land 0x7F) lsl 6) lor (c4 land 0x7F) in
+						add (from_char_code k);
+						loop (i + 12)
+				| None ->
+					loop (i + 1)
+				end;
 			| '+' ->
 			| '+' ->
-				Rope.Buffer.add_char b ' ';
+				add (create_ascii (String.make 1 ' '));
 				loop (i + 1)
 				loop (i + 1)
 			| c ->
 			| c ->
-				Rope.Buffer.add_char b c;
+				add (create_ascii (String.make 1 c));
 				loop (i + 1)
 				loop (i + 1)
 		in
 		in
 		loop 0;
 		loop 0;
-		encode_rope (Rope.Buffer.contents b)
+		vstring (AwareBuffer.contents b)
 	)
 	)
 end
 end
 
 
@@ -2166,7 +2259,7 @@ module StdSys = struct
 		let h = StringHashtbl.create 0 in
 		let h = StringHashtbl.create 0 in
 		Array.iter(fun s ->
 		Array.iter(fun s ->
 			let k, v = ExtString.String.split s "=" in
 			let k, v = ExtString.String.split s "=" in
-			StringHashtbl.replace h (Rope.of_string k,lazy k) (encode_string v)
+			StringHashtbl.replace h (create_ascii k) (encode_string v)
 		) env;
 		) env;
 		encode_string_map_direct h
 		encode_string_map_direct h
 	)
 	)
@@ -2543,17 +2636,12 @@ module StdUtf8 = struct
 		vnull
 		vnull
 	)
 	)
 
 
-	let charCodeAt = vfun2 (fun s index ->
-		let s = decode_string s in
-		let i = decode_int index in
-		let c = try UTF8.get s i with exc -> exc_string (Printexc.to_string exc) in
-		vint (UChar.int_of_uchar c)
-	)
+	let charCodeAt = StdString.charCodeAt
 
 
 	let compare = vfun2 (fun a b ->
 	let compare = vfun2 (fun a b ->
 		let a = decode_string a in
 		let a = decode_string a in
 		let b = decode_string b in
 		let b = decode_string b in
-		vint (UTF8.compare a b)
+		vint (Pervasives.compare a b)
 	)
 	)
 
 
 	let decode = vfun1 (fun s ->
 	let decode = vfun1 (fun s ->
@@ -2564,7 +2652,8 @@ module StdUtf8 = struct
 			Bytes.unsafe_set buf !i (UChar.char_of uc);
 			Bytes.unsafe_set buf !i (UChar.char_of uc);
 			incr i
 			incr i
 		) s;
 		) s;
-		encode_string (Bytes.unsafe_to_string buf)
+		let s = Bytes.unsafe_to_string buf in
+		encode_string s
 	)
 	)
 
 
 	let encode = vfun1 (fun s ->
 	let encode = vfun1 (fun s ->
@@ -2579,25 +2668,15 @@ module StdUtf8 = struct
 	)
 	)
 
 
 	let length = vfun1 (fun s ->
 	let length = vfun1 (fun s ->
-		let s = decode_string s in
-		vint (UTF8.length s)
+		let s = decode_vstring s in
+		vint (s.slength)
 	)
 	)
 
 
-	let sub = vfun3 (fun s pos len ->
-		let s = decode_string s in
-		let pos = decode_int pos in
-		let len = decode_int len in
-		let buf = UTF8.Buf.create 0 in
-		let i = ref (-1) in
-		UTF8.iter (fun c ->
-			incr i;
-			if !i >= pos && !i < pos + len then UTF8.Buf.add_char buf c;
-		) s;
-		encode_string (UTF8.Buf.contents buf)
-	)
+	let sub = StdString.substr
 
 
 	let toString = vifun0 (fun vthis ->
 	let toString = vifun0 (fun vthis ->
-		encode_string (UTF8.Buf.contents (this vthis))
+		let this = this vthis in
+		bytes_to_utf8 (Bytes.unsafe_of_string (UTF8.Buf.contents this))
 	)
 	)
 
 
 	let validate = vfun1 (fun s ->
 	let validate = vfun1 (fun s ->
@@ -2621,12 +2700,12 @@ let init_maps builtins =
 		| VInstance {ikind = IIntMap h} -> h
 		| VInstance {ikind = IIntMap h} -> h
 		| v -> unexpected_value v "int map"
 		| v -> unexpected_value v "int map"
 	in
 	in
-	init_fields builtins (["haxe";"ds"],"IntMap") [] (StdIntMap.map_fields vint decode_int (fun i -> Rope.of_string (string_of_int i)) encode_int_map_direct this);
+	init_fields builtins (["haxe";"ds"],"IntMap") [] (StdIntMap.map_fields vint decode_int (fun i -> create_ascii (string_of_int i)) encode_int_map_direct this);
 	let this vthis = match vthis with
 	let this vthis = match vthis with
 		| VInstance {ikind = IStringMap h} -> h
 		| VInstance {ikind = IStringMap h} -> h
 		| v -> unexpected_value v "string map"
 		| v -> unexpected_value v "string map"
 	in
 	in
-	init_fields builtins (["haxe";"ds"],"StringMap") [] (StdStringMap.map_fields vstring_direct decode_rope_string (fun (r,_) -> r) encode_string_map_direct this);
+	init_fields builtins (["haxe";"ds"],"StringMap") [] (StdStringMap.map_fields vstring decode_vstring (fun s -> s) encode_string_map_direct this);
 	let this vthis = match vthis with
 	let this vthis = match vthis with
 		| VInstance {ikind = IObjectMap h} -> Obj.magic h
 		| VInstance {ikind = IObjectMap h} -> Obj.magic h
 		| v -> unexpected_value v "object map"
 		| v -> unexpected_value v "object map"
@@ -2664,7 +2743,7 @@ let init_constructors builtins =
 			| [s] -> s
 			| [s] -> s
 			| _ -> assert false
 			| _ -> assert false
 		);
 		);
-	add key_StringBuf (fun _ -> encode_instance key_StringBuf ~kind:(IBuffer (Rope.Buffer.create 0)));
+	add key_StringBuf (fun _ -> encode_instance key_StringBuf ~kind:(IBuffer (AwareBuffer.create())));
 	add key_haxe_Utf8
 	add key_haxe_Utf8
 		(fun vl -> match vl with
 		(fun vl -> match vl with
 			| [size] -> encode_instance key_haxe_Utf8 ~kind:(IUtf8 (UTF8.Buf.create (default_int size 0)))
 			| [size] -> encode_instance key_haxe_Utf8 ~kind:(IUtf8 (UTF8.Buf.create (default_int size 0)))
@@ -3033,6 +3112,7 @@ let init_standard_library builtins =
 		"toString",StdString.toString;
 		"toString",StdString.toString;
 		"toUpperCase",StdString.toUpperCase;
 		"toUpperCase",StdString.toUpperCase;
 		"cca",StdString.cca;
 		"cca",StdString.cca;
+		"isAscii",StdString.isAscii;
 	];
 	];
 	init_fields builtins ([],"StringBuf") [] [
 	init_fields builtins ([],"StringBuf") [] [
 		"add",StdStringBuf.add;
 		"add",StdStringBuf.add;

+ 235 - 0
src/macro/eval/evalString.ml

@@ -0,0 +1,235 @@
+(*
+	The Haxe Compiler
+	Copyright (C) 2005-2018  Haxe Foundation
+
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License
+	as published by the Free Software Foundation; either version 2
+	of the License, or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with this program; if not, write to the Free Software
+	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *)
+
+open Globals
+open EvalValue
+open EvalBytes
+
+let create_ascii s = {
+	srope = Rope.of_string s;
+	sstring = lazy s;
+	slength = String.length s;
+	sascii = true;
+}
+
+let create_ascii_of_rope r = {
+	srope = r;
+	sstring = lazy (Rope.to_string r);
+	slength = Rope.length r;
+	sascii = true;
+}
+
+let create_ucs2 s length = {
+	srope = Rope.of_string s;
+	sstring = lazy s;
+	slength = length;
+	sascii = false;
+}
+
+let create_ucs2_of_rope r length = {
+	srope = r;
+	sstring = lazy (Rope.to_string r);
+	slength = length;
+	sascii = false;
+}
+
+let vstring s = VString s
+
+module AwareBuffer = struct
+	type t = vstring_buffer
+
+	let create () = {
+		bbuffer = Rope.Buffer.create 0;
+		blength = 0;
+		bascii = true;
+	}
+
+	let promote_to_ucs this =
+		let current = Rope.to_string (Rope.Buffer.contents this.bbuffer) in
+		let current = extend_ascii current in
+		Rope.Buffer.clear this.bbuffer;
+		this.bascii <- false;
+		Rope.Buffer.add_string this.bbuffer current
+
+	let add_string this s =
+		begin match s.sascii,this.bascii with
+		| true,true
+		| false,false ->
+			Rope.Buffer.add_rope this.bbuffer s.srope
+		| true,false ->
+			Rope.Buffer.add_string this.bbuffer (extend_ascii (Lazy.force s.sstring))
+		| false,true ->
+			promote_to_ucs this;
+			Rope.Buffer.add_rope this.bbuffer s.srope
+		end;
+		this.blength <- this.blength + s.slength
+
+	let contents this =
+		if this.bascii then
+			create_ascii_of_rope (Rope.Buffer.contents this.bbuffer)
+		else
+			create_ucs2_of_rope (Rope.Buffer.contents this.bbuffer) this.blength
+end
+
+let read_char s =
+	read_ui16 (Bytes.unsafe_of_string (Lazy.force s.sstring))
+
+let utf8_to_utf16 s =
+	let only_ascii = ref true in
+	let buf = Buffer.create 0 in
+	let l = ref 0 in
+	let add i =
+		incr l;
+		Buffer.add_char buf (Char.unsafe_chr i);
+		Buffer.add_char buf (Char.unsafe_chr (i lsr 8));
+	in
+	let length = String.length s in
+	let i = ref 0 in
+	let get () =
+		let i' = int_of_char (String.unsafe_get s !i) in
+		incr i;
+		i'
+	in
+	while !i < length do
+		let c = get() in
+		if c < 0x80 then
+			add c
+		else if c < 0xE0 then begin
+			only_ascii := false;
+			add (((c land 0x3F) lsl 6) lor ((get ()) land 0x7F))
+		end else if c < 0xF0 then begin
+			only_ascii := false;
+			let c2 = get () in
+			add (((c land 0x1F) lsl 12) lor ((c2 land 0x7F) lsl 6) lor ((get ()) land 0x7F));
+		end else begin
+			only_ascii := false;
+			let c2 = get () in
+			let c3 = get () in
+			let c = (((c land 0x0F) lsl 18) lor ((c2 land 0x7F) lsl 12) lor ((c3 land 0x7F) lsl 6) lor ((get ()) land 0x7F)) in
+			add ((c lsr 10) + 0xD7C0);
+			add ((c land 0x3FF) lor 0xDC00);
+		end
+	done;
+	Buffer.contents buf,!only_ascii,!l
+
+let utf16_to_utf8 s =
+	let buf = Buffer.create 0 in
+	let i = ref 0 in
+	let add i =
+		Buffer.add_char buf (Char.unsafe_chr i)
+	in
+	let b = Bytes.unsafe_of_string s in
+	let get () =
+		let ch1 = read_byte b !i in
+		let ch2 = read_byte b (!i + 1) in
+		let c = ch1 lor (ch2 lsl 8) in
+		i := !i + 2;
+		c
+	in
+	let length = String.length s in
+	while !i < length do
+		let c = get() in
+		let c = if 0xD800 <= c && c <= 0xDBFF then
+			(((c - 0xD7C0) lsl 10) lor ((get()) land 0X3FF))
+		else
+			c
+		in
+		if c <= 0x7F then
+			add c
+		else if c <= 0x7FF then begin
+			add (0xC0 lor (c lsr 6));
+			add (0x80 lor (c land 63));
+		end else if c <= 0xFFFF then begin
+			add (0xE0 lor (c lsr 12));
+			add (0x80 lor ((c lsr 6) land 63));
+			add (0x80 lor (c land 63));
+		end else begin
+			add (0xF0 lor (c lsr 18));
+			add (0x80 lor ((c lsr 12) land 63));
+			add (0x80 lor ((c lsr 6) land 63));
+			add (0x80 lor (c land 63));
+		end
+	done;
+	Buffer.contents buf
+
+let maybe_extend_ascii s =
+	let s' = Lazy.force s.sstring in
+	if s.sascii then begin
+		extend_ascii s'
+	end else
+		s'
+
+let concat s1 s2 =
+	match s1.sascii,s2.sascii with
+	| true,true ->
+		create_ascii_of_rope (Rope.concat2 s1.srope s2.srope)
+	| false,false ->
+		create_ucs2_of_rope (Rope.concat2 s1.srope s2.srope) (s1.slength + s2.slength)
+	| true,false ->
+		create_ucs2 ((extend_ascii (Lazy.force s1.sstring)) ^ (Lazy.force s2.sstring)) (s1.slength + s2.slength)
+	| false,true ->
+		create_ucs2 ((Lazy.force s1.sstring) ^ (extend_ascii (Lazy.force s2.sstring))) (s1.slength + s2.slength)
+
+let join sep sl =
+	let buf = AwareBuffer.create () in
+	let rec loop sl = match sl with
+		| [s] ->
+			AwareBuffer.add_string buf s;
+		| s :: sl ->
+			AwareBuffer.add_string buf s;
+			AwareBuffer.add_string buf sep;
+			loop sl;
+		| [] ->
+			()
+	in
+	loop sl;
+	AwareBuffer.contents buf
+
+let bytes_to_utf8 s =
+	let s',is_ascii,length = utf8_to_utf16 (Bytes.unsafe_to_string s) in
+	if is_ascii then
+		vstring (create_ascii (Bytes.unsafe_to_string s))
+	else
+		vstring (create_ucs2 s' length)
+
+exception InvalidUnicodeChar
+
+let from_char_code i =
+	if i < 0 then
+		raise Not_found
+	else if i < 128 then
+		create_ascii (String.make 1 (char_of_int i))
+	else if i < 0x10000 then begin
+		if i >= 0xD800 && i <= 0xDFFF then raise InvalidUnicodeChar;
+		let b = Bytes.create 2 in
+		write_ui16 b 0 i;
+		create_ucs2 (Bytes.unsafe_to_string b) 1
+	end else if i < 0x110000 then begin
+		let i = i - 0x10000 in
+		let b = Bytes.create 4 in
+		write_ui16 b 0 ((i lsr 10 + 0xD800));
+		write_ui16 b 2 ((i land 1023) + 0xDC00);
+		create_ucs2 (Bytes.unsafe_to_string b) 2
+	end else
+		raise InvalidUnicodeChar
+
+let get s =
+	let s' = Lazy.force s.sstring in
+	if s.sascii then s'
+	else utf16_to_utf8 s'

+ 38 - 5
src/macro/eval/evalValue.ml

@@ -26,12 +26,45 @@ type cmp =
 	| CInf
 	| CInf
 	| CUndef
 	| CUndef
 
 
-type vstring = Rope.t * string Lazy.t
+type vstring = {
+	(* The rope representation of the string. This is what we mainly use. *)
+	srope   : Rope.t;
+	(* The bytes representation of the string. This is only evaluated if we
+	   need it for something like random access. *)
+	sstring : string Lazy.t;
+	(* The length of the string. *)
+	slength : int;
+	(* If true, the string is one-byte-per-character ASCII. Otherwise, it is
+	   encoded as UCS2. *)
+	sascii  : bool;
+}
+
+type vstring_buffer = {
+	        bbuffer : Rope.Buffer.t;
+	mutable blength : int;
+	mutable bascii  : bool;
+}
+
+let extend_ascii s =
+	let length = String.length s in
+	let b = Bytes.make (length lsl 1) '\000' in
+	for i = 0 to length - 1 do
+		Bytes.unsafe_set b (i lsl 1) (String.unsafe_get s i)
+	done;
+	Bytes.unsafe_to_string b
+
+let vstring_equal s1 s2 =
+	if s1.sascii = s2.sascii then
+		s1.srope == s2.srope || Lazy.force s1.sstring = Lazy.force s2.sstring
+	else if not s2.sascii then
+		(Lazy.force s1.sstring) = Lazy.force s2.sstring
+	else
+		Lazy.force s1.sstring = extend_ascii (Lazy.force s2.sstring)
 
 
 module StringHashtbl = Hashtbl.Make(struct
 module StringHashtbl = Hashtbl.Make(struct
 	type t = vstring
 	type t = vstring
-	let equal (r1,s1) (r2,s2) = r1 == r2 || Lazy.force s1 = Lazy.force s2
-	let hash (_,s) = Hashtbl.hash (Lazy.force s)
+	let equal = vstring_equal
+	let hash s = Hashtbl.hash (Lazy.force s.sstring)
 end)
 end)
 
 
 module IntHashtbl = Hashtbl.Make(struct type t = int let equal = (=) let hash = Hashtbl.hash end)
 module IntHashtbl = Hashtbl.Make(struct type t = int let equal = (=) let hash = Hashtbl.hash end)
@@ -114,7 +147,7 @@ and vinstance_kind =
 	| IIntMap of value IntHashtbl.t
 	| IIntMap of value IntHashtbl.t
 	| IObjectMap of (value,value) Hashtbl.t
 	| IObjectMap of (value,value) Hashtbl.t
 	| IOutput of Buffer.t (* BytesBuffer *)
 	| IOutput of Buffer.t (* BytesBuffer *)
-	| IBuffer of Rope.Buffer.t (* StringBuf *)
+	| IBuffer of vstring_buffer(* StringBuf *)
 	| IPos of pos
 	| IPos of pos
 	| IUtf8 of UTF8.Buf.buf
 	| IUtf8 of UTF8.Buf.buf
 	| IProcess of Process.process
 	| IProcess of Process.process
@@ -165,7 +198,7 @@ let rec equals a b = match a,b with
 	| VEnumValue a,VEnumValue b -> a == b || a.eindex = b.eindex && Array.length a.eargs = 0 && Array.length b.eargs = 0 && a.epath = b.epath
 	| VEnumValue a,VEnumValue b -> a == b || a.eindex = b.eindex && Array.length a.eargs = 0 && Array.length b.eargs = 0 && a.epath = b.epath
 	| VObject vo1,VObject vo2 -> vo1 == vo2
 	| VObject vo1,VObject vo2 -> vo1 == vo2
 	| VInstance vi1,VInstance vi2 -> vi1 == vi2
 	| VInstance vi1,VInstance vi2 -> vi1 == vi2
-	| VString(r1,s1),VString(r2,s2) -> r1 == r2 || Lazy.force s1 = Lazy.force s2
+	| VString s1,VString s2 -> vstring_equal s1 s2
 	| VArray va1,VArray va2 -> va1 == va2
 	| VArray va1,VArray va2 -> va1 == va2
 	| VVector vv1,VVector vv2 -> vv1 == vv2
 	| VVector vv1,VVector vv2 -> vv1 == vv2
 	| VFunction(vf1,_),VFunction(vf2,_) -> vf1 == vf2
 	| VFunction(vf1,_),VFunction(vf2,_) -> vf1 == vf2

+ 4 - 3
src/typing/typer.ml

@@ -2563,9 +2563,10 @@ let rec create com =
 			()
 			()
 	) ctx.g.std.m_types;
 	) ctx.g.std.m_types;
 	let m = TypeloadModule.load_module ctx ([],"String") null_pos in
 	let m = TypeloadModule.load_module ctx ([],"String") null_pos in
-	(match m.m_types with
-	| [TClassDecl c] -> ctx.t.tstring <- TInst (c,[])
-	| _ -> assert false);
+	List.iter (fun mt -> match mt with
+		| TClassDecl c -> ctx.t.tstring <- TInst (c,[])
+		| _ -> ()
+	) m.m_types;
 	let m = TypeloadModule.load_module ctx ([],"Array") null_pos in
 	let m = TypeloadModule.load_module ctx ([],"Array") null_pos in
 	(try
 	(try
 		List.iter (fun t -> (
 		List.iter (fun t -> (

+ 1 - 1
std/StringTools.hx

@@ -457,7 +457,7 @@ class StringTools {
 		#elseif hl
 		#elseif hl
 		return @:privateAccess s.bytes.getUI16(index << 1);
 		return @:privateAccess s.bytes.getUI16(index << 1);
 		#elseif lua
 		#elseif lua
-		return lua.NativeStringTools.byte(s,index+1);
+		return lua.lib.luautf8.Utf8.byte(s,index+1);
 		#else
 		#else
 		return untyped s.cca(index);
 		return untyped s.cca(index);
 		#end
 		#end

+ 3 - 2
std/cs/internal/StringExt.hx

@@ -180,9 +180,10 @@ private typedef NativeString = cs.system.String;
 		return me;
 		return me;
 	}
 	}
 
 
-	public static function fromCharCode(code:Int):NativeString
+	public static function fromCharCode(code:Int):String
 	{
 	{
-		return new NativeString( cast(code,cs.StdTypes.Char16), 1 );
+		return cs.system.Char.ConvertFromUtf32(code);
+		// return new NativeString( cast(code,cs.StdTypes.Char16), 1 );
 	}
 	}
 }
 }
 
 

+ 3 - 2
std/eval/_std/haxe/io/Bytes.hx

@@ -21,6 +21,7 @@
  */
  */
 package haxe.io;
 package haxe.io;
 
 
+//@:coreApi
 extern class Bytes {
 extern class Bytes {
 	function new(length:Int,b:BytesData):Void;
 	function new(length:Int,b:BytesData):Void;
 	public var length(default,null):Int;
 	public var length(default,null):Int;
@@ -40,13 +41,13 @@ extern class Bytes {
 	public function getInt64( pos : Int ) : haxe.Int64;
 	public function getInt64( pos : Int ) : haxe.Int64;
 	public function setInt32( pos : Int, v : Int ) : Void;
 	public function setInt32( pos : Int, v : Int ) : Void;
 	public function setInt64( pos : Int, v : haxe.Int64 ) : Void;
 	public function setInt64( pos : Int, v : haxe.Int64 ) : Void;
-	public function getString( pos : Int, len : Int ) : String;
+	public function getString( pos : Int, len : Int, ?encoding : Encoding ) : String;
 	public function toString() : String;
 	public function toString() : String;
 	public function toHex() : String;
 	public function toHex() : String;
 	public function getData() : BytesData;
 	public function getData() : BytesData;
 	public static function alloc( length : Int ) : Bytes;
 	public static function alloc( length : Int ) : Bytes;
 	@:pure
 	@:pure
-	public static function ofString( s : String ) : Bytes;
+	public static function ofString( s : String, ?encoding : Encoding ) : Bytes;
 	public static function ofData( b : BytesData ) : Bytes;
 	public static function ofData( b : BytesData ) : Bytes;
 	public static function fastGet( b : BytesData, pos : Int ) : Int;
 	public static function fastGet( b : BytesData, pos : Int ) : Int;
 	static function __init__():Void {
 	static function __init__():Void {

+ 1 - 1
std/eval/_std/haxe/io/BytesBuffer.hx

@@ -28,7 +28,7 @@ extern class BytesBuffer {
 	function get_length():Int;
 	function get_length():Int;
 	public function addByte(byte:Int):Void;
 	public function addByte(byte:Int):Void;
 	public function add(src:Bytes):Void;
 	public function add(src:Bytes):Void;
-	public function addString(v:String):Void;
+	public function addString(v:String,?encoding:Encoding):Void;
 	public function addInt32(v:Int):Void;
 	public function addInt32(v:Int):Void;
 	public function addInt64(v:haxe.Int64):Void;
 	public function addInt64(v:haxe.Int64):Void;
 	public function addFloat(v:Float):Void;
 	public function addFloat(v:Float):Void;

+ 9 - 0
std/flash/Boot.hx

@@ -212,6 +212,15 @@ class Boot extends flash.display.MovieClip {
 		return new String(v);
 		return new String(v);
 	}
 	}
 
 
+	static public function fromCodePoint( code : Int ) {
+		var o = new flash.utils.ByteArray();
+		o.endian = LITTLE_ENDIAN;
+		o.writeShort((code>>10) + 0xD7C0);
+		o.writeShort((code&0x3FF) + 0xDC00);
+		o.position = 0;
+		return o.readMultiByte(4,"unicode");
+	}
+
 	static function __unprotect__( s : String ) {
 	static function __unprotect__( s : String ) {
 		return s;
 		return s;
 	}
 	}

+ 43 - 0
std/flash/_std/String.hx

@@ -0,0 +1,43 @@
+/*
+ * Copyright (C)2005-2018 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+@:coreApi
+extern class String {
+
+	var length(default,null) : Int;
+	function new(string:String) : Void;
+	function toUpperCase() : String;
+	function toLowerCase() : String;
+	function charAt(index : Int) : String;
+	function charCodeAt( index : Int) : Null<Int>;
+	function indexOf( str : String, ?startIndex : Int ) : Int;
+	function lastIndexOf( str : String, ?startIndex : Int ) : Int;
+	function split( delimiter : String ) : Array<String>;
+	function substr( pos : Int, ?len : Int ) : String;
+	function substring( startIndex : Int, ?endIndex : Int ) : String;
+	function toString() : String;
+
+	@:pure static inline function fromCharCode( code : Int ) : String untyped {
+		return code < 0x10000 ? String["fromCharCode"](code) : flash.Boot.fromCodePoint(code);
+	}
+	
+}

+ 2 - 2
std/haxe/format/JsonParser.hx

@@ -172,7 +172,7 @@ class JsonParser {
 				case 'u'.code:
 				case 'u'.code:
 					var uc = Std.parseInt("0x" + str.substr(pos, 4));
 					var uc = Std.parseInt("0x" + str.substr(pos, 4));
 					pos += 4;
 					pos += 4;
-					#if (neko || php || (cpp&&!hxcpp_smart_strings) || lua || eval)
+					#if (neko || (cpp&&!hxcpp_smart_strings))
 					if( uc <= 0x7F )
 					if( uc <= 0x7F )
 						buf.addChar(uc);
 						buf.addChar(uc);
 					else if( uc <= 0x7FF ) {
 					else if( uc <= 0x7FF ) {
@@ -196,7 +196,7 @@ class JsonParser {
 				}
 				}
 				start = pos;
 				start = pos;
 			}
 			}
-			#if (neko || php || (cpp&&!hxcpp_smart_strings) )
+			#if (neko || (cpp&&!hxcpp_smart_strings) )
 			// ensure utf8 chars are not cut
 			// ensure utf8 chars are not cut
 			else if( c >= 0x80 ) {
 			else if( c >= 0x80 ) {
 				pos++;
 				pos++;

+ 56 - 17
std/haxe/io/Bytes.hx

@@ -346,7 +346,8 @@ class Bytes {
 		setInt32(pos + 4, v.high);
 		setInt32(pos + 4, v.high);
 	}
 	}
 
 
-	public function getString( pos : Int, len : Int ) : String {
+	public function getString( pos : Int, len : Int, ?encoding : Encoding ) : String {
+		if( encoding == null ) encoding == UTF8;
 		#if !neko
 		#if !neko
 		if( pos < 0 || len < 0 || pos + len > length ) throw Error.OutsideBounds;
 		if( pos < 0 || len < 0 || pos + len > length ) throw Error.OutsideBounds;
 		#end
 		#end
@@ -354,23 +355,43 @@ class Bytes {
 		return try new String(untyped __dollar__ssub(b,pos,len)) catch( e : Dynamic ) throw Error.OutsideBounds;
 		return try new String(untyped __dollar__ssub(b,pos,len)) catch( e : Dynamic ) throw Error.OutsideBounds;
 		#elseif flash
 		#elseif flash
 		b.position = pos;
 		b.position = pos;
-		return b.readUTFBytes(len);
+		return encoding == RawNative ? b.readMultiByte(len, "unicode") : b.readUTFBytes(len);
 		#elseif cpp
 		#elseif cpp
 		var result:String="";
 		var result:String="";
 		untyped __global__.__hxcpp_string_of_bytes(b,result,pos,len);
 		untyped __global__.__hxcpp_string_of_bytes(b,result,pos,len);
 		return result;
 		return result;
 		#elseif cs
 		#elseif cs
-		return cs.system.text.Encoding.UTF8.GetString(b, pos, len);
+		switch (encoding) {
+			case UTF8 | null:
+				return cs.system.text.Encoding.UTF8.GetString(b, pos, len);
+			case RawNative:
+				return cs.system.text.Encoding.Unicode.GetString(b, pos, len);
+		}
 		#elseif java
 		#elseif java
-		try
-			return new String(b, pos, len, "UTF-8")
-		catch (e:Dynamic) throw e;
+		try {
+			switch (encoding) {
+				case UTF8 | null:
+					return new String(b, pos, len, "UTF-8");
+				case RawNative:
+					return new String(b, pos, len, "UTF-16LE");
+			}
+		} catch (e:Dynamic) {
+			throw e;
+		}
 		#elseif python
 		#elseif python
 		return python.Syntax.code("self.b[{0}:{0}+{1}].decode('UTF-8','replace')", pos, len);
 		return python.Syntax.code("self.b[{0}:{0}+{1}].decode('UTF-8','replace')", pos, len);
 		#elseif lua
 		#elseif lua
-		var begin = cast(Math.min(pos,b.length),Int);
-		var end = cast(Math.min(pos+len,b.length),Int);
-		return [for (i in begin...end) String.fromCharCode(b[i])].join("");
+
+		if (b.length - pos <= lua.Boot.MAXSTACKSIZE){
+			var end : Int = cast Math.min(b.length, pos+len) - 1;
+			return lua.NativeStringTools.char(lua.TableTools.unpack(untyped b, pos, end));
+		} else {
+			var tbl : lua.Table<Int,String> = lua.Table.create();
+			for (idx in pos...pos+len){
+				lua.Table.insert(tbl, lua.NativeStringTools.char(b[idx]));
+			}
+			return lua.Table.concat(tbl, '');
+		}
 		#else
 		#else
 		var s = "";
 		var s = "";
 		var b = b;
 		var b = b;
@@ -407,6 +428,9 @@ class Bytes {
 		return getString(pos, len);
 		return getString(pos, len);
 	}
 	}
 
 
+	/**
+		Returns string representation of the bytes as UTF8
+	**/
 	public function toString() : String {
 	public function toString() : String {
 		#if neko
 		#if neko
 		return new String(untyped __dollar__ssub(b,0,length));
 		return new String(untyped __dollar__ssub(b,0,length));
@@ -469,35 +493,50 @@ class Bytes {
 		#end
 		#end
 	}
 	}
 
 
+	/**
+		Returns bytes representation of the given String, using specific encoding (UTF-8 by default)
+	**/
 	@:pure
 	@:pure
-	public static function ofString( s : String ) : Bytes {
+	public static function ofString( s : String, ?encoding : Encoding ) : Bytes {
 		#if neko
 		#if neko
 		return new Bytes(s.length,untyped __dollar__ssub(s.__s,0,s.length));
 		return new Bytes(s.length,untyped __dollar__ssub(s.__s,0,s.length));
 		#elseif flash
 		#elseif flash
 		var b = new flash.utils.ByteArray();
 		var b = new flash.utils.ByteArray();
-		b.writeUTFBytes(s);
+		if( encoding == RawNative ) b.writeMultiByte(s,"unicode") else b.writeUTFBytes(s);
 		return new Bytes(b.length,b);
 		return new Bytes(b.length,b);
 		#elseif cpp
 		#elseif cpp
 		var a = new BytesData();
 		var a = new BytesData();
 		untyped __global__.__hxcpp_bytes_of_string(a,s);
 		untyped __global__.__hxcpp_bytes_of_string(a,s);
 		return new Bytes(a.length, a);
 		return new Bytes(a.length, a);
 		#elseif cs
 		#elseif cs
-		var b = cs.system.text.Encoding.UTF8.GetBytes(s);
+		var b = switch (encoding) {
+			case UTF8 | null:
+				cs.system.text.Encoding.UTF8.GetBytes(s);
+			case RawNative:
+				cs.system.text.Encoding.Unicode.GetBytes(s);
+		};
 		return new Bytes(b.Length, b);
 		return new Bytes(b.Length, b);
 		#elseif java
 		#elseif java
-		try
-		{
-			var b:BytesData = untyped s.getBytes("UTF-8");
+		try {
+			var b:BytesData = switch (encoding) {
+				case UTF8 | null:
+					@:privateAccess s.getBytes("UTF-8");
+				case RawNative:
+					@:privateAccess s.getBytes("UTF-16LE");
+			};
 			return new Bytes(b.length, b);
 			return new Bytes(b.length, b);
+		} catch (e:Dynamic) {
+			throw e;
 		}
 		}
-		catch (e:Dynamic) throw e;
 
 
 		#elseif python
 		#elseif python
 			var b:BytesData = new python.Bytearray(s, "UTF-8");
 			var b:BytesData = new python.Bytearray(s, "UTF-8");
 			return new Bytes(b.length, b);
 			return new Bytes(b.length, b);
 
 
 		#elseif lua
 		#elseif lua
-			var bytes = [for (c in 0...s.length) StringTools.fastCodeAt(s,c)];
+			var bytes = [for (i in 0...lua.NativeStringTools.len(s)) {
+					lua.NativeStringTools.byte(s,i+1);
+			}];
 			return new Bytes(bytes.length, bytes);
 			return new Bytes(bytes.length, bytes);
 		#else
 		#else
 		var a = new Array();
 		var a = new Array();

+ 3 - 3
std/haxe/io/BytesBuffer.hx

@@ -107,13 +107,13 @@ class BytesBuffer {
 		#end
 		#end
 	}
 	}
 
 
-	public inline function addString( v : String ) {
+	public inline function addString( v : String, ?encoding : Encoding ) {
 		#if neko
 		#if neko
 		untyped StringBuf.__add(b, v.__s);
 		untyped StringBuf.__add(b, v.__s);
 		#elseif flash
 		#elseif flash
-		b.writeUTFBytes(v);
+		if( encoding == RawNative ) b.writeMultiByte(v, "unicode") else b.writeUTFBytes(v);
 		#else
 		#else
-		add(Bytes.ofString(v));
+		add(Bytes.ofString(v,encoding));
 		#end
 		#end
 	}
 	}
 
 

+ 2 - 2
std/haxe/io/BytesInput.hx

@@ -191,8 +191,8 @@ class BytesInput extends Input {
 	}
 	}
 
 
 	@:dox(hide)
 	@:dox(hide)
-	override function readString( len : Int ) {
-		return try b.readUTFBytes(len) catch( e : Dynamic ) throw new Eof();
+	override function readString( len : Int, ?encoding : Encoding ) {
+		return try encoding == RawNative ? b.readMultiByte(len,"unicode") : b.readUTFBytes(len) catch( e : Dynamic ) throw new Eof();
 	}
 	}
 
 
 	#end
 	#end

+ 5 - 2
std/haxe/io/BytesOutput.hx

@@ -116,8 +116,11 @@ class BytesOutput extends Output {
 	}
 	}
 
 
 	@:dox(hide)
 	@:dox(hide)
-	override function writeString( s : String ) {
-		b.writeUTFBytes(s);
+	override function writeString( s : String, ?encoding : Encoding ) {
+		if( encoding == RawNative )
+			b.writeMultiByte(s, "unicode");
+		else
+			b.writeUTFBytes(s);
 	}
 	}
 
 
 	#end
 	#end

+ 33 - 0
std/haxe/io/Encoding.hx

@@ -0,0 +1,33 @@
+/*
+ * Copyright (C)2005-2018 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+package haxe.io;
+
+/**
+	String binary encoding supported by Haxe I/O
+**/
+enum Encoding {
+	UTF8;
+	/**
+		Output the string the way the platform represent it in memory. This is the most efficient but is platform-specific
+	**/
+	RawNative;
+}

+ 2 - 2
std/haxe/io/Input.hx

@@ -300,13 +300,13 @@ class Input {
 	/**
 	/**
 		Read and `len` bytes as a string.
 		Read and `len` bytes as a string.
 	**/
 	**/
-	public function readString( len : Int ) : String {
+	public function readString( len : Int, ?encoding : Encoding ) : String {
 		var b = Bytes.alloc(len);
 		var b = Bytes.alloc(len);
 		readFullBytes(b,0,len);
 		readFullBytes(b,0,len);
 		#if neko
 		#if neko
 		return neko.Lib.stringReference(b);
 		return neko.Lib.stringReference(b);
 		#else
 		#else
-		return b.toString();
+		return b.getString(0, len, encoding);
 		#end
 		#end
 	}
 	}
 
 

+ 2 - 2
std/haxe/io/Output.hx

@@ -275,11 +275,11 @@ class Output {
 	/**
 	/**
 		Write `s` string.
 		Write `s` string.
 	**/
 	**/
-	public function writeString( s : String ) {
+	public function writeString( s : String, ?encoding : Encoding ) {
 		#if neko
 		#if neko
 		var b = untyped new Bytes(s.length,s.__s);
 		var b = untyped new Bytes(s.length,s.__s);
 		#else
 		#else
-		var b = Bytes.ofString(s);
+		var b = Bytes.ofString(s, encoding);
 		#end
 		#end
 		writeFullBytes(b,0,b.length);
 		writeFullBytes(b,0,b.length);
 	}
 	}

+ 1 - 1
std/haxe/xml/Parser.hx

@@ -378,7 +378,7 @@ class Parser
 							var c = s.fastCodeAt(1) == 'x'.code
 							var c = s.fastCodeAt(1) == 'x'.code
 								? Std.parseInt("0" +s.substr(1, s.length - 1))
 								? Std.parseInt("0" +s.substr(1, s.length - 1))
 								: Std.parseInt(s.substr(1, s.length - 1));
 								: Std.parseInt(s.substr(1, s.length - 1));
-							#if (neko || (cpp && !hxcpp_smart_strings) || php || lua || eval)
+							#if (neko || (cpp && !hxcpp_smart_strings))
 							if( c >= 128 ) {
 							if( c >= 128 ) {
 								// UTF8-encode it
 								// UTF8-encode it
 								if( c <= 0x7FF ) {
 								if( c <= 0x7FF ) {

+ 12 - 6
std/hl/_std/haxe/io/Bytes.hx

@@ -119,13 +119,13 @@ class Bytes {
 		setInt32(pos, v.low);
 		setInt32(pos, v.low);
 	}
 	}
 
 
-	public function getString( pos : Int, len : Int ) : String {
+	public function getString( pos : Int, len : Int, ?encoding : Encoding ) : String {
 		if( outRange(pos,len) ) throw Error.OutsideBounds;
 		if( outRange(pos,len) ) throw Error.OutsideBounds;
 
 
 		var b = new hl.Bytes(len + 1);
 		var b = new hl.Bytes(len + 1);
 		b.blit(0, this.b, pos, len);
 		b.blit(0, this.b, pos, len);
 		b[len] = 0;
 		b[len] = 0;
-		return @:privateAccess String.fromUTF8(b);
+		return @:privateAccess (encoding == RawNative ? String.fromUCS2(b) : String.fromUTF8(b));
 	}
 	}
 
 
 	@:deprecated("readString is deprecated, use getString instead")
 	@:deprecated("readString is deprecated, use getString instead")
@@ -162,10 +162,16 @@ class Bytes {
 		return new Bytes(b,length);
 		return new Bytes(b,length);
 	}
 	}
 
 
-	public static function ofString( s : String ) : Bytes @:privateAccess {
-		var size = 0;
-		var b = s.bytes.utf16ToUtf8(0, size);
-		return new Bytes(b,size);
+	public static function ofString( s : String, ?encoding : Encoding ) : Bytes @:privateAccess {
+		if( encoding == null ) encoding = UTF8;
+		return switch( encoding ) {
+		case RawNative:
+			return new Bytes(s.bytes.sub(0,s.length << 1), s.length << 1);
+		case UTF8:
+			var size = 0;
+			var b = s.bytes.utf16ToUtf8(0, size);
+			return new Bytes(b,size);
+		}
 	}
 	}
 
 
 	public static function ofData( b : BytesData ) : Bytes {
 	public static function ofData( b : BytesData ) : Bytes {

+ 2 - 2
std/hl/_std/haxe/io/BytesBuffer.hx

@@ -64,9 +64,9 @@ class BytesBuffer {
 		__add(@:privateAccess src.b, 0, src.length);
 		__add(@:privateAccess src.b, 0, src.length);
 	}
 	}
 
 
-	public inline function addString( v : String ) : Void {
+	public inline function addString( v : String, ?encoding : Encoding ) : Void {
 		var len = 0;
 		var len = 0;
-		@:privateAccess __add(v.bytes.utf16ToUtf8(0, len), 0, len);
+		@:privateAccess (encoding == RawNative ? __add(v.bytes,0,v.length<<1) : __add(v.bytes.utf16ToUtf8(0, len), 0, len));
 	}
 	}
 
 
 	public inline function addInt32( v : Int ) : Void {
 	public inline function addInt32( v : Int ) : Void {

+ 1 - 0
std/java/_std/String.hx

@@ -26,6 +26,7 @@
 
 
 	@:overload(function(b:haxe.io.BytesData, offset:Int, length:Int, charsetName:String):Void { })
 	@:overload(function(b:haxe.io.BytesData, offset:Int, length:Int, charsetName:String):Void { })
 	@:overload(function(b:haxe.io.BytesData, offset:Int, length:Int):Void { })
 	@:overload(function(b:haxe.io.BytesData, offset:Int, length:Int):Void { })
+	@:overload(function(b:java.NativeArray<java.StdTypes.Char16>):Void { })
 	function new(string:String) : Void;
 	function new(string:String) : Void;
 
 
 	function toUpperCase() : String;
 	function toUpperCase() : String;

+ 2 - 5
std/java/internal/StringExt.hx

@@ -195,12 +195,9 @@ private typedef NativeString = String;
 		return me;
 		return me;
 	}
 	}
 
 
-	@:functionCode('
-		return java.lang.Character.toString( (char) code );
-	')
-	public static function fromCharCode(code:Int):NativeString
+	public static function fromCharCode(code:Int):String
 	{
 	{
-		return null;
+		return new String(java.lang.Character.toChars(code));
 	}
 	}
 }
 }
 
 

+ 8 - 1
std/js/_std/String.hx

@@ -40,5 +40,12 @@
 		return @:privateAccess HxOverrides.substr(this, pos, len);
 		return @:privateAccess HxOverrides.substr(this, pos, len);
 	}
 	}
 
 
-	@:pure static function fromCharCode( code : Int ) : String;
+	@:pure static inline function fromCharCode( code : Int ) : String {
+		return js.Syntax.code("String.fromCodePoint({0})",code); 
+	}
+	
+	static function __init__() : Void {
+		js.Syntax.code("if( String.fromCodePoint == null ) String.fromCodePoint = function(c) { return c < 0x10000 ? String.fromCharCode(c) : String.fromCharCode((c>>10)+0xD7C0)+String.fromCharCode((c&0x3FF)+0xDC00); }");
+	}
+	
 }
 }

+ 37 - 21
std/js/_std/haxe/io/Bytes.hx

@@ -132,31 +132,38 @@ class Bytes {
 		setInt32(pos + 4, v.high);
 		setInt32(pos + 4, v.high);
 	}
 	}
 
 
-	public function getString( pos : Int, len : Int ) : String {
+	public function getString( pos : Int, len : Int, ?encoding : Encoding ) : String {
 		if( pos < 0 || len < 0 || pos + len > length ) throw Error.OutsideBounds;
 		if( pos < 0 || len < 0 || pos + len > length ) throw Error.OutsideBounds;
+		if( encoding == null ) encoding = UTF8;
 		var s = "";
 		var s = "";
 		var b = b;
 		var b = b;
-		var fcc = String.fromCharCode;
 		var i = pos;
 		var i = pos;
 		var max = pos+len;
 		var max = pos+len;
-		// utf8-decode and utf16-encode
-		while( i < max ) {
-			var c = b[i++];
-			if( c < 0x80 ) {
-				if( c == 0 ) break;
-				s += fcc(c);
-			} else if( c < 0xE0 )
-				s += fcc( ((c & 0x3F) << 6) | (b[i++] & 0x7F) );
-			else if( c < 0xF0 ) {
-				var c2 = b[i++];
-				s += fcc( ((c & 0x1F) << 12) | ((c2 & 0x7F) << 6) | (b[i++] & 0x7F) );
-			} else {
-				var c2 = b[i++];
-				var c3 = b[i++];
-				var u = ((c & 0x0F) << 18) | ((c2 & 0x7F) << 12) | ((c3 & 0x7F) << 6) | (b[i++] & 0x7F);
-				// surrogate pair
-				s += fcc( (u >> 10) + 0xD7C0 );
-				s += fcc( (u & 0x3FF) | 0xDC00 );
+		switch( encoding ) {
+		case UTF8:
+			var debug = pos > 0;
+			// utf8-decode and utf16-encode
+			while( i < max ) {
+				var c = b[i++];
+				if( c < 0x80 ) {
+					if( c == 0 ) break;
+					s += String.fromCharCode(c);
+				} else if( c < 0xE0 )
+					s += String.fromCharCode( ((c & 0x3F) << 6) | (b[i++] & 0x7F) );
+				else if( c < 0xF0 ) {
+					var c2 = b[i++];
+					s += String.fromCharCode( ((c & 0x1F) << 12) | ((c2 & 0x7F) << 6) | (b[i++] & 0x7F) );
+				} else {
+					var c2 = b[i++];
+					var c3 = b[i++];
+					var u = ((c & 0x0F) << 18) | ((c2 & 0x7F) << 12) | ((c3 & 0x7F) << 6) | (b[i++] & 0x7F);
+					s += String.fromCharCode(u);
+				}
+			}
+		case RawNative:
+			while( i < max ) {
+				var c = b[i++] | (b[i++] << 8);
+				s += String.fromCharCode(c);
 			}
 			}
 		}
 		}
 		return s;
 		return s;
@@ -194,7 +201,16 @@ class Bytes {
 		return new Bytes(new BytesData(length));
 		return new Bytes(new BytesData(length));
 	}
 	}
 
 
-	public static function ofString( s : String ) : Bytes {
+	public static function ofString( s : String, ?encoding : Encoding ) : Bytes {
+		if( encoding == RawNative ) {
+			var buf = new js.html.Uint8Array(s.length << 1);
+			for( i in 0...s.length ) {
+				var c : Int = StringTools.fastCodeAt(s,i);
+				buf[i << 1] = c & 0xFF;
+				buf[(i << 1)|1] = c >> 8;
+			}
+			return new Bytes(buf.buffer);
+		}
 		var a = new Array();
 		var a = new Array();
 		// utf16-decode and utf8-encode
 		// utf16-decode and utf8-encode
 		var i = 0;
 		var i = 0;

+ 11 - 10
std/js/_std/haxe/io/BytesBuffer.hx

@@ -21,6 +21,7 @@
  */
  */
 package haxe.io;
 package haxe.io;
 
 
+@:coreApi
 class BytesBuffer {
 class BytesBuffer {
 
 
 	var buffer : js.html.ArrayBuffer;
 	var buffer : js.html.ArrayBuffer;
@@ -40,12 +41,12 @@ class BytesBuffer {
 		return pos;
 		return pos;
 	}
 	}
 
 
-	public function addByte( byte : Int ) {
+	public function addByte( byte : Int ) : Void {
 		if( pos == size ) grow(1);
 		if( pos == size ) grow(1);
 		view.setUint8(pos++, byte);
 		view.setUint8(pos++, byte);
 	}
 	}
 
 
-	public function add( src : Bytes ) {
+	public function add( src : Bytes ) : Void {
 		if( pos + src.length > size ) grow(src.length);
 		if( pos + src.length > size ) grow(src.length);
 		if( size == 0 ) return;
 		if( size == 0 ) return;
 		var sub = new js.html.Uint8Array(@:privateAccess src.b.buffer, @:privateAccess src.b.byteOffset, src.length);
 		var sub = new js.html.Uint8Array(@:privateAccess src.b.buffer, @:privateAccess src.b.byteOffset, src.length);
@@ -53,36 +54,36 @@ class BytesBuffer {
 		pos += src.length;
 		pos += src.length;
 	}
 	}
 
 
-	public function addString( v : String ) {
-		add(Bytes.ofString(v));
+	public function addString( v : String, ?encoding : Encoding ) : Void {
+		add(Bytes.ofString(v,encoding));
 	}
 	}
 
 
-	public function addInt32( v : Int ) {
+	public function addInt32( v : Int ) : Void {
 		if( pos + 4 > size ) grow(4);
 		if( pos + 4 > size ) grow(4);
 		view.setInt32(pos, v, true);
 		view.setInt32(pos, v, true);
 		pos += 4;
 		pos += 4;
 	}
 	}
 
 
-	public function addInt64( v : haxe.Int64 ) {
+	public function addInt64( v : haxe.Int64 ) : Void {
 		if( pos + 8 > size ) grow(8);
 		if( pos + 8 > size ) grow(8);
 		view.setInt32(pos, v.low, true);
 		view.setInt32(pos, v.low, true);
 		view.setInt32(pos + 4, v.high, true);
 		view.setInt32(pos + 4, v.high, true);
 		pos += 8;
 		pos += 8;
 	}
 	}
 
 
-	public function addFloat( v : Float ) {
+	public function addFloat( v : Float ) : Void {
 		if( pos + 4 > size ) grow(4);
 		if( pos + 4 > size ) grow(4);
 		view.setFloat32(pos, v, true);
 		view.setFloat32(pos, v, true);
 		pos += 4;
 		pos += 4;
 	}
 	}
 
 
-	public function addDouble( v : Float ) {
+	public function addDouble( v : Float ) : Void {
 		if( pos + 8 > size ) grow(8);
 		if( pos + 8 > size ) grow(8);
 		view.setFloat64(pos, v, true);
 		view.setFloat64(pos, v, true);
 		pos += 8;
 		pos += 8;
 	}
 	}
 
 
-	public function addBytes( src : Bytes, pos : Int, len : Int ) {
+	public function addBytes( src : Bytes, pos : Int, len : Int ) : Void {
 		if( pos < 0 || len < 0 || pos + len > src.length ) throw Error.OutsideBounds;
 		if( pos < 0 || len < 0 || pos + len > src.length ) throw Error.OutsideBounds;
 		if( this.pos + len > size ) grow(len);
 		if( this.pos + len > size ) grow(len);
 		if( size == 0 ) return;
 		if( size == 0 ) return;
@@ -91,7 +92,7 @@ class BytesBuffer {
 		this.pos += len;
 		this.pos += len;
 	}
 	}
 
 
-	function grow( delta : Int ) {
+	function grow( delta : Int ) : Void {
 		var req = pos + delta;
 		var req = pos + delta;
 		var nsize = size == 0 ? 16 : size;
 		var nsize = size == 0 ? 16 : size;
 		while( nsize < req )
 		while( nsize < req )

+ 11 - 1
std/lua/Boot.hx

@@ -32,6 +32,9 @@ class Boot {
 	static var _;
 	static var _;
 	static var _fid = 0;
 	static var _fid = 0;
 
 
+	// A max stack size to respect for unpack operations
+	public static var MAXSTACKSIZE (default, null) = 1000;
+
 	public static var platformBigEndian = NativeStringTools.byte(NativeStringTools.dump(function(){}),7) > 0;
 	public static var platformBigEndian = NativeStringTools.byte(NativeStringTools.dump(function(){}),7) > 0;
 
 
 	static var hiddenFields : Table<String,Bool> = untyped __lua__("{__id__=true, hx__closures=true, super=true, prototype=true, __fields__=true, __ifields__=true, __class__=true, __properties__=true}");
 	static var hiddenFields : Table<String,Bool> = untyped __lua__("{__id__=true, hx__closures=true, super=true, prototype=true, __fields__=true, __ifields__=true, __class__=true, __properties__=true}");
@@ -188,7 +191,14 @@ class Boot {
 			}
 			}
 			case "boolean" : untyped tostring(o);
 			case "boolean" : untyped tostring(o);
 			case "string"  : o;
 			case "string"  : o;
-			case "userdata": "<userdata>";
+			case "userdata": {
+				var mt = lua.Lua.getmetatable(o);
+				if (mt != null && mt.__tostring != null){
+					lua.Lua.tostring(o);
+				} else {
+					"<userdata>";
+				}
+			}
 			case "function": "<function>";
 			case "function": "<function>";
 			case "thread"  : "<thread>";
 			case "thread"  : "<thread>";
 			case "table": {
 			case "table": {

+ 36 - 41
std/lua/NativeStringTools.hx

@@ -1,6 +1,6 @@
 package lua;
 package lua;
 /**
 /**
-	These are all externs for the base Lua "string" class, which functions 
+	These are all externs for the base Lua "string" class, which functions
 	as an additional set of string tools.
 	as an additional set of string tools.
 
 
 	Note that all relevant indexes are "1" based.
 	Note that all relevant indexes are "1" based.
@@ -8,14 +8,14 @@ package lua;
 @:native("_G.string")
 @:native("_G.string")
 extern class NativeStringTools {
 extern class NativeStringTools {
 	/**
 	/**
-		Receives a string and returns its length. The empty string `""` has 
+		Receives a string and returns its length. The empty string `""` has
 		length `0`. Embedded zeros are counted, so `"a\000bc\000"` has length `5`.
 		length `0`. Embedded zeros are counted, so `"a\000bc\000"` has length `5`.
 	**/
 	**/
 	public static function len(str : String): Int;
 	public static function len(str : String): Int;
 
 
 	/**
 	/**
-		Receives zero or more integers. Returns a string with length equal to the 
-		number of arguments, in which each character has the internal numerical 
+		Receives zero or more integers. Returns a string with length equal to the
+		number of arguments, in which each character has the internal numerical
 		code equal to its corresponding argument.
 		code equal to its corresponding argument.
 		Note that numerical codes are not necessarily portable across platforms.
 		Note that numerical codes are not necessarily portable across platforms.
 	**/
 	**/
@@ -24,31 +24,26 @@ extern class NativeStringTools {
 
 
 	// TODO: make a note about handling matched groups with multireturn
 	// TODO: make a note about handling matched groups with multireturn
 	/**
 	/**
-		Returns the substring of `str` that starts at `start` and continues until `end`; 
-		`start` and `end` can be negative. If `end` is absent, then it is assumed to be 
-		equal to `-1` (which is the same as the string length). 
-		In particular, the call `sub(str,1,end)` returns a prefix of `str` 
-		with length `end`, and `sub(str, -end)` returns a suffix of `str` with 
+		Returns the substring of `str` that starts at `start` and continues until `end`;
+		`start` and `end` can be negative. If `end` is absent, then it is assumed to be
+		equal to `-1` (which is the same as the string length).
+		In particular, the call `sub(str,1,end)` returns a prefix of `str`
+		with length `end`, and `sub(str, -end)` returns a suffix of `str` with
 		length `start`.
 		length `start`.
 	**/
 	**/
 	public static function sub(str : String, start : Int, ?end : Int): StringSub;
 	public static function sub(str : String, start : Int, ?end : Int): StringSub;
 
 
 	/**
 	/**
-		Returns the character code at position `index` of `str`.
-	**/
-	public static function charCodeAt(str : String, index : Int): Int;
-
-	/**
-		Looks for the first match of pattern in the string `str`. 
-		If it finds a match, then `find` returns the indices of `str` where this 
+		Looks for the first match of pattern in the string `str`.
+		If it finds a match, then `find` returns the indices of `str` where this
 		occurrence starts and ends.
 		occurrence starts and ends.
-		
-		@param target If the target has captures, then in a successful match the 
+
+		@param target If the target has captures, then in a successful match the
 		       captured values are also returned, after the two indices.
 		       captured values are also returned, after the two indices.
 		@param start specifies where to start the search; its default value is `1`
 		@param start specifies where to start the search; its default value is `1`
-		       and can be negative. 
-		@param plain turns off the pattern matching facilities, so the function does 
-		       a plain "find substring" operation, with no characters in pattern 
+		       and can be negative.
+		@param plain turns off the pattern matching facilities, so the function does
+		       a plain "find substring" operation, with no characters in pattern
 		       being considered "magic". Note that if plain is given, then `start` must be given as well.
 		       being considered "magic". Note that if plain is given, then `start` must be given as well.
 	**/
 	**/
 	public static function find(str : String, target : String, ?start : Int, ?plain : Bool): StringFind;
 	public static function find(str : String, target : String, ?start : Int, ?plain : Bool): StringFind;
@@ -60,63 +55,63 @@ extern class NativeStringTools {
 	public static function byte(str : String, ?index : Int) : Int;
 	public static function byte(str : String, ?index : Int) : Int;
 
 
 	/**
 	/**
-		Returns a formatted version of its variable number of arguments following 
-		the description given in its first argument (which must be a string). 
-		The format string follows the same rules as the printf family of standard C 
-		functions. The only differences are that the options/modifiers 
-		`*`, `l`, `L`, `n`, `p`, and `h` are not supported and that there is an 
+		Returns a formatted version of its variable number of arguments following
+		the description given in its first argument (which must be a string).
+		The format string follows the same rules as the printf family of standard C
+		functions. The only differences are that the options/modifiers
+		`*`, `l`, `L`, `n`, `p`, and `h` are not supported and that there is an
 		extra option, `q`. The `q` option formats a string in a form suitable to be
 		extra option, `q`. The `q` option formats a string in a form suitable to be
-		safely read back by the Lua interpreter: the string is written between 
-		double quotes, and all double quotes, newlines, embedded zeros, 
+		safely read back by the Lua interpreter: the string is written between
+		double quotes, and all double quotes, newlines, embedded zeros,
 		and backslashes in the string are correctly escaped when written.
 		and backslashes in the string are correctly escaped when written.
 		For instance, the call
 		For instance, the call
    `string.format('%q', 'a string with "quotes" and \n new line')`
    `string.format('%q', 'a string with "quotes" and \n new line')`
 		will produce the string:
 		will produce the string:
 		`"a string with \"quotes\" and \
 		`"a string with \"quotes\" and \
       new line"`
       new line"`
-		
-		The options `c`, `d` `E`, `e`, `f`, `g`, `G`, `i`, `o`, `u, `X-, and `x` all 
+
+		The options `c`, `d` `E`, `e`, `f`, `g`, `G`, `i`, `o`, `u, `X-, and `x` all
 		expect a number as argument, whereas `q` and `s` expect a string.
 		expect a number as argument, whereas `q` and `s` expect a string.
-		
-		This function does not accept string values containing embedded zeros, 
+
+		This function does not accept string values containing embedded zeros,
 		except as arguments to the `q` option.
 		except as arguments to the `q` option.
 	**/
 	**/
 	public static function format(str : String, ?e1 : Dynamic, ?e2 : Dynamic, ?e3 : Dynamic, ?e4 : Dynamic): String;
 	public static function format(str : String, ?e1 : Dynamic, ?e2 : Dynamic, ?e3 : Dynamic, ?e4 : Dynamic): String;
 
 
 	/**
 	/**
-		
+
 	**/
 	**/
 	@:overload(   function     (str : String, pattern : String, replace : String->Void,   ?n : Int): String {})
 	@:overload(   function     (str : String, pattern : String, replace : String->Void,   ?n : Int): String {})
 	@:overload(   function     (str : String, pattern : String, replace : String->String, ?n : Int): String {})
 	@:overload(   function     (str : String, pattern : String, replace : String->String, ?n : Int): String {})
 	public static function gsub(str : String, pattern : String, replace : String,		  ?n : Int): String;
 	public static function gsub(str : String, pattern : String, replace : String,		  ?n : Int): String;
 
 
 	/**
 	/**
-		Returns an iterator function that, each time it is called, returns the next 
-		captures from pattern over string `str`. If `pattern` specifies no captures, 
+		Returns an iterator function that, each time it is called, returns the next
+		captures from pattern over string `str`. If `pattern` specifies no captures,
 		then the whole match is produced in each call.
 		then the whole match is produced in each call.
 	**/
 	**/
 	@:overload(   function     (str : String, pattern : String, match : Void->String,   ?n : Int): String->Void {})
 	@:overload(   function     (str : String, pattern : String, match : Void->String,   ?n : Int): String->Void {})
 	public static function gmatch(str : String, pattern : String): Void->String;
 	public static function gmatch(str : String, pattern : String): Void->String;
 
 
 	/**
 	/**
-		Looks for the first match of pattern in the string s. If it finds one, 
+		Looks for the first match of pattern in the string s. If it finds one,
 		then match returns the captures from the pattern; otherwise it returns `null`.
 		then match returns the captures from the pattern; otherwise it returns `null`.
 		If pattern specifies no captures, then the whole match is returned.
 		If pattern specifies no captures, then the whole match is returned.
-		The optional argument `n` specifies where to start the search; 
+		The optional argument `n` specifies where to start the search;
 		its default value is `1` and can be negative.
 		its default value is `1` and can be negative.
 	**/
 	**/
 	public static function match(str : String, pattern : String, ?n : Int): String;
 	public static function match(str : String, pattern : String, ?n : Int): String;
 
 
 	/**
 	/**
-		Receives a string and returns a copy of this string with all lowercase 
-		letters changed to uppercase. All other characters are left unchanged. 
+		Receives a string and returns a copy of this string with all lowercase
+		letters changed to uppercase. All other characters are left unchanged.
 		The definition of what a lowercase letter is depends on the current locale.
 		The definition of what a lowercase letter is depends on the current locale.
 	**/
 	**/
 	public static function upper(str:String) : String;
 	public static function upper(str:String) : String;
 
 
 	/**
 	/**
-		Receives a string and returns a copy of this string with all uppercase 
-		letters changed to lowercase. All other characters are left unchanged. 
+		Receives a string and returns a copy of this string with all uppercase
+		letters changed to lowercase. All other characters are left unchanged.
 		The definition of what an uppercase letter is depends on the current locale.
 		The definition of what an uppercase letter is depends on the current locale.
 	**/
 	**/
 	public static function lower(str:String) : String;
 	public static function lower(str:String) : String;

+ 14 - 17
std/lua/_std/String.hx

@@ -23,7 +23,7 @@
 import lua.Lua;
 import lua.Lua;
 import lua.Table;
 import lua.Table;
 import lua.Boot;
 import lua.Boot;
-import lua.NativeStringTools;
+import lua.lib.luautf8.Utf8;
 
 
 @:coreApi
 @:coreApi
 @:extern
 @:extern
@@ -35,7 +35,7 @@ class String {
 
 
 	@:keep
 	@:keep
 	static function __index(s:Dynamic, k:Dynamic) : Dynamic {
 	static function __index(s:Dynamic, k:Dynamic) : Dynamic {
-		if (k == "length") return NativeStringTools.len(s);
+		if (k == "length") return Utf8.len(s);
 		else if (Reflect.hasField(untyped String.prototype, k)) return untyped String.prototype[k];
 		else if (Reflect.hasField(untyped String.prototype, k)) return untyped String.prototype[k];
 		else if (__oldindex != null) {
 		else if (__oldindex != null) {
 			if (Lua.type(__oldindex) == "function"){
 			if (Lua.type(__oldindex) == "function"){
@@ -48,12 +48,12 @@ class String {
 		else return null;
 		else return null;
 	}
 	}
 
 
-	public inline function toUpperCase() : String return NativeStringTools.upper(this);
-	public inline function toLowerCase() : String return NativeStringTools.lower(this);
+	public inline function toUpperCase() : String return Utf8.upper(this);
+	public inline function toLowerCase() : String return Utf8.lower(this);
 	public inline function indexOf( str : String, ?startIndex : Int ) : Int {
 	public inline function indexOf( str : String, ?startIndex : Int ) : Int {
 		if (startIndex == null) startIndex = 1;
 		if (startIndex == null) startIndex = 1;
 		else startIndex += 1;
 		else startIndex += 1;
-		var r = NativeStringTools.find(this, str, startIndex, true).begin;
+		var r = Utf8.find(this, str, startIndex, true).begin;
 		if (r != null && r > 0) return r-1;
 		if (r != null && r > 0) return r-1;
 		else return -1;
 		else return -1;
 	}
 	}
@@ -77,7 +77,7 @@ class String {
 		while (idx != null){
 		while (idx != null){
 			var newidx = 0;
 			var newidx = 0;
 			if (delimiter.length > 0){
 			if (delimiter.length > 0){
-				newidx = NativeStringTools.find(this, delimiter, idx, true).begin;
+				newidx = Utf8.find(this, delimiter, idx, true).begin;
 			} else if (idx >= this.length){
 			} else if (idx >= this.length){
 				newidx = null;
 				newidx = null;
 			} else {
 			} else {
@@ -85,11 +85,11 @@ class String {
 			}
 			}
 
 
 			if (newidx != null){
 			if (newidx != null){
-				var match = NativeStringTools.sub(this, idx, newidx-1).match;
+				var match = Utf8.sub(this, idx, newidx-1).match;
 				ret.push(match);
 				ret.push(match);
 				idx = newidx + delimiter.length;
 				idx = newidx + delimiter.length;
 			} else {
 			} else {
-				ret.push(NativeStringTools.sub(this,idx,this.length).match);
+				ret.push(Utf8.sub(this,idx,this.length).match);
 				idx = null;
 				idx = null;
 			}
 			}
 		}
 		}
@@ -105,20 +105,17 @@ class String {
 		if (startIndex < 0) startIndex = 0;
 		if (startIndex < 0) startIndex = 0;
 		if (endIndex < startIndex) {
 		if (endIndex < startIndex) {
 			// swap the index positions
 			// swap the index positions
-			return NativeStringTools.sub(this, endIndex+1, startIndex).match;
+			return Utf8.sub(this, endIndex+1, startIndex).match;
 		} else {
 		} else {
-			return NativeStringTools.sub(this, startIndex+1, endIndex).match;
+			return Utf8.sub(this, startIndex+1, endIndex).match;
 		}
 		}
 	}
 	}
 
 
-	function get_length() : Int {
-		return NativeStringTools.len(this);
-	}
 	public inline function charAt( index : Int) : String {
 	public inline function charAt( index : Int) : String {
-		return NativeStringTools.sub(this,index+1, index+1).match;
+		return Utf8.sub(this,index+1, index+1).match;
 	}
 	}
 	public inline function charCodeAt( index : Int) : Null<Int> {
 	public inline function charCodeAt( index : Int) : Null<Int> {
-		return NativeStringTools.byte(this,index+1);
+		return Utf8.byte(this,index+1);
 	}
 	}
 
 
 	public inline function substr( pos : Int, ?len : Int ) : String {
 	public inline function substr( pos : Int, ?len : Int ) : String {
@@ -126,11 +123,11 @@ class String {
 		else if (len < 0) len = length + len;
 		else if (len < 0) len = length + len;
 		if (pos < 0) pos = length + pos;
 		if (pos < 0) pos = length + pos;
 		if (pos < 0) pos = 0;
 		if (pos < 0) pos = 0;
-		return NativeStringTools.sub(this, pos + 1, pos+len).match;
+		return Utf8.sub(this, pos + 1, pos+len).match;
 	}
 	}
 
 
 	public inline static function fromCharCode( code : Int ) : String {
 	public inline static function fromCharCode( code : Int ) : String {
-		return NativeStringTools.char(code);
+		return Utf8.char(code);
 	}
 	}
 
 
 }
 }

+ 0 - 250
std/lua/_std/haxe/Utf8.hx

@@ -1,250 +0,0 @@
-/*
- * Copyright (C)2005-2018 Haxe Foundation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-package haxe;
-
-import lua.NativeStringTools;
-
-class Utf8 {
-
-    var __b : String;
-
-    public function new( ?size : Int ) {
-		__b = "";
-    }
-
-    public inline function addChar( c : Int ) : Void {
-		__b += char(c);
-    }
-
-    public inline function toString() : String {
-		return __b;
-    }
-
-    static inline function decodeChar( s : String, pos : Int, code : Int, width : Int ) {
-        return
-            if (width == 1)
-                code;
-            else if (width == 2)
-                ((code & 0x3F) << 6) |
-                (s.charCodeAt(pos+1) & 0x7F);
-            else if (width == 3)
-                ((code & 0x1F) << 12) |
-                ((s.charCodeAt(pos+1) & 0x7F) << 6) |
-                (s.charCodeAt(pos+2) & 0x7F);
-            else
-                ((code & 0x0F) << 18) |
-                ((s.charCodeAt(pos+1) & 0x7F) << 12) |
-                ((s.charCodeAt(pos+2) & 0x7F) << 6) |
-                (s.charCodeAt(pos+3) & 0x7F);
-    }
-
-    public static function iter( s : String, chars : Int -> Void ) {
-		var cur = 0;
-		while (cur < s.length){
-			var code = s.charCodeAt(cur);
-			var width = charWidth(code);
-			chars( decodeChar( s, cur, code, width ) );
-			cur += width;
-		}
-    }
-
-    public static function encode( s : String ) : String {
-		// ported from : http://phpjs.org/functions/utf8_encode/
-		if (s == null ) {
-			return '';
-		}
-		var string = (s + ''); // .replace(/\r\n/g, "\n").replace(/\r/g, "\n");
-		var utftext = '';
-		var start = 0;
-		var end = 0;
-		var n = 0;
-		while (n < s.length) {
-			var c1 = string.charCodeAt(n);
-			var enc = null;
-
-			if (c1 < 128) {
-			end++;
-			} else if (c1 > 127 && c1 < 2048) {
-			enc = String.fromCharCode( (c1 >> 6) | 192)
-				+ String.fromCharCode( (c1 & 63) | 128);
-			} else if ((c1 & 0xF800) != 0xD800) {
-			enc = String.fromCharCode( (c1 >> 12) | 224)
-				+ String.fromCharCode( ((c1 >> 6) & 63) | 128)
-				+ String.fromCharCode( (c1 & 63) | 128);
-			} else { // surrogate pairs
-			if ((c1 & 0xFC00) != 0xD800) {
-				throw 'Unmatched trail surrogate at ' + n;
-			}
-			var c2 = string.charCodeAt(++n);
-			if ((c2 & 0xFC00) != 0xDC00) {
-				throw 'Unmatched lead surrogate at ' + (n - 1);
-			}
-			c1 = ((c1 & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000;
-			enc = String.fromCharCode( (c1 >> 18) | 240)
-				+ String.fromCharCode( ((c1 >> 12) & 63) | 128)
-				+ String.fromCharCode(((c1 >> 6) & 63) | 128)
-				+ String.fromCharCode((c1 & 63) | 128);
-			}
-			if (enc != null) {
-			if (end > start) {
-				utftext += string.substring(start, end);
-			}
-			utftext += enc;
-			start = end = n + 1;
-			}
-			n++;
-		}
-
-		if (end > start) {
-			utftext += string.substring(start, s.length);
-		}
-
-		return utftext;
-
-    }
-
-    public static function decode( s : String ) : String {
-		var ret = new StringBuf();
-		iter(s, function(c){
-			if( c == 8364 ) // euro symbol
-			c = 164;
-			else if( c > 255 ){
-			// throw new RangeError('Utf8 decode invalid character ($c)');
-			throw 'Utf8::decode invalid character ($c)';
-			}
-
-			if (c != 0xFEFF) // BOM
-			ret.add(String.fromCharCode(c));
-		});
-		return ret.toString();
-    }
-
-    public static inline function charCodeAt( s : String, index : Int ) : Int {
-		var cur_idx = 0;
-		var pos = 0;
-		for (i in 0...index){
-			pos += charWidth(s.charCodeAt(pos));
-		}
-		var ret = 0;
-		var code = s.charCodeAt(pos);
-		var bytes = charWidth(code);
-		return decodeChar( s, pos, code, bytes );
-    }
-
-    public static function validate( s : String ) : Bool {
-		if (s == null) return false;
-		var cur = 0;
-		while (cur < s.length){
-			var c1 = s.charCodeAt(cur++);
-			if (c1 < 0x80) continue;
-			if (c1 < 0xC0) return false;
-			if (s.length <= cur) return false;
-			var c2 = s.charCodeAt(cur++);
-			if (c1 < 0xE0) {
-				if ((c1 & 0x1E != 0) && (c2 & 0xC0 == 0x80)) continue;
-				return false;
-			}
-			if (s.length <= cur) return false;
-			var c3 = s.charCodeAt(cur++);
-			if (c1 < 0xF0) {
-				if (((c1 & 0x0F != 0) || (c2 & 0x20 != 0)) && (c2 & 0xC0 == 0x80) && (c3 & 0xC0 == 0x80)
-						&& !(c1 == 0xED && 0xA0 <= c2 && c2 <= 0xBF))
-					continue;
-				return false;
-			}
-			if (s.length <= cur) return false;
-			var c4 = s.charCodeAt(cur++);
-			if (c1 < 0xF8) {
-				if (((c1 & 0x07 != 0) || (c2 & 0x30 != 0)) && (c2 & 0xC0 == 0x80) && (c3 & 0xC0 == 0x80) && (c4 & 0xC0 == 0x80)
-						&& !((c1 == 0xF4 && c2 > 0x8F) || c1 > 0xF4))
-					continue;
-				return false;
-			}
-			return false;
-		}
-		return true;
-    }
-
-    public static inline function length( s : String ) : Int {
-		var pos = 0;
-		var len = 0;
-		while (pos < s.length){
-			pos += charWidth(s.charCodeAt(pos));
-			len++;
-		}
-		return len;
-    }
-
-    public static function compare( a : String, b : String ) : Int {
-		return a > b ? 1 : (a == b ? 0 : -1);
-    }
-
-    public static inline function sub( s : String, pos : Int, len : Int ) : String {
-		var startpos = 0;
-		var ret = new StringBuf();
-		for (i in 0...pos){
-			startpos += charWidth(s.charCodeAt(startpos));
-		}
-		var endpos = startpos;
-		for (i in 0...len){
-			endpos += charWidth(s.charCodeAt(endpos));
-		}
-		return s.substring(startpos, endpos);
-    }
-
-    static function charWidth(c:Int) : Int {
-		return   if (c >  0   && c <= 127) 1;
-			else if (c >= 194 && c <= 223) 2;
-			else if (c >= 224 && c <= 239) 3;
-			else if (c >= 240 && c <= 244) 4;
-			else null;
-    }
-
-    public static function char( unicode : Int ) : String {
-		if (unicode <= 0x7F) {
-			return String.fromCharCode(unicode);
-		} else if (unicode <= 0x7FF) {
-			var b0 = 0xC0 + Math.floor(unicode / 0x40);
-			var b1 = 0x80 + (unicode % 0x40);
-			return NativeStringTools.char(b0, b1);
-		} else if (unicode <= 0xFFFF) {
-			var b0 = 0xE0 +  Math.floor(unicode / 0x1000);
-			var b1 = 0x80 + (Math.floor(unicode / 0x40) % 0x40);
-			var b2 = 0x80 + (unicode % 0x40);
-			return NativeStringTools.char(b0, b1, b2);
-		} else if (unicode <= 0x10FFFF) {
-			var code = unicode;
-			var b3   = 0x80 + (code % 0x40);
-			code     = Math.floor(code / 0x40);
-			var b2   = 0x80 + (code % 0x40);
-			code     = Math.floor(code / 0x40);
-			var b1   = 0x80 + (code % 0x40);
-			code     = Math.floor(code / 0x40);
-			var b0   = 0xF0 + code;
-
-			return NativeStringTools.char(b0, b1, b2, b3);
-		} else {
-			throw 'Unicode greater than U+10FFFF';
-		}
-    }
-}
-

+ 6 - 5
std/lua/_std/sys/io/Process.hx

@@ -27,6 +27,7 @@ import lua.lib.luv.Signal;
 import lua.lib.luv.Loop;
 import lua.lib.luv.Loop;
 import lua.Boot;
 import lua.Boot;
 import lua.Table;
 import lua.Table;
+import lua.NativeStringTools;
 
 
 import haxe.io.Bytes;
 import haxe.io.Bytes;
 import haxe.io.Error;
 import haxe.io.Error;
@@ -74,9 +75,9 @@ class Process {
 
 
 
 
 	public function new( cmd : String, ?args : Array<String>, ?detached : Bool){
 	public function new( cmd : String, ?args : Array<String>, ?detached : Bool){
-	
+
 		if( detached ) throw "Detached process is not supported on this platform";
 		if( detached ) throw "Detached process is not supported on this platform";
-	
+
 		var _stdout = new Pipe(false);
 		var _stdout = new Pipe(false);
 		var _stderr = new Pipe(false);
 		var _stderr = new Pipe(false);
 		var _stdin  = new Pipe(false);
 		var _stdin  = new Pipe(false);
@@ -133,7 +134,7 @@ private class ProcessInput extends haxe.io.Input {
 
 
 	override public function readByte() {
 	override public function readByte() {
 		var err_str = null;
 		var err_str = null;
-		if (buf == null || idx >= buf.length){
+		if (buf == null || idx >= NativeStringTools.len(buf)){
 			buf = null;
 			buf = null;
 			idx = 0;
 			idx = 0;
 			var pending = true;
 			var pending = true;
@@ -147,7 +148,7 @@ private class ProcessInput extends haxe.io.Input {
 		}
 		}
 		if (buf == null) throw new haxe.io.Eof();
 		if (buf == null) throw new haxe.io.Eof();
 		if (err_str != null) throw err_str;
 		if (err_str != null) throw err_str;
-		var code : Int =  cast buf.charCodeAt(idx++);
+		var code = NativeStringTools.byte(buf, ++idx);
 		return code;
 		return code;
 	}
 	}
 
 
@@ -182,7 +183,7 @@ private class ProcessOutput extends haxe.io.Output {
 	}
 	}
 
 
 	override public function writeByte(c : Int ) : Void {
 	override public function writeByte(c : Int ) : Void {
-		b.write(String.fromCharCode(c));
+		b.write(NativeStringTools.char(c));
 	}
 	}
 
 
 	override public function close(){
 	override public function close(){

+ 117 - 0
std/lua/lib/luautf8/Utf8.hx

@@ -0,0 +1,117 @@
+package lua.lib.luautf8;
+/**
+	These are all externs for the lua-utf8 library, which functions
+	as an additional set of string tools.
+
+	Note that all relevant indexes are "1" based.
+**/
+@:luaRequire('lua-utf8')
+extern class Utf8 {
+	/**
+		Receives a string and returns its length. The empty string `""` has
+		length `0`. Embedded zeros are counted, so `"a\000bc\000"` has length `5`.
+	**/
+	public static function len(str : String): Int;
+
+	/**
+		Receives zero or more integers. Returns a string with length equal to the
+		number of arguments, in which each character has the internal numerical
+		code equal to its corresponding argument.
+		Note that numerical codes are not necessarily portable across platforms.
+	**/
+	public static function char(codes: haxe.extern.Rest<Int>): String;
+
+
+	/**
+		Returns the substring of `str` that starts at `start` and continues until `end`;
+		`start` and `end` can be negative. If `end` is absent, then it is assumed to be
+		equal to `-1` (which is the same as the string length).
+		In particular, the call `sub(str,1,end)` returns a prefix of `str`
+		with length `end`, and `sub(str, -end)` returns a suffix of `str` with
+		length `start`.
+	**/
+	public static function sub(str : String, start : Int, ?end : Int): StringSub;
+
+	/**
+		Returns the character code at position `index` of `str`.
+	**/
+	public static function charCodeAt(str : String, index : Int): Int;
+
+	/**
+		Looks for the first match of pattern in the string `str`.
+		If it finds a match, then `find` returns the indices of `str` where this
+		occurrence starts and ends.
+
+		@param target If the target has captures, then in a successful match the
+		       captured values are also returned, after the two indices.
+		@param start specifies where to start the search; its default value is `1`
+		       and can be negative.
+		@param plain turns off the pattern matching facilities, so the function does
+		       a plain "find substring" operation, with no characters in pattern
+		       being considered "magic". Note that if plain is given, then `start` must be given as well.
+	**/
+	public static function find(str : String, target : String, ?start : Int, ?plain : Bool): StringFind;
+
+	/**
+		Returns the internal numerical codes of the characters `str[index]`.
+		Note that numerical codes are not necessarily portable across platforms.
+	**/
+	public static function byte(str : String, ?index : Int) : Int;
+
+	/**
+
+	**/
+	@:overload(   function     (str : String, pattern : String, replace : String->Void,   ?n : Int): String {})
+	@:overload(   function     (str : String, pattern : String, replace : String->String, ?n : Int): String {})
+	public static function gsub(str : String, pattern : String, replace : String,		  ?n : Int): String;
+
+	/**
+		Returns an iterator function that, each time it is called, returns the next
+		captures from pattern over string `str`. If `pattern` specifies no captures,
+		then the whole match is produced in each call.
+	**/
+	@:overload(   function     (str : String, pattern : String, match : Void->String,   ?n : Int): String->Void {})
+	public static function gmatch(str : String, pattern : String): Void->String;
+
+	/**
+		Looks for the first match of pattern in the string s. If it finds one,
+		then match returns the captures from the pattern; otherwise it returns `null`.
+		If pattern specifies no captures, then the whole match is returned.
+		The optional argument `n` specifies where to start the search;
+		its default value is `1` and can be negative.
+	**/
+	public static function match(str : String, pattern : String, ?n : Int): String;
+
+	/**
+		Receives a string and returns a copy of this string with all lowercase
+		letters changed to uppercase. All other characters are left unchanged.
+		The definition of what a lowercase letter is depends on the current locale.
+	**/
+	public static function upper(str:String) : String;
+
+	/**
+		Receives a string and returns a copy of this string with all uppercase
+		letters changed to lowercase. All other characters are left unchanged.
+		The definition of what an uppercase letter is depends on the current locale.
+	**/
+	public static function lower(str:String) : String;
+
+
+	public static function codes(str : String) : Void->StringCodePoint;
+
+}
+
+@:multiReturn extern class StringFind {
+	var begin : Int;
+	var end : Int;
+}
+
+@:multiReturn extern class StringSub {
+	var match : String;
+	var count : Int;
+}
+
+@:multiReturn extern class StringCodePoint {
+	var position : Int;
+	var codepoint : Int;
+}

+ 35 - 26
std/php/Boot.hx

@@ -525,6 +525,15 @@ class Boot {
 	public static function dynamicString( str:String ) : HxDynamicStr {
 	public static function dynamicString( str:String ) : HxDynamicStr {
 		return @:privateAccess new HxDynamicStr(str);
 		return @:privateAccess new HxDynamicStr(str);
 	}
 	}
+
+	static public function utf8CharAt(str:String, index:Int):Null<String> {
+		if (index < 0 || index >= str.length) {
+			return null;
+		}
+		//preg_split() is faster than mb_substr()
+		var chars = Global.preg_split('//u', str, -1, Const.PREG_SPLIT_NO_EMPTY);
+		return chars == false ? null : (chars:NativeArray)[index];
+	}
 }
 }
 
 
 
 
@@ -626,27 +635,21 @@ private class HxEnum {
 private class HxString {
 private class HxString {
 
 
 	public static function toUpperCase( str:String ) : String {
 	public static function toUpperCase( str:String ) : String {
-		return Global.strtoupper(str);
+		return Global.mb_strtoupper(str, 'UTF-8');
 	}
 	}
 
 
 	public static function toLowerCase( str:String ) : String {
 	public static function toLowerCase( str:String ) : String {
-		return Global.strtolower(str);
+		return Global.mb_strtolower(str, 'UTF-8');
 	}
 	}
 
 
 	public static function charAt( str:String, index:Int) : String {
 	public static function charAt( str:String, index:Int) : String {
-		if (index < 0 || index >= str.length) {
-			return '';
-		} else {
-			return (str:NativeString)[index];
-		}
+		return Syntax.coalesce(Boot.utf8CharAt(str, index), '');
 	}
 	}
 
 
 	public static function charCodeAt( str:String, index:Int) : Null<Int> {
 	public static function charCodeAt( str:String, index:Int) : Null<Int> {
-		if (index < 0 || index >= str.length) {
-			return null;
-		} else {
-			return Global.ord((str:NativeString)[index]);
-		}
+		var char = Boot.utf8CharAt(str, index);
+		if(char == null) return null;
+		return Global.mb_ord(char, 'UTF-8');
 	}
 	}
 
 
 	public static function indexOf( str:String, search:String, startIndex:Int = null ) : Int {
 	public static function indexOf( str:String, search:String, startIndex:Int = null ) : Int {
@@ -655,12 +658,17 @@ private class HxString {
 		} else if (startIndex < 0) {
 		} else if (startIndex < 0) {
 			startIndex += str.length;
 			startIndex += str.length;
 		}
 		}
-		var index = Global.strpos(str, search, startIndex);
+		var index = Global.mb_strpos(str, search, startIndex, 'UTF-8');
 		return (index == false ? -1 : index);
 		return (index == false ? -1 : index);
 	}
 	}
 
 
 	public static function lastIndexOf( str:String, search:String, startIndex:Int = null ) : Int {
 	public static function lastIndexOf( str:String, search:String, startIndex:Int = null ) : Int {
-		var index = Global.strrpos(str, search, (startIndex == null ? 0 : startIndex - str.length));
+		if(startIndex == null) {
+			startIndex = 0;
+		} else {
+			startIndex = startIndex - str.length;
+		}
+		var index = Global.mb_strrpos(str, search, startIndex, 'UTF-8');
 		if (index == false) {
 		if (index == false) {
 			return -1;
 			return -1;
 		} else {
 		} else {
@@ -670,9 +678,14 @@ private class HxString {
 
 
 	public static function split( str:String, delimiter:String ) : Array<String> {
 	public static function split( str:String, delimiter:String ) : Array<String> {
 		if (delimiter == '') {
 		if (delimiter == '') {
-			return @:privateAccess Array.wrap(Global.str_split(str));
+			var arr:NativeArray = Global.preg_split('//u', str, -1, Const.PREG_SPLIT_NO_EMPTY);
+			return @:privateAccess Array.wrap(arr);
 		} else {
 		} else {
-			return @:privateAccess Array.wrap(Global.explode(delimiter, str));
+			//don't mess with user-defined encoding
+			var prev = Global.mb_regex_encoding();
+			Global.mb_regex_encoding('UTF-8');
+			return @:privateAccess Array.wrap(Global.mb_split(Global.preg_quote(delimiter), str));
+			Global.mb_regex_encoding(prev);
 		}
 		}
 	}
 	}
 
 
@@ -682,12 +695,7 @@ private class HxString {
 		} else if (pos >= str.length) {
 		} else if (pos >= str.length) {
 			return '';
 			return '';
 		}
 		}
-		if (len == null) {
-			return Global.substr(str, pos);
-		} else {
-			var result = Global.substr(str, pos, len);
-			return (result == false ? '' : result);
-		}
+		return Global.mb_substr(str, pos, len, 'UTF-8');
 	}
 	}
 
 
 	public static function substring( str:String, startIndex:Int, ?endIndex:Int ) : String {
 	public static function substring( str:String, startIndex:Int, ?endIndex:Int ) : String {
@@ -696,14 +704,15 @@ private class HxString {
 		} else if (endIndex < 0) {
 		} else if (endIndex < 0) {
 			endIndex = 0;
 			endIndex = 0;
 		}
 		}
-		if (startIndex < 0) startIndex = 0;
+		if (startIndex < 0) {
+			startIndex = 0;
+		}
 		if (startIndex > endIndex) {
 		if (startIndex > endIndex) {
 			var tmp = endIndex;
 			var tmp = endIndex;
 			endIndex = startIndex;
 			endIndex = startIndex;
 			startIndex = tmp;
 			startIndex = tmp;
 		}
 		}
-		var result = Global.substr(str, startIndex, endIndex - startIndex);
-		return (result == false ? '' : result);
+		return Global.mb_substr(str, startIndex, endIndex - startIndex, 'UTF-8');
 	}
 	}
 
 
 	public static function toString( str:String ) : String {
 	public static function toString( str:String ) : String {
@@ -711,7 +720,7 @@ private class HxString {
 	}
 	}
 
 
 	public static function fromCharCode( code:Int ) : String {
 	public static function fromCharCode( code:Int ) : String {
-		return Global.chr(code);
+		return Global.mb_chr(code, 'UTF-8');
 	}
 	}
 }
 }
 
 

+ 42 - 0
std/php/Global.hx

@@ -957,6 +957,11 @@ extern class Global {
 	**/
 	**/
 	static function mb_check_encoding( str:String = null, ?encoding:String ) : Bool;
 	static function mb_check_encoding( str:String = null, ?encoding:String ) : Bool;
 
 
+	/**
+		@see http://php.net/manual/en/function.mb-split.php
+	**/
+	static function mb_split( pattern:String, str:String, ?limit:Int ) : NativeIndexedArray<String>;
+
 	/**
 	/**
 		@see http://php.net/manual/en/function.mb-strlen.php
 		@see http://php.net/manual/en/function.mb-strlen.php
 	**/
 	**/
@@ -967,6 +972,43 @@ extern class Global {
 	**/
 	**/
 	static function mb_substr( str:String, start:Int, length:Int = null, ?encoding:String ) : String;
 	static function mb_substr( str:String, start:Int, length:Int = null, ?encoding:String ) : String;
 
 
+	/**
+		@see http://php.net/manual/en/function.mb-chr.php
+		(Polyfilled for php 7.0)
+	**/
+	static function mb_chr( cp:Int, ?encoding:String ) : String;
+
+	/**
+		@see http://php.net/manual/en/function.mb-ord.php
+		(Polyfilled for php 7.0)
+	**/
+	static function mb_ord( str:String, ?encoding:String ) : Int;
+
+	/**
+		@see http://php.net/manual/en/function.mb-regex-encoding.php
+	**/
+	static function mb_regex_encoding( ?encoding:String ) : EitherType<Bool,String>;
+
+	/**
+		@see http://php.net/manual/en/function.mb-strtoupper.php
+	**/
+	static function mb_strtoupper( str:String, ?encoding:String ) : String;
+
+	/**
+		@see http://php.net/manual/en/function.mb-strpos.php
+	**/
+	static function mb_strpos( haystack:String, needle:String, ?offset:Int, ?encoding:String ) : EitherType<Int,Bool>;
+
+	/**
+		@see http://php.net/manual/en/function.mb-strrpos.php
+	**/
+	static function mb_strrpos( haystack:String, needle:String, ?offset:Int, ?encoding:String ) : EitherType<Int,Bool>;
+
+	/**
+		@see http://php.net/manual/en/function.mb-strtolower.php
+	**/
+	static function mb_strtolower( str:String, ?encoding:String ) : String;
+
 	/**
 	/**
 		@see http://php.net/manual/en/function.proc-open.php
 		@see http://php.net/manual/en/function.proc-open.php
 	**/
 	**/

+ 43 - 0
std/php/_polyfills.php

@@ -7,4 +7,47 @@
  */
  */
 namespace { //Namespace declaration is required because this file is included under non-root namespace.
 namespace { //Namespace declaration is required because this file is included under non-root namespace.
 
 
+	/**
+	 * @see http://php.net/manual/en/function.mb-chr.php
+	 */
+	if(!function_exists('mb_chr')) {
+		function mb_chr($code, $encoding = null) {
+			if($encoding !== 'UTF-8') {
+				throw new Exception("$encoding is not supported in mb_chr() polyfill.");
+			}
+			if (0x80 > $code %= 0x200000) {
+				$s = chr($code);
+			} elseif (0x800 > $code) {
+				$s = chr(0xC0 | $code >> 6) . chr(0x80 | $code & 0x3F);
+			} elseif (0x10000 > $code) {
+				$s = chr(0xE0 | $code >> 12) . chr(0x80 | $code >> 6 & 0x3F) . chr(0x80 | $code & 0x3F);
+			} else {
+				$s = chr(0xF0 | $code >> 18) . chr(0x80 | $code >> 12 & 0x3F) . chr(0x80 | $code >> 6 & 0x3F) . chr(0x80 | $code & 0x3F);
+			}
+			return $s;
+		}
+	}
+
+	/**
+	 * @see http://php.net/manual/en/function.mb-ord.php
+	 */
+	if(!function_exists('mb_ord')) {
+		function mb_ord($s, $encoding = null) {
+			if($encoding !== 'UTF-8') {
+				throw new Exception("$encoding is not supported in mb_ord() polyfill.");
+			}
+			$code = ($s = unpack('C*', substr($s, 0, 4))) ? $s[1] : 0;
+			if (0xF0 <= $code) {
+				return (($code - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
+			}
+			if (0xE0 <= $code) {
+				return (($code - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
+			}
+			if (0xC0 <= $code) {
+				return (($code - 0xC0) << 6) + $s[2] - 0x80;
+			}
+			return $code;
+		}
+	}
+
 }
 }

+ 6 - 10
std/php/_std/String.hx

@@ -29,28 +29,24 @@ import php.*;
 	@:pure function new(string:String) : Void;
 	@:pure function new(string:String) : Void;
 
 
 	@:pure @:runtime inline function toUpperCase() : String {
 	@:pure @:runtime inline function toUpperCase() : String {
-		return Global.strtoupper(this);
+		return Global.mb_strtoupper(this, 'UTF-8');
 	}
 	}
 
 
 	@:pure @:runtime inline function toLowerCase() : String {
 	@:pure @:runtime inline function toLowerCase() : String {
-		return Global.strtolower(this);
+		return Global.mb_strtolower(this, 'UTF-8');
 	}
 	}
 
 
 	@:pure @:runtime inline function charAt(index : Int) : String {
 	@:pure @:runtime inline function charAt(index : Int) : String {
-		return (index < 0 || index >= this.length ? '' : (this:NativeString)[index]);
+		return Syntax.coalesce(Boot.utf8CharAt(this, index), '');
 	}
 	}
 
 
-	@:pure @:runtime inline function charCodeAt( index : Int) : Null<Int> {
-		return (index < 0 || index >= this.length ? null : Global.ord((this:NativeString)[index]));
-	}
+	@:pure function charCodeAt( index : Int) : Null<Int>;
 
 
 	@:pure function indexOf( str : String, ?startIndex : Int ) : Int;
 	@:pure function indexOf( str : String, ?startIndex : Int ) : Int;
 
 
 	@:pure function lastIndexOf( str : String, ?startIndex : Int ) : Int;
 	@:pure function lastIndexOf( str : String, ?startIndex : Int ) : Int;
 
 
-	@:pure @:runtime inline function split( delimiter : String ) : Array<String> {
-		return @:privateAccess Array.wrap(delimiter == '' ? Global.str_split(this) : Global.explode(delimiter, this));
-	}
+	@:pure function split( delimiter : String ) : Array<String>;
 
 
 	@:pure function substr( pos : Int, ?len : Int ) : String;
 	@:pure function substr( pos : Int, ?len : Int ) : String;
 
 
@@ -61,6 +57,6 @@ import php.*;
 	}
 	}
 
 
 	@:pure @:runtime static inline function fromCharCode( code : Int ) : String {
 	@:pure @:runtime static inline function fromCharCode( code : Int ) : String {
-		return Global.chr(code);
+		return Global.mb_chr(code, 'UTF-8');
 	}
 	}
 }
 }

+ 1 - 1
std/php/_std/StringTools.hx

@@ -95,7 +95,7 @@ import php.*;
 	}
 	}
 
 
 	public static inline function fastCodeAt( s : String, index : Int ) : Int {
 	public static inline function fastCodeAt( s : String, index : Int ) : Int {
-		return (s.length == index ? 0 : Global.ord((s:NativeString)[index]));
+		return (s.length == index ? 0 : Global.mb_ord(Boot.utf8CharAt(s, index), 'UTF-8'));
 	}
 	}
 
 
 	public static inline function isEof( c : Int ) : Bool {
 	public static inline function isEof( c : Int ) : Bool {

+ 4 - 3
std/php/_std/haxe/io/Bytes.hx

@@ -147,10 +147,11 @@ class Bytes {
 		setInt32(pos + 4, v.high);
 		setInt32(pos + 4, v.high);
 	}
 	}
 
 
-	public inline function getString( pos : Int, len : Int ) : String {
+	public inline function getString( pos : Int, len : Int, ?encoding : Encoding ) : String {
 		if( pos < 0 || len < 0 || pos + len > length ) {
 		if( pos < 0 || len < 0 || pos + len > length ) {
 			throw Error.OutsideBounds;
 			throw Error.OutsideBounds;
 		} else {
 		} else {
+			//no need to handle encoding, because PHP strings are binary safe.
 			return b.getString(pos, len);
 			return b.getString(pos, len);
 		}
 		}
 	}
 	}
@@ -177,8 +178,8 @@ class Bytes {
 		return new Bytes(length, BytesData.alloc(length));
 		return new Bytes(length, BytesData.alloc(length));
 	}
 	}
 
 
-	public static inline function ofString( s : String ) : Bytes {
-		return new Bytes(s.length, s);
+	public static inline function ofString( s : String, ?encoding : Encoding ) : Bytes {
+		return new Bytes(php.Global.strlen(s), s);
 	}
 	}
 
 
 	public static inline function ofData( b : BytesData ) : Bytes {
 	public static inline function ofData( b : BytesData ) : Bytes {

+ 5 - 5
std/php/_std/haxe/io/BytesBuffer.hx

@@ -24,7 +24,7 @@ package haxe.io;
 import php.*;
 import php.*;
 
 
 class BytesBuffer {
 class BytesBuffer {
-	var b : String;
+	var b : NativeString;
 
 
 	/** The length of the buffer in bytes. **/
 	/** The length of the buffer in bytes. **/
 	public var length(get,never) : Int;
 	public var length(get,never) : Int;
@@ -41,7 +41,7 @@ class BytesBuffer {
 		b = Syntax.concat(b, src.getData().toNativeString());
 		b = Syntax.concat(b, src.getData().toNativeString());
 	}
 	}
 
 
-	public inline function addString( v : String ) {
+	public inline function addString( v : String, ?encoding : Encoding ) {
 		b = Syntax.concat(b, v);
 		b = Syntax.concat(b, v);
 	}
 	}
 
 
@@ -77,13 +77,13 @@ class BytesBuffer {
 		Returns either a copy or a reference of the current bytes.
 		Returns either a copy or a reference of the current bytes.
 		Once called, the buffer can no longer be used.
 		Once called, the buffer can no longer be used.
 	**/
 	**/
-	public function getBytes() : Bytes untyped {
-		var bytes = new Bytes(b.length, b);
+	public function getBytes() : Bytes {
+		var bytes = @:privateAccess new Bytes(length, b);
 		b = null;
 		b = null;
 		return bytes;
 		return bytes;
 	}
 	}
 
 
 	inline function get_length() : Int {
 	inline function get_length() : Int {
-		return b.length;
+		return Global.strlen(b);
 	}
 	}
 }
 }

+ 2 - 1
std/python/_std/sys/io/FileInput.hx

@@ -22,6 +22,7 @@
 package sys.io;
 package sys.io;
 
 
 import haxe.io.Bytes;
 import haxe.io.Bytes;
+import haxe.io.Encoding;
 import haxe.io.Input;
 import haxe.io.Input;
 import python.io.IFileInput;
 import python.io.IFileInput;
 
 
@@ -115,7 +116,7 @@ class FileInput extends Input
 		return impl.readInt32();
 		return impl.readInt32();
 	}
 	}
 
 
-	override public function readString( len : Int ) : String {
+	override public function readString( len : Int, ?encoding : Encoding ) : String {
 		return impl.readString(len);
 		return impl.readString(len);
 	}
 	}
 
 

+ 2 - 1
std/python/_std/sys/io/FileOutput.hx

@@ -22,6 +22,7 @@
 package sys.io;
 package sys.io;
 
 
 import haxe.io.Bytes;
 import haxe.io.Bytes;
+import haxe.io.Encoding;
 import haxe.io.Input;
 import haxe.io.Input;
 import haxe.io.Output;
 import haxe.io.Output;
 import python.io.IFileOutput;
 import python.io.IFileOutput;
@@ -110,7 +111,7 @@ class FileOutput extends Output {
 		impl.writeInput(i,bufsize);
 		impl.writeInput(i,bufsize);
 	}
 	}
 
 
-	override public function writeString( s : String ):Void {
+	override public function writeString( s : String, ?encoding : Encoding ):Void {
 		impl.writeString(s);
 		impl.writeString(s);
 	}
 	}
 }
 }

+ 2 - 1
std/python/io/IInput.hx

@@ -22,6 +22,7 @@
 package python.io;
 package python.io;
 
 
 import haxe.io.Bytes;
 import haxe.io.Bytes;
+import haxe.io.Encoding;
 
 
 interface IInput
 interface IInput
 {
 {
@@ -59,5 +60,5 @@ interface IInput
 
 
 	public function readInt32():Int;
 	public function readInt32():Int;
 
 
-	public function readString( len : Int ) : String;
+	public function readString( len : Int, ?encoding : Encoding ) : String;
 }
 }

+ 2 - 1
std/python/io/IOutput.hx

@@ -22,6 +22,7 @@
 package python.io;
 package python.io;
 
 
 import haxe.io.Bytes;
 import haxe.io.Bytes;
+import haxe.io.Encoding;
 import haxe.io.Input;
 import haxe.io.Input;
 
 
 interface IOutput {
 interface IOutput {
@@ -60,5 +61,5 @@ interface IOutput {
 
 
 	public function writeInput( i : Input, ?bufsize : Int ):Void;
 	public function writeInput( i : Input, ?bufsize : Int ):Void;
 
 
-	public function writeString( s : String ):Void;
+	public function writeString( s : String, ?encoding : Encoding ):Void;
 }
 }

+ 1 - 1
tests/optimization/src/issues/Issue6015.hx

@@ -4,7 +4,7 @@ class Issue6015 {
 	@:js('
 	@:js('
 		var a = null;
 		var a = null;
 		var tmp = a.a();
 		var tmp = a.a();
-		String.fromCharCode.apply(null,tmp);
+		_$String_String_$Impl_$.fromCharCode.apply(null,tmp);
 	')
 	')
 	static public function main() {
 	static public function main() {
 		var a:Dynamic = null;
 		var a:Dynamic = null;

+ 16 - 18
tests/runci/targets/Lua.hx

@@ -23,26 +23,24 @@ class Lua {
 		}
 		}
 	}
 	}
 
 
-	static public function installLuaVersionDependencies(lv:String){
-		if (lv == "-l5.1"){
-			if (!commandSucceed("luarocks", ["show", "luabit"])) {
-				runCommand("luarocks", ["install", "luabitop", "1.0.2-3", "--server=https://luarocks.org/dev"]);
-			}
-		}
-		if (!commandSucceed("luarocks", ["show", "lrexlib-pcre"])) {
-			runCommand("luarocks", ["install", "lrexlib-pcre", "2.8.0-1", "--server=https://luarocks.org/dev"]);
-		}
-		if (!commandSucceed("luarocks", ["show", "luv"])) {
-			runCommand("luarocks", ["install", "luv", "1.9.1-0", "--server=https://luarocks.org/dev"]);
-		}
-		if (!commandSucceed("luarocks", ["show", "luasocket"])) {
-			runCommand("luarocks", ["install", "luasocket", "3.0rc1-2", "--server=https://luarocks.org/dev"]);
-		}
-		if (!commandSucceed("luarocks", ["show", "environ"])) {
-			runCommand("luarocks", ["install", "environ", "0.1.0-1", "--server=https://luarocks.org/dev"]);
+	static function installLib(lib : String, version : String, server = "https://luarocks.org/dev"){
+		var server_arg = '--server=$server';
+		if (!commandSucceed("luarocks", ["show", lib])) {
+			runCommand("luarocks", ["install",lib, version, server_arg]);
 		}
 		}
 	}
 	}
 
 
+	static public function installLuaVersionDependencies(lv:String){
+		if (lv == "-l5.1") installLib("luabitop", "1.0.2-3");
+
+		installLib("lrexlib-pcre" , "2.8.0-1");
+		installLib("luv"          , "1.9.1-0");
+		installLib("luasocket"    , "3.0rc1-2");
+		installLib("environ"      , "0.1.0-1");
+		installLib("luautf8"      , "0.1.1-1");
+
+	}
+
 	static public function run(args:Array<String>) {
 	static public function run(args:Array<String>) {
 		getLuaDependencies();
 		getLuaDependencies();
 		var envpath = Sys.getEnv("HOME") + '/lua_env';
 		var envpath = Sys.getEnv("HOME") + '/lua_env';
@@ -70,4 +68,4 @@ class Lua {
 			runCommand("haxe", ["compile.hxml"]);
 			runCommand("haxe", ["compile.hxml"]);
 		}
 		}
 	}
 	}
-}
+}

+ 1 - 0
tests/unit/compile-cpp.hxml

@@ -1,4 +1,5 @@
 compile-each.hxml
 compile-each.hxml
 --main unit.TestMain
 --main unit.TestMain
+-D hxcpp_smart_strings
 -cpp bin/cpp
 -cpp bin/cpp
 -D HXCPP_NO_DEBUG_LINK
 -D HXCPP_NO_DEBUG_LINK

+ 1 - 0
tests/unit/compile-cppia-host.hxml

@@ -1,6 +1,7 @@
 --main cpp.cppia.Host
 --main cpp.cppia.Host
 -D source-header=''
 -D source-header=''
 -D scriptable
 -D scriptable
+-D hxcpp_smart_strings
 -D dll_export=bin/cppia.classes
 -D dll_export=bin/cppia.classes
 --debug
 --debug
 --dce no
 --dce no

+ 1 - 1
tests/unit/src/unit/TestResource.hx

@@ -16,7 +16,7 @@ class TestResource extends Test {
 			eq( names[1], "re/s?!%[]))(\"'1.txt" );
 			eq( names[1], "re/s?!%[]))(\"'1.txt" );
 		}
 		}
 		eq( haxe.Resource.getString("re/s?!%[]))(\"'1.txt"), STR );
 		eq( haxe.Resource.getString("re/s?!%[]))(\"'1.txt"), STR );
-		#if (neko || php)
+		#if (neko || php ||  eval)
 		// allow binary strings
 		// allow binary strings
 		eq( haxe.Resource.getBytes("re/s?!%[]))(\"'1.bin").sub(0,9).toString(), "MZ\x90\x00\x03\x00\x00\x00\x04" );
 		eq( haxe.Resource.getBytes("re/s?!%[]))(\"'1.bin").sub(0,9).toString(), "MZ\x90\x00\x03\x00\x00\x00\x04" );
 		#else
 		#else

+ 193 - 0
tests/unit/src/unitstd/Unicode.unit.hx

@@ -0,0 +1,193 @@
+#if !(neko || (cpp && !cppia && !hxcpp_smart_strings)) // these platforms will not be made unicode-compatible
+
+
+var s = String.fromCharCode(0xE9);
+s == "é";
+s.length == 1;
+s.charCodeAt(0) == 0xE9;
+
+var s = String.fromCharCode("あ".code);
+s == "あ";
+s.length == 1;
+s.charCodeAt(0) == "あ".code;
+
+var s = "aa😂éé";
+s.indexOf(String.fromCharCode(0x80))<0;
+s.indexOf("é")==s.length-2;
+s.indexOf("aa")==0;
+s.indexOf("a")==0;
+s.lastIndexOf("a")==1;
+s.indexOf("😂")>0;
+s.lastIndexOf("😂")>0;
+s.lastIndexOf("é")==s.length-1;
+var s = "abc";
+s.indexOf("éé")<0;
+s.lastIndexOf("éé")<0;
+
+var s = String.fromCharCode(0x1f602);
+s == "😂";
+
+
+#if (php || lua || python)
+// native UTF-16 or 32
+s.length == 1;
+s.charCodeAt(0) == "😂".code;
+#else
+// UTF-16 surrogate pairs encoding
+s.length == 2;
+s.charCodeAt(0) == 55357;
+s.charCodeAt(1) == 56834;
+#end
+
+var s = "é" + "あ";
+s == "éあ";
+s.length == 2;
+s.charCodeAt(1) == "あ".code;
+
+var s = "é" + "😂" + "あ";
+s == "é😂あ";
+var a = s.split('😂');
+a.length == 2;
+a[0] == "é";
+a[1] == "あ";
+a.join('😂') == s;
+
+var a = s.split('');
+#if ( php || lua || python )
+// native UTF-16 or 32
+a.length == 3;
+a[0] == "é";
+a[1] == "😂";
+a[2] == "あ";
+#else
+a.length == 4;
+a[0] == "é";
+a[3] == "あ";
+#end
+
+var buf = new StringBuf();
+buf.addChar(0xE9);
+buf.addChar("あ".code);
+buf.add("é");
+buf.add("あ");
+var str = buf.toString();
+str.length == 4;
+str == "éあéあ";
+str.charCodeAt(3) == "あ".code;
+
+var str = StringTools.urlEncode("éあ😂");
+str == "%C3%A9%E3%81%82%F0%9F%98%82";
+str = StringTools.urlDecode(str);
+str == "éあ😂";
+
+var str = haxe.Serializer.run("éあ");
+str == "y15:%C3%A9%E3%81%82";
+str = haxe.Unserializer.run(str);
+str == "éあ";
+
+var str = haxe.Serializer.run("😂");
+str == "y12:%F0%9F%98%82";
+str = haxe.Unserializer.run(str);
+str == "😂";
+
+var str = haxe.io.Bytes.ofString("éあ😂");
+str.toHex() == "c3a9e38182f09f9882";
+
+["é", "e"].join("é") == "éée";
+["é", "e"].join("e") == "éee";
+
+var bytes = haxe.io.Bytes.ofString("éあ😂",RawNative);
+
+#if (cpp || php || lua || eval || python )
+bytes.toHex() == "c3a9e38182f09f9882"; // UTF-8 native
+#else
+bytes.toHex() == "e90042303dd802de"; // UTF-16 native
+#end
+
+bytes.getString(0,bytes.length,RawNative) == "éあ😂";
+
+haxe.crypto.Md5.encode("éあ😂") == "d30b209e81e40d03dd474b26b77a8a18";
+haxe.crypto.Sha1.encode("éあ😂") == "ec79856a75c98572210430aeb7fe6300b6c4e20c";
+#if php //utf-8
+haxe.crypto.Sha224.encode("éあ😂") == "d7967c5f27bd6868e276647583c55ab09d5f45b40610a3d9c6d91b90";
+haxe.crypto.Sha256.encode("éあ😂") == "d0230b8d8ac2d6d0dbcee11ad0e0eaa68a6565347261871dc241571cab591676";
+#elseif (lua || python)
+null; // skip these until str2blk is updated
+#else //utf-16
+haxe.crypto.Sha224.encode("éあ😂") == "5132a98e08a503350384c765388a1a3b8b0b532f038eca94c881537e";
+haxe.crypto.Sha256.encode("éあ😂") == "e662834bdc1a099b9f7b8d97975a1b1d9b6730c991268bba0e7fe7427e68be74";
+#end
+haxe.crypto.BaseCode.encode("éあ😂","0123456789abcdef") == "c3a9e38182f09f9882";
+
+var buf = new haxe.io.BytesBuffer();
+buf.addString("éあ😂");
+buf.addString("éあ😂",RawNative);
+var bytes = buf.getBytes();
+bytes.getString(0,9) == "éあ😂";
+bytes.getString(2,3) == "あ";
+bytes.getString(5,4) == "😂";
+bytes.getString(2,7) == "あ😂";
+bytes.getString(9,bytes.length - 9,RawNative) == "éあ😂";
+
+var o = new haxe.io.BytesOutput();
+o.writeString("éあ😂");
+o.writeString("éあ😂",RawNative);
+var bytes2 = o.getBytes();
+bytes2.toHex() == bytes.toHex();
+
+var input = new haxe.io.BytesInput(bytes2);
+input.readString(2) == "é";
+input.readString(7) == "あ😂";
+input.readString(bytes.length - 9,RawNative) == "éあ😂";
+
+// Mixed encoding tests... mostly relevant for Eval which has both ASCII and UCS2 at run-time
+
+var s = "ée";
+var s1 = s.charAt(1);
+s1 == "e";
+#if eval
+(untyped s1.isAscii()) == true;
+(untyped s.charAt(0).isAscii()) == false;
+#end
+
+var s1 = s.substr(1, 1);
+var s2 = s.substr(1);
+var s3 = s.substr(-1);
+var s4 = s.substr(-1, 1);
+s1 == "e";
+s2 == "e";
+s3 == "e";
+#if !python
+s4 == "e";
+#end
+#if eval
+// We currently don't asciify anything we extract from UCS2 strings... not sure if this would
+// be worth it or not.
+(untyped s1.isAscii()) == false;
+(untyped s2.isAscii()) == false;
+(untyped s3.isAscii()) == false;
+(untyped s4.isAscii()) == false;
+#end
+
+var s1 = s.substring(1, 2);
+var s2 = s.substring(1);
+var s3 = s.substring(2, 1);
+var s4 = s.substring(1, 20);
+s1 == "e";
+s2 == "e";
+s3 == "e";
+s4 == "e";
+#if eval
+(untyped s1.isAscii()) == false;
+(untyped s2.isAscii()) == false;
+(untyped s3.isAscii()) == false;
+(untyped s4.isAscii()) == false;
+#end
+
+Reflect.compare("ed", "éee".substr(1)) < 0;
+Reflect.compare("éed".substr(1), "éee".substr(1)) < 0;
+Reflect.compare("éed".substr(1), "ee") < 0;
+Reflect.compare("ee", "éed".substr(1)) > 0;
+Reflect.compare("éee".substr(1), "éed".substr(1)) > 0;
+Reflect.compare("éee".substr(1), "ed") > 0;
+#end

+ 1 - 1
tests/unit/src/unitstd/haxe/Utf8.unit.hx

@@ -48,7 +48,7 @@ haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 0), "") == 0;
 
 
 // #if (neko || php || cpp || lua || macro)
 // #if (neko || php || cpp || lua || macro)
 // TODO neko, cpp, macro
 // TODO neko, cpp, macro
-#if (php || lua)
+#if php
 haxe.Utf8.validate("\xf0\xa9\xb8\xbd\xe3\x81\x82\xc3\xab\x61") == true;
 haxe.Utf8.validate("\xf0\xa9\xb8\xbd\xe3\x81\x82\xc3\xab\x61") == true;
 haxe.Utf8.validate("\xed\x9f\xbf") == true;
 haxe.Utf8.validate("\xed\x9f\xbf") == true;
 haxe.Utf8.validate("\xee\x80\x80") == true;
 haxe.Utf8.validate("\xee\x80\x80") == true;

+ 1 - 1
tests/unit/src/unitstd/haxe/crypto/Hmac.unit.hx

@@ -8,4 +8,4 @@ hmacSha256.make(haxe.io.Bytes.ofString(""), haxe.io.Bytes.ofString("")).toHex()
 
 
 hmacMd5.make(haxe.io.Bytes.ofString("key"), haxe.io.Bytes.ofString("The quick brown fox jumps over the lazy dog")).toHex() == "80070713463e7749b90c2dc24911e275";
 hmacMd5.make(haxe.io.Bytes.ofString("key"), haxe.io.Bytes.ofString("The quick brown fox jumps over the lazy dog")).toHex() == "80070713463e7749b90c2dc24911e275";
 hmacSha1.make(haxe.io.Bytes.ofString("key"), haxe.io.Bytes.ofString("The quick brown fox jumps over the lazy dog")).toHex() == "de7c9b85b8b78aa6bc8a7a36f70a90701c9db4d9";
 hmacSha1.make(haxe.io.Bytes.ofString("key"), haxe.io.Bytes.ofString("The quick brown fox jumps over the lazy dog")).toHex() == "de7c9b85b8b78aa6bc8a7a36f70a90701c9db4d9";
-hmacSha256.make(haxe.io.Bytes.ofString("key"), haxe.io.Bytes.ofString("The quick brown fox jumps over the lazy dog")).toHex() == "f7bc83f430538424b13298e6aa6fb143ef4d59a14946175997479dbc2d1a3cd8";
+hmacSha256.make(haxe.io.Bytes.ofString("key"), haxe.io.Bytes.ofString("The quick brown fox jumps over the lazy dog")).toHex() == "f7bc83f430538424b13298e6aa6fb143ef4d59a14946175997479dbc2d1a3cd8";