Browse Source

ucs2 ok, TestXml ok, continuing unit tests

Nicolas Cannasse 9 years ago
parent
commit
dd706f39d2

+ 131 - 44
genhl.ml

@@ -387,8 +387,8 @@ let rec safe_cast t1 t2 =
 	| _ ->
 		tsame t1 t2
 
-let to_utf8 str =
-	try
+let to_utf8 str p =
+	let u8 = try
 		UTF8.validate str;
 		str;
 	with
@@ -397,6 +397,15 @@ let to_utf8 str =
 			let b = UTF8.Buf.create 0 in
 			String.iter (fun c -> UTF8.Buf.add_char b (UChar.of_char c)) str;
 			UTF8.Buf.contents b
+	in
+	let ccount = ref 0 in
+	UTF8.iter (fun c ->
+		let c = UChar.code c in
+		if (c >= 0xD800 && c <= 0xDFFF) || c >= 0x110000 then error "Invalid unicode char" p;
+		incr ccount;
+		if c > 0x10000 then incr ccount;
+	) u8;
+	u8, !ccount
 
 let type_size_bits = function
 	| HI8 | HBool -> 0
@@ -554,7 +563,9 @@ let rec to_type ctx t =
 		to_type ctx (!f())
 	| TFun (args, ret) ->
 		HFun (List.map (fun (_,o,t) -> to_type ctx (if o then ctx.com.basic.tnull t else t)) args, to_type ctx ret)
-	| TAnon a when (match !(a.a_status) with Statics _ | EnumStatics _  -> true | _ -> false) ->
+	| TAnon a when (match !(a.a_status) with Statics c -> true | _ -> false) ->
+		class_type ctx (match !(a.a_status) with Statics c -> c | _ -> assert false) [] true
+	| TAnon a when (match !(a.a_status) with EnumStatics _ -> true | _ -> false) ->
 		HType
 	| TAnon a ->
 		(try
@@ -943,6 +954,12 @@ and cast_to ctx (r:reg) (t:ttype) p =
 		let tmp = alloc_tmp ctx t in
 		op ctx (OToFloat (tmp, r));
 		tmp
+	| (HI8 | HI16 | HI32), HNull ((HF32 | HF64) as t) ->
+		let tmp = alloc_tmp ctx t in
+		op ctx (OToFloat (tmp, r));
+		let r = alloc_tmp ctx (HNull t) in
+		op ctx (OToDyn (r,tmp));
+		r
 	| (HI8 | HI16 | HI32), HObj { pname = "String" } ->
 		let out = alloc_tmp ctx t in
 		let len = alloc_tmp ctx HI32 in
@@ -1076,7 +1093,11 @@ and get_access ctx e =
 			let i = eval_to ctx i HI32 in
 			AArray (a,to_type ctx t,i)
 		| _ ->
-			error ("Invalid array access on " ^ s_type (print_context()) a.etype) e.epos)
+			let a = eval_to ctx a (class_type ctx ctx.array_impl.adyn [] false) in
+			op ctx (ONullCheck a);
+			let i = eval_to ctx i HI32 in
+			AArray (a,HDyn,i)
+		)
 	| _ ->
 		ANone
 
@@ -1197,13 +1218,13 @@ and eval_expr ctx e =
 			op ctx (OBool (r,b));
 			r
 		| TString s ->
-			let str = to_utf8 s in
+			let str, len = to_utf8 s e.epos in
 			let r = alloc_tmp ctx HBytes in
 			let s = alloc_tmp ctx (to_type ctx e.etype) in
 			op ctx (ONew s);
 			op ctx (OString (r,alloc_string ctx str));
 			op ctx (OSetField (s,0,r));
-			op ctx (OSetField (s,1,reg_int ctx (UTF8.length str)));
+			op ctx (OSetField (s,1,reg_int ctx len));
 			s
 		| TThis ->
 			0 (* first reg *)
@@ -1917,7 +1938,9 @@ and eval_expr ctx e =
 		ctx.m.mcontinues <- oldc;
 		alloc_tmp ctx HVoid
 	| TCast (v,None) ->
-		eval_to ctx v (to_type ctx e.etype)
+		let t = to_type ctx e.etype in
+		let v = eval_expr ctx v in
+		unsafe_cast_to ctx v t e.epos
 	| TArrayDecl el ->
 		let r = alloc_tmp ctx (to_type ctx e.etype) in
 		let et = (match follow e.etype with TInst (_,[t]) -> to_type ctx t | _ -> assert false) in
@@ -2300,7 +2323,13 @@ and make_fun ?gen_content ctx fidx f cthis cparent =
 				op ctx (OBool (tmp, b));
 				op ctx (OToDyn (r, tmp));
 			| TString s ->
-				assert false);
+				let str, len = to_utf8 s f.tf_expr.epos in
+				let rb = alloc_tmp ctx HBytes in
+				op ctx (ONew r);
+				op ctx (OString (rb,alloc_string ctx str));
+				op ctx (OSetField (r,0,rb));
+				op ctx (OSetField (r,1,reg_int ctx len));
+			);
 			(* if optional but not null, turn into a not nullable here *)
 			let vt = to_type ctx v.v_type in
 			if not (is_nullable vt) then begin
@@ -2996,47 +3025,59 @@ let interp code =
 			proto
 	in
 
-	let caml_to_hl str =
-		let b = Buffer.create (String.length str * 2) in
-		let add c =
-			Buffer.add_char b (char_of_int (c land 0xFF));
-			Buffer.add_char b (char_of_int (c lsr 8));
-		in
-		UTF8.iter (fun c ->
-			let c = UChar.code c in
-			if c >= 0 && c < 0x10000 then begin
-				if c >= 0xD800 && c <= 0xDFFF then failwith ("Invalid unicode char " ^ string_of_int c);
-				add c;
-			end else if c < 0x110000 then begin
-				let c = c - 0x10000 in
-				add ((c asr 10) + 0xD800);
-				add ((c land 1023) + 0xDC00);
-			end else
-				failwith ("Invalid unicode char " ^ string_of_int c);
-		) str;
-		add 0;
-		Buffer.contents b
-	in
-
-	let hl_to_caml str =
-		let b = UTF8.Buf.create (String.length str / 2) in
-		let add c =
-			UTF8.Buf.add_char b (UChar.chr c);
-		in
-		let get v = int_of_char str.[v] in
+	let utf16_iter f s =
+		let get v = int_of_char s.[v] in
 		let rec loop p =
+			if p = String.length s then () else
 			let c = (get p) lor ((get (p+1)) lsl 8) in
-			if c = 0 then () else if c >= 0xD800 && c <= 0xDFFF then begin
+			if c >= 0xD800 && c <= 0xDFFF then begin
 				let c = c - 0xD800 in
 				let c2 = ((get (p+2)) lor ((get(p+3)) lsl 8)) - 0xDC00 in
-				add ((c2 lor (c lsl 10)) + 0x10000);
+				f ((c2 lor (c lsl 10)) + 0x10000);
 				loop (p + 4);
 			end else begin
-				add c;
+				f c;
 				loop (p + 2);
 			end;
 		in
-		loop 0;
+		loop 0
+	in
+
+	let utf16_eof s =
+		let get v = int_of_char s.[v] in
+		let rec loop p =
+			let c = (get p) lor ((get (p+1)) lsl 8) in
+			if c = 0 then String.sub s 0 p else loop (p + 2);
+		in
+		loop 0
+	in
+
+	let utf16_add buf c =
+		let add c =
+			Buffer.add_char buf (char_of_int (c land 0xFF));
+			Buffer.add_char buf (char_of_int (c lsr 8));
+		in
+		if c >= 0 && c < 0x10000 then begin
+			if c >= 0xD800 && c <= 0xDFFF then failwith ("Invalid unicode char " ^ string_of_int c);
+			add c;
+		end else if c < 0x110000 then begin
+			let c = c - 0x10000 in
+			add ((c asr 10) + 0xD800);
+			add ((c land 1023) + 0xDC00);
+		end else
+			failwith ("Invalid unicode char " ^ string_of_int c);
+	in
+
+	let caml_to_hl str =
+		let b = Buffer.create (String.length str * 2) in
+		UTF8.iter (fun c -> utf16_add b (UChar.code c)) str;
+		utf16_add b 0;
+		Buffer.contents b
+	in
+
+	let hl_to_caml str =
+		let b = UTF8.Buf.create (String.length str / 2) in
+		utf16_iter (fun c -> UTF8.Buf.add_char b (UChar.chr c)) (utf16_eof str);
 		UTF8.Buf.contents b
 	in
 
@@ -3053,7 +3094,7 @@ let interp code =
 	let hash b =
 		let h = ref Int32.zero in
 		let rec loop i =
-			let c = int_of_char b.[i] in
+			let c = if i = String.length b then 0 else int_of_char b.[i] in
 			if c <> 0 then begin
 				h := Int32.add (Int32.mul !h 223l) (Int32.of_int c);
 				loop (i + 1)
@@ -3100,7 +3141,7 @@ let interp code =
 			(match get_method o.oproto.pclass "__string" with
 			| None -> p
 			| Some f -> p ^ ":" ^ vstr_d (fcall (func f) [v]))
-		| VBytes b -> "bytes(" ^ String.escaped b ^ ")"
+		| VBytes b -> "bytes(" ^ String.escaped (hl_to_caml b) ^ ")"
 		| VClosure (f,o) ->
 			(match o with
 			| None -> fstr f
@@ -3163,6 +3204,8 @@ let interp code =
 				raise (InterpThrow v)
 			| Failure msg ->
 				throw_msg msg
+			| e ->
+				throw_msg (Printexc.to_string e)
 
 	and dyn_set_field obj field v vt =
 		let v, vt = (match vt with
@@ -3316,7 +3359,7 @@ let interp code =
 				| [], [] -> []
 				| _, [] -> throw_msg (Printf.sprintf "Too many arguments (%s) != (%s)" (String.concat "," (List.map (fun (v,_) -> vstr_d v) full_args)) (String.concat "," (List.map tstr full_fargs)))
 				| (v,t) :: args, ft :: fargs -> dyn_cast v t ft :: loop args fargs
-				| [], _ :: _ -> default ft :: loop args fargs
+				| [], _ :: fargs -> default ft :: loop args fargs
 			in
 			let vargs = loop args full_fargs in
 			let v = fcall f (match a with None -> vargs | Some a -> a :: vargs) in
@@ -3989,7 +4032,7 @@ let interp code =
 				| _ -> assert false)
 			| "hash" ->
 				(function
-				| [VBytes str] -> VInt (hash str)
+				| [VBytes str] -> VInt (hash (hl_to_caml str))
 				| _ -> assert false)
 			| "type_check" ->
 				(function
@@ -4049,6 +4092,50 @@ let interp code =
 					in
 					to_int (loop 0)
 				| _ -> assert false)
+			| "utf8_to_utf16" ->
+				(function
+				| [VBytes s; VInt pos; VRef (regs,idx,HI32)] ->
+					let s = String.sub s (int pos) (String.length s - (int pos)) in
+					let u16 = caml_to_hl (try String.sub s 0 (String.index s '\000') with Not_found -> assert false) in
+					regs.(idx) <- to_int (String.length u16 - 2);
+					VBytes u16
+				| _ -> assert false)
+			| "utf16_to_utf8" ->
+				(function
+				| [VBytes s; VInt pos; VRef (regs,idx,HI32)] ->
+					let s = String.sub s (int pos) (String.length s - (int pos)) in
+					let u8 = hl_to_caml s in
+					regs.(idx) <- to_int (String.length u8);
+					VBytes (u8 ^ "\x00")
+				| _ -> assert false)
+			| "ucs2_upper" ->
+				(function
+				| [VBytes s; VInt pos; VInt len] ->
+					let buf = Buffer.create 0 in
+					utf16_iter (fun c ->
+						let c =
+							if c >= int_of_char 'a' && c <= int_of_char 'z' then c + int_of_char 'A' - int_of_char 'a'
+							else c
+						in
+						utf16_add buf c
+					) (String.sub s (int pos) (int len));
+					utf16_add buf 0;
+					VBytes (Buffer.contents buf)
+				| _ -> assert false)
+			| "ucs2_lower" ->
+				(function
+				| [VBytes s; VInt pos; VInt len] ->
+					let buf = Buffer.create 0 in
+					utf16_iter (fun c ->
+						let c =
+							if c >= int_of_char 'A' && c <= int_of_char 'Z' then c + int_of_char 'a' - int_of_char 'A'
+							else c
+						in
+						utf16_add buf c
+					) (String.sub s (int pos) (int len));
+					utf16_add buf 0;
+					VBytes (Buffer.contents buf)
+				| _ -> assert false)
 			| "call_method" ->
 				(function
 				| [f;VArray (args,HDyn)] -> dyn_call f (List.map (fun v -> v,HDyn) (Array.to_list args)) HDyn

+ 2 - 2
std/StringTools.hx

@@ -389,7 +389,7 @@ class StringTools {
 		#elseif python
 		return if (index >= s.length) -1 else python.internal.UBuiltins.ord(python.Syntax.arrayAccess(s, index));
 		#elseif hl
-		return @:privateAccess s.bytes[index];
+		return @:privateAccess s.bytes.getUI16(index<<1);
 		#else
 		return untyped s.cca(index);
 		#end
@@ -457,7 +457,7 @@ class StringTools {
 	public static function quoteWinArg(argument:String, escapeMetaCharacters:Bool):String {
 		// If there is no space, tab, back-slash, or double-quotes, and it is not an empty string.
 		if (!~/^[^ \t\\"]+$/.match(argument)) {
-			
+
 			// Based on cpython's subprocess.list2cmdline().
 			// https://hg.python.org/cpython/file/50741316dd3a/Lib/subprocess.py#l620
 

+ 2 - 2
std/hl/_std/EReg.hx

@@ -126,7 +126,7 @@ private typedef ERegValue = hl.types.NativeAbstract<"ereg">;
 			var i = 1;
 			while( i < a.length ) {
 				var k = a[i];
-				var c = k.charCodeAt(0);
+				var c = StringTools.fastCodeAt(k, 0);
 				// 1...9
 				if( c >= 49 && c <= 57 ) {
 					var plen = 0;
@@ -138,7 +138,7 @@ private typedef ERegValue = hl.types.NativeAbstract<"ereg">;
 						if( p >= 0 ) b.addSub(s,p,plen);
 						b.addSub(k,1,k.length - 1);
 					}
-				} else if( c == null ) {
+				} else if( c == 0 ) {
 					b.add("$");
 					i++;
 					var k2 = a[i];

+ 8 - 8
std/hl/_std/String.hx

@@ -11,18 +11,16 @@ class String {
 	}
 
 	public function toUpperCase() : String {
-		throw "TODO";
-		return null;
+		return __alloc__(@:privateAccess bytes.ucs2Upper(0,length<<1), length);
 	}
 
 	public function toLowerCase() : String {
-		throw "TODO";
-		return null;
+		return __alloc__(@:privateAccess bytes.ucs2Lower(0,length<<1), length);
 	}
 
 	public function charAt(index : Int) : String {
-		throw "TODO";
-		return null;
+		if( (index:UInt) >= (length:UInt) ) return "";
+		return __alloc__(bytes.sub(index<<1,2),1);
 	}
 
 	public function charCodeAt( index : Int) : Null<Int> {
@@ -39,7 +37,9 @@ class String {
 				return -1;
 			startByte = startIndex << 1;
 		}
-		return bytes.find(startByte,(length << 1) - startByte,str.bytes,0,str.length << 1);
+		var p = bytes.find(startByte, (length << 1) - startByte, str.bytes, 0, str.length << 1);
+		if( p > 0 ) p >>= 1;
+		return p;
 	}
 
 	public function lastIndexOf( str : String, ?startIndex : Int ) : Int {
@@ -55,7 +55,7 @@ class String {
 		while( true ) {
 			var p = bytes.find(pos, size - pos, str.bytes, 0, str.length << 1);
 			if( p < 0 || p >= lastByte ) break;
-			last = p;
+			last = p >> 1;
 			pos = p + 1;
 		}
 		return last;

+ 2 - 2
std/hl/_std/StringBuf.hx

@@ -86,9 +86,9 @@
 	}
 
 	public function toString() : String {
-		if( pos == size ) __expand(0);
+		if( pos+2 > size ) __expand(0);
 		b.setUI16(pos,0);
-		return @:privateAccess String.__alloc__(b, pos);
+		return @:privateAccess String.__alloc__(b, pos>>1);
 	}
 
 }

+ 9 - 3
std/hl/_std/haxe/io/Bytes.hx

@@ -117,10 +117,14 @@ class Bytes {
 
 	public function getString( pos : Int, len : Int ) : String {
 		if( pos < 0 || len < 0 || pos + len > length ) throw Error.OutsideBounds;
+
 		var b = new hl.types.Bytes(len + 1);
 		b.blit(0, this.b, pos, len);
 		b[len] = 0;
-		return @:privateAccess String.__alloc__(b, len, b.utf8Length(0, len));
+
+		var outLen = 0;
+		var b2 = @:privateAccess b.utf8ToUtf16(0, outLen);
+		return @:privateAccess String.__alloc__(b2, outLen>>1);
 	}
 
 	@:deprecated("readString is deprecated, use getString instead")
@@ -157,8 +161,10 @@ class Bytes {
 		return new Bytes(length,b);
 	}
 
-	public static function ofString( s : String ) : Bytes {
-		return @:privateAccess new Bytes(s.size,s.bytes.sub(0,s.size));
+	public static function ofString( s : String ) : Bytes @:privateAccess {
+		var size = 0;
+		var b = s.bytes.utf16ToUtf8(0,size);
+		return new Bytes(size,b);
 	}
 
 	public static function ofData( b : BytesData ) : Bytes {

+ 1 - 1
std/hl/_std/haxe/io/BytesBuffer.hx

@@ -65,7 +65,7 @@ class BytesBuffer {
 	}
 
 	public inline function addString( v : String ) : Void {
-		@:privateAccess __add(v.bytes, 0, v.size);
+		@:privateAccess __add(v.bytes, 0, v.length << 1);
 	}
 
 	public inline function addInt32( v : Int ) : Void {

+ 3 - 2
std/hl/types/ArrayObj.hx

@@ -136,8 +136,9 @@ class ArrayObj<T> extends ArrayBase {
 	}
 
 	public function iterator() : Iterator<T> {
-		throw "TODO";
-		return null;
+		var n = new NativeArray.NativeArrayIterator<Dynamic>(array);
+		@:privateAccess n.length = length;
+		return cast n;
 	}
 
 	public function map<S>( f : T -> S ) : ArrayObj<S> {

+ 16 - 29
std/hl/types/Bytes.hx

@@ -82,47 +82,34 @@ package hl.types;
 		return b;
 	}
 
-
-	/**
-		Count the number of UTF8 chars into the given Bytes data.
-	**/
-	@:hlNative("std","utf8length")
-	public function _utf8Length( pos : Int, size : Int ) : Int {
-		return 0;
-	}
-
 	@:hlNative("std", "ucs2length")
 	function ucs2Length( bytePos : Int ) : Int {
 		return 0;
 	}
 
-	/**
-		Count the number of bytes until we reach \0
-	**/
-	@:hlNative("std","byteslength")
-	function _bytesLength( pos : Int ) : Int {
-		return 0;
-	}
-
 	@:hlNative("std","hash")
 	function hash() : Int {
 		return 0;
 	}
 
-	/**
-		Decode the utf8 char at the given position
-	**/
-	@:hlNative("std","utf8char")
-	public function _utf8Char( pos : Int, charPos : Int ) : Int {
-		return 0;
+	@:hlNative("std","utf8_to_utf16")
+	function utf8ToUtf16( bytePos : Int, outSize : Ref<Int> ) : Bytes {
+		return null;
 	}
 
-	/**
-		Gives the byte position for the utf8 char starting at pos.
-	**/
-	@:hlNative("std","utf8pos")
-	public function _utf8Pos( pos : Int, charPos : Int ) : Int {
-		return 0;
+	@:hlNative("std","utf16_to_utf8")
+	function utf16ToUtf8( bytePos : Int, outSize : Ref<Int> ) : Bytes {
+		return null;
+	}
+
+	@:hlNative("std", "ucs2_upper")
+	function ucs2Upper( bytePos : Int, size : Int ) : Bytes {
+		return null;
+	}
+
+	@:hlNative("std", "ucs2_lower")
+	function ucs2Lower( bytePos : Int, size : Int ) : Bytes {
+		return null;
 	}
 
 	@:hlNative("std","value_to_string")

+ 1 - 1
tests/unit/unit_hl.hxproj

@@ -43,7 +43,7 @@
     <hidden path="obj" />
   </hiddenPaths>
   <!-- Executed before build -->
-  <preBuildCommand>haxe compile-hl.hxml -D interp -D fail_eager</preBuildCommand>
+  <preBuildCommand>haxe compile-hl.hxml -D interp -D fail_eager -D dump</preBuildCommand>
   <!-- Executed after build -->
   <postBuildCommand alwaysRun="False" />
   <!-- Other project options -->