Browse Source

added ONop, first try at bytecode optimize

Nicolas Cannasse 8 years ago
parent
commit
82ca416602
5 changed files with 430 additions and 45 deletions
  1. 1 1
      src/generators/genhl.ml
  2. 2 0
      src/generators/hl2c.ml
  3. 2 0
      src/generators/hlcode.ml
  4. 5 1
      src/generators/hlinterp.ml
  5. 420 43
      src/generators/hlopt.ml

+ 1 - 1
src/generators/genhl.ml

@@ -3175,7 +3175,7 @@ let write_code ch code debug =
 		let oid = Obj.tag o in
 
 		match op with
-		| OLabel _ ->
+		| OLabel _ | ONop _ ->
 			byte oid
 		| OCall2 (r,g,a,b) ->
 			byte oid;

+ 2 - 0
src/generators/hl2c.ml

@@ -1148,6 +1148,8 @@ let write_c version file (code:code) =
 				if b then decr trap_depth;
 			| ODump r ->
 				todo()
+			| ONop _ ->
+				()
 		) f.code;
 		unblock();
 		line "}";

+ 2 - 0
src/generators/hlcode.ml

@@ -196,6 +196,7 @@ type opcode =
 	| OSetEnumField of reg * int * reg
 	(* misc *)
 	| ODump of reg
+	| ONop of unused
 
 type fundecl = {
 	fpath : string * string;
@@ -563,6 +564,7 @@ let ostr fstr o =
 	| OTrap (r,i) -> Printf.sprintf "trap %d, %d" r i
 	| OEndTrap b -> Printf.sprintf "endtrap %b" b
 	| ODump r -> Printf.sprintf "dump %d" r
+	| ONop _ -> "nop"
 
 let fundecl_name f = if snd f.fpath = "" then "fun$" ^ (string_of_int f.findex) else (fst f.fpath) ^ "." ^ (snd f.fpath)
 

+ 5 - 1
src/generators/hlinterp.ml

@@ -1050,6 +1050,8 @@ let interp code =
 				traps := List.tl !traps
 			| ODump r ->
 				print_endline (vstr_d (get r));
+			| ONop _ ->
+				()
 			);
 			loop()
 		in
@@ -2215,6 +2217,8 @@ let check code =
 				()
 			| ODump r ->
 				ignore(rtype r);
+			| ONop _ ->
+				()
 		) f.code
 		(* TODO : check that all path correctly initialize NULL values and reach a return *)
 	in
@@ -2607,7 +2611,7 @@ let make_spec (code:code) (f:fundecl) =
 			| OEnumIndex (d,r) -> args.(d) <- SConv ("index",args.(r))
 			| OEnumField (d,r,fid,cid) -> args.(d) <- SEnumField (args.(r),fid,cid)
 			| OSetEnumField (e,fid,r) -> semit (SSetEnumField (args.(e),fid,args.(r)))
-			| ODump _ -> ()
+			| ODump _ | ONop _ -> ()
 		done;
 		Hashtbl.add block_args b.bstart args
 	in

+ 420 - 43
src/generators/hlopt.ml

@@ -21,12 +21,23 @@
  *)
 open Hlcode
 
+type cur_value =
+	| VUndef
+	| VReg of int
+
+type reg_state = {
+	mutable rindex : int;
+	mutable ralias : reg_state;
+	mutable rbind : reg_state list;
+}
+
 type block = {
 	bstart : int;
 	mutable bend : int;
 	mutable bnext : block list;
 	mutable bprev : block list;
 	mutable bloop : bool;
+	mutable bstate : reg_state array option;
 }
 
 type control =
@@ -57,11 +68,11 @@ let control = function
 		CTry d
 	| _ ->
 		CNo
-		
-let opcode_fx frw op = 
+
+let opcode_fx frw op =
 	let read r = frw r true and write r = frw r false in
 	match op with
-	| OMov (d,a) | ONeg (d,a) | ONot (d,a) -> 
+	| OMov (d,a) | ONeg (d,a) | ONot (d,a) ->
 		read a; write d
 	| OInt (d,_) | OFloat (d,_) | OBool (d,_) | OBytes (d,_) | OString (d,_) | ONull d ->
 		write d
@@ -78,7 +89,7 @@ let opcode_fx frw op =
 	| OCall3 (d,_,a,b,c) ->
 		read a; read b; read c; write d
 	| OCall4 (d,_,a,b,c,k) ->
-		read a; read b; read c; read k; write d	
+		read a; read b; read c; read k; write d
 	| OCallN (d,_,rl) | OCallMethod (d,_,rl) | OCallThis (d,_,rl) ->
 		List.iter read rl; write d
 	| OCallClosure (d,f,rl) ->
@@ -133,6 +144,280 @@ let opcode_fx frw op =
 		read a; read b
 	| ODump r ->
 		read r
+	| ONop _ ->
+		()
+
+let opcode_eq a b =
+	match a, b with
+	| OType (r1,t1), OType (r2,t2) ->
+		r1 = r2 && t1 == t2
+	| _ ->
+		a = b
+
+let opcode_map read write op =
+	match op with
+	| OMov (d,a) ->
+		let a = read a in
+		OMov (write d, a)
+	| ONeg (d,a) ->
+		let a = read a in
+		ONeg (write d, a)
+	| ONot (d,a) ->
+		let a = read a in
+		ONot (write d, a)
+	| OInt (d,idx) ->
+		OInt (write d, idx)
+	| OFloat (d,idx) ->
+		OFloat (write d, idx)
+	| OBool (d,idx) ->
+		OBool (write d, idx)
+	| OBytes (d,idx) ->
+		OBytes (write d, idx)
+	| OString (d,idx) ->
+		OString (write d, idx)
+	| ONull d ->
+		ONull (write d)
+	| OAdd (d,a,b) ->
+		let a = read a and b = read b in
+		OAdd (write d, a, b)
+	| OSub (d,a,b) ->
+		let a = read a and b = read b in
+		OSub (write d, a, b)
+	| OMul (d,a,b) ->
+		let a = read a and b = read b in
+		OMul (write d, a, b)
+	| OSDiv (d,a,b) ->
+		let a = read a and b = read b in
+		OSDiv (write d, a, b)
+	| OUDiv (d,a,b) ->
+		let a = read a and b = read b in
+		OUDiv (write d, a, b)
+	| OSMod (d,a,b) ->
+		let a = read a and b = read b in
+		OSMod (write d, a, b)
+	| OUMod (d,a,b) ->
+		let a = read a and b = read b in
+		OUMod (write d, a, b)
+	| OShl (d,a,b) ->
+		let a = read a and b = read b in
+		OShl (write d, a, b)
+	| OSShr (d,a,b) ->
+		let a = read a and b = read b in
+		OSShr (write d, a, b)
+	| OUShr (d,a,b) ->
+		let a = read a and b = read b in
+		OUShr (write d, a, b)
+	| OAnd (d,a,b) ->
+		let a = read a and b = read b in
+		OAnd (write d, a, b)
+	| OOr (d,a,b) ->
+		let a = read a and b = read b in
+		OOr (write d, a, b)
+	| OXor (d,a,b) ->
+		let a = read a and b = read b in
+		OXor (write d, a, b)
+	| OIncr a ->
+		let a = read a in
+		OIncr (write a)
+	| ODecr a ->
+		let a = read a in
+		ODecr (write a)
+	| OCall0 (d,f) ->
+		OCall0 (write d, f)
+	| OCall1 (d,f,a) ->
+		let a = read a in
+		OCall1 (write d, f, a)
+	| OCall2 (d,f,a,b) ->
+		let a = read a in
+		let b = read b in
+		OCall2 (write d, f, a, b)
+	| OCall3 (d,f,a,b,c) ->
+		let a = read a in
+		let b = read b in
+		let c = read c in
+		OCall3 (write d, f, a, b, c)
+	| OCall4 (w,f,a,b,c,d) ->
+		let a = read a in
+		let b = read b in
+		let c = read c in
+		let d = read d in
+		OCall4 (write w, f, a, b, c, d)
+	| OCallN (d,f,rl) ->
+		let rl = List.map read rl in
+		OCallN (write d, f, rl)
+	| OCallMethod (d,f,rl) ->
+		let rl = List.map read rl in
+		OCallMethod (write d, f, rl)
+	| OCallThis (d,f,rl) ->
+		let rl = List.map read rl in
+		OCallThis (write d, f, rl)
+	| OCallClosure (d,f,rl) ->
+		let rl = List.map read rl in
+		OCallClosure (write d, f, rl)
+	| OStaticClosure (d,f) ->
+		OStaticClosure (write d, f)
+	| OInstanceClosure (d, f, a) ->
+		let a = read a in
+		OInstanceClosure (write d, f, a)
+	| OVirtualClosure (d,a,f) ->
+		let a = read a in
+		OVirtualClosure (write d, a, f)
+	| OGetGlobal (d,g) ->
+		OGetGlobal (write d, g)
+	| OSetGlobal (g,r) ->
+		OSetGlobal (g, read r)
+	| OSetMethod (o,f,m) ->
+		OSetMethod (read o, f, m)
+	| OField (d,a,f) ->
+		let a = read a in
+		OField (write d, a, f)
+	| ODynGet (d,a,f) ->
+		let a = read a in
+		ODynGet (write d, a, f)
+	| OSetField (a,f,b) ->
+		OSetField (read a, f, read b)
+	| ODynSet (a,f,b) ->
+		ODynSet (read a, f, read b)
+	| OGetThis (d,f) ->
+		OGetThis (write d, f)
+	| OSetThis (f,a) ->
+		OSetThis (f, read a)
+	| OJTrue (r,d) ->
+		OJTrue (read r, d)
+	| OJFalse (r,d) ->
+		OJFalse (read r, d)
+	| OJNull (r,d) ->
+		OJNull (read r, d)
+	| OJNotNull (r,d) ->
+		OJNotNull (read r, d)
+	| OJSLt (a,b,d) ->
+		OJSLt (read a, read b, d)
+	| OJSGte (a,b,d) ->
+		OJSGte (read a, read b, d)
+	| OJSGt (a,b,d) ->
+		OJSGt (read a, read b, d)
+	| OJSLte (a,b,d) ->
+		OJSLte (read a, read b, d)
+	| OJULt (a,b,d) ->
+		OJULt (read a, read b, d)
+	| OJUGte (a,b,d) ->
+		OJUGte (read a, read b, d)
+	| OJEq (a,b,d) ->
+		OJEq (read a, read b, d)
+	| OJNotEq (a,b,d) ->
+		OJNotEq (read a, read b, d)
+	| OJAlways _ | OLabel _ ->
+		op
+	| OToDyn (d, a) ->
+		let a = read a in
+		OToDyn (write d, a)
+	| OToSFloat (d,a) ->
+		let a = read a in
+		OToSFloat (write d, a)
+	| OToUFloat (d,a) ->
+		let a = read a in
+		OToUFloat (write d, a)
+	| OToInt (d,a) ->
+		let a = read a in
+		OToInt (write d, a)
+	| OSafeCast (d,a) ->
+		let a = read a in
+		OSafeCast (write d, a)
+	| OUnsafeCast (d,a) ->
+		let a = read a in
+		OUnsafeCast (write d, a)
+	| OToVirtual (d,a) ->
+		let a = read a in
+		OToVirtual (write d, a)
+	| ORet r ->
+		ORet (read r)
+	| OThrow r ->
+		OThrow (read r)
+	| ORethrow r ->
+		ORethrow (read r)
+	| OSwitch (r,cases,def) ->
+		OSwitch (read r, cases, def)
+	| ONullCheck r ->
+		ONullCheck (read r)
+	| OTrap _ ->
+		op
+	| OEndTrap _ ->
+		op (* ??? *)
+	| OGetUI8 (d,a,b) ->
+		let a = read a and b = read b in
+		OGetUI8 (write d, a, b)
+	| OGetUI16 (d,a,b) ->
+		let a = read a and b = read b in
+		OGetUI16 (write d, a, b)
+	| OGetI32 (d,a,b) ->
+		let a = read a and b = read b in
+		OGetI32 (write d, a, b)
+	| OGetF32 (d,a,b) ->
+		let a = read a and b = read b in
+		OGetF32 (write d, a, b)
+	| OGetF64 (d,a,b) ->
+		let a = read a and b = read b in
+		OGetF64 (write d, a, b)
+	| OGetArray (d,a,b) ->
+		let a = read a and b = read b in
+		OGetArray (write d, a, b)
+	| OSetUI8 (a,b,c) ->
+		let a = read a and b = read b and c = read c in
+		OSetUI8 (a, b, c)
+	| OSetUI16 (a,b,c) ->
+		let a = read a and b = read b and c = read c in
+		OSetUI16 (a, b, c)
+	| OSetI32 (a,b,c) ->
+		let a = read a and b = read b and c = read c in
+		OSetI32 (a, b, c)
+	| OSetF32 (a,b,c) ->
+		let a = read a and b = read b and c = read c in
+		OSetF32 (a, b, c)
+	| OSetF64 (a,b,c) ->
+		let a = read a and b = read b and c = read c in
+		OSetF64 (a, b, c)
+	| OSetArray (a,b,c) ->
+		let a = read a and b = read b and c = read c in
+		OSetArray (a, b, c)
+	| ONew d ->
+		ONew (write d)
+	| OArraySize (d, a) ->
+		let a = read a in
+		OArraySize (write d, a)
+	| OGetType (d,a) ->
+		let a = read a in
+		OGetType (write d, a)
+	| OGetTID (d,a) ->
+		let a = read a in
+		OGetTID (write d, a)
+	| ORef (d, a) ->
+		let a = read a in
+		ORef (write d, a)
+	| OUnref (d,a) ->
+		let a = read a in
+		OUnref (write d, a)
+	| OSetref (d, a) ->
+		let a = read a in
+		OSetref (write d, a)
+	| OEnumIndex (d, a) ->
+		let a = read a in
+		OEnumIndex (write d, a)
+	| OEnumField (d,a,cs,idx) ->
+		let a = read a in
+		OEnumField (write d, a, cs, idx)
+	| OType (d,t) ->
+		OType (write d, t)
+	| OEnumAlloc (d,e) ->
+		OEnumAlloc (write d, e)
+	| OMakeEnum (d,e,rl) ->
+		let rl = List.map read rl in
+		OMakeEnum (write d, e, rl)
+	| OSetEnumField (a,f,b) ->
+		OSetEnumField (read a, f, read b)
+	| ODump r ->
+		ODump (read r)
+	| ONop _ ->
+		op
 
 (* build code graph *)
 
@@ -155,6 +440,7 @@ let code_graph (f:fundecl) =
 				bnext = [];
 				bprev = [];
 				bloop = false;
+				bstate = None;
 			} in
 			Hashtbl.add blocks_pos pos b;
 			let rec loop i =
@@ -190,65 +476,157 @@ let code_graph (f:fundecl) =
 	blocks_pos, make_block 0
 
 let optimize dump (f:fundecl) =
+	let nregs = Array.length f.regs in
+	let old_code = match dump with None -> f.code | Some _ -> Array.copy f.code in
 	let op index = f.code.(index) in
+	let set_op index op = f.code.(index) <- op in
 	let write str = match dump with None -> () | Some ch -> IO.nwrite ch (str ^ "\n") in
 
 	let blocks_pos, root = code_graph f in
-	
-	(* build registers liveness *)
-(*	
-	let rec liveness (b:block) regs =
-		let regs = ref regs in
+
+	let read_counts = Array.make nregs 0 in
+	let read_count r = read_counts.(r) <- read_counts.(r) + 1 in
+	let empty_state() = Array.init nregs (fun i ->
+		let r = { rindex = i; ralias = Obj.magic 0; rbind = [] } in
+		r.ralias <- r;
+		r
+	) in
+
+	let print_state i s =
+		let state_str s =
+			if s.ralias == s && s.rbind == [] then "" else
+			Printf.sprintf "%d%s[%s]" s.rindex (if s.ralias == s then "" else "=" ^ string_of_int s.ralias.rindex) (String.concat "," (List.map (fun s -> string_of_int s.rindex) s.rbind))
+		in
+		write (Printf.sprintf "@%X %s" i (String.concat " " (Array.to_list (Array.map state_str s))))
+	in
+
+	let dstate = false in
+
+	let rec propagate b =
+		let state = if b.bloop then
+			empty_state()
+		else match b.bprev with
+		| [] -> empty_state()
+		| b2 :: l ->
+			let s = get_state b2 in
+			let s = (match b2.bnext with
+			| [] -> assert false
+			| [_] -> s (* reuse *)
+			| _ :: l ->
+				let s2 = empty_state() in
+				for i = 0 to nregs - 1 do
+					let sold = s.(i) and snew = s2.(i) in
+					snew.ralias <- s2.(sold.ralias.rindex);
+					snew.rbind <- List.map (fun b -> s2.(b.rindex)) sold.rbind;
+				done;
+				s2
+			) in
+			List.iter (fun b2 ->
+				let s2 = get_state b2 in
+				for i = 0 to nregs - 1 do
+					let s1 = s.(i) and s2 = s2.(i) in
+					if s1.ralias.rindex <> s2.ralias.rindex then s1.ralias <- s1
+				done;
+				for i = 0 to nregs - 1 do
+					let s1 = s.(i) and s2 = s2.(i) in
+					s1.rbind <- List.filter (fun s -> s.ralias == s1) s1.rbind;
+					(match s2.rbind with
+					| [] -> ()
+					| l -> s1.rbind <- List.fold_left (fun acc sb2 -> let s = s.(sb2.rindex) in if s.ralias == s1 && not (List.memq s s1.rbind) then s :: acc else acc) s1.rbind s2.rbind)
+				done;
+			) l;
+			s
+		in
+		let undef r =
+			r.ralias.rbind <- List.filter (fun r2 -> r2 != r) r.ralias.rbind;
+			r.ralias <- r
+		in
 		let rec loop i =
-			if i > b.bend then ()
-			else begin
+			let do_write r =
+				let s = state.(r) in
+				List.iter (fun s2 -> s2.ralias <- s2) s.rbind;
+				s.rbind <- [];
+				undef s
+			in
+			if i > b.bend then () else
+			let op = op i in
+			if dstate then print_state i state;
+			(match op with
+			| OIncr r | ODecr r | ORef (_,r) ->  do_write r
+			| _ -> ());
+			let op = opcode_map (fun r ->
+				let s = state.(r) in
+				s.ralias.rindex
+			) (fun w ->	w) op in
+			set_op i op;
+			(match op with
+			| OMov (d, v) when d = v ->
+				set_op i (ONop 0)
+			| OMov (d, v) ->
+				let sv = state.(v) in
+				let sd = state.(d) in
+				do_write d;
+				sd.ralias <- sv;
+				if not (List.memq sd sv.rbind) then sv.rbind <- sd :: sv.rbind;
+				read_count v
+			| _ ->
 				opcode_fx (fun r read ->
-					let ranges = (try PMap.find r !regs with Not_found -> []) in
-					let ranges = if read then 
-						(match ranges with [] -> [(i,b.bstart)] | (_,write) :: l -> (i,write) :: l) 
-					else
-						(-1,i) :: ranges
-					in
-					regs := PMap.add r ranges !regs;
-				) (op i);
-				loop (i + 1)
-			end;
+					if read then read_count r else do_write r
+				) op
+			);
+			loop (i + 1)
 		in
 		loop b.bstart;
-		let start = !regs in
-		if b.bloop then begin
-			start;
-		end else
-			List.fold_left (fun regs b2 ->
-				let regs2 = liveness b2 start in
-				(* todo : regs union regs2 *)
-				regs2
-			) start b.bnext 
+		b.bstate <- Some state;
+		List.iter (fun b2 -> if b2.bstart > b.bstart then ignore (get_state b2)) b.bnext
+
+	and get_state b =
+		match b.bstate with
+		| None ->
+			propagate b;
+			get_state b
+		| Some state ->
+			state
 	in
-	let rec loop i args map =
-		match args with
-		| [] -> map
-		| _ :: args ->
-			loop (i + 1) args (PMap.add i [(-1,-1)] map) 
+	propagate root;
+
+	(* nop *)
+
+	let todo = ref true in
+	let rec loop i =
+		if i < 0 then () else begin
+		(match op i with
+		| OMov (d,r) when read_counts.(d) = 0 ->
+			let n = read_counts.(r) in
+			if n = 0 then todo := true;
+			read_counts.(r) <- n;
+			set_op i (ONop 0)
+		| _ -> ());
+		loop (i - 1)
+		end
 	in
-	let live = liveness root (loop 0 (match f.ftype with HFun (args,_) -> args | _ -> assert false) PMap.empty) in
-	*)
+	while !todo do
+		todo := false;
+		loop (Array.length f.code - 1);
+	done;
+
 	(* done *)
-	
 	if dump <> None then begin
 		let rec loop i block =
 			if i = Array.length f.code then () else
-			let block = try 
+			let block = try
 				let nblock = Hashtbl.find blocks_pos i in
-				write (Printf.sprintf "\t----- [%s] (%d)"
-					(String.concat "," (List.map (fun b -> string_of_int b.bstart) nblock.bnext))
+				write (Printf.sprintf "\t----- [%s] (%X)"
+					(String.concat "," (List.map (fun b -> Printf.sprintf "%X" b.bstart) nblock.bnext))
 					nblock.bend
 				);
 				nblock
 			with Not_found ->
 				block
 			in
-			write (Printf.sprintf "\t@%d %s" i (ostr string_of_int (op i)));
+			let old = old_code.(i) in
+			let op = op i in
+			write (Printf.sprintf "\t@%-3X %-20s %s" i (ostr string_of_int old) (if opcode_eq old op then "" else ostr string_of_int op));
 			loop (i + 1) block
 		in
 		write (fundecl_name f);
@@ -256,5 +634,4 @@ let optimize dump (f:fundecl) =
 		write "";
 		write "";
 	end;
-
 	f