Sfoglia il codice sorgente

hlopt : unreachable code, redundant null checks, regs remap

Nicolas Cannasse 8 anni fa
parent
commit
31aa6d65b7
4 ha cambiato i file con 77 aggiunte e 21 eliminazioni
  1. 1 1
      src/generators/genhl.ml
  2. 14 5
      src/generators/hl2c.ml
  3. 3 3
      src/generators/hlinterp.ml
  4. 59 12
      src/generators/hlopt.ml

+ 1 - 1
src/generators/genhl.ml

@@ -2840,7 +2840,7 @@ and make_fun ?gen_content ctx name fidx f cthis cparent =
 	} in
 	ctx.m <- old;
 	Hashtbl.add ctx.defined_funs fidx ();
-	(*let f = if ctx.optimize then Hlopt.optimize ctx.dump_out f else f in*)
+	let f = if ctx.optimize then Hlopt.optimize ctx.dump_out f else f in
 	DynArray.add ctx.cfunctions f;
 	capt
 

+ 14 - 5
src/generators/hl2c.ml

@@ -757,7 +757,7 @@ let write_c version file (code:code) =
 			let il = List.rev_map (fun s -> prefix ^ s) il in
 			sexpr "%s %s" s (String.concat ", " il)
 		) var_map;
-		let output_options = Array.make (Array.length f.code) [] in
+		let output_options = Array.make (Array.length f.code + 1) [] in
 		let output_at i oo = output_options.(i) <- oo :: output_options.(i) in
 		let output_at2 i ool = List.iter (output_at i) ool in
 		let has_label i = List.exists (function OOLabel -> true | _ -> false) output_options.(i) in
@@ -778,8 +778,8 @@ let write_c version file (code:code) =
 			sexpr "hl_trap_ctx trap$%d" i;
 		done;
 
-		Array.iteri (fun i op ->
-			(match output_options.(i) with
+		let flush_options i =
+			match output_options.(i) with
 			| [] -> ()
 			| opts ->
 				(* put label after } *)
@@ -793,7 +793,11 @@ let write_c version file (code:code) =
 					| OODecreaseIndent -> unblock()
 					| OOBeginBlock ->  line "{"
 					| OOEndBlock -> line "}"
-				) opts);
+				) opts
+		in
+
+		Array.iteri (fun i op ->
+			flush_options i;
 			let label delta =
 				let addr = delta + i + 1 in
 				let label = label addr in
@@ -1137,7 +1141,11 @@ let write_c version file (code:code) =
 				block();
 				output_at2 (i + 1) [OODefault;OOIncreaseIndent];
 				Array.iteri (fun k delta -> output_at2 (delta + i + 1) [OODecreaseIndent;OOCase k;OOIncreaseIndent]) idx;
-				output_at2 (i + 1 + eend) [OODecreaseIndent;OODecreaseIndent;OOEndBlock];
+				let pend = i+1+eend in
+				(* insert at end if we have another switch case here *)
+				let old = output_options.(pend) in
+				output_options.(pend) <- [];
+				output_at2 pend ([OODecreaseIndent;OODecreaseIndent;OOEndBlock] @ List.rev old);
 			| ONullCheck r ->
 				sexpr "if( %s == NULL ) hl_null_access()" (reg r)
 			| OTrap (r,d) ->
@@ -1151,6 +1159,7 @@ let write_c version file (code:code) =
 			| ONop _ ->
 				()
 		) f.code;
+		flush_options (Array.length f.code);
 		unblock();
 		line "}";
 		line "";

+ 3 - 3
src/generators/hlinterp.ml

@@ -1894,7 +1894,7 @@ let check code =
 			failwith (Printf.sprintf "\n%s:%d: Check failure at %d@x%x - %s" code.debugfiles.(dfile) dline f.findex (!pos) msg)
 		in
 		let targs, tret = (match f.ftype with HFun (args,ret) -> args, ret | _ -> assert false) in
-		let rtype i = f.regs.(i) in
+		let rtype i = try f.regs.(i) with _ -> HObj { null_proto with pname = "OUT_OF_BOUNDS:" ^ string_of_int i } in
 		let check t1 t2 =
 			if not (safe_cast t1 t2) then error (tstr t1 ^ " should be " ^ tstr t2)
 		in
@@ -2166,7 +2166,7 @@ let check code =
 				| HVirtual _ -> ()
 				| _ -> reg r (HVirtual {vfields=[||];vindex=PMap.empty;}));
 				(match rtype v with
-				| HObj _ | HDynObj | HDyn -> ()
+				| HObj _ | HDynObj | HDyn | HVirtual _ -> ()
 				| _ -> reg v HDynObj)
 			| ODynGet (v,r,f) | ODynSet (r,f,v) ->
 				ignore(code.strings.(f));
@@ -2206,7 +2206,7 @@ let check code =
 			| OSwitch (r,idx,eend) ->
 				reg r HI32;
 				Array.iter can_jump idx;
-				can_jump eend
+				if eend + 1 + i <> Array.length f.code then can_jump eend
 			| ONullCheck r ->
 				ignore(rtype r)
 			| OTrap (r, idx) ->

+ 59 - 12
src/generators/hlopt.ml

@@ -123,8 +123,8 @@ let opcode_fx frw op =
 		read a; write d
 	| ORet r | OThrow r  | ORethrow r | OSwitch (r,_,_) | ONullCheck r ->
 		read r
-	| OTrap _ ->
-		()
+	| OTrap (r,_) ->
+		write r
 	| OEndTrap _ ->
 		() (* ??? *)
 	| OGetUI8 (d,a,b) | OGetUI16 (d,a,b) | OGetI32 (d,a,b) | OGetF32 (d,a,b) | OGetF64 (d,a,b) | OGetArray (d,a,b) ->
@@ -218,10 +218,8 @@ let opcode_map read write op =
 		let a = read a and b = read b in
 		OXor (write d, a, b)
 	| OIncr a ->
-		let a = read a in
 		OIncr (write a)
 	| ODecr a ->
-		let a = read a in
 		ODecr (write a)
 	| OCall0 (d,f) ->
 		OCall0 (write d, f)
@@ -253,6 +251,7 @@ let opcode_map read write op =
 		let rl = List.map read rl in
 		OCallThis (write d, f, rl)
 	| OCallClosure (d,f,rl) ->
+		let f = read f in
 		let rl = List.map read rl in
 		OCallClosure (write d, f, rl)
 	| OStaticClosure (d,f) ->
@@ -340,8 +339,8 @@ let opcode_map read write op =
 		OSwitch (read r, cases, def)
 	| ONullCheck r ->
 		ONullCheck (read r)
-	| OTrap _ ->
-		op
+	| OTrap (r,d) ->
+		OTrap (write r, d)
 	| OEndTrap _ ->
 		op (* ??? *)
 	| OGetUI8 (d,a,b) ->
@@ -488,7 +487,10 @@ let optimize dump (f:fundecl) =
 	let blocks_pos, root = code_graph f in
 
 	let read_counts = Array.make nregs 0 in
+	let write_counts = Array.make nregs 0 in
 	let read_count r = read_counts.(r) <- read_counts.(r) + 1 in
+	let write_count r = write_counts.(r) <- write_counts.(r) + 1 in
+
 	let empty_state() = Array.init nregs (fun i ->
 		let r = { rindex = i; ralias = Obj.magic 0; rbind = []; rnullcheck = false } in
 		r.ralias <- r;
@@ -555,6 +557,7 @@ let optimize dump (f:fundecl) =
 				List.iter (fun s2 -> s2.ralias <- s2) s.rbind;
 				s.rbind <- [];
 				s.rnullcheck <- false;
+				write_count r;
 				undef s
 			in
 			if i > b.bend then () else
@@ -603,6 +606,21 @@ let optimize dump (f:fundecl) =
 	in
 	propagate root;
 
+	(* unreachable code *)
+
+	let rec loop i =
+		if i = Array.length f.code then () else
+		try
+			let b = Hashtbl.find blocks_pos i in
+			loop (b.bend + 1)
+		with Not_found ->
+			(match op i with
+			| OEndTrap true -> ()
+			| _ -> set_nop i "unreach");
+			loop (i + 1)
+	in
+	loop 0;
+
 	(* nop *)
 
 	let todo = ref true in
@@ -610,9 +628,10 @@ let optimize dump (f:fundecl) =
 		if i < 0 then () else begin
 		(match op i with
 		| OMov (d,r) when read_counts.(d) = 0 ->
-			let n = read_counts.(r) in
+			let n = read_counts.(r) - 1 in
 			if n = 0 then todo := true;
 			read_counts.(r) <- n;
+			write_counts.(d) <- write_counts.(d) - 1;
 			set_nop i "unused"
 		| _ -> ());
 		loop (i - 1)
@@ -623,6 +642,20 @@ let optimize dump (f:fundecl) =
 		loop (Array.length f.code - 1);
 	done;
 
+	(* reg map *)
+
+	let used_regs = ref 0 in
+	let reg_map = read_counts in
+	let nargs = (match f.ftype with HFun (args,_) -> List.length args | _ -> assert false) in
+	for i=0 to nregs-1 do
+		if read_counts.(i) > 0 || write_counts.(i) > 0 || i < nargs then begin
+			reg_map.(i) <- !used_regs;
+			incr used_regs;
+		end else
+			reg_map.(i) <- -1;
+	done;
+	let reg_remap = !used_regs <> nregs in
+
 	(* done *)
 	if dump <> None then begin
 		let rec loop i block =
@@ -642,7 +675,12 @@ let optimize dump (f:fundecl) =
 			write (Printf.sprintf "\t@%-3X %-20s %s" i (ostr string_of_int old) (if opcode_eq old op then "" else ostr string_of_int op));
 			loop (i + 1) block
 		in
-		write (fundecl_name f);
+		write (Printf.sprintf "%s@%d" (fundecl_name f) f.findex);
+		if reg_remap then begin
+			for i=0 to nregs-1 do
+				write (Printf.sprintf "\tr%-2d %-10s%s" i (tstr f.regs.(i)) (if reg_map.(i) < 0 then " unused" else if reg_map.(i) = i then "" else Printf.sprintf " r%-2d" reg_map.(i)))
+			done;
+		end;
 		loop 0 root;
 		write "";
 		write "";
@@ -654,7 +692,7 @@ let optimize dump (f:fundecl) =
 	let regs = ref f.regs in
 	let debug = ref f.debug in
 
-	if !nop_count > 0 then begin
+	if !nop_count > 0 || reg_remap then begin
 		let new_pos = Array.make (Array.length f.code) 0 in
 		let jumps = ref [] in
 		let out_pos = ref 0 in
@@ -669,6 +707,7 @@ let optimize dump (f:fundecl) =
 				| OJTrue _ | OJFalse _ | OJNull _ | OJNotNull _  | OJSLt _ | OJSGte _ | OJSGt _ | OJSLte _ | OJULt _ | OJUGte _ | OJEq _ | OJNotEq _ | OJAlways _ | OSwitch _  | OTrap _ ->
 					jumps := i :: !jumps
 				| _ -> ());
+				let op = if reg_remap then opcode_map (fun r -> reg_map.(r)) (fun r -> reg_map.(r)) op else op in
 				out_code.(!out_pos) <- op;
 				new_debug.(!out_pos) <- f.debug.(i);
 				incr out_pos
@@ -677,7 +716,8 @@ let optimize dump (f:fundecl) =
 			let pos d =
 				new_pos.(j + 1 + d) - new_pos.(j + 1)
 			in
-			let op = (match f.code.(j) with
+			let p = new_pos.(j) in
+			out_code.(p) <- (match out_code.(p) with
 			| OJTrue (r,d) -> OJTrue (r,pos d)
 			| OJFalse (r,d) -> OJFalse (r,pos d)
 			| OJNull (r,d) -> OJNull (r, pos d)
@@ -693,11 +733,18 @@ let optimize dump (f:fundecl) =
 			| OJAlways d -> OJAlways (pos d)
 			| OSwitch (r,cases,send) -> OSwitch (r, Array.map pos cases, pos send)
 			| OTrap (r,d) -> OTrap (r,pos d)
-			| _ -> assert false) in
-			out_code.(new_pos.(j)) <- op
+			| _ -> assert false)
 		) !jumps;
 		code := out_code;
 		debug := new_debug;
+		if reg_remap then begin
+			let new_regs = Array.make !used_regs HVoid in
+			for i=0 to nregs-1 do
+				let p = reg_map.(i) in
+				if p >= 0 then new_regs.(p) <- f.regs.(i)
+			done;
+			regs := new_regs;
+		end;
 	end;
 
 	{ f with code = !code; regs = !regs; debug = !debug }