Browse Source

computer registers liveness + remove unused mov

Nicolas Cannasse 8 years ago
parent
commit
3cb2bf1d4c
1 changed files with 106 additions and 20 deletions
  1. 106 20
      src/generators/hlopt.ml

+ 106 - 20
src/generators/hlopt.ml

@@ -21,6 +21,11 @@
  *)
 open Hlcode
 
+module ISet = Set.Make(struct
+	let compare = Pervasives.compare
+	type t = int
+end)
+
 type cur_value =
 	| VUndef
 	| VReg of int
@@ -39,6 +44,9 @@ type block = {
 	mutable bprev : block list;
 	mutable bloop : bool;
 	mutable bstate : reg_state array option;
+	mutable bneed : ISet.t;
+	mutable bneed_all : ISet.t option;
+	mutable bwrite : (int, int) PMap.t;
 }
 
 type control =
@@ -441,6 +449,9 @@ let code_graph (f:fundecl) =
 				bprev = [];
 				bloop = false;
 				bstate = None;
+				bneed = ISet.empty;
+				bwrite = PMap.empty;
+				bneed_all = None;
 			} in
 			Hashtbl.add blocks_pos pos b;
 			let rec loop i =
@@ -488,6 +499,27 @@ let optimize dump (f:fundecl) =
 
 	let read_counts = Array.make nregs 0 in
 	let write_counts = Array.make nregs 0 in
+	let last_write = Array.make nregs (-1) in
+
+	let bit_regs = 30 in
+	let stride = (nregs + bit_regs - 1) / bit_regs in
+	let live_bits = Array.make (Array.length f.code * stride) 0 in
+
+	let set_live r min max =
+		let offset = r / bit_regs in
+		let mask = 1 lsl (r - offset * bit_regs) in
+		if min < 0 || max >= Array.length f.code then assert false;
+		for i=min to max do
+			let p = i * stride + offset in
+			Array.unsafe_set live_bits p ((Array.unsafe_get live_bits p) lor mask);
+		done;
+	in
+	let is_live r i =
+		let offset = r / bit_regs in
+		let mask = 1 lsl (r - offset * bit_regs) in
+		live_bits.(i * stride + offset) land mask <> 0
+	in
+
 	let read_count r = read_counts.(r) <- read_counts.(r) + 1 in
 	let write_count r = write_counts.(r) <- write_counts.(r) + 1 in
 
@@ -544,12 +576,19 @@ let optimize dump (f:fundecl) =
 			) l;
 			s
 		in
-		let undef r =
+		let unalias r =
 			r.ralias.rbind <- List.filter (fun r2 -> r2 != r) r.ralias.rbind;
 			r.ralias <- r
 		in
 		let rec loop i =
 			let do_read r =
+				let w = last_write.(r) in
+				if w < b.bstart || w > i then begin
+					last_write.(r) <- i;
+					set_live r b.bstart i;
+					b.bneed <- ISet.add r b.bneed;
+				end else
+					set_live r (w + 1) i;
 				read_count r
 			in
 			let do_write r =
@@ -557,15 +596,17 @@ let optimize dump (f:fundecl) =
 				List.iter (fun s2 -> s2.ralias <- s2) s.rbind;
 				s.rbind <- [];
 				s.rnullcheck <- false;
+				last_write.(r) <- i;
+				b.bwrite <- PMap.add r i b.bwrite;
 				write_count r;
-				undef s
+				unalias s
 			in
 			if i > b.bend then () else
 			let op = op i in
 			if dstate then print_state i state;
 			(match op with
-			| OIncr r | ODecr r | ORef (_,r) ->  do_write r
-			| OCallClosure (_,r,_) when f.regs.(r) = HDyn && (match f.regs.(state.(r).ralias.rindex) with HFun (_,rt) -> not (is_dynamic rt) | HDyn -> false | _ -> true) -> do_write r (* Issue3218.hx *)
+			| OIncr r | ODecr r | ORef (_,r) -> unalias state.(r)
+			| OCallClosure (_,r,_) when f.regs.(r) = HDyn && (match f.regs.(state.(r).ralias.rindex) with HFun (_,rt) -> not (is_dynamic rt) | HDyn -> false | _ -> true) -> unalias state.(r) (* Issue3218.hx *)
 			| _ -> ());
 			let op = opcode_map (fun r ->
 				let s = state.(r) in
@@ -583,6 +624,9 @@ let optimize dump (f:fundecl) =
 				sd.ralias <- sv;
 				sd.rnullcheck <- sv.rnullcheck;
 				if not (List.memq sd sv.rbind) then sv.rbind <- sd :: sv.rbind;
+			| OIncr r | ODecr r ->
+				do_read r;
+				do_write r;
 			| ONullCheck r ->
 				let s = state.(r) in
 				if s.rnullcheck then set_nop i "nullcheck" else begin do_read r; s.rnullcheck <- true; end;
@@ -622,25 +666,58 @@ let optimize dump (f:fundecl) =
 	in
 	loop 0;
 
+	(* liveness *)
+
+	let rec live b =
+		match b.bneed_all with
+		| Some a -> a
+		| None ->
+			let need_sub = List.fold_left (fun acc b2 ->
+				(* loop : first pass does not recurse, second pass uses cache *)
+				if b2.bloop && b2.bstart < b.bstart then (match b2.bneed_all with None -> acc | Some s -> ISet.union acc s) else
+				ISet.union acc (live b2)
+			) ISet.empty b.bnext in
+			let need_sub = ISet.filter (fun r ->
+				try
+					let w = PMap.find r b.bwrite in
+					set_live r (w + 1) b.bend;
+					false
+				with Not_found ->
+					set_live r b.bstart b.bend;
+					true
+			) need_sub in
+			let need = ISet.union b.bneed need_sub in
+			b.bneed_all <- Some need;
+			if b.bloop then begin
+				(*
+					if we are a loop, we need a second pass to perform fixed point
+					first clear the cache within the loop from backward
+					then rebuild the cache to make sure liveness ranges are correctly set
+				*)
+				let rec clear b2 =
+					match b2.bneed_all with
+					| Some _ when b2.bstart > b.bstart ->
+						b2.bneed_all <- None;
+						List.iter clear b2.bprev
+					| _ -> ()
+				in
+				List.iter (fun b2 -> if b2.bstart > b.bstart then clear b2) b.bprev;
+				List.iter (fun b -> ignore(live b)) b.bnext;
+			end;
+			need
+	in
+	ignore(live root);
+
 	(* nop *)
 
-	let todo = ref true in
-	let rec loop i =
-		if i < 0 then () else begin
+	for i=0 to Array.length f.code - 1 do
 		(match op i with
-		| OMov (d,r) when read_counts.(d) = 0 ->
+		| OMov (d,r) when not (is_live d (i + 1)) ->
 			let n = read_counts.(r) - 1 in
-			if n = 0 then todo := true;
 			read_counts.(r) <- n;
 			write_counts.(d) <- write_counts.(d) - 1;
 			set_nop i "unused"
 		| _ -> ());
-		loop (i - 1)
-		end
-	in
-	while !todo do
-		todo := false;
-		loop (Array.length f.code - 1);
 	done;
 
 	(* reg map *)
@@ -662,18 +739,26 @@ let optimize dump (f:fundecl) =
 		let rec loop i block =
 			if i = Array.length f.code then () else
 			let block = try
-				let nblock = Hashtbl.find blocks_pos i in
+				let b = Hashtbl.find blocks_pos i in
 				write (Printf.sprintf "\t----- [%s] (%X)"
-					(String.concat "," (List.map (fun b -> Printf.sprintf "%X" b.bstart) nblock.bnext))
-					nblock.bend
+					(String.concat "," (List.map (fun b -> Printf.sprintf "%X" b.bstart) b.bnext))
+					b.bend
 				);
-				nblock
+				let need = String.concat "," (List.map string_of_int (ISet.elements b.bneed)) in
+				let wr = String.concat " " (List.rev (PMap.foldi (fun r p acc -> Printf.sprintf "r%d:%X" r p :: acc) b.bwrite [])) in
+				write ("\tNEED=" ^ need ^ "\tWRITE=" ^ wr);
+				b
 			with Not_found ->
 				block
 			in
 			let old = old_code.(i) in
 			let op = op i in
-			write (Printf.sprintf "\t@%-3X %-20s %s" i (ostr string_of_int old) (if opcode_eq old op then "" else ostr string_of_int op));
+			let rec live_loop r l =
+				if r = nregs then List.rev l else
+				live_loop (r + 1) (if is_live r i then r :: l else l)
+			in
+			let live = "LIVE=" ^ String.concat "," (List.map string_of_int (live_loop 0 [])) in
+			write (Printf.sprintf "\t@%-3X %-20s %-20s%s" i (ostr string_of_int old) (if opcode_eq old op then "" else ostr string_of_int op) live);
 			loop (i + 1) block
 		in
 		write (Printf.sprintf "%s@%d" (fundecl_name f) f.findex);
@@ -685,6 +770,7 @@ let optimize dump (f:fundecl) =
 		loop 0 root;
 		write "";
 		write "";
+		(match dump with None -> () | Some ch -> IO.flush ch);
 	end;
 
 	(* remap *)