浏览代码

[hxb] Implement shared string pools (#11511)

* add StringDynArray

* allow passing string pool to the writer

* also allow passing it to the reader

* activate

* [hxb] basic string pool impl for cache

* [hxb] configurable target vs macro string pool

* [hxb] simplify string pool handling/config

---------

Co-authored-by: Rudy Ges <[email protected]>
Simon Krajewski 1 年之前
父节点
当前提交
6b3d9e991c

+ 4 - 1
src/compiler/compilationCache.ml

@@ -35,6 +35,7 @@ class context_cache (index : int) (sign : Digest.t) = object(self)
 	val files : (Path.UniqueKey.t,cached_file) Hashtbl.t = Hashtbl.create 0
 	val modules : (path,module_def) Hashtbl.t = Hashtbl.create 0
 	val binary_cache : (path,HxbData.module_cache) Hashtbl.t = Hashtbl.create 0
+	val string_pool  = StringPool.create ()
 	val removed_files = Hashtbl.create 0
 	val mutable json = JNull
 	val mutable initialized = false
@@ -74,7 +75,7 @@ class context_cache (index : int) (sign : Digest.t) = object(self)
 		| MImport ->
 			Hashtbl.add modules m.m_path m
 		| _ ->
-			let writer = HxbWriter.create config warn anon_identification in
+			let writer = HxbWriter.create config (Some string_pool) warn anon_identification in
 			HxbWriter.write_module writer m;
 			let chunks = HxbWriter.get_chunks writer in
 			Hashtbl.replace binary_cache path {
@@ -98,6 +99,8 @@ class context_cache (index : int) (sign : Digest.t) = object(self)
 	method get_modules = modules
 
 	method get_hxb = binary_cache
+	method get_string_pool = string_pool
+	method get_string_pool_arr = string_pool.items.arr
 	method get_hxb_module path = Hashtbl.find binary_cache path
 
 	(* TODO handle hxb cache there too *)

+ 49 - 21
src/compiler/generate.ml

@@ -21,7 +21,17 @@ let check_auxiliary_output com actx =
 			Genjson.generate com.types file
 	end
 
-let export_hxb com config cc platform zip m =
+let create_writer com config string_pool =
+	let anon_identification = new tanon_identification in
+	let warn w s p = com.Common.warning w com.warning_options s p in
+	let writer = HxbWriter.create config string_pool warn anon_identification in
+	writer,(fun () ->
+		let out = IO.output_string () in
+		HxbWriter.export writer out;
+		IO.close_out out
+	)
+
+let export_hxb from_cache com config string_pool cc platform zip m =
 	let open HxbData in
 	match m.m_extra.m_kind with
 		| MCode | MMacro | MFake | MExtern -> begin
@@ -29,8 +39,8 @@ let export_hxb com config cc platform zip m =
 			let l = platform :: (fst m.m_path @ [snd m.m_path]) in
 			let path = (String.concat "/" l) ^ ".hxb" in
 
-			try
-				let hxb_cache = cc#get_hxb_module m.m_path in
+			if from_cache then begin
+				let hxb_cache = try cc#get_hxb_module m.m_path with Not_found -> raise Abort in
 				let out = IO.output_string () in
 				write_header out;
 				List.iter (fun (kind,data) ->
@@ -39,14 +49,12 @@ let export_hxb com config cc platform zip m =
 				) hxb_cache.mc_chunks;
 				let data = IO.close_out out in
 				zip#add_entry data path;
-			with Not_found ->
-				let anon_identification = new tanon_identification in
-				let warn w s p = com.Common.warning w com.warning_options s p in
-				let writer = HxbWriter.create config warn anon_identification in
+			end else begin
+				let writer,close = create_writer com config string_pool in
 				HxbWriter.write_module writer m;
-				let out = IO.output_string () in
-				HxbWriter.export writer out;
-				zip#add_entry (IO.close_out out) path;
+				let bytes = close () in
+				zip#add_entry bytes path;
+			end
 		end
 	| _ ->
 		()
@@ -54,41 +62,61 @@ let export_hxb com config cc platform zip m =
 let check_hxb_output ctx config =
 	let open HxbWriterConfig in
 	let com = ctx.com in
+	let write_string_pool config zip name pool =
+		let writer,close = create_writer com config (Some pool) in
+		let a = StringPool.finalize writer.cp in
+		HxbWriter.HxbWriter.write_string_pool writer STR a;
+		let bytes = close () in
+		zip#add_entry bytes name;
+	in
 	let match_path_list l sl_path =
 		List.exists (fun sl -> Ast.match_path true sl_path sl) l
 	in
-	let try_write () =
+	let try_write from_cache =
 		let path = config.HxbWriterConfig.archive_path in
 		let path = Str.global_replace (Str.regexp "\\$target") (platform_name ctx.com.platform) path in
 		let t = Timer.timer ["generate";"hxb"] in
 		Path.mkdir_from_path path;
 		let zip = new Zip_output.zip_output path 6 in
-		let export com config =
+		let export com config string_pool =
 			let cc = CommonCache.get_cache com in
 			let target = Common.platform_name_macro com in
+
 			List.iter (fun m ->
 				let t = Timer.timer ["generate";"hxb";s_type_path m.m_path] in
 				let sl_path = fst m.m_path @ [snd m.m_path] in
 				if not (match_path_list config.exclude sl_path) || match_path_list config.include' sl_path then
-					Std.finally t (export_hxb com config cc target zip) m
+					Std.finally t (export_hxb from_cache com config string_pool cc target zip) m
 			) com.modules;
 		in
 		Std.finally (fun () ->
 			zip#close;
 			t()
 		) (fun () ->
-			if  config.target_config.generate then
-				export com config.target_config;
-			begin match com.get_macros() with
-				| Some mcom when config.macro_config.generate ->
-					export mcom config.macro_config
-				| _ ->
-					()
+			let string_pool = if config.share_string_pool then Some (StringPool.create ()) else None in
+			if config.target_config.generate then begin
+				export com config.target_config string_pool;
+			end;
+
+			if config.macro_config.generate then begin
+				match com.get_macros() with
+					| Some mcom ->
+						let use_separate_pool = config.share_string_pool && from_cache in
+						let string_pool = if use_separate_pool then Some (StringPool.create ()) else string_pool in
+						export mcom config.macro_config string_pool;
+						if use_separate_pool then write_string_pool config.macro_config zip "StringPool.macro.hxb" (Option.get string_pool)
+					| _ ->
+						()
 			end;
+
+			if config.share_string_pool then
+				write_string_pool config.target_config zip "StringPool.hxb" (Option.get string_pool);
 		) ()
 	in
 	try
-		try_write ()
+		(* This Abort case shouldn't happen, unless some modules are not stored in hxb cache (which should not be the case currently) *)
+		if ctx.comm.is_server then try try_write true with Abort -> try_write false
+		else try_write false
 	with Sys_error s ->
 		CompilationContext.error ctx (Printf.sprintf "Could not write to %s: %s" config.archive_path s) null_pos
 

+ 14 - 0
src/compiler/hxb/hxbLib.ml

@@ -10,12 +10,23 @@ class hxb_library file_path = object(self)
 	val modules = Hashtbl.create 0
 	val mutable closed = false
 	val mutable loaded = false
+	val mutable string_pool : string array option = None
+	val mutable macro_string_pool : string array option = None
 
 	method load =
 		if not loaded then begin
 			loaded <- true;
 			let close = Timer.timer ["hxblib";"read"] in
 			List.iter (function
+				| ({ Zip.filename = "StringPool.hxb" | "StringPool.macro.hxb" as filename} as entry) ->
+					let reader = new HxbReader.hxb_reader (["hxb";"internal"],"StringPool") (HxbReader.create_hxb_reader_stats()) None in
+					let zip = Lazy.force zip in
+					let data = Bytes.unsafe_of_string (Zip.read_entry zip entry) in
+					ignore(reader#read (new HxbReaderApi.hxb_reader_api_null) data STR);
+					if filename = "StringPool.hxb" then
+						string_pool <- reader#get_string_pool
+					else
+						macro_string_pool <- reader#get_string_pool
 				| ({ Zip.is_directory = false; Zip.filename = filename } as entry) when String.ends_with filename ".hxb" ->
 					let pack = String.nsplit filename "/" in
 					begin match List.rev pack with
@@ -49,6 +60,9 @@ class hxb_library file_path = object(self)
 		end
 
 	method get_file_path = file_path
+	method get_string_pool target =
+		if target = "macro" && Option.is_some macro_string_pool then macro_string_pool
+		else string_pool
 end
 
 

+ 11 - 1
src/compiler/hxb/hxbReader.ml

@@ -148,12 +148,14 @@ let dump_stats name stats =
 class hxb_reader
 	(mpath : path)
 	(stats : hxb_reader_stats)
+	(string_pool : string array option)
 = object(self)
 	val mutable api = Obj.magic ""
 	val mutable current_module = null_module
 
 	val mutable ch = BytesWithPosition.create (Bytes.create 0)
-	val mutable string_pool = Array.make 0 ""
+	val mutable has_string_pool = (string_pool <> None)
+	val mutable string_pool = (match string_pool with None -> Array.make 0 "" | Some pool -> pool)
 	val mutable doc_pool = Array.make 0 ""
 
 	val mutable classes = Array.make 0 null_class
@@ -180,6 +182,12 @@ class hxb_reader
 			dump_backtrace();
 			error (Printf.sprintf "[HXB] [%s] Cannot resolve type %s" (s_type_path current_module.m_path) (s_type_path ((pack @ [mname]),tname)))
 
+	method get_string_pool =
+		if has_string_pool then
+			Some (string_pool)
+		else
+			None
+
 	(* Primitives *)
 
 	method read_i32 =
@@ -1924,9 +1932,11 @@ class hxb_reader
 		match kind with
 		| STR ->
 			string_pool <- self#read_string_pool;
+			has_string_pool <- true;
 		| DOC ->
 			doc_pool <- self#read_string_pool;
 		| MDF ->
+			assert(has_string_pool);
 			current_module <- self#read_mdf;
 		| MTF ->
 			current_module.m_types <- self#read_mtf;

+ 12 - 0
src/compiler/hxb/hxbReaderApi.ml

@@ -10,3 +10,15 @@ class virtual hxb_reader_api = object(self)
 	method virtual get_var_id : int -> int
 	method virtual read_expression_eagerly : tclass_field -> bool
 end
+
+class hxb_reader_api_null = object(self)
+	inherit hxb_reader_api
+
+	method make_module _ = assert false
+	method add_module _ = assert false
+	method resolve_type _ _ _ = assert false
+	method resolve_module _ = assert false
+	method basic_types = assert false
+	method get_var_id _ = assert false
+	method read_expression_eagerly _ = assert false
+end

+ 27 - 55
src/compiler/hxb/hxbWriter.ml

@@ -56,41 +56,6 @@ module StringHashtbl = Hashtbl.Make(struct
 		Hashtbl.hash s
 end)
 
-module StringPool = struct
-	type t = {
-		lut : int StringHashtbl.t;
-		items : string DynArray.t;
-		mutable closed : bool;
-	}
-
-	let create () = {
-		lut = StringHashtbl.create 16;
-		items = DynArray.create ();
-		closed = false;
-	}
-
-	let add sp s =
-		assert (not sp.closed);
-		let index = DynArray.length sp.items in
-		StringHashtbl.add sp.lut s index;
-		DynArray.add sp.items s;
-		index
-
-	let get sp s =
-		StringHashtbl.find sp.lut s
-
-	let get_or_add sp s =
-		try
-			get sp s
-		with Not_found ->
-			add sp s
-
-	let finalize sp =
-		assert (not sp.closed);
-		sp.closed <- true;
-		DynArray.to_list sp.items,DynArray.length sp.items
-end
-
 module Pool = struct
 	type ('key,'value) t = {
 		lut : ('key,int) Hashtbl.t;
@@ -445,6 +410,7 @@ type hxb_writer = {
 	anon_id : Type.t Tanon_identification.tanon_identification;
 	mutable current_module : module_def;
 	chunks : Chunk.t DynArray.t;
+	has_own_string_pool : bool;
 	cp : StringPool.t;
 	docs : StringPool.t;
 	mutable chunk : Chunk.t;
@@ -1780,11 +1746,11 @@ module HxbWriter = struct
 					write_type_parameters writer ltp
 				end;
 				Chunk.write_option writer.chunk fctx.texpr_this (fun e -> write_type_instance writer e.etype);
-				let items,length = StringPool.finalize fctx.t_pool in
-				Chunk.write_uleb128 writer.chunk length;
-				List.iter (fun bytes ->
+				let a = StringPool.finalize fctx.t_pool in
+				Chunk.write_uleb128 writer.chunk a.length;
+				StringDynArray.iter a (fun bytes ->
 					Chunk.write_bytes writer.chunk (Bytes.unsafe_of_string bytes)
-				) items;
+				);
 				Chunk.write_uleb128 writer.chunk (DynArray.length fctx.vars);
 				DynArray.iter (fun (v,v_id) ->
 					v.v_id <- v_id;
@@ -2050,6 +2016,14 @@ module HxbWriter = struct
 		| TTypeDecl t ->
 			()
 
+	let write_string_pool writer kind a =
+		start_chunk writer kind;
+		Chunk.write_uleb128 writer.chunk a.StringDynArray.length;
+		StringDynArray.iter a (fun s ->
+			let b = Bytes.unsafe_of_string s in
+			Chunk.write_bytes_length_prefixed writer.chunk b;
+		)
+
 	let write_module writer (m : module_def) =
 		writer.current_module <- m;
 
@@ -2270,22 +2244,14 @@ module HxbWriter = struct
 		start_chunk writer EOF;
 		start_chunk writer EOM;
 
-		let finalize_string_pool kind items length =
-			start_chunk writer kind;
-			Chunk.write_uleb128 writer.chunk length;
-			List.iter (fun s ->
-				let b = Bytes.unsafe_of_string s in
-				Chunk.write_bytes_length_prefixed writer.chunk b;
-			) items
-		in
-		begin
-			let items,length = StringPool.finalize writer.cp in
-			finalize_string_pool STR items length
+		if writer.has_own_string_pool then begin
+			let a = StringPool.finalize writer.cp in
+			write_string_pool writer STR a
 		end;
 		begin
-			let items,length = StringPool.finalize writer.docs in
-			if length > 0 then
-				finalize_string_pool DOC items length
+			let a = StringPool.finalize writer.docs in
+			if a.length > 0 then
+				write_string_pool writer DOC a
 		end
 
 	let get_sorted_chunks writer =
@@ -2296,8 +2262,13 @@ module HxbWriter = struct
 		l
 end
 
-let create config warn anon_id =
-	let cp = StringPool.create () in
+let create config string_pool warn anon_id =
+	let cp,has_own_string_pool = match string_pool with
+		| None ->
+			StringPool.create(),true
+		| Some pool ->
+			pool,false
+	in
 	{
 		config;
 		warn;
@@ -2305,6 +2276,7 @@ let create config warn anon_id =
 		current_module = null_module;
 		chunks = DynArray.create ();
 		cp = cp;
+		has_own_string_pool;
 		docs = StringPool.create ();
 		chunk = Obj.magic ();
 		classes = Pool.create ();

+ 3 - 1
src/compiler/hxb/hxbWriterConfig.ml

@@ -11,6 +11,7 @@ type writer_target_config = {
 
 type t = {
 	mutable archive_path : string;
+	mutable share_string_pool : bool;
 	target_config : writer_target_config;
 	macro_config : writer_target_config;
 }
@@ -25,6 +26,7 @@ let create_target_config () = {
 
 let create () = {
 	archive_path = "";
+	share_string_pool = true; (* Do we want this as default? *)
 	target_config = create_target_config ();
 	macro_config = create_target_config ()
 }
@@ -115,4 +117,4 @@ let process_argument file =
 		| _ ->
 			config.archive_path <- file;
 	end;
-	Some config
+	Some config

+ 2 - 2
src/compiler/server.ml

@@ -417,7 +417,7 @@ class hxb_reader_api_server
 		| GoodModule m ->
 			m
 		| BinaryModule mc ->
-			let reader = new HxbReader.hxb_reader path com.hxb_reader_stats in
+			let reader = new HxbReader.hxb_reader path com.hxb_reader_stats (Some cc#get_string_pool_arr) in
 			let f_next chunks until =
 				let t_hxb = Timer.timer ["server";"module cache";"hxb read"] in
 				let r = reader#read_chunks_until (self :> HxbReaderApi.hxb_reader_api) chunks until in
@@ -567,7 +567,7 @@ and type_module sctx com delay mpath p =
 			   checking dependencies. This means that the actual decoding never has any reason to fail. *)
 			begin match check_module sctx mpath mc.mc_extra p with
 				| None ->
-					let reader = new HxbReader.hxb_reader mpath com.hxb_reader_stats in
+					let reader = new HxbReader.hxb_reader mpath com.hxb_reader_stats (Some cc#get_string_pool_arr) in
 					let api = match com.hxb_reader_api with
 						| Some api ->
 							api

+ 1 - 0
src/context/common.ml

@@ -338,6 +338,7 @@ class virtual abstract_hxb_lib = object(self)
 	method virtual get_bytes : string -> path -> bytes option
 	method virtual close : unit
 	method virtual get_file_path : string
+	method virtual get_string_pool : string -> string array option
 end
 
 type context_main = {

+ 1 - 1
src/context/display/displayJson.ml

@@ -139,7 +139,7 @@ class hxb_reader_api_com
 			cc#find_module m_path
 		with Not_found ->
 			let mc = cc#get_hxb_module m_path in
-			let reader = new HxbReader.hxb_reader mc.mc_path com.hxb_reader_stats in
+			let reader = new HxbReader.hxb_reader mc.mc_path com.hxb_reader_stats (Some cc#get_string_pool_arr) in
 			fst (reader#read_chunks_until (self :> HxbReaderApi.hxb_reader_api) mc.mc_chunks (if headers_only then MTF else EOM))
 
 	method basic_types =

+ 1 - 1
src/context/display/displayTexpr.ml

@@ -177,7 +177,7 @@ let check_display_file ctx cs =
 				| NoModule | BadModule _ -> raise Not_found
 				| BinaryModule mc ->
 					let api = (new TypeloadModule.hxb_reader_api_typeload ctx.com ctx.g TypeloadModule.load_module' p :> HxbReaderApi.hxb_reader_api) in
-					let reader = new HxbReader.hxb_reader path ctx.com.hxb_reader_stats in
+					let reader = new HxbReader.hxb_reader path ctx.com.hxb_reader_stats (Some cc#get_string_pool_arr) in
 					let m = reader#read_chunks api mc.mc_chunks in
 					m
 				| GoodModule m ->

+ 27 - 0
src/core/ds/stringDynArray.ml

@@ -0,0 +1,27 @@
+type t = {
+	mutable arr : string array;
+	mutable length : int;
+}
+
+let create length = {
+	arr = Array.make length "";
+	length = 0;
+}
+
+let length d =
+	d.length
+
+let add d s =
+	let length = Array.length d.arr in
+	if d.length = length then begin
+		let new_arr = Array.make (length * 2) "" in
+		Array.blit d.arr 0 new_arr 0 length;
+		d.arr <- new_arr;
+	end;
+	d.arr.(d.length) <- s;
+	d.length <- d.length + 1
+
+let iter d f =
+	for i = 0 to d.length - 1 do
+		f (Array.unsafe_get d.arr i)
+	done

+ 43 - 0
src/core/ds/stringPool.ml

@@ -0,0 +1,43 @@
+module StringHashtbl = Hashtbl.Make(struct
+	type t = string
+
+	let equal =
+		String.equal
+
+	let hash s =
+		(* What's the best here? *)
+		Hashtbl.hash s
+end)
+
+type t = {
+	lut : int StringHashtbl.t;
+	items : StringDynArray.t;
+	mutable closed : bool;
+}
+
+let create () = {
+	lut = StringHashtbl.create 16;
+	items = StringDynArray.create 16;
+	closed = false;
+}
+
+let add sp s =
+	assert (not sp.closed);
+	let index = StringDynArray.length sp.items in
+	StringHashtbl.add sp.lut s index;
+	StringDynArray.add sp.items s;
+	index
+
+let get sp s =
+	StringHashtbl.find sp.lut s
+
+let get_or_add sp s =
+	try
+		get sp s
+	with Not_found ->
+		add sp s
+
+let finalize sp =
+	assert (not sp.closed);
+	sp.closed <- true;
+	sp.items

+ 3 - 3
src/typing/typeloadModule.ml

@@ -776,10 +776,10 @@ class hxb_reader_api_typeload
 end
 
 let rec load_hxb_module com g path p =
-	let read file bytes =
+	let read file bytes string_pool =
 		try
 			let api = (new hxb_reader_api_typeload com g load_module' p :> HxbReaderApi.hxb_reader_api) in
-			let reader = new HxbReader.hxb_reader path com.hxb_reader_stats in
+			let reader = new HxbReader.hxb_reader path com.hxb_reader_stats string_pool in
 			let read = reader#read api bytes in
 			let m = read EOT in
 			delay g PConnectField (fun () ->
@@ -797,7 +797,7 @@ let rec load_hxb_module com g path p =
 		| hxb_lib :: l ->
 			begin match hxb_lib#get_bytes target path with
 				| Some bytes ->
-					read hxb_lib#get_file_path bytes
+					read hxb_lib#get_file_path bytes (hxb_lib#get_string_pool target)
 				| None ->
 					loop l
 			end