| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214 |
- open Cgen
- open Match
- let mgen ~verbose ~fuzz path lofs input oc =
- let info ?(level = 1) fmt =
- if level <= verbose then
- Printf.eprintf fmt
- else
- Printf.ifprintf stdout fmt
- in
- let rules =
- match Sexp.(run_parser ppats) input with
- | `Error (ps, err, loc) ->
- Printf.eprintf "%s:%d:%d %s\n"
- path (lofs + ps.Sexp.line) ps.Sexp.coln err;
- Printf.eprintf "%s" loc;
- exit 1
- | `Ok rules -> rules
- in
- info "adding ac variants...%!";
- let nparsed =
- List.fold_left
- (fun npats (_, _, ps) ->
- npats + List.length ps)
- 0 rules
- in
- let varsmap = Hashtbl.create 10 in
- let rules =
- List.concat_map (fun (name, vars, patterns) ->
- (try assert (Hashtbl.find varsmap name = vars)
- with Not_found -> ());
- Hashtbl.replace varsmap name vars;
- List.map
- (fun pattern -> {name; vars; pattern})
- (List.concat_map ac_equiv patterns)
- ) rules
- in
- info " %d -> %d patterns\n"
- nparsed (List.length rules);
- let rnames =
- setify (List.map (fun r -> r.name) rules) in
- info "generating match tables...%!";
- let sa, am, sm = generate_table rules in
- let numbr = make_numberer sa am sm in
- info " %d states, %d rules\n"
- (Array.length sa) (StateMap.cardinal sm);
- if verbose >= 2 then begin
- info "-------------\nstates:\n";
- Array.iteri (fun i s ->
- info " state %d: %s\n"
- i (show_pattern s.seen)) sa;
- info "-------------\nstatemap:\n";
- Test.print_sm stderr sm;
- info "-------------\n";
- end;
- info "generating matchers...\n";
- let matchers =
- List.map (fun rname ->
- info "+ %s...%!" rname;
- let m = lr_matcher sm sa rules rname in
- let vars = Hashtbl.find varsmap rname in
- info " %d nodes\n" (Action.size m);
- info ~level:2 " -------------\n";
- info ~level:2 " automaton:\n";
- info ~level:2 "%s\n"
- (Format.asprintf " @[%a@]" Action.pp m);
- info ~level:2 " ----------\n";
- (vars, rname, m)
- ) rnames
- in
- if fuzz then begin
- info ~level:0 "fuzzing statemap...\n";
- let tp = Fuzz.fuzz_numberer rules numbr in
- info ~level:0 "testing %d patterns...\n"
- (List.length rules);
- Fuzz.test_matchers tp numbr rules
- end;
- info "emitting C...\n";
- flush stderr;
- let cgopts =
- { pfx = ""; static = true; oc = oc } in
- emit_c cgopts numbr;
- emit_matchers cgopts matchers;
- ()
- let read_all ic =
- let bufsz = 4096 in
- let buf = Bytes.create bufsz in
- let data = Buffer.create bufsz in
- let read = ref 0 in
- while
- read := input ic buf 0 bufsz;
- !read <> 0
- do
- Buffer.add_subbytes data buf 0 !read
- done;
- Buffer.contents data
- let split_c src =
- let begin_re, eoc_re, end_re =
- let re = Str.regexp in
- ( re "mgen generated code"
- , re "\\*/"
- , re "end of generated code" )
- in
- let str_match regexp str =
- try
- let _: int =
- Str.search_forward regexp str 0
- in true
- with Not_found -> false
- in
- let rec go st lofs pfx rules lines =
- let line, lines =
- match lines with
- | [] ->
- failwith (
- match st with
- | `Prefix -> "could not find mgen section"
- | `Rules -> "mgen rules not terminated"
- | `Skip -> "mgen section not terminated"
- )
- | l :: ls -> (l, ls)
- in
- match st with
- | `Prefix ->
- let pfx = line :: pfx in
- if str_match begin_re line
- then
- let lofs = List.length pfx in
- go `Rules lofs pfx rules lines
- else go `Prefix 0 pfx rules lines
- | `Rules ->
- let pfx = line :: pfx in
- if str_match eoc_re line
- then go `Skip lofs pfx rules lines
- else go `Rules lofs pfx (line :: rules) lines
- | `Skip ->
- if str_match end_re line then
- let join = String.concat "\n" in
- let pfx = join (List.rev pfx) ^ "\n\n"
- and rules = join (List.rev rules)
- and sfx = join (line :: lines)
- in (lofs, pfx, rules, sfx)
- else go `Skip lofs pfx rules lines
- in
- let lines = String.split_on_char '\n' src in
- go `Prefix 0 [] [] lines
- let () =
- let usage_msg =
- "mgen [--fuzz] [--verbose <N>] <file>" in
- let fuzz_arg = ref false in
- let verbose_arg = ref 0 in
- let input_paths = ref [] in
- let anon_fun filename =
- input_paths := filename :: !input_paths in
- let speclist =
- [ ( "--fuzz", Arg.Set fuzz_arg
- , " Fuzz tables and matchers" )
- ; ( "--verbose", Arg.Set_int verbose_arg
- , "<N> Set verbosity level" )
- ; ( "--", Arg.Rest_all (List.iter anon_fun)
- , " Stop argument parsing" ) ]
- in
- Arg.parse speclist anon_fun usage_msg;
- let input_paths = !input_paths in
- let verbose = !verbose_arg in
- let fuzz = !fuzz_arg in
- let input_path, input =
- match input_paths with
- | ["-"] -> ("-", read_all stdin)
- | [path] -> (path, read_all (open_in path))
- | _ ->
- Printf.eprintf
- "%s: single input file expected\n"
- Sys.argv.(0);
- Arg.usage speclist usage_msg; exit 1
- in
- let mgen = mgen ~verbose ~fuzz in
- if Str.last_chars input_path 2 <> ".c"
- then mgen input_path 0 input stdout
- else
- let tmp_path = input_path ^ ".tmp" in
- Fun.protect
- ~finally:(fun () ->
- try Sys.remove tmp_path with _ -> ())
- (fun () ->
- let lofs, pfx, rules, sfx = split_c input in
- let oc = open_out tmp_path in
- output_string oc pfx;
- mgen input_path lofs rules oc;
- output_string oc sfx;
- close_out oc;
- Sys.rename tmp_path input_path;
- ());
- ()
|