fuzz.ml 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. (* fuzz the tables and matchers generated *)
  2. open Match
  3. module Buffer: sig
  4. type 'a t
  5. val create: ?capacity:int -> unit -> 'a t
  6. val reset: 'a t -> unit
  7. val size: 'a t -> int
  8. val get: 'a t -> int -> 'a
  9. val set: 'a t -> int -> 'a -> unit
  10. val push: 'a t -> 'a -> unit
  11. end = struct
  12. type 'a t =
  13. { mutable size: int
  14. ; mutable data: 'a array }
  15. let mk_array n = Array.make n (Obj.magic 0)
  16. let create ?(capacity = 10) () =
  17. if capacity < 0 then invalid_arg "Buffer.make";
  18. {size = 0; data = mk_array capacity}
  19. let reset b = b.size <- 0
  20. let size b = b.size
  21. let get b n =
  22. if n >= size b then invalid_arg "Buffer.get";
  23. b.data.(n)
  24. let set b n x =
  25. if n >= size b then invalid_arg "Buffer.set";
  26. b.data.(n) <- x
  27. let push b x =
  28. let cap = Array.length b.data in
  29. if size b = cap then begin
  30. let data = mk_array (2 * cap + 1) in
  31. Array.blit b.data 0 data 0 cap;
  32. b.data <- data
  33. end;
  34. let sz = size b in
  35. b.size <- sz + 1;
  36. set b sz x
  37. end
  38. let binop_state n op s1 s2 =
  39. let key = K (op, s1, s2) in
  40. try StateMap.find key n.statemap
  41. with Not_found -> atom_state n Tmp
  42. type id = int
  43. type term_data =
  44. | Binop of op * id * id
  45. | Leaf of atomic_pattern
  46. type term =
  47. { id: id
  48. ; data: term_data
  49. ; state: p state }
  50. let pp_term fmt (ta, id) =
  51. let fpf x = Format.fprintf fmt x in
  52. let rec pp _fmt id =
  53. match ta.(id).data with
  54. | Leaf (Con c) -> fpf "%Ld" c
  55. | Leaf AnyCon -> fpf "$%d" id
  56. | Leaf Tmp -> fpf "%%%d" id
  57. | Binop (op, id1, id2) ->
  58. fpf "@[(%s@%d:%d @[<hov>%a@ %a@])@]"
  59. (show_op op) id ta.(id).state.id
  60. pp id1 pp id2
  61. in pp fmt id
  62. (* A term pool is a deduplicated set of term
  63. * that maintains nodes numbering using the
  64. * statemap passed at creation time *)
  65. module TermPool = struct
  66. type t =
  67. { terms: term Buffer.t
  68. ; hcons: (term_data, id) Hashtbl.t
  69. ; numbr: numberer }
  70. let create numbr =
  71. { terms = Buffer.create ()
  72. ; hcons = Hashtbl.create 100
  73. ; numbr }
  74. let reset tp =
  75. Buffer.reset tp.terms;
  76. Hashtbl.clear tp.hcons
  77. let size tp = Buffer.size tp.terms
  78. let term tp id = Buffer.get tp.terms id
  79. let mk_leaf tp atm =
  80. let data = Leaf atm in
  81. match Hashtbl.find tp.hcons data with
  82. | id -> term tp id
  83. | exception Not_found ->
  84. let id = Buffer.size tp.terms in
  85. let state = atom_state tp.numbr atm in
  86. Buffer.push tp.terms {id; data; state};
  87. Hashtbl.add tp.hcons data id;
  88. term tp id
  89. let mk_binop tp op t1 t2 =
  90. let data = Binop (op, t1.id, t2.id) in
  91. match Hashtbl.find tp.hcons data with
  92. | id -> term tp id
  93. | exception Not_found ->
  94. let id = Buffer.size tp.terms in
  95. let state =
  96. binop_state tp.numbr op t1.state t2.state
  97. in
  98. Buffer.push tp.terms {id; data; state};
  99. Hashtbl.add tp.hcons data id;
  100. term tp id
  101. let rec add_pattern tp = function
  102. | Bnr (op, p1, p2) ->
  103. let t1 = add_pattern tp p1 in
  104. let t2 = add_pattern tp p2 in
  105. mk_binop tp op t1 t2
  106. | Atm atm -> mk_leaf tp atm
  107. | Var (_, atm) -> add_pattern tp (Atm atm)
  108. let explode_term tp id =
  109. let rec aux tms n id =
  110. let t = term tp id in
  111. match t.data with
  112. | Leaf _ -> (n, {t with id = n} :: tms)
  113. | Binop (op, id1, id2) ->
  114. let n1, tms = aux tms n id1 in
  115. let n = n1 + 1 in
  116. let n2, tms = aux tms n id2 in
  117. let n = n2 + 1 in
  118. (n, { t with data = Binop (op, n1, n2)
  119. ; id = n } :: tms)
  120. in
  121. let n, tms = aux [] 0 id in
  122. Array.of_list (List.rev tms), n
  123. end
  124. module R = Random
  125. (* uniform pick in a list *)
  126. let list_pick l =
  127. let rec aux n l x =
  128. match l with
  129. | [] -> x
  130. | y :: l ->
  131. if R.int (n + 1) = 0 then
  132. aux (n + 1) l y
  133. else
  134. aux (n + 1) l x
  135. in
  136. match l with
  137. | [] -> invalid_arg "list_pick"
  138. | x :: l -> aux 1 l x
  139. let term_pick ~numbr =
  140. let ops =
  141. if numbr.ops = [] then
  142. numbr.ops <-
  143. (StateMap.fold (fun k _ ops ->
  144. match k with
  145. | K (op, _, _) -> op :: ops)
  146. numbr.statemap [] |> setify);
  147. numbr.ops
  148. in
  149. let rec gen depth =
  150. (* exponential probability for leaves to
  151. * avoid skewing towards shallow terms *)
  152. let atm_prob = 0.75 ** float_of_int depth in
  153. if R.float 1.0 <= atm_prob || ops = [] then
  154. let atom, st = list_pick numbr.atoms in
  155. (st, Atm atom)
  156. else
  157. let op = list_pick ops in
  158. let s1, t1 = gen (depth - 1) in
  159. let s2, t2 = gen (depth - 1) in
  160. ( binop_state numbr op s1 s2
  161. , Bnr (op, t1, t2) )
  162. in fun ~depth -> gen depth
  163. exception FuzzError
  164. let rec pattern_depth = function
  165. | Bnr (_, p1, p2) ->
  166. 1 + max (pattern_depth p1) (pattern_depth p2)
  167. | Atm _ -> 0
  168. | Var (_, atm) -> pattern_depth (Atm atm)
  169. let ( %% ) a b =
  170. 1e2 *. float_of_int a /. float_of_int b
  171. let progress ?(width = 50) msg pct =
  172. Format.eprintf "\x1b[2K\r%!";
  173. let progress_bar fmt =
  174. let n =
  175. let fwidth = float_of_int width in
  176. 1 + int_of_float (pct *. fwidth /. 1e2)
  177. in
  178. Format.fprintf fmt " %s%s %.0f%%@?"
  179. (String.concat "" (List.init n (fun _ -> "▒")))
  180. (String.make (max 0 (width - n)) '-')
  181. pct
  182. in
  183. Format.kfprintf progress_bar
  184. Format.err_formatter msg
  185. let fuzz_numberer rules numbr =
  186. (* pick twice the max pattern depth so we
  187. * have a chance to find non-trivial numbers
  188. * for the atomic patterns in the rules *)
  189. let depth =
  190. List.fold_left (fun depth r ->
  191. max depth (pattern_depth r.pattern))
  192. 0 rules * 2
  193. in
  194. (* fuzz until the term pool we are constructing
  195. * is no longer growing fast enough; or we just
  196. * went through sufficiently many iterations *)
  197. let max_iter = 1_000_000 in
  198. let low_insert_rate = 1e-2 in
  199. let tp = TermPool.create numbr in
  200. let rec loop new_stats i =
  201. let (_, _, insert_rate) = new_stats in
  202. if insert_rate <= low_insert_rate then () else
  203. if i >= max_iter then () else
  204. (* periodically update stats *)
  205. let new_stats =
  206. let (num, cnt, rate) = new_stats in
  207. if num land 1023 = 0 then
  208. let rate =
  209. 0.5 *. (rate +. float_of_int cnt /. 1023.)
  210. in
  211. progress " insert_rate=%.1f%%"
  212. (i %% max_iter) (rate *. 1e2);
  213. (num + 1, 0, rate)
  214. else new_stats
  215. in
  216. (* create a term and check that its number is
  217. * accurate wrt the rules *)
  218. let st, term = term_pick ~numbr ~depth in
  219. let state_matched =
  220. List.filter_map (fun cu ->
  221. match cu with
  222. | Top ("$" | "%") -> None
  223. | Top name -> Some name
  224. | _ -> None)
  225. st.point |> setify
  226. in
  227. let rule_matched =
  228. List.filter_map (fun r ->
  229. if pattern_match r.pattern term then
  230. Some r.name
  231. else None)
  232. rules |> setify
  233. in
  234. if state_matched <> rule_matched then begin
  235. let open Format in
  236. let pp_str_list =
  237. let pp_sep fmt () = fprintf fmt ",@ " in
  238. pp_print_list ~pp_sep pp_print_string
  239. in
  240. eprintf "@.@[<v2>fuzz error for %s"
  241. (show_pattern term);
  242. eprintf "@ @[state matched: %a@]"
  243. pp_str_list state_matched;
  244. eprintf "@ @[rule matched: %a@]"
  245. pp_str_list rule_matched;
  246. eprintf "@]@.";
  247. raise FuzzError;
  248. end;
  249. if state_matched = [] then
  250. loop new_stats (i + 1)
  251. else
  252. (* add to the term pool *)
  253. let old_size = TermPool.size tp in
  254. let _ = TermPool.add_pattern tp term in
  255. let new_stats =
  256. let (num, cnt, rate) = new_stats in
  257. if TermPool.size tp <> old_size then
  258. (num + 1, cnt + 1, rate)
  259. else
  260. (num + 1, cnt, rate)
  261. in
  262. loop new_stats (i + 1)
  263. in
  264. loop (1, 0, 1.0) 0;
  265. Format.eprintf
  266. "@.@[ generated %.3fMiB of test terms@]@."
  267. (float_of_int (Obj.reachable_words (Obj.repr tp))
  268. /. 128. /. 1024.);
  269. tp
  270. let rec run_matcher stk m (ta, id as t) =
  271. let state id = ta.(id).state.id in
  272. match m.Action.node with
  273. | Action.Switch cases ->
  274. let m =
  275. try List.assoc (state id) cases
  276. with Not_found -> failwith "no switch case"
  277. in
  278. run_matcher stk m t
  279. | Action.Push (sym, m) ->
  280. let l, r =
  281. match ta.(id).data with
  282. | Leaf _ -> failwith "push on leaf"
  283. | Binop (_, l, r) -> (l, r)
  284. in
  285. if sym && state l > state r
  286. then run_matcher (l :: stk) m (ta, r)
  287. else run_matcher (r :: stk) m (ta, l)
  288. | Action.Pop m -> begin
  289. match stk with
  290. | id :: stk -> run_matcher stk m (ta, id)
  291. | [] -> failwith "pop on empty stack"
  292. end
  293. | Action.Set (v, m) ->
  294. (v, id) :: run_matcher stk m t
  295. | Action.Stop -> []
  296. let rec term_match p (ta, id) =
  297. let (|>>) x f =
  298. match x with None -> None | Some x -> f x
  299. in
  300. let atom_match a =
  301. match ta.(id).data with
  302. | Leaf a' -> pattern_match (Atm a) (Atm a')
  303. | Binop _ -> pattern_match (Atm a) (Atm Tmp)
  304. in
  305. match p with
  306. | Var (v, a) when atom_match a ->
  307. Some [(v, id)]
  308. | Atm a when atom_match a -> Some []
  309. | (Atm _ | Var _) -> None
  310. | Bnr (op, pl, pr) -> begin
  311. match ta.(id).data with
  312. | Binop (op', idl, idr) when op' = op ->
  313. term_match pl (ta, idl) |>> fun l1 ->
  314. term_match pr (ta, idr) |>> fun l2 ->
  315. Some (l1 @ l2)
  316. | _ -> None
  317. end
  318. let test_matchers tp numbr rules =
  319. let {statemap = sm; states = sa; _} = numbr in
  320. let total = ref 0 in
  321. let matchers =
  322. let htbl = Hashtbl.create (Array.length sa) in
  323. List.map (fun r -> (r.name, r.pattern)) rules |>
  324. group_by_fst |>
  325. List.iter (fun (r, ps) ->
  326. total := !total + List.length ps;
  327. let pm = (ps, lr_matcher sm sa rules r) in
  328. sa |> Array.iter (fun s ->
  329. if List.mem (Top r) s.point then
  330. Hashtbl.add htbl s.id pm));
  331. htbl
  332. in
  333. let seen = Hashtbl.create !total in
  334. for id = 0 to TermPool.size tp - 1 do
  335. if id land 1023 = 0 ||
  336. id = TermPool.size tp - 1 then begin
  337. progress
  338. " coverage=%.1f%%"
  339. (id %% TermPool.size tp)
  340. (Hashtbl.length seen %% !total)
  341. end;
  342. let t = TermPool.explode_term tp id in
  343. Hashtbl.find_all matchers
  344. (TermPool.term tp id).state.id |>
  345. List.iter (fun (ps, m) ->
  346. let norm = List.fast_sort compare in
  347. let ok =
  348. match norm (run_matcher [] m t) with
  349. | asn -> `Match (List.exists (fun p ->
  350. match term_match p t with
  351. | None -> false
  352. | Some asn' ->
  353. if asn = norm asn' then begin
  354. Hashtbl.replace seen p ();
  355. true
  356. end else false) ps)
  357. | exception e -> `RunFailure e
  358. in
  359. if ok <> `Match true then begin
  360. let open Format in
  361. let pp_asn fmt asn =
  362. fprintf fmt "@[<h>";
  363. pp_print_list
  364. ~pp_sep:(fun fmt () -> fprintf fmt ";@ ")
  365. (fun fmt (v, d) ->
  366. fprintf fmt "@[%s←%d@]" v d)
  367. fmt asn;
  368. fprintf fmt "@]"
  369. in
  370. eprintf "@.@[<v2>matcher error for";
  371. eprintf "@ @[%a@]" pp_term t;
  372. begin match ok with
  373. | `RunFailure e ->
  374. eprintf "@ @[exception: %s@]"
  375. (Printexc.to_string e)
  376. | `Match (* false *) _ ->
  377. let asn = run_matcher [] m t in
  378. eprintf "@ @[assignment: %a@]"
  379. pp_asn asn;
  380. eprintf "@ @[<v2>could not match";
  381. List.iter (fun p ->
  382. eprintf "@ + @[%s@]"
  383. (show_pattern p)) ps;
  384. eprintf "@]"
  385. end;
  386. eprintf "@]@.";
  387. raise FuzzError
  388. end)
  389. done;
  390. Format.eprintf "@."