sexp.ml 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. type pstate =
  2. { data: string
  3. ; line: int
  4. ; coln: int
  5. ; indx: int }
  6. type perror =
  7. { error: string
  8. ; ps: pstate }
  9. exception ParseError of perror
  10. type 'a parser =
  11. { fn: 'r. pstate -> ('a -> pstate -> 'r) -> 'r }
  12. let update_pos ps beg fin =
  13. let l, c = (ref ps.line, ref ps.coln) in
  14. for i = beg to fin - 1 do
  15. if ps.data.[i] = '\n' then
  16. (incr l; c := 0)
  17. else
  18. incr c
  19. done;
  20. { ps with line = !l; coln = !c }
  21. let pret (type a) (x: a): a parser =
  22. let fn ps k = k x ps in { fn }
  23. let pfail error: 'a parser =
  24. let fn ps _ = raise (ParseError {error; ps})
  25. in { fn }
  26. let por: 'a parser -> 'a parser -> 'a parser =
  27. fun p1 p2 ->
  28. let fn ps k =
  29. try p1.fn ps k with ParseError e1 ->
  30. try p2.fn ps k with ParseError e2 ->
  31. if e1.ps.indx > e2.ps.indx then
  32. raise (ParseError e1)
  33. else
  34. raise (ParseError e2)
  35. in { fn }
  36. let pbind: 'a parser -> ('a -> 'b parser) -> 'b parser =
  37. fun p1 p2 ->
  38. let fn ps k =
  39. p1.fn ps (fun x ps -> (p2 x).fn ps k)
  40. in { fn }
  41. (* handy for recursive rules *)
  42. let papp p x = pbind (pret x) p
  43. let psnd: 'a parser -> 'b parser -> 'b parser =
  44. fun p1 p2 -> pbind p1 (fun _x -> p2)
  45. let pfst: 'a parser -> 'b parser -> 'a parser =
  46. fun p1 p2 -> pbind p1 (fun x -> psnd p2 (pret x))
  47. module Infix = struct
  48. let ( let* ) = pbind
  49. let ( ||| ) = por
  50. let ( |<< ) = pfst
  51. let ( |>> ) = psnd
  52. end
  53. open Infix
  54. let pre: ?what:string -> string -> string parser =
  55. fun ?what re ->
  56. let what =
  57. match what with
  58. | None -> Printf.sprintf "%S" re
  59. | Some what -> what
  60. and re = Str.regexp re in
  61. let fn ps k =
  62. if not (Str.string_match re ps.data ps.indx) then
  63. (let error =
  64. Printf.sprintf "expected to match %s" what in
  65. raise (ParseError {error; ps}));
  66. let ps =
  67. let indx = Str.match_end () in
  68. { (update_pos ps ps.indx indx) with indx }
  69. in
  70. k (Str.matched_string ps.data) ps
  71. in { fn }
  72. let peoi: unit parser =
  73. let fn ps k =
  74. if ps.indx <> String.length ps.data then
  75. raise (ParseError
  76. { error = "expected end of input"; ps });
  77. k () ps
  78. in { fn }
  79. let pws = pre "[ \r\n\t*]*"
  80. let pws1 = pre "[ \r\n\t*]+"
  81. let pthen p1 p2 =
  82. let* x1 = p1 in
  83. let* x2 = p2 in
  84. pret (x1, x2)
  85. let rec plist_tail: 'a parser -> ('a list) parser =
  86. fun pitem ->
  87. (pws |>> pre ")" |>> pret []) |||
  88. (let* itm = pitem in
  89. let* itms = plist_tail pitem in
  90. pret (itm :: itms))
  91. let plist pitem =
  92. pws |>> pre ~what:"a list" "("
  93. |>> plist_tail pitem
  94. let plist1p p1 pitem =
  95. pws |>> pre ~what:"a list" "("
  96. |>> pthen p1 (plist_tail pitem)
  97. let ppair p1 p2 =
  98. pws |>> pre ~what:"a pair" "("
  99. |>> pthen p1 p2 |<< pws |<< pre ")"
  100. let run_parser p s =
  101. let ps =
  102. {data = s; line = 1; coln = 0; indx = 0} in
  103. try `Ok (p.fn ps (fun res _ps -> res))
  104. with ParseError e ->
  105. let rec bol i =
  106. if i = 0 then i else
  107. if i < String.length s && s.[i] = '\n'
  108. then i+1 (* XXX BUG *)
  109. else bol (i-1)
  110. in
  111. let rec eol i =
  112. if i = String.length s then i else
  113. if s.[i] = '\n' then i else
  114. eol (i+1)
  115. in
  116. let bol = bol e.ps.indx in
  117. let eol = eol e.ps.indx in
  118. (*
  119. Printf.eprintf "bol:%d eol:%d indx:%d len:%d\n"
  120. bol eol e.ps.indx (String.length s); (* XXX debug *)
  121. *)
  122. let lines =
  123. String.split_on_char '\n'
  124. (String.sub s bol (eol - bol))
  125. in
  126. let nl = List.length lines in
  127. let caret = ref (e.ps.indx - bol) in
  128. let msg = ref [] in
  129. let pfx = " > " in
  130. lines |> List.iteri (fun ln l ->
  131. if ln <> nl - 1 || l <> "" then begin
  132. let ll = String.length l + 1 in
  133. msg := (pfx ^ l ^ "\n") :: !msg;
  134. if !caret <= ll then begin
  135. let pad = String.make !caret ' ' in
  136. msg := (pfx ^ pad ^ "^\n") :: !msg;
  137. end;
  138. caret := !caret - ll;
  139. end;
  140. );
  141. `Error
  142. ( e.ps, e.error
  143. , String.concat "" (List.rev !msg) )
  144. (* ---------------------------------------- *)
  145. (* pattern parsing *)
  146. (* ---------------------------------------- *)
  147. (* Example syntax:
  148. (with-vars (a b c d)
  149. (patterns
  150. (ob (add (tmp a) (con d)))
  151. (bsm (add (tmp b) (mul (tmp m) (con 2 4 8)))) ))
  152. *)
  153. open Match
  154. let pint64 =
  155. let* s = pre "[-]?[0-9_]+" in
  156. pret (Int64.of_string s)
  157. let pid =
  158. pre ~what:"an identifer"
  159. "[a-zA-Z][a-zA-Z0-9_]*"
  160. let pop_base =
  161. let sob, obs = show_op_base, op_bases in
  162. let* s = pre ~what:"an operator"
  163. (String.concat "\\|" (List.map sob obs))
  164. in pret (List.find (fun o -> s = sob o) obs)
  165. let pop = let* ob = pop_base in pret (Kl, ob)
  166. let rec ppat vs =
  167. let pcons_tail =
  168. let* cs = plist_tail (pws1 |>> pint64) in
  169. match cs with
  170. | [] -> pret [AnyCon]
  171. | _ -> pret (List.map (fun c -> Con c) cs)
  172. in
  173. let pvar =
  174. let* id = pid in
  175. if not (List.mem id vs) then
  176. pfail ("unbound variable: " ^ id)
  177. else
  178. pret id
  179. in
  180. pws |>> (
  181. ( let* c = pint64 in pret [Atm (Con c)] )
  182. |||
  183. ( pre "(con)" |>> pret [Atm AnyCon] ) |||
  184. ( let* cs = pre "(con" |>> pcons_tail in
  185. pret (List.map (fun c -> Atm c) cs) ) |||
  186. ( let* v = pre "(con" |>> pws1 |>> pvar in
  187. let* cs = pcons_tail in
  188. pret (List.map (fun c -> Var (v, c)) cs) )
  189. |||
  190. ( pre "(tmp)" |>> pret [Atm Tmp] ) |||
  191. ( let* v = pre "(tmp" |>> pws1 |>> pvar in
  192. pws |>> pre ")" |>> pret [Var (v, Tmp)] )
  193. |||
  194. ( let* (op, rands) =
  195. plist1p (pws |>> pop) (papp ppat vs) in
  196. let nrands = List.length rands in
  197. if nrands < 2 then
  198. pfail ( "binary op requires at least"
  199. ^ " two arguments" )
  200. else
  201. let mk x y = Bnr (op, x, y) in
  202. pret
  203. (products rands []
  204. (fun rands pats ->
  205. (* construct a left-heavy tree *)
  206. let r0 = List.hd rands in
  207. let rs = List.tl rands in
  208. List.fold_left mk r0 rs :: pats)) )
  209. )
  210. let pwith_vars ?(vs = []) p =
  211. ( let* vs =
  212. pws |>> pre "(with-vars" |>> pws |>>
  213. plist (pws |>> pid)
  214. in pws |>> p vs |<< pws |<< pre ")" )
  215. ||| p vs
  216. let ppats =
  217. pwith_vars @@ fun vs ->
  218. pre "(patterns" |>> plist_tail
  219. (pwith_vars ~vs @@ fun vs ->
  220. let* n, ps = ppair pid (ppat vs) in
  221. pret (n, vs, ps))
  222. (* ---------------------------------------- *)
  223. (* tests *)
  224. (* ---------------------------------------- *)
  225. let () =
  226. if false then
  227. let show_patterns ps =
  228. "[" ^ String.concat "; "
  229. (List.map show_pattern ps) ^ "]"
  230. in
  231. let pat s =
  232. Printf.printf "parse %s = " s;
  233. let vars =
  234. [ "foobar"; "a"; "b"; "d"
  235. ; "m"; "s"; "x" ]
  236. in
  237. match run_parser (ppat vars) s with
  238. | `Ok p ->
  239. Printf.printf "%s\n" (show_patterns p)
  240. | `Error (_, e, _) ->
  241. Printf.printf "ERROR: %s\n" e
  242. in
  243. pat "42";
  244. pat "(tmp)";
  245. pat "(tmp foobar)";
  246. pat "(con)";
  247. pat "(con 1 2 3)";
  248. pat "(con x 1 2 3)";
  249. pat "(add 1 2)";
  250. pat "(add 1 2 3 4)";
  251. pat "(sub 1 2)";
  252. pat "(sub 1 2 3)";
  253. pat "(tmp unbound_var)";
  254. pat "(add 0)";
  255. pat "(add 1 (add 2 3))";
  256. pat "(add (tmp a) (con d))";
  257. pat "(add (tmp b) (mul (tmp m) (con s 2 4 8)))";
  258. pat "(add (con 1 2) (con 3 4))";
  259. ()