2
0

lexer.mll 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. (*
  2. * Copyright (C)2005-2012 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. *)
  22. {
  23. open Lexing
  24. open Ast
  25. type error_msg =
  26. | Invalid_character of char
  27. | Unterminated_string
  28. | Unterminated_regexp
  29. | Unclosed_comment
  30. | Invalid_escape
  31. | Invalid_option
  32. exception Error of error_msg * pos
  33. let error_msg = function
  34. | Invalid_character c when int_of_char c > 32 && int_of_char c < 128 -> Printf.sprintf "Invalid character '%c'" c
  35. | Invalid_character c -> Printf.sprintf "Invalid character 0x%.2X" (int_of_char c)
  36. | Unterminated_string -> "Unterminated string"
  37. | Unterminated_regexp -> "Unterminated regular expression"
  38. | Unclosed_comment -> "Unclosed comment"
  39. | Invalid_escape -> "Invalid escape sequence"
  40. | Invalid_option -> "Invalid regular expression option"
  41. type lexer_file = {
  42. lfile : string;
  43. mutable lline : int;
  44. mutable lmaxline : int;
  45. mutable llines : (int * int) list;
  46. mutable lalines : (int * int) array;
  47. mutable lstrings : int list;
  48. }
  49. let make_file file =
  50. {
  51. lfile = file;
  52. lline = 1;
  53. lmaxline = 1;
  54. llines = [0,1];
  55. lalines = [|0,1|];
  56. lstrings = [];
  57. }
  58. let cur = ref (make_file "")
  59. let all_files = Hashtbl.create 0
  60. let buf = Buffer.create 100
  61. let error e pos =
  62. raise (Error (e,{ pmin = pos; pmax = pos; pfile = !cur.lfile }))
  63. let keywords =
  64. let h = Hashtbl.create 3 in
  65. List.iter (fun k -> Hashtbl.add h (s_keyword k) k)
  66. [Function;Class;Static;Var;If;Else;While;Do;For;
  67. Break;Return;Continue;Extends;Implements;Import;
  68. Switch;Case;Default;Public;Private;Try;Untyped;
  69. Catch;New;This;Throw;Extern;Enum;In;Interface;
  70. Cast;Override;Dynamic;Typedef;Package;
  71. Inline;Using;Null;True;False;Abstract;Macro];
  72. h
  73. let init file =
  74. let f = make_file file in
  75. cur := f;
  76. Hashtbl.replace all_files file f
  77. let save() =
  78. !cur
  79. let restore c =
  80. cur := c
  81. let newline lexbuf =
  82. let cur = !cur in
  83. cur.lline <- cur.lline + 1;
  84. cur.llines <- (lexeme_end lexbuf,cur.lline) :: cur.llines
  85. let fmt_pos p =
  86. p.pmin + (p.pmax - p.pmin) * 1000000
  87. let add_fmt_string p =
  88. let file = (try
  89. Hashtbl.find all_files p.pfile
  90. with Not_found ->
  91. let f = make_file p.pfile in
  92. Hashtbl.replace all_files p.pfile f;
  93. f
  94. ) in
  95. file.lstrings <- (fmt_pos p) :: file.lstrings
  96. let fast_add_fmt_string p =
  97. let cur = !cur in
  98. cur.lstrings <- (fmt_pos p) :: cur.lstrings
  99. let is_fmt_string p =
  100. try
  101. let file = Hashtbl.find all_files p.pfile in
  102. List.mem (fmt_pos p) file.lstrings
  103. with Not_found ->
  104. false
  105. let remove_fmt_string p =
  106. try
  107. let file = Hashtbl.find all_files p.pfile in
  108. file.lstrings <- List.filter ((<>) (fmt_pos p)) file.lstrings
  109. with Not_found ->
  110. ()
  111. let find_line p f =
  112. (* rebuild cache if we have a new line *)
  113. if f.lmaxline <> f.lline then begin
  114. f.lmaxline <- f.lline;
  115. f.lalines <- Array.of_list (List.rev f.llines);
  116. end;
  117. let rec loop min max =
  118. let med = (min + max) lsr 1 in
  119. let lp, line = Array.unsafe_get f.lalines med in
  120. if med = min then
  121. line, p - lp
  122. else if lp > p then
  123. loop min med
  124. else
  125. loop med max
  126. in
  127. loop 0 (Array.length f.lalines)
  128. let find_pos p =
  129. let file = (try Hashtbl.find all_files p.pfile with Not_found -> make_file p.pfile) in
  130. find_line p.pmin file
  131. let get_error_line p =
  132. let l, _ = find_pos p in
  133. l
  134. let get_error_pos printer p =
  135. if p.pmin = -1 then
  136. "(unknown)"
  137. else
  138. let file = (try Hashtbl.find all_files p.pfile with Not_found -> make_file p.pfile) in
  139. let l1, p1 = find_line p.pmin file in
  140. let l2, p2 = find_line p.pmax file in
  141. if l1 = l2 then begin
  142. let s = (if p1 = p2 then Printf.sprintf " %d" p1 else Printf.sprintf "s %d-%d" p1 p2) in
  143. Printf.sprintf "%s character%s" (printer p.pfile l1) s
  144. end else
  145. Printf.sprintf "%s lines %d-%d" (printer p.pfile l1) l1 l2
  146. let reset() = Buffer.reset buf
  147. let contents() = Buffer.contents buf
  148. let store lexbuf = Buffer.add_string buf (lexeme lexbuf)
  149. let add c = Buffer.add_string buf c
  150. let mk_tok t pmin pmax =
  151. t , { pfile = !cur.lfile; pmin = pmin; pmax = pmax }
  152. let mk lexbuf t =
  153. mk_tok t (lexeme_start lexbuf) (lexeme_end lexbuf)
  154. let mk_ident lexbuf =
  155. let s = lexeme lexbuf in
  156. mk lexbuf (try Kwd (Hashtbl.find keywords s) with Not_found -> Const (Ident s))
  157. let invalid_char lexbuf =
  158. error (Invalid_character (lexeme_char lexbuf 0)) (lexeme_start lexbuf)
  159. }
  160. let ident = ('_'* ['a'-'z'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']* | '_'+ | '_'+ ['0'-'9'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']* )
  161. let idtype = '_'* ['A'-'Z'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']*
  162. rule skip_header = parse
  163. | "\239\187\191" { skip_header lexbuf }
  164. | "#!" [^'\n' '\r']* { skip_header lexbuf }
  165. | "" | eof { }
  166. and token = parse
  167. | eof { mk lexbuf Eof }
  168. | [' ' '\t']+ { token lexbuf }
  169. | "\r\n" { newline lexbuf; token lexbuf }
  170. | '\n' | '\r' { newline lexbuf; token lexbuf }
  171. | "0x" ['0'-'9' 'a'-'f' 'A'-'F']+ { mk lexbuf (Const (Int (lexeme lexbuf))) }
  172. | ['0'-'9']+ { mk lexbuf (Const (Int (lexeme lexbuf))) }
  173. | ['0'-'9']+ '.' ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  174. | '.' ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  175. | ['0'-'9']+ ['e' 'E'] ['+' '-']? ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  176. | ['0'-'9']+ '.' ['0'-'9']* ['e' 'E'] ['+' '-']? ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  177. | ['0'-'9']+ "..." {
  178. let s = lexeme lexbuf in
  179. mk lexbuf (IntInterval (String.sub s 0 (String.length s - 3)))
  180. }
  181. | "//" [^'\n' '\r']* {
  182. let s = lexeme lexbuf in
  183. mk lexbuf (CommentLine (String.sub s 2 ((String.length s)-2)))
  184. }
  185. | "++" { mk lexbuf (Unop Increment) }
  186. | "--" { mk lexbuf (Unop Decrement) }
  187. | "~" { mk lexbuf (Unop NegBits) }
  188. | "%=" { mk lexbuf (Binop (OpAssignOp OpMod)) }
  189. | "&=" { mk lexbuf (Binop (OpAssignOp OpAnd)) }
  190. | "|=" { mk lexbuf (Binop (OpAssignOp OpOr)) }
  191. | "^=" { mk lexbuf (Binop (OpAssignOp OpXor)) }
  192. | "+=" { mk lexbuf (Binop (OpAssignOp OpAdd)) }
  193. | "-=" { mk lexbuf (Binop (OpAssignOp OpSub)) }
  194. | "*=" { mk lexbuf (Binop (OpAssignOp OpMult)) }
  195. | "/=" { mk lexbuf (Binop (OpAssignOp OpDiv)) }
  196. | "<<=" { mk lexbuf (Binop (OpAssignOp OpShl)) }
  197. (*//| ">>=" { mk lexbuf (Binop (OpAssignOp OpShr)) } *)
  198. (*//| ">>>=" { mk lexbuf (Binop (OpAssignOp OpUShr)) } *)
  199. | "==" { mk lexbuf (Binop OpEq) }
  200. | "!=" { mk lexbuf (Binop OpNotEq) }
  201. | "<=" { mk lexbuf (Binop OpLte) }
  202. (*//| ">=" { mk lexbuf (Binop OpGte) }*)
  203. | "&&" { mk lexbuf (Binop OpBoolAnd) }
  204. | "||" { mk lexbuf (Binop OpBoolOr) }
  205. | "<<" { mk lexbuf (Binop OpShl) }
  206. | "->" { mk lexbuf Arrow }
  207. | "..." { mk lexbuf (Binop OpInterval) }
  208. | "=>" { mk lexbuf (Binop OpArrow)}
  209. | "!" { mk lexbuf (Unop Not) }
  210. | "<" { mk lexbuf (Binop OpLt) }
  211. | ">" { mk lexbuf (Binop OpGt) }
  212. | ";" { mk lexbuf Semicolon }
  213. | ":" { mk lexbuf DblDot }
  214. | "," { mk lexbuf Comma }
  215. | "." { mk lexbuf Dot }
  216. | "%" { mk lexbuf (Binop OpMod) }
  217. | "&" { mk lexbuf (Binop OpAnd) }
  218. | "|" { mk lexbuf (Binop OpOr) }
  219. | "^" { mk lexbuf (Binop OpXor) }
  220. | "+" { mk lexbuf (Binop OpAdd) }
  221. | "*" { mk lexbuf (Binop OpMult) }
  222. | "/" { mk lexbuf (Binop OpDiv) }
  223. | "-" { mk lexbuf (Binop OpSub) }
  224. | "=" { mk lexbuf (Binop OpAssign) }
  225. | "[" { mk lexbuf BkOpen }
  226. | "]" { mk lexbuf BkClose }
  227. | "{" { mk lexbuf BrOpen }
  228. | "}" { mk lexbuf BrClose }
  229. | "(" { mk lexbuf POpen }
  230. | ")" { mk lexbuf PClose }
  231. | "?" { mk lexbuf Question }
  232. | "@" { mk lexbuf At }
  233. | "/*" {
  234. reset();
  235. let pmin = lexeme_start lexbuf in
  236. let pmax = (try comment lexbuf with Exit -> error Unclosed_comment pmin) in
  237. mk_tok (Comment (contents())) pmin pmax;
  238. }
  239. | '"' {
  240. reset();
  241. let pmin = lexeme_start lexbuf in
  242. let pmax = (try string lexbuf with Exit -> error Unterminated_string pmin) in
  243. let str = (try unescape (contents()) with Exit -> error Invalid_escape pmin) in
  244. mk_tok (Const (String str)) pmin pmax;
  245. }
  246. | "'" {
  247. reset();
  248. let pmin = lexeme_start lexbuf in
  249. let pmax = (try string2 lexbuf with Exit -> error Unterminated_string pmin) in
  250. let str = (try unescape (contents()) with Exit -> error Invalid_escape pmin) in
  251. let t = mk_tok (Const (String str)) pmin pmax in
  252. fast_add_fmt_string (snd t);
  253. t
  254. }
  255. | "~/" {
  256. reset();
  257. let pmin = lexeme_start lexbuf in
  258. let options, pmax = (try regexp lexbuf with Exit -> error Unterminated_regexp pmin) in
  259. let str = contents() in
  260. mk_tok (Const (Regexp (str,options))) pmin pmax;
  261. }
  262. | '#' ident {
  263. let v = lexeme lexbuf in
  264. let v = String.sub v 1 (String.length v - 1) in
  265. mk lexbuf (Sharp v)
  266. }
  267. | '$' ['_' 'a'-'z' 'A'-'Z' '0'-'9']* {
  268. let v = lexeme lexbuf in
  269. let v = String.sub v 1 (String.length v - 1) in
  270. mk lexbuf (Dollar v)
  271. }
  272. | ident { mk_ident lexbuf }
  273. | idtype { mk lexbuf (Const (Ident (lexeme lexbuf))) }
  274. | _ { invalid_char lexbuf }
  275. and comment = parse
  276. | eof { raise Exit }
  277. | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; comment lexbuf }
  278. | "*/" { lexeme_end lexbuf }
  279. | '*' { store lexbuf; comment lexbuf }
  280. | [^'*' '\n' '\r']+ { store lexbuf; comment lexbuf }
  281. and string = parse
  282. | eof { raise Exit }
  283. | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; string lexbuf }
  284. | "\\\"" { store lexbuf; string lexbuf }
  285. | "\\\\" { store lexbuf; string lexbuf }
  286. | '\\' { store lexbuf; string lexbuf }
  287. | '"' { lexeme_end lexbuf }
  288. | [^'"' '\\' '\r' '\n']+ { store lexbuf; string lexbuf }
  289. and string2 = parse
  290. | eof { raise Exit }
  291. | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; string2 lexbuf }
  292. | '\\' { store lexbuf; string2 lexbuf }
  293. | "\\\\" { store lexbuf; string2 lexbuf }
  294. | "\\'" { store lexbuf; string2 lexbuf }
  295. | "'" { lexeme_end lexbuf }
  296. | [^'\'' '\\' '\r' '\n']+ { store lexbuf; string2 lexbuf }
  297. and regexp = parse
  298. | eof | '\n' | '\r' { raise Exit }
  299. | '\\' '/' { add "/"; regexp lexbuf }
  300. | '\\' 'r' { add "\r"; regexp lexbuf }
  301. | '\\' 'n' { add "\n"; regexp lexbuf }
  302. | '\\' 't' { add "\t"; regexp lexbuf }
  303. | '\\' ['\\' '$' '.' '*' '+' '^' '|' '{' '}' '[' ']' '(' ')' '?' '-' '0'-'9'] { add (lexeme lexbuf); regexp lexbuf }
  304. | '\\' ['w' 'W' 'b' 'B' 's' 'S' 'd' 'D' 'x'] { add (lexeme lexbuf); regexp lexbuf }
  305. | '\\' ['u' 'U'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] { add (lexeme lexbuf); regexp lexbuf }
  306. | '\\' [^ '\\'] { error (Invalid_character (lexeme lexbuf).[1]) (lexeme_end lexbuf - 1) }
  307. | '/' { regexp_options lexbuf, lexeme_end lexbuf }
  308. | [^ '\\' '/' '\r' '\n']+ { store lexbuf; regexp lexbuf }
  309. and regexp_options = parse
  310. | 'g' | 'i' | 'm' | 's' | 'u' {
  311. let l = lexeme lexbuf in
  312. l ^ regexp_options lexbuf
  313. }
  314. | ['a' - 'z'] { error Invalid_option (lexeme_start lexbuf) }
  315. | "" { "" }