lexer.mll 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. (*
  2. * Copyright (C)2005-2012 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. *)
  22. {
  23. open Lexing
  24. open Ast
  25. type error_msg =
  26. | Invalid_character of char
  27. | Unterminated_string
  28. | Unterminated_regexp
  29. | Unclosed_comment
  30. | Unclosed_code
  31. | Invalid_escape
  32. | Invalid_option
  33. exception Error of error_msg * pos
  34. let error_msg = function
  35. | Invalid_character c when int_of_char c > 32 && int_of_char c < 128 -> Printf.sprintf "Invalid character '%c'" c
  36. | Invalid_character c -> Printf.sprintf "Invalid character 0x%.2X" (int_of_char c)
  37. | Unterminated_string -> "Unterminated string"
  38. | Unterminated_regexp -> "Unterminated regular expression"
  39. | Unclosed_comment -> "Unclosed comment"
  40. | Unclosed_code -> "Unclosed code string"
  41. | Invalid_escape -> "Invalid escape sequence"
  42. | Invalid_option -> "Invalid regular expression option"
  43. type lexer_file = {
  44. lfile : string;
  45. mutable lline : int;
  46. mutable lmaxline : int;
  47. mutable llines : (int * int) list;
  48. mutable lalines : (int * int) array;
  49. mutable lstrings : int list;
  50. }
  51. let make_file file =
  52. {
  53. lfile = file;
  54. lline = 1;
  55. lmaxline = 1;
  56. llines = [0,1];
  57. lalines = [|0,1|];
  58. lstrings = [];
  59. }
  60. let cur = ref (make_file "")
  61. let all_files = Hashtbl.create 0
  62. let buf = Buffer.create 100
  63. let error e pos =
  64. raise (Error (e,{ pmin = pos; pmax = pos; pfile = !cur.lfile }))
  65. let keywords =
  66. let h = Hashtbl.create 3 in
  67. List.iter (fun k -> Hashtbl.add h (s_keyword k) k)
  68. [Function;Class;Static;Var;If;Else;While;Do;For;
  69. Break;Return;Continue;Extends;Implements;Import;
  70. Switch;Case;Default;Public;Private;Try;Untyped;
  71. Catch;New;This;Throw;Extern;Enum;In;Interface;
  72. Cast;Override;Dynamic;Typedef;Package;
  73. Inline;Using;Null;True;False;Abstract;Macro];
  74. h
  75. let init file do_add =
  76. let f = make_file file in
  77. cur := f;
  78. if do_add then Hashtbl.replace all_files file f
  79. let save() =
  80. !cur
  81. let restore c =
  82. cur := c
  83. let newline lexbuf =
  84. let cur = !cur in
  85. cur.lline <- cur.lline + 1;
  86. cur.llines <- (lexeme_end lexbuf,cur.lline) :: cur.llines
  87. let fmt_pos p =
  88. p.pmin + (p.pmax - p.pmin) * 1000000
  89. let add_fmt_string p =
  90. let file = (try
  91. Hashtbl.find all_files p.pfile
  92. with Not_found ->
  93. let f = make_file p.pfile in
  94. Hashtbl.replace all_files p.pfile f;
  95. f
  96. ) in
  97. file.lstrings <- (fmt_pos p) :: file.lstrings
  98. let fast_add_fmt_string p =
  99. let cur = !cur in
  100. cur.lstrings <- (fmt_pos p) :: cur.lstrings
  101. let is_fmt_string p =
  102. try
  103. let file = Hashtbl.find all_files p.pfile in
  104. List.mem (fmt_pos p) file.lstrings
  105. with Not_found ->
  106. false
  107. let remove_fmt_string p =
  108. try
  109. let file = Hashtbl.find all_files p.pfile in
  110. file.lstrings <- List.filter ((<>) (fmt_pos p)) file.lstrings
  111. with Not_found ->
  112. ()
  113. let find_line p f =
  114. (* rebuild cache if we have a new line *)
  115. if f.lmaxline <> f.lline then begin
  116. f.lmaxline <- f.lline;
  117. f.lalines <- Array.of_list (List.rev f.llines);
  118. end;
  119. let rec loop min max =
  120. let med = (min + max) lsr 1 in
  121. let lp, line = Array.unsafe_get f.lalines med in
  122. if med = min then
  123. line, p - lp
  124. else if lp > p then
  125. loop min med
  126. else
  127. loop med max
  128. in
  129. loop 0 (Array.length f.lalines)
  130. (* resolve a position within a non-haxe file by counting newlines *)
  131. let resolve_pos file =
  132. let ch = open_in_bin file in
  133. let f = make_file file in
  134. let rec loop p =
  135. let inc i () =
  136. f.lline <- f.lline + 1;
  137. f.llines <- (p + i,f.lline) :: f.llines;
  138. i
  139. in
  140. let i = match input_char ch with
  141. | '\n' -> inc 1
  142. | '\r' ->
  143. ignore(input_char ch);
  144. inc 2
  145. | _ -> fun () -> 1
  146. in
  147. loop (p + i())
  148. in
  149. try
  150. loop 0
  151. with End_of_file ->
  152. close_in ch;
  153. f
  154. let find_file file =
  155. try Hashtbl.find all_files file with Not_found -> try resolve_pos file with Sys_error _ -> make_file file
  156. let find_pos p =
  157. find_line p.pmin (find_file p.pfile)
  158. let get_error_line p =
  159. let l, _ = find_pos p in
  160. l
  161. let get_error_pos printer p =
  162. if p.pmin = -1 then
  163. "(unknown)"
  164. else
  165. let file = find_file p.pfile in
  166. let l1, p1 = find_line p.pmin file in
  167. let l2, p2 = find_line p.pmax file in
  168. if l1 = l2 then begin
  169. let s = (if p1 = p2 then Printf.sprintf " %d" p1 else Printf.sprintf "s %d-%d" p1 p2) in
  170. Printf.sprintf "%s character%s" (printer p.pfile l1) s
  171. end else
  172. Printf.sprintf "%s lines %d-%d" (printer p.pfile l1) l1 l2
  173. let reset() = Buffer.reset buf
  174. let contents() = Buffer.contents buf
  175. let store lexbuf = Buffer.add_string buf (lexeme lexbuf)
  176. let add c = Buffer.add_string buf c
  177. let mk_tok t pmin pmax =
  178. t , { pfile = !cur.lfile; pmin = pmin; pmax = pmax }
  179. let mk lexbuf t =
  180. mk_tok t (lexeme_start lexbuf) (lexeme_end lexbuf)
  181. let mk_ident lexbuf =
  182. let s = lexeme lexbuf in
  183. mk lexbuf (try Kwd (Hashtbl.find keywords s) with Not_found -> Const (Ident s))
  184. let invalid_char lexbuf =
  185. error (Invalid_character (lexeme_char lexbuf 0)) (lexeme_start lexbuf)
  186. }
  187. let ident = ('_'* ['a'-'z'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']* | '_'+ | '_'+ ['0'-'9'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']* )
  188. let idtype = '_'* ['A'-'Z'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']*
  189. let integer = ['1'-'9'] ['0'-'9']* | '0'
  190. rule skip_header = parse
  191. | "\239\187\191" { skip_header lexbuf }
  192. | "#!" [^'\n' '\r']* { skip_header lexbuf }
  193. | "" | eof { }
  194. and token = parse
  195. | eof { mk lexbuf Eof }
  196. | [' ' '\t']+ { token lexbuf }
  197. | "\r\n" { newline lexbuf; token lexbuf }
  198. | '\n' | '\r' { newline lexbuf; token lexbuf }
  199. | "0x" ['0'-'9' 'a'-'f' 'A'-'F']+ { mk lexbuf (Const (Int (lexeme lexbuf))) }
  200. | integer { mk lexbuf (Const (Int (lexeme lexbuf))) }
  201. | integer '.' ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  202. | '.' ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  203. | integer ['e' 'E'] ['+' '-']? ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  204. | integer '.' ['0'-'9']* ['e' 'E'] ['+' '-']? ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  205. | integer "..." {
  206. let s = lexeme lexbuf in
  207. mk lexbuf (IntInterval (String.sub s 0 (String.length s - 3)))
  208. }
  209. | "//" [^'\n' '\r']* {
  210. let s = lexeme lexbuf in
  211. mk lexbuf (CommentLine (String.sub s 2 ((String.length s)-2)))
  212. }
  213. | "++" { mk lexbuf (Unop Increment) }
  214. | "--" { mk lexbuf (Unop Decrement) }
  215. | "~" { mk lexbuf (Unop NegBits) }
  216. | "%=" { mk lexbuf (Binop (OpAssignOp OpMod)) }
  217. | "&=" { mk lexbuf (Binop (OpAssignOp OpAnd)) }
  218. | "|=" { mk lexbuf (Binop (OpAssignOp OpOr)) }
  219. | "^=" { mk lexbuf (Binop (OpAssignOp OpXor)) }
  220. | "+=" { mk lexbuf (Binop (OpAssignOp OpAdd)) }
  221. | "-=" { mk lexbuf (Binop (OpAssignOp OpSub)) }
  222. | "*=" { mk lexbuf (Binop (OpAssignOp OpMult)) }
  223. | "/=" { mk lexbuf (Binop (OpAssignOp OpDiv)) }
  224. | "<<=" { mk lexbuf (Binop (OpAssignOp OpShl)) }
  225. (*//| ">>=" { mk lexbuf (Binop (OpAssignOp OpShr)) } *)
  226. (*//| ">>>=" { mk lexbuf (Binop (OpAssignOp OpUShr)) } *)
  227. | "==" { mk lexbuf (Binop OpEq) }
  228. | "!=" { mk lexbuf (Binop OpNotEq) }
  229. | "<=" { mk lexbuf (Binop OpLte) }
  230. (*//| ">=" { mk lexbuf (Binop OpGte) }*)
  231. | "&&" { mk lexbuf (Binop OpBoolAnd) }
  232. | "||" { mk lexbuf (Binop OpBoolOr) }
  233. | "<<" { mk lexbuf (Binop OpShl) }
  234. | "->" { mk lexbuf Arrow }
  235. | "..." { mk lexbuf (Binop OpInterval) }
  236. | "=>" { mk lexbuf (Binop OpArrow)}
  237. | "!" { mk lexbuf (Unop Not) }
  238. | "<" { mk lexbuf (Binop OpLt) }
  239. | ">" { mk lexbuf (Binop OpGt) }
  240. | ";" { mk lexbuf Semicolon }
  241. | ":" { mk lexbuf DblDot }
  242. | "," { mk lexbuf Comma }
  243. | "." { mk lexbuf Dot }
  244. | "%" { mk lexbuf (Binop OpMod) }
  245. | "&" { mk lexbuf (Binop OpAnd) }
  246. | "|" { mk lexbuf (Binop OpOr) }
  247. | "^" { mk lexbuf (Binop OpXor) }
  248. | "+" { mk lexbuf (Binop OpAdd) }
  249. | "*" { mk lexbuf (Binop OpMult) }
  250. | "/" { mk lexbuf (Binop OpDiv) }
  251. | "-" { mk lexbuf (Binop OpSub) }
  252. | "=" { mk lexbuf (Binop OpAssign) }
  253. | "[" { mk lexbuf BkOpen }
  254. | "]" { mk lexbuf BkClose }
  255. | "{" { mk lexbuf BrOpen }
  256. | "}" { mk lexbuf BrClose }
  257. | "(" { mk lexbuf POpen }
  258. | ")" { mk lexbuf PClose }
  259. | "?" { mk lexbuf Question }
  260. | "@" { mk lexbuf At }
  261. | "/*" {
  262. reset();
  263. let pmin = lexeme_start lexbuf in
  264. let pmax = (try comment lexbuf with Exit -> error Unclosed_comment pmin) in
  265. mk_tok (Comment (contents())) pmin pmax;
  266. }
  267. | '"' {
  268. reset();
  269. let pmin = lexeme_start lexbuf in
  270. let pmax = (try string lexbuf with Exit -> error Unterminated_string pmin) in
  271. let str = (try unescape (contents()) with Exit -> error Invalid_escape pmin) in
  272. mk_tok (Const (String str)) pmin pmax;
  273. }
  274. | "'" {
  275. reset();
  276. let pmin = lexeme_start lexbuf in
  277. let pmax = (try string2 lexbuf with Exit -> error Unterminated_string pmin) in
  278. let str = (try unescape (contents()) with Exit -> error Invalid_escape pmin) in
  279. let t = mk_tok (Const (String str)) pmin pmax in
  280. fast_add_fmt_string (snd t);
  281. t
  282. }
  283. | "~/" {
  284. reset();
  285. let pmin = lexeme_start lexbuf in
  286. let options, pmax = (try regexp lexbuf with Exit -> error Unterminated_regexp pmin) in
  287. let str = contents() in
  288. mk_tok (Const (Regexp (str,options))) pmin pmax;
  289. }
  290. | '#' ident {
  291. let v = lexeme lexbuf in
  292. let v = String.sub v 1 (String.length v - 1) in
  293. mk lexbuf (Sharp v)
  294. }
  295. | '$' ['_' 'a'-'z' 'A'-'Z' '0'-'9']* {
  296. let v = lexeme lexbuf in
  297. let v = String.sub v 1 (String.length v - 1) in
  298. mk lexbuf (Dollar v)
  299. }
  300. | ident { mk_ident lexbuf }
  301. | idtype { mk lexbuf (Const (Ident (lexeme lexbuf))) }
  302. | _ { invalid_char lexbuf }
  303. and comment = parse
  304. | eof { raise Exit }
  305. | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; comment lexbuf }
  306. | "*/" { lexeme_end lexbuf }
  307. | '*' { store lexbuf; comment lexbuf }
  308. | [^'*' '\n' '\r']+ { store lexbuf; comment lexbuf }
  309. and string = parse
  310. | eof { raise Exit }
  311. | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; string lexbuf }
  312. | "\\\"" { store lexbuf; string lexbuf }
  313. | "\\\\" { store lexbuf; string lexbuf }
  314. | '\\' { store lexbuf; string lexbuf }
  315. | '"' { lexeme_end lexbuf }
  316. | [^'"' '\\' '\r' '\n']+ { store lexbuf; string lexbuf }
  317. and string2 = parse
  318. | eof { raise Exit }
  319. | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; string2 lexbuf }
  320. | '\\' { store lexbuf; string2 lexbuf }
  321. | "\\\\" { store lexbuf; string2 lexbuf }
  322. | "\\'" { store lexbuf; string2 lexbuf }
  323. | "'" { lexeme_end lexbuf }
  324. | "$$" | "\\$" | '$' { store lexbuf; string2 lexbuf }
  325. | "${" {
  326. let pmin = lexeme_start lexbuf in
  327. store lexbuf;
  328. (try code_string lexbuf with Exit -> error Unclosed_code pmin);
  329. string2 lexbuf;
  330. }
  331. | [^'\'' '\\' '\r' '\n' '$']+ { store lexbuf; string2 lexbuf }
  332. and code_string = parse
  333. | eof { raise Exit }
  334. | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; code_string lexbuf }
  335. | '{' | '/' { store lexbuf; code_string lexbuf }
  336. | '}' { store lexbuf; (* stop *) }
  337. | '"' {
  338. add "\"";
  339. let pmin = lexeme_start lexbuf in
  340. (try ignore(string lexbuf) with Exit -> error Unterminated_string pmin);
  341. add "\"";
  342. code_string lexbuf;
  343. }
  344. | "'" {
  345. add "'";
  346. let pmin = lexeme_start lexbuf in
  347. let pmax = (try string2 lexbuf with Exit -> error Unterminated_string pmin) in
  348. add "'";
  349. fast_add_fmt_string { pfile = !cur.lfile; pmin = pmin; pmax = pmax };
  350. code_string lexbuf;
  351. }
  352. | "/*" {
  353. let pmin = lexeme_start lexbuf in
  354. (try ignore(comment lexbuf) with Exit -> error Unclosed_comment pmin);
  355. code_string lexbuf;
  356. }
  357. | "//" [^'\n' '\r']* { store lexbuf; code_string lexbuf; }
  358. | [^'/' '"' '\'' '{' '}' '\n' '\r']+ { store lexbuf; code_string lexbuf; }
  359. and regexp = parse
  360. | eof | '\n' | '\r' { raise Exit }
  361. | '\\' '/' { add "/"; regexp lexbuf }
  362. | '\\' 'r' { add "\r"; regexp lexbuf }
  363. | '\\' 'n' { add "\n"; regexp lexbuf }
  364. | '\\' 't' { add "\t"; regexp lexbuf }
  365. | '\\' ['\\' '$' '.' '*' '+' '^' '|' '{' '}' '[' ']' '(' ')' '?' '-' '0'-'9'] { add (lexeme lexbuf); regexp lexbuf }
  366. | '\\' ['w' 'W' 'b' 'B' 's' 'S' 'd' 'D' 'x'] { add (lexeme lexbuf); regexp lexbuf }
  367. | '\\' ['u' 'U'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] { add (lexeme lexbuf); regexp lexbuf }
  368. | '\\' [^ '\\'] { error (Invalid_character (lexeme lexbuf).[1]) (lexeme_end lexbuf - 1) }
  369. | '/' { regexp_options lexbuf, lexeme_end lexbuf }
  370. | [^ '\\' '/' '\r' '\n']+ { store lexbuf; regexp lexbuf }
  371. and regexp_options = parse
  372. | 'g' | 'i' | 'm' | 's' | 'u' {
  373. let l = lexeme lexbuf in
  374. l ^ regexp_options lexbuf
  375. }
  376. | ['a' - 'z'] { error Invalid_option (lexeme_start lexbuf) }
  377. | "" { "" }