lexer.mll 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. (*
  2. The Haxe Compiler
  3. Copyright (C) 2005-2016 Haxe Foundation
  4. This program is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU General Public License
  6. as published by the Free Software Foundation; either version 2
  7. of the License, or (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  15. *)
  16. {
  17. open Lexing
  18. open Ast
  19. type error_msg =
  20. | Invalid_character of char
  21. | Unterminated_string
  22. | Unterminated_regexp
  23. | Unclosed_comment
  24. | Unclosed_code
  25. | Invalid_escape
  26. | Invalid_option
  27. exception Error of error_msg * pos
  28. let error_msg = function
  29. | Invalid_character c when int_of_char c > 32 && int_of_char c < 128 -> Printf.sprintf "Invalid character '%c'" c
  30. | Invalid_character c -> Printf.sprintf "Invalid character 0x%.2X" (int_of_char c)
  31. | Unterminated_string -> "Unterminated string"
  32. | Unterminated_regexp -> "Unterminated regular expression"
  33. | Unclosed_comment -> "Unclosed comment"
  34. | Unclosed_code -> "Unclosed code string"
  35. | Invalid_escape -> "Invalid escape sequence"
  36. | Invalid_option -> "Invalid regular expression option"
  37. type lexer_file = {
  38. lfile : string;
  39. mutable lline : int;
  40. mutable lmaxline : int;
  41. mutable llines : (int * int) list;
  42. mutable lalines : (int * int) array;
  43. mutable lstrings : int list;
  44. }
  45. let make_file file =
  46. {
  47. lfile = file;
  48. lline = 1;
  49. lmaxline = 1;
  50. llines = [0,1];
  51. lalines = [|0,1|];
  52. lstrings = [];
  53. }
  54. let cur = ref (make_file "")
  55. let all_files = Hashtbl.create 0
  56. let buf = Buffer.create 100
  57. let error e pos =
  58. raise (Error (e,{ pmin = pos; pmax = pos; pfile = !cur.lfile }))
  59. let keywords =
  60. let h = Hashtbl.create 3 in
  61. List.iter (fun k -> Hashtbl.add h (s_keyword k) k)
  62. [Function;Class;Static;Var;If;Else;While;Do;For;
  63. Break;Return;Continue;Extends;Implements;Import;
  64. Switch;Case;Default;Public;Private;Try;Untyped;
  65. Catch;New;This;Throw;Extern;Enum;In;Interface;
  66. Cast;Override;Dynamic;Typedef;Package;
  67. Inline;Using;Null;True;False;Abstract;Macro];
  68. h
  69. let init file do_add =
  70. let f = make_file file in
  71. cur := f;
  72. if do_add then Hashtbl.replace all_files file f
  73. let save() =
  74. !cur
  75. let restore c =
  76. cur := c
  77. let newline lexbuf =
  78. let cur = !cur in
  79. cur.lline <- cur.lline + 1;
  80. cur.llines <- (lexeme_end lexbuf,cur.lline) :: cur.llines
  81. let fmt_pos p =
  82. p.pmin + (p.pmax - p.pmin) * 1000000
  83. let add_fmt_string p =
  84. let file = (try
  85. Hashtbl.find all_files p.pfile
  86. with Not_found ->
  87. let f = make_file p.pfile in
  88. Hashtbl.replace all_files p.pfile f;
  89. f
  90. ) in
  91. file.lstrings <- (fmt_pos p) :: file.lstrings
  92. let fast_add_fmt_string p =
  93. let cur = !cur in
  94. cur.lstrings <- (fmt_pos p) :: cur.lstrings
  95. let is_fmt_string p =
  96. try
  97. let file = Hashtbl.find all_files p.pfile in
  98. List.mem (fmt_pos p) file.lstrings
  99. with Not_found ->
  100. false
  101. let remove_fmt_string p =
  102. try
  103. let file = Hashtbl.find all_files p.pfile in
  104. file.lstrings <- List.filter ((<>) (fmt_pos p)) file.lstrings
  105. with Not_found ->
  106. ()
  107. let find_line p f =
  108. (* rebuild cache if we have a new line *)
  109. if f.lmaxline <> f.lline then begin
  110. f.lmaxline <- f.lline;
  111. f.lalines <- Array.of_list (List.rev f.llines);
  112. end;
  113. let rec loop min max =
  114. let med = (min + max) lsr 1 in
  115. let lp, line = Array.unsafe_get f.lalines med in
  116. if med = min then
  117. line, p - lp
  118. else if lp > p then
  119. loop min med
  120. else
  121. loop med max
  122. in
  123. loop 0 (Array.length f.lalines)
  124. (* resolve a position within a non-haxe file by counting newlines *)
  125. let resolve_pos file =
  126. let ch = open_in_bin file in
  127. let f = make_file file in
  128. let rec loop p =
  129. let inc i () =
  130. f.lline <- f.lline + 1;
  131. f.llines <- (p + i,f.lline) :: f.llines;
  132. i
  133. in
  134. let i = match input_char ch with
  135. | '\n' -> inc 1
  136. | '\r' ->
  137. ignore(input_char ch);
  138. inc 2
  139. | _ -> fun () -> 1
  140. in
  141. loop (p + i())
  142. in
  143. try
  144. loop 0
  145. with End_of_file ->
  146. close_in ch;
  147. f
  148. let find_file file =
  149. try Hashtbl.find all_files file with Not_found -> try resolve_pos file with Sys_error _ -> make_file file
  150. let find_pos p =
  151. find_line p.pmin (find_file p.pfile)
  152. let get_error_line p =
  153. let l, _ = find_pos p in
  154. l
  155. let get_error_pos printer p =
  156. if p.pmin = -1 then
  157. "(unknown)"
  158. else
  159. let file = find_file p.pfile in
  160. let l1, p1 = find_line p.pmin file in
  161. let l2, p2 = find_line p.pmax file in
  162. if l1 = l2 then begin
  163. let s = (if p1 = p2 then Printf.sprintf " %d" p1 else Printf.sprintf "s %d-%d" p1 p2) in
  164. Printf.sprintf "%s character%s" (printer p.pfile l1) s
  165. end else
  166. Printf.sprintf "%s lines %d-%d" (printer p.pfile l1) l1 l2
  167. let reset() = Buffer.reset buf
  168. let contents() = Buffer.contents buf
  169. let store lexbuf = Buffer.add_string buf (lexeme lexbuf)
  170. let add c = Buffer.add_string buf c
  171. let mk_tok t pmin pmax =
  172. t , { pfile = !cur.lfile; pmin = pmin; pmax = pmax }
  173. let mk lexbuf t =
  174. mk_tok t (lexeme_start lexbuf) (lexeme_end lexbuf)
  175. let mk_ident lexbuf =
  176. let s = lexeme lexbuf in
  177. mk lexbuf (try Kwd (Hashtbl.find keywords s) with Not_found -> Const (Ident s))
  178. let invalid_char lexbuf =
  179. error (Invalid_character (lexeme_char lexbuf 0)) (lexeme_start lexbuf)
  180. }
  181. let ident = ('_'* ['a'-'z'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']* | '_'+ | '_'+ ['0'-'9'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']* )
  182. let idtype = '_'* ['A'-'Z'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']*
  183. let integer = ['1'-'9'] ['0'-'9']* | '0'
  184. rule skip_header = parse
  185. | "\239\187\191" { skip_header lexbuf }
  186. | "#!" [^'\n' '\r']* { skip_header lexbuf }
  187. | "" | eof { }
  188. and token = parse
  189. | eof { mk lexbuf Eof }
  190. | [' ' '\t']+ { token lexbuf }
  191. | "\r\n" { newline lexbuf; token lexbuf }
  192. | '\n' | '\r' { newline lexbuf; token lexbuf }
  193. | "0x" ['0'-'9' 'a'-'f' 'A'-'F']+ { mk lexbuf (Const (Int (lexeme lexbuf))) }
  194. | integer { mk lexbuf (Const (Int (lexeme lexbuf))) }
  195. | integer '.' ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  196. | '.' ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  197. | integer ['e' 'E'] ['+' '-']? ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  198. | integer '.' ['0'-'9']* ['e' 'E'] ['+' '-']? ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
  199. | integer "..." {
  200. let s = lexeme lexbuf in
  201. mk lexbuf (IntInterval (String.sub s 0 (String.length s - 3)))
  202. }
  203. | "//" [^'\n' '\r']* {
  204. let s = lexeme lexbuf in
  205. mk lexbuf (CommentLine (String.sub s 2 ((String.length s)-2)))
  206. }
  207. | "++" { mk lexbuf (Unop Increment) }
  208. | "--" { mk lexbuf (Unop Decrement) }
  209. | "~" { mk lexbuf (Unop NegBits) }
  210. | "%=" { mk lexbuf (Binop (OpAssignOp OpMod)) }
  211. | "&=" { mk lexbuf (Binop (OpAssignOp OpAnd)) }
  212. | "|=" { mk lexbuf (Binop (OpAssignOp OpOr)) }
  213. | "^=" { mk lexbuf (Binop (OpAssignOp OpXor)) }
  214. | "+=" { mk lexbuf (Binop (OpAssignOp OpAdd)) }
  215. | "-=" { mk lexbuf (Binop (OpAssignOp OpSub)) }
  216. | "*=" { mk lexbuf (Binop (OpAssignOp OpMult)) }
  217. | "/=" { mk lexbuf (Binop (OpAssignOp OpDiv)) }
  218. | "<<=" { mk lexbuf (Binop (OpAssignOp OpShl)) }
  219. | "||=" { mk lexbuf (Binop (OpAssignOp OpBoolOr)) }
  220. | "&&=" { mk lexbuf (Binop (OpAssignOp OpBoolAnd)) }
  221. (*//| ">>=" { mk lexbuf (Binop (OpAssignOp OpShr)) } *)
  222. (*//| ">>>=" { mk lexbuf (Binop (OpAssignOp OpUShr)) } *)
  223. | "==" { mk lexbuf (Binop OpEq) }
  224. | "!=" { mk lexbuf (Binop OpNotEq) }
  225. | "<=" { mk lexbuf (Binop OpLte) }
  226. (*//| ">=" { mk lexbuf (Binop OpGte) }*)
  227. | "&&" { mk lexbuf (Binop OpBoolAnd) }
  228. | "||" { mk lexbuf (Binop OpBoolOr) }
  229. | "<<" { mk lexbuf (Binop OpShl) }
  230. | "->" { mk lexbuf Arrow }
  231. | "..." { mk lexbuf (Binop OpInterval) }
  232. | "=>" { mk lexbuf (Binop OpArrow)}
  233. | "!" { mk lexbuf (Unop Not) }
  234. | "<" { mk lexbuf (Binop OpLt) }
  235. | ">" { mk lexbuf (Binop OpGt) }
  236. | ";" { mk lexbuf Semicolon }
  237. | ":" { mk lexbuf DblDot }
  238. | "," { mk lexbuf Comma }
  239. | "." { mk lexbuf Dot }
  240. | "%" { mk lexbuf (Binop OpMod) }
  241. | "&" { mk lexbuf (Binop OpAnd) }
  242. | "|" { mk lexbuf (Binop OpOr) }
  243. | "^" { mk lexbuf (Binop OpXor) }
  244. | "+" { mk lexbuf (Binop OpAdd) }
  245. | "*" { mk lexbuf (Binop OpMult) }
  246. | "/" { mk lexbuf (Binop OpDiv) }
  247. | "-" { mk lexbuf (Binop OpSub) }
  248. | "=" { mk lexbuf (Binop OpAssign) }
  249. | "[" { mk lexbuf BkOpen }
  250. | "]" { mk lexbuf BkClose }
  251. | "{" { mk lexbuf BrOpen }
  252. | "}" { mk lexbuf BrClose }
  253. | "(" { mk lexbuf POpen }
  254. | ")" { mk lexbuf PClose }
  255. | "?" { mk lexbuf Question }
  256. | "@" { mk lexbuf At }
  257. | "/*" {
  258. reset();
  259. let pmin = lexeme_start lexbuf in
  260. let pmax = (try comment lexbuf with Exit -> error Unclosed_comment pmin) in
  261. mk_tok (Comment (contents())) pmin pmax;
  262. }
  263. | '"' {
  264. reset();
  265. let pmin = lexeme_start lexbuf in
  266. let pmax = (try string lexbuf with Exit -> error Unterminated_string pmin) in
  267. let str = (try unescape (contents()) with Exit -> error Invalid_escape pmin) in
  268. mk_tok (Const (String str)) pmin pmax;
  269. }
  270. | "'" {
  271. reset();
  272. let pmin = lexeme_start lexbuf in
  273. let pmax = (try string2 lexbuf with Exit -> error Unterminated_string pmin) in
  274. let str = (try unescape (contents()) with Exit -> error Invalid_escape pmin) in
  275. let t = mk_tok (Const (String str)) pmin pmax in
  276. fast_add_fmt_string (snd t);
  277. t
  278. }
  279. | "~/" {
  280. reset();
  281. let pmin = lexeme_start lexbuf in
  282. let options, pmax = (try regexp lexbuf with Exit -> error Unterminated_regexp pmin) in
  283. let str = contents() in
  284. mk_tok (Const (Regexp (str,options))) pmin pmax;
  285. }
  286. | '#' ident {
  287. let v = lexeme lexbuf in
  288. let v = String.sub v 1 (String.length v - 1) in
  289. mk lexbuf (Sharp v)
  290. }
  291. | '$' ['_' 'a'-'z' 'A'-'Z' '0'-'9']* {
  292. let v = lexeme lexbuf in
  293. let v = String.sub v 1 (String.length v - 1) in
  294. mk lexbuf (Dollar v)
  295. }
  296. | ident { mk_ident lexbuf }
  297. | idtype { mk lexbuf (Const (Ident (lexeme lexbuf))) }
  298. | _ { invalid_char lexbuf }
  299. and comment = parse
  300. | eof { raise Exit }
  301. | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; comment lexbuf }
  302. | "*/" { lexeme_end lexbuf }
  303. | '*' { store lexbuf; comment lexbuf }
  304. | [^'*' '\n' '\r']+ { store lexbuf; comment lexbuf }
  305. and string = parse
  306. | eof { raise Exit }
  307. | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; string lexbuf }
  308. | "\\\"" { store lexbuf; string lexbuf }
  309. | "\\\\" { store lexbuf; string lexbuf }
  310. | '\\' { store lexbuf; string lexbuf }
  311. | '"' { lexeme_end lexbuf }
  312. | [^'"' '\\' '\r' '\n']+ { store lexbuf; string lexbuf }
  313. and string2 = parse
  314. | eof { raise Exit }
  315. | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; string2 lexbuf }
  316. | '\\' { store lexbuf; string2 lexbuf }
  317. | "\\\\" { store lexbuf; string2 lexbuf }
  318. | "\\'" { store lexbuf; string2 lexbuf }
  319. | "'" { lexeme_end lexbuf }
  320. | "$$" | "\\$" | '$' { store lexbuf; string2 lexbuf }
  321. | "${" {
  322. let pmin = lexeme_start lexbuf in
  323. store lexbuf;
  324. (try code_string lexbuf with Exit -> error Unclosed_code pmin);
  325. string2 lexbuf;
  326. }
  327. | [^'\'' '\\' '\r' '\n' '$']+ { store lexbuf; string2 lexbuf }
  328. and code_string = parse
  329. | eof { raise Exit }
  330. | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; code_string lexbuf }
  331. | '{' | '/' { store lexbuf; code_string lexbuf }
  332. | '}' { store lexbuf; (* stop *) }
  333. | '"' {
  334. add "\"";
  335. let pmin = lexeme_start lexbuf in
  336. (try ignore(string lexbuf) with Exit -> error Unterminated_string pmin);
  337. add "\"";
  338. code_string lexbuf;
  339. }
  340. | "'" {
  341. add "'";
  342. let pmin = lexeme_start lexbuf in
  343. let pmax = (try string2 lexbuf with Exit -> error Unterminated_string pmin) in
  344. add "'";
  345. fast_add_fmt_string { pfile = !cur.lfile; pmin = pmin; pmax = pmax };
  346. code_string lexbuf;
  347. }
  348. | "/*" {
  349. let pmin = lexeme_start lexbuf in
  350. (try ignore(comment lexbuf) with Exit -> error Unclosed_comment pmin);
  351. code_string lexbuf;
  352. }
  353. | "//" [^'\n' '\r']* { store lexbuf; code_string lexbuf; }
  354. | [^'/' '"' '\'' '{' '}' '\n' '\r']+ { store lexbuf; code_string lexbuf; }
  355. and regexp = parse
  356. | eof | '\n' | '\r' { raise Exit }
  357. | '\\' '/' { add "/"; regexp lexbuf }
  358. | '\\' 'r' { add "\r"; regexp lexbuf }
  359. | '\\' 'n' { add "\n"; regexp lexbuf }
  360. | '\\' 't' { add "\t"; regexp lexbuf }
  361. | '\\' ['\\' '$' '.' '*' '+' '^' '|' '{' '}' '[' ']' '(' ')' '?' '-' '0'-'9'] { add (lexeme lexbuf); regexp lexbuf }
  362. | '\\' ['w' 'W' 'b' 'B' 's' 'S' 'd' 'D' 'x'] { add (lexeme lexbuf); regexp lexbuf }
  363. | '\\' ['u' 'U'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] { add (lexeme lexbuf); regexp lexbuf }
  364. | '\\' [^ '\\'] { error (Invalid_character (lexeme lexbuf).[1]) (lexeme_end lexbuf - 1) }
  365. | '/' { regexp_options lexbuf, lexeme_end lexbuf }
  366. | [^ '\\' '/' '\r' '\n']+ { store lexbuf; regexp lexbuf }
  367. and regexp_options = parse
  368. | 'g' | 'i' | 'm' | 's' | 'u' {
  369. let l = lexeme lexbuf in
  370. l ^ regexp_options lexbuf
  371. }
  372. | ['a' - 'z'] { error Invalid_option (lexeme_start lexbuf) }
  373. | "" { "" }