123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419 |
- (*
- * Copyright (C)2005-2012 Haxe Foundation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *)
- {
- open Lexing
- open Ast
- type error_msg =
- | Invalid_character of char
- | Unterminated_string
- | Unterminated_regexp
- | Unclosed_comment
- | Unclosed_code
- | Invalid_escape
- | Invalid_option
- exception Error of error_msg * pos
- let error_msg = function
- | Invalid_character c when int_of_char c > 32 && int_of_char c < 128 -> Printf.sprintf "Invalid character '%c'" c
- | Invalid_character c -> Printf.sprintf "Invalid character 0x%.2X" (int_of_char c)
- | Unterminated_string -> "Unterminated string"
- | Unterminated_regexp -> "Unterminated regular expression"
- | Unclosed_comment -> "Unclosed comment"
- | Unclosed_code -> "Unclosed code string"
- | Invalid_escape -> "Invalid escape sequence"
- | Invalid_option -> "Invalid regular expression option"
- type lexer_file = {
- lfile : string;
- mutable lline : int;
- mutable lmaxline : int;
- mutable llines : (int * int) list;
- mutable lalines : (int * int) array;
- mutable lstrings : int list;
- }
- let make_file file =
- {
- lfile = file;
- lline = 1;
- lmaxline = 1;
- llines = [0,1];
- lalines = [|0,1|];
- lstrings = [];
- }
- let cur = ref (make_file "")
- let all_files = Hashtbl.create 0
- let buf = Buffer.create 100
- let error e pos =
- raise (Error (e,{ pmin = pos; pmax = pos; pfile = !cur.lfile }))
- let keywords =
- let h = Hashtbl.create 3 in
- List.iter (fun k -> Hashtbl.add h (s_keyword k) k)
- [Function;Class;Static;Var;If;Else;While;Do;For;
- Break;Return;Continue;Extends;Implements;Import;
- Switch;Case;Default;Public;Private;Try;Untyped;
- Catch;New;This;Throw;Extern;Enum;In;Interface;
- Cast;Override;Dynamic;Typedef;Package;
- Inline;Using;Null;True;False;Abstract;Macro];
- h
- let init file do_add =
- let f = make_file file in
- cur := f;
- if do_add then Hashtbl.replace all_files file f
- let save() =
- !cur
- let restore c =
- cur := c
- let newline lexbuf =
- let cur = !cur in
- cur.lline <- cur.lline + 1;
- cur.llines <- (lexeme_end lexbuf,cur.lline) :: cur.llines
- let fmt_pos p =
- p.pmin + (p.pmax - p.pmin) * 1000000
- let add_fmt_string p =
- let file = (try
- Hashtbl.find all_files p.pfile
- with Not_found ->
- let f = make_file p.pfile in
- Hashtbl.replace all_files p.pfile f;
- f
- ) in
- file.lstrings <- (fmt_pos p) :: file.lstrings
- let fast_add_fmt_string p =
- let cur = !cur in
- cur.lstrings <- (fmt_pos p) :: cur.lstrings
- let is_fmt_string p =
- try
- let file = Hashtbl.find all_files p.pfile in
- List.mem (fmt_pos p) file.lstrings
- with Not_found ->
- false
- let remove_fmt_string p =
- try
- let file = Hashtbl.find all_files p.pfile in
- file.lstrings <- List.filter ((<>) (fmt_pos p)) file.lstrings
- with Not_found ->
- ()
- let find_line p f =
- (* rebuild cache if we have a new line *)
- if f.lmaxline <> f.lline then begin
- f.lmaxline <- f.lline;
- f.lalines <- Array.of_list (List.rev f.llines);
- end;
- let rec loop min max =
- let med = (min + max) lsr 1 in
- let lp, line = Array.unsafe_get f.lalines med in
- if med = min then
- line, p - lp
- else if lp > p then
- loop min med
- else
- loop med max
- in
- loop 0 (Array.length f.lalines)
- (* resolve a position within a non-haxe file by counting newlines *)
- let resolve_pos file =
- let ch = open_in_bin file in
- let f = make_file file in
- let rec loop p =
- let inc i () =
- f.lline <- f.lline + 1;
- f.llines <- (p + i,f.lline) :: f.llines;
- i
- in
- let i = match input_char ch with
- | '\n' -> inc 1
- | '\r' ->
- ignore(input_char ch);
- inc 2
- | _ -> fun () -> 1
- in
- loop (p + i())
- in
- try
- loop 0
- with End_of_file ->
- close_in ch;
- f
- let find_file file =
- try Hashtbl.find all_files file with Not_found -> try resolve_pos file with Sys_error _ -> make_file file
- let find_pos p =
- find_line p.pmin (find_file p.pfile)
- let get_error_line p =
- let l, _ = find_pos p in
- l
- let get_error_pos printer p =
- if p.pmin = -1 then
- "(unknown)"
- else
- let file = find_file p.pfile in
- let l1, p1 = find_line p.pmin file in
- let l2, p2 = find_line p.pmax file in
- if l1 = l2 then begin
- let s = (if p1 = p2 then Printf.sprintf " %d" p1 else Printf.sprintf "s %d-%d" p1 p2) in
- Printf.sprintf "%s character%s" (printer p.pfile l1) s
- end else
- Printf.sprintf "%s lines %d-%d" (printer p.pfile l1) l1 l2
- let reset() = Buffer.reset buf
- let contents() = Buffer.contents buf
- let store lexbuf = Buffer.add_string buf (lexeme lexbuf)
- let add c = Buffer.add_string buf c
- let mk_tok t pmin pmax =
- t , { pfile = !cur.lfile; pmin = pmin; pmax = pmax }
- let mk lexbuf t =
- mk_tok t (lexeme_start lexbuf) (lexeme_end lexbuf)
- let mk_ident lexbuf =
- let s = lexeme lexbuf in
- mk lexbuf (try Kwd (Hashtbl.find keywords s) with Not_found -> Const (Ident s))
- let invalid_char lexbuf =
- error (Invalid_character (lexeme_char lexbuf 0)) (lexeme_start lexbuf)
- }
- let ident = ('_'* ['a'-'z'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']* | '_'+ | '_'+ ['0'-'9'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']* )
- let idtype = '_'* ['A'-'Z'] ['_' 'a'-'z' 'A'-'Z' '0'-'9']*
- let integer = ['1'-'9'] ['0'-'9']* | '0'
- rule skip_header = parse
- | "\239\187\191" { skip_header lexbuf }
- | "#!" [^'\n' '\r']* { skip_header lexbuf }
- | "" | eof { }
- and token = parse
- | eof { mk lexbuf Eof }
- | [' ' '\t']+ { token lexbuf }
- | "\r\n" { newline lexbuf; token lexbuf }
- | '\n' | '\r' { newline lexbuf; token lexbuf }
- | "0x" ['0'-'9' 'a'-'f' 'A'-'F']+ { mk lexbuf (Const (Int (lexeme lexbuf))) }
- | integer { mk lexbuf (Const (Int (lexeme lexbuf))) }
- | integer '.' ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
- | '.' ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
- | integer ['e' 'E'] ['+' '-']? ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
- | integer '.' ['0'-'9']* ['e' 'E'] ['+' '-']? ['0'-'9']+ { mk lexbuf (Const (Float (lexeme lexbuf))) }
- | integer "..." {
- let s = lexeme lexbuf in
- mk lexbuf (IntInterval (String.sub s 0 (String.length s - 3)))
- }
- | "//" [^'\n' '\r']* {
- let s = lexeme lexbuf in
- mk lexbuf (CommentLine (String.sub s 2 ((String.length s)-2)))
- }
- | "++" { mk lexbuf (Unop Increment) }
- | "--" { mk lexbuf (Unop Decrement) }
- | "~" { mk lexbuf (Unop NegBits) }
- | "%=" { mk lexbuf (Binop (OpAssignOp OpMod)) }
- | "&=" { mk lexbuf (Binop (OpAssignOp OpAnd)) }
- | "|=" { mk lexbuf (Binop (OpAssignOp OpOr)) }
- | "^=" { mk lexbuf (Binop (OpAssignOp OpXor)) }
- | "+=" { mk lexbuf (Binop (OpAssignOp OpAdd)) }
- | "-=" { mk lexbuf (Binop (OpAssignOp OpSub)) }
- | "*=" { mk lexbuf (Binop (OpAssignOp OpMult)) }
- | "/=" { mk lexbuf (Binop (OpAssignOp OpDiv)) }
- | "<<=" { mk lexbuf (Binop (OpAssignOp OpShl)) }
- (*//| ">>=" { mk lexbuf (Binop (OpAssignOp OpShr)) } *)
- (*//| ">>>=" { mk lexbuf (Binop (OpAssignOp OpUShr)) } *)
- | "==" { mk lexbuf (Binop OpEq) }
- | "!=" { mk lexbuf (Binop OpNotEq) }
- | "<=" { mk lexbuf (Binop OpLte) }
- (*//| ">=" { mk lexbuf (Binop OpGte) }*)
- | "&&" { mk lexbuf (Binop OpBoolAnd) }
- | "||" { mk lexbuf (Binop OpBoolOr) }
- | "<<" { mk lexbuf (Binop OpShl) }
- | "->" { mk lexbuf Arrow }
- | "..." { mk lexbuf (Binop OpInterval) }
- | "=>" { mk lexbuf (Binop OpArrow)}
- | "!" { mk lexbuf (Unop Not) }
- | "<" { mk lexbuf (Binop OpLt) }
- | ">" { mk lexbuf (Binop OpGt) }
- | ";" { mk lexbuf Semicolon }
- | ":" { mk lexbuf DblDot }
- | "," { mk lexbuf Comma }
- | "." { mk lexbuf Dot }
- | "%" { mk lexbuf (Binop OpMod) }
- | "&" { mk lexbuf (Binop OpAnd) }
- | "|" { mk lexbuf (Binop OpOr) }
- | "^" { mk lexbuf (Binop OpXor) }
- | "+" { mk lexbuf (Binop OpAdd) }
- | "*" { mk lexbuf (Binop OpMult) }
- | "/" { mk lexbuf (Binop OpDiv) }
- | "-" { mk lexbuf (Binop OpSub) }
- | "=" { mk lexbuf (Binop OpAssign) }
- | "[" { mk lexbuf BkOpen }
- | "]" { mk lexbuf BkClose }
- | "{" { mk lexbuf BrOpen }
- | "}" { mk lexbuf BrClose }
- | "(" { mk lexbuf POpen }
- | ")" { mk lexbuf PClose }
- | "?" { mk lexbuf Question }
- | "@" { mk lexbuf At }
- | "/*" {
- reset();
- let pmin = lexeme_start lexbuf in
- let pmax = (try comment lexbuf with Exit -> error Unclosed_comment pmin) in
- mk_tok (Comment (contents())) pmin pmax;
- }
- | '"' {
- reset();
- let pmin = lexeme_start lexbuf in
- let pmax = (try string lexbuf with Exit -> error Unterminated_string pmin) in
- let str = (try unescape (contents()) with Exit -> error Invalid_escape pmin) in
- mk_tok (Const (String str)) pmin pmax;
- }
- | "'" {
- reset();
- let pmin = lexeme_start lexbuf in
- let pmax = (try string2 lexbuf with Exit -> error Unterminated_string pmin) in
- let str = (try unescape (contents()) with Exit -> error Invalid_escape pmin) in
- let t = mk_tok (Const (String str)) pmin pmax in
- fast_add_fmt_string (snd t);
- t
- }
- | "~/" {
- reset();
- let pmin = lexeme_start lexbuf in
- let options, pmax = (try regexp lexbuf with Exit -> error Unterminated_regexp pmin) in
- let str = contents() in
- mk_tok (Const (Regexp (str,options))) pmin pmax;
- }
- | '#' ident {
- let v = lexeme lexbuf in
- let v = String.sub v 1 (String.length v - 1) in
- mk lexbuf (Sharp v)
- }
- | '$' ['_' 'a'-'z' 'A'-'Z' '0'-'9']* {
- let v = lexeme lexbuf in
- let v = String.sub v 1 (String.length v - 1) in
- mk lexbuf (Dollar v)
- }
- | ident { mk_ident lexbuf }
- | idtype { mk lexbuf (Const (Ident (lexeme lexbuf))) }
- | _ { invalid_char lexbuf }
- and comment = parse
- | eof { raise Exit }
- | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; comment lexbuf }
- | "*/" { lexeme_end lexbuf }
- | '*' { store lexbuf; comment lexbuf }
- | [^'*' '\n' '\r']+ { store lexbuf; comment lexbuf }
- and string = parse
- | eof { raise Exit }
- | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; string lexbuf }
- | "\\\"" { store lexbuf; string lexbuf }
- | "\\\\" { store lexbuf; string lexbuf }
- | '\\' { store lexbuf; string lexbuf }
- | '"' { lexeme_end lexbuf }
- | [^'"' '\\' '\r' '\n']+ { store lexbuf; string lexbuf }
- and string2 = parse
- | eof { raise Exit }
- | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; string2 lexbuf }
- | '\\' { store lexbuf; string2 lexbuf }
- | "\\\\" { store lexbuf; string2 lexbuf }
- | "\\'" { store lexbuf; string2 lexbuf }
- | "'" { lexeme_end lexbuf }
- | "$$" | "\\$" | '$' { store lexbuf; string2 lexbuf }
- | "${" {
- let pmin = lexeme_start lexbuf in
- store lexbuf;
- (try code_string lexbuf with Exit -> error Unclosed_code pmin);
- string2 lexbuf;
- }
- | [^'\'' '\\' '\r' '\n' '$']+ { store lexbuf; string2 lexbuf }
- and code_string = parse
- | eof { raise Exit }
- | '\n' | '\r' | "\r\n" { newline lexbuf; store lexbuf; code_string lexbuf }
- | '{' | '/' { store lexbuf; code_string lexbuf }
- | '}' { store lexbuf; (* stop *) }
- | '"' {
- add "\"";
- let pmin = lexeme_start lexbuf in
- (try ignore(string lexbuf) with Exit -> error Unterminated_string pmin);
- add "\"";
- code_string lexbuf;
- }
- | "'" {
- add "'";
- let pmin = lexeme_start lexbuf in
- let pmax = (try string2 lexbuf with Exit -> error Unterminated_string pmin) in
- add "'";
- fast_add_fmt_string { pfile = !cur.lfile; pmin = pmin; pmax = pmax };
- code_string lexbuf;
- }
- | "/*" {
- let pmin = lexeme_start lexbuf in
- (try ignore(comment lexbuf) with Exit -> error Unclosed_comment pmin);
- code_string lexbuf;
- }
- | "//" [^'\n' '\r']* { store lexbuf; code_string lexbuf; }
- | [^'/' '"' '\'' '{' '}' '\n' '\r']+ { store lexbuf; code_string lexbuf; }
- and regexp = parse
- | eof | '\n' | '\r' { raise Exit }
- | '\\' '/' { add "/"; regexp lexbuf }
- | '\\' 'r' { add "\r"; regexp lexbuf }
- | '\\' 'n' { add "\n"; regexp lexbuf }
- | '\\' 't' { add "\t"; regexp lexbuf }
- | '\\' ['\\' '$' '.' '*' '+' '^' '|' '{' '}' '[' ']' '(' ')' '?' '-' '0'-'9'] { add (lexeme lexbuf); regexp lexbuf }
- | '\\' ['w' 'W' 'b' 'B' 's' 'S' 'd' 'D' 'x'] { add (lexeme lexbuf); regexp lexbuf }
- | '\\' ['u' 'U'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] { add (lexeme lexbuf); regexp lexbuf }
- | '\\' [^ '\\'] { error (Invalid_character (lexeme lexbuf).[1]) (lexeme_end lexbuf - 1) }
- | '/' { regexp_options lexbuf, lexeme_end lexbuf }
- | [^ '\\' '/' '\r' '\n']+ { store lexbuf; regexp lexbuf }
- and regexp_options = parse
- | 'g' | 'i' | 'm' | 's' | 'u' {
- let l = lexeme lexbuf in
- l ^ regexp_options lexbuf
- }
- | ['a' - 'z'] { error Invalid_option (lexeme_start lexbuf) }
- | "" { "" }
|