123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401 |
- (*
- The Haxe Compiler
- Copyright (C) 2005-2019 Haxe Foundation
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *)
- type t =
- | JString of string
- | JFloat of float
- | JInt of int
- | JObject of (string * t) list
- | JArray of t list
- | JBool of bool
- | JNull
- let write_iter f_el f_sep l =
- let rec rest = function
- | [] -> ()
- | v :: l ->
- f_sep();
- f_el v;
- rest l
- in
- match l with
- | [] -> ()
- | v :: l ->
- f_el v;
- rest l
- let write_sep w =
- w ","
- let rec write_json w v =
- match v with
- | JNull -> write_null w
- | JBool b -> write_bool w b
- | JString s -> write_string w s
- | JFloat f -> write_float w f
- | JInt i -> write_int w i
- | JObject o -> write_object w o
- | JArray a -> write_array w a
- and write_null w =
- w "null"
- and write_bool w b =
- w (if b then "true" else "false")
- and write_string w s =
- w "\"";
- let b = Buffer.create (String.length s) in
- for i = 0 to String.length s - 1 do
- match String.unsafe_get s i with
- | '"' -> Buffer.add_string b "\\\""
- | '\t' -> Buffer.add_string b "\\t"
- | '\r' -> Buffer.add_string b "\\r"
- | '\b' -> Buffer.add_string b "\\b"
- | '\n' -> Buffer.add_string b "\\n"
- | '\012' -> Buffer.add_string b "\\f"
- | '\\' -> Buffer.add_string b "\\\\"
- | '\x00'..'\x1F' | '\x7F' as c -> Buffer.add_string b (Printf.sprintf "\\u%04X" (int_of_char c))
- | c -> Buffer.add_char b c
- done;
- w (Buffer.contents b);
- w "\""
- and write_int w i =
- w (string_of_int i)
- and write_float w f =
- match classify_float f with
- | FP_nan | FP_infinite -> failwith "NaN and infinity floats are unsupported in JSON"
- | _ ->
- let s = Printf.sprintf "%.16g" f in
- let s = if float_of_string s = f then s else Printf.sprintf "%.17g" f in
- w s
- and write_array w a =
- w "[";
- write_iter (write_json w) (fun() -> write_sep w) a;
- w "]"
- and write_object w o =
- let write_el (k, v) =
- write_string w k;
- w ":";
- write_json w v
- in
- w "{";
- write_iter write_el (fun() -> write_sep w) o;
- w "}"
- let string_of_json json =
- let b = Buffer.create 0 in
- write_json (Buffer.add_string b) json;
- Buffer.contents b;
- module Reader = struct
- (*
- The following code is basically stripped down yojson (https://github.com/mjambon/yojson),
- adapted to our data structures and using sedlex instad of ocamllex.
- TODO: we could probably re-use utf-8 stuff from our extlib, but I don't know enough about it.
- *)
- open Sedlexing
- open Sedlexing.Utf8
- exception Json_error of string
- exception Int_overflow
- let dec c =
- Char.code c - 48
- let hex c =
- match (char_of_int c) with
- | '0'..'9' -> c - int_of_char '0'
- | 'a'..'f' -> c - int_of_char 'a' + 10
- | 'A'..'F' -> c - int_of_char 'A' + 10
- | _ -> assert false
- let min10 = min_int / 10 - (if min_int mod 10 = 0 then 0 else 1)
- let max10 = max_int / 10 + (if max_int mod 10 = 0 then 0 else 1)
- let json_error s = raise (Json_error s)
- let extract_positive_int lexbuf =
- let s = Sedlexing.Utf8.lexeme lexbuf in
- let n = ref 0 in
- for i = 0 to (lexeme_length lexbuf) - 1 do
- if !n >= max10 then
- raise Int_overflow
- else
- n := 10 * !n + dec s.[i]
- done;
- if !n < 0 then
- raise Int_overflow
- else
- !n
- let make_positive_int lexbuf =
- try JInt (extract_positive_int lexbuf)
- with Int_overflow -> JFloat (float_of_string (lexeme lexbuf))
- let extract_negative_int lexbuf =
- let s = Sedlexing.Utf8.lexeme lexbuf in
- let n = ref 0 in
- for i = 1 to (lexeme_length lexbuf) - 1 do
- if !n <= min10 then
- raise Int_overflow
- else
- n := 10 * !n - dec s.[i]
- done;
- if !n > 0 then
- raise Int_overflow
- else
- !n
- let make_negative_int lexbuf =
- try JInt (extract_negative_int lexbuf)
- with Int_overflow -> JFloat (float_of_string (lexeme lexbuf))
- let utf8_of_code buf x =
- let add = Buffer.add_char in
- (* Straight <= doesn't work with signed 31-bit ints *)
- let maxbits n x = x lsr n = 0 in
- if maxbits 7 x then
- (* 7 *)
- add buf (Char.chr x)
- else if maxbits 11 x then (
- (* 5 + 6 *)
- add buf (Char.chr (0b11000000 lor ((x lsr 6) land 0b00011111)));
- add buf (Char.chr (0b10000000 lor (x land 0b00111111)))
- )
- else if maxbits 16 x then (
- (* 4 + 6 + 6 *)
- add buf (Char.chr (0b11100000 lor ((x lsr 12) land 0b00001111)));
- add buf (Char.chr (0b10000000 lor ((x lsr 6) land 0b00111111)));
- add buf (Char.chr (0b10000000 lor (x land 0b00111111)))
- )
- else if maxbits 21 x then (
- (* 3 + 6 + 6 + 6 *)
- add buf (Char.chr (0b11110000 lor ((x lsr 18) land 0b00000111)));
- add buf (Char.chr (0b10000000 lor ((x lsr 12) land 0b00111111)));
- add buf (Char.chr (0b10000000 lor ((x lsr 6) land 0b00111111)));
- add buf (Char.chr (0b10000000 lor (x land 0b00111111)));
- )
- else if maxbits 26 x then (
- (* 2 + 6 + 6 + 6 + 6 *)
- add buf (Char.chr (0b11111000 lor ((x lsr 24) land 0b00000011)));
- add buf (Char.chr (0b10000000 lor ((x lsr 18) land 0b00111111)));
- add buf (Char.chr (0b10000000 lor ((x lsr 12) land 0b00111111)));
- add buf (Char.chr (0b10000000 lor ((x lsr 6) land 0b00111111)));
- add buf (Char.chr (0b10000000 lor (x land 0b00111111)));
- )
- else (
- assert (maxbits 31 x);
- (* 1 + 6 + 6 + 6 + 6 + 6 *)
- add buf (Char.chr (0b11111100 lor ((x lsr 30) land 0b00000001)));
- add buf (Char.chr (0b10000000 lor ((x lsr 24) land 0b00111111)));
- add buf (Char.chr (0b10000000 lor ((x lsr 18) land 0b00111111)));
- add buf (Char.chr (0b10000000 lor ((x lsr 12) land 0b00111111)));
- add buf (Char.chr (0b10000000 lor ((x lsr 6) land 0b00111111)));
- add buf (Char.chr (0b10000000 lor (x land 0b00111111)));
- )
- let code_of_surrogate_pair i j =
- let high10 = i - 0xD800 in
- let low10 = j - 0xDC00 in
- 0x10000 + ((high10 lsl 10) lor low10)
- let utf8_of_surrogate_pair buf i j =
- utf8_of_code buf (code_of_surrogate_pair i j)
- let space = [%sedlex.regexp? Plus (Chars " \t\r\n")]
- let digit = [%sedlex.regexp? '0' .. '9']
- let nonzero = [%sedlex.regexp? '1' .. '9']
- let digits = [%sedlex.regexp? Plus digit]
- let frac = [%sedlex.regexp? '.', digits]
- let e = [%sedlex.regexp? (Chars "eE"),(Opt (Chars "+-"))]
- let exp = [%sedlex.regexp? e, digits]
- let positive_int = [%sedlex.regexp? digit | (nonzero, digits)]
- let float = [%sedlex.regexp? (Opt '-'), positive_int, (frac | exp | (frac, exp))]
- let hex = [%sedlex.regexp? '0'..'9' | 'a'..'f' | 'A'..'F' ]
- let rec read_json lexbuf =
- match%sedlex lexbuf with
- | "true" ->
- JBool true
- | "false" ->
- JBool false
- | "null" ->
- JNull
- | '"' ->
- JString (finish_string (Buffer.create 0) lexbuf)
- | positive_int ->
- make_positive_int lexbuf
- | '-', positive_int ->
- make_negative_int lexbuf
- | float ->
- JFloat (float_of_string (lexeme lexbuf))
- | '{' ->
- let acc = ref [] in
- begin try
- skip_space lexbuf;
- read_object_end lexbuf;
- let field_name = read_string lexbuf in
- skip_space lexbuf;
- read_colon lexbuf;
- skip_space lexbuf;
- acc := (field_name, read_json lexbuf) :: !acc;
- while true do
- skip_space lexbuf;
- read_object_sep lexbuf;
- skip_space lexbuf;
- let field_name = read_string lexbuf in
- skip_space lexbuf;
- read_colon lexbuf;
- skip_space lexbuf;
- acc := (field_name, read_json lexbuf) :: !acc;
- done;
- assert false
- with Exit ->
- JObject (List.rev !acc)
- end
- | '[' ->
- let acc = ref [] in
- begin try
- skip_space lexbuf;
- read_array_end lexbuf;
- acc := read_json lexbuf :: !acc;
- while true do
- skip_space lexbuf;
- read_array_sep lexbuf;
- skip_space lexbuf;
- acc := read_json lexbuf :: !acc;
- done;
- assert false
- with Exit ->
- JArray (List.rev !acc)
- end
- | space ->
- read_json lexbuf
- | eof ->
- json_error "Unexpected end of input"
- | _ ->
- json_error "Invalid token"
- and finish_string buf lexbuf =
- match%sedlex lexbuf with
- | '"' -> Buffer.contents buf
- | '\\' ->
- finish_escaped_char buf lexbuf;
- finish_string buf lexbuf
- | Plus (Compl ('"' | '\\')) ->
- Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- finish_string buf lexbuf
- | eof -> json_error "Unexpected end of input"
- | _ -> assert false
- and finish_escaped_char buf lexbuf =
- match%sedlex lexbuf with
- | '"' | '\\' | '/' ->
- Buffer.add_char buf (Uchar.to_char (Sedlexing.lexeme_char lexbuf 0))
- | 'b' ->
- Buffer.add_char buf '\b'
- | 'f' ->
- Buffer.add_char buf '\012'
- | 'n' ->
- Buffer.add_char buf '\n'
- | 'r' ->
- Buffer.add_char buf '\r'
- | 't' ->
- Buffer.add_char buf '\t'
- | 'u', hex, hex, hex, hex ->
- let a,b,c,d =
- match Sedlexing.lexeme lexbuf with
- | [|_; a; b; c; d|] -> Uchar.to_int a, Uchar.to_int b, Uchar.to_int c, Uchar.to_int d
- | _ -> assert false
- in
- let x =
- (hex a lsl 12) lor (hex b lsl 8) lor (hex c lsl 4) lor hex d
- in
- if x >= 0xD800 && x <= 0xDBFF then
- finish_surrogate_pair buf x lexbuf
- else
- utf8_of_code buf x
- | _ ->
- json_error "Invalid escape sequence"
- and finish_surrogate_pair buf x lexbuf =
- match%sedlex lexbuf with
- | "\\u", hex, hex, hex, hex ->
- let a,b,c,d =
- match Sedlexing.lexeme lexbuf with
- | [|_;_ ; a; b; c; d|] -> Uchar.to_int a, Uchar.to_int b, Uchar.to_int c, Uchar.to_int d
- | _ -> assert false
- in
- let y =
- (hex a lsl 12) lor (hex b lsl 8) lor (hex c lsl 4) lor hex d
- in
- if y >= 0xDC00 && y <= 0xDFFF then
- utf8_of_surrogate_pair buf x y
- else
- json_error "Invalid low surrogate for code point beyond U+FFFF"
- | _ ->
- json_error "Missing escape sequence representing low surrogate for code point beyond U+FFFF"
- and skip_space lexbuf =
- match%sedlex lexbuf with
- | space | "" -> ()
- | _ -> assert false
- and read_string lexbuf =
- match%sedlex lexbuf with
- | '"' -> finish_string (Buffer.create 0) lexbuf
- | _ -> json_error "Expected string"
- and read_array_end lexbuf =
- match%sedlex lexbuf with
- | ']' -> raise Exit
- | "" -> ()
- | _ -> assert false
- and read_array_sep lexbuf =
- match%sedlex lexbuf with
- | ',' -> ()
- | ']' -> raise Exit
- | _ -> json_error "Expected ',' or ']'"
- and read_object_end lexbuf =
- match%sedlex lexbuf with
- | '}' -> raise Exit
- | "" -> ()
- | _ -> assert false
- and read_object_sep lexbuf =
- match%sedlex lexbuf with
- | ',' -> ()
- | '}' -> raise Exit
- | _ -> json_error "Expected ',' or '}'"
- and read_colon lexbuf =
- match%sedlex lexbuf with
- | ':' -> ()
- | _ -> json_error "Expected ':'"
- end
|