json.ml 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. (*
  2. The Haxe Compiler
  3. Copyright (C) 2005-2019 Haxe Foundation
  4. This program is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU General Public License
  6. as published by the Free Software Foundation; either version 2
  7. of the License, or (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  15. *)
  16. type t =
  17. | JString of string
  18. | JFloat of float
  19. | JInt of int
  20. | JObject of (string * t) list
  21. | JArray of t list
  22. | JBool of bool
  23. | JNull
  24. let write_iter f_el f_sep l =
  25. let rec rest = function
  26. | [] -> ()
  27. | v :: l ->
  28. f_sep();
  29. f_el v;
  30. rest l
  31. in
  32. match l with
  33. | [] -> ()
  34. | v :: l ->
  35. f_el v;
  36. rest l
  37. let write_sep w =
  38. w ","
  39. let rec write_json w v =
  40. match v with
  41. | JNull -> write_null w
  42. | JBool b -> write_bool w b
  43. | JString s -> write_string w s
  44. | JFloat f -> write_float w f
  45. | JInt i -> write_int w i
  46. | JObject o -> write_object w o
  47. | JArray a -> write_array w a
  48. and write_null w =
  49. w "null"
  50. and write_bool w b =
  51. w (if b then "true" else "false")
  52. and write_string w s =
  53. w "\"";
  54. let b = Buffer.create (String.length s) in
  55. for i = 0 to String.length s - 1 do
  56. match String.unsafe_get s i with
  57. | '"' -> Buffer.add_string b "\\\""
  58. | '\t' -> Buffer.add_string b "\\t"
  59. | '\r' -> Buffer.add_string b "\\r"
  60. | '\b' -> Buffer.add_string b "\\b"
  61. | '\n' -> Buffer.add_string b "\\n"
  62. | '\012' -> Buffer.add_string b "\\f"
  63. | '\\' -> Buffer.add_string b "\\\\"
  64. | '\x00'..'\x1F' | '\x7F' as c -> Buffer.add_string b (Printf.sprintf "\\u%04X" (int_of_char c))
  65. | c -> Buffer.add_char b c
  66. done;
  67. w (Buffer.contents b);
  68. w "\""
  69. and write_int w i =
  70. w (string_of_int i)
  71. and write_float w f =
  72. match classify_float f with
  73. | FP_nan | FP_infinite -> failwith "NaN and infinity floats are unsupported in JSON"
  74. | _ ->
  75. let s = Printf.sprintf "%.16g" f in
  76. let s = if float_of_string s = f then s else Printf.sprintf "%.17g" f in
  77. w s
  78. and write_array w a =
  79. w "[";
  80. write_iter (write_json w) (fun() -> write_sep w) a;
  81. w "]"
  82. and write_object w o =
  83. let write_el (k, v) =
  84. write_string w k;
  85. w ":";
  86. write_json w v
  87. in
  88. w "{";
  89. write_iter write_el (fun() -> write_sep w) o;
  90. w "}"
  91. let string_of_json json =
  92. let b = Buffer.create 0 in
  93. write_json (Buffer.add_string b) json;
  94. Buffer.contents b;
  95. module Reader = struct
  96. (*
  97. The following code is basically stripped down yojson (https://github.com/mjambon/yojson),
  98. adapted to our data structures and using sedlex instad of ocamllex.
  99. TODO: we could probably re-use utf-8 stuff from our extlib, but I don't know enough about it.
  100. *)
  101. open Sedlexing
  102. open Sedlexing.Utf8
  103. exception Json_error of string
  104. exception Int_overflow
  105. let dec c =
  106. Char.code c - 48
  107. let hex c =
  108. match (char_of_int c) with
  109. | '0'..'9' -> c - int_of_char '0'
  110. | 'a'..'f' -> c - int_of_char 'a' + 10
  111. | 'A'..'F' -> c - int_of_char 'A' + 10
  112. | _ -> assert false
  113. let min10 = min_int / 10 - (if min_int mod 10 = 0 then 0 else 1)
  114. let max10 = max_int / 10 + (if max_int mod 10 = 0 then 0 else 1)
  115. let json_error s = raise (Json_error s)
  116. let extract_positive_int lexbuf =
  117. let s = Sedlexing.Utf8.lexeme lexbuf in
  118. let n = ref 0 in
  119. for i = 0 to (lexeme_length lexbuf) - 1 do
  120. if !n >= max10 then
  121. raise Int_overflow
  122. else
  123. n := 10 * !n + dec s.[i]
  124. done;
  125. if !n < 0 then
  126. raise Int_overflow
  127. else
  128. !n
  129. let make_positive_int lexbuf =
  130. try JInt (extract_positive_int lexbuf)
  131. with Int_overflow -> JFloat (float_of_string (lexeme lexbuf))
  132. let extract_negative_int lexbuf =
  133. let s = Sedlexing.Utf8.lexeme lexbuf in
  134. let n = ref 0 in
  135. for i = 1 to (lexeme_length lexbuf) - 1 do
  136. if !n <= min10 then
  137. raise Int_overflow
  138. else
  139. n := 10 * !n - dec s.[i]
  140. done;
  141. if !n > 0 then
  142. raise Int_overflow
  143. else
  144. !n
  145. let make_negative_int lexbuf =
  146. try JInt (extract_negative_int lexbuf)
  147. with Int_overflow -> JFloat (float_of_string (lexeme lexbuf))
  148. let utf8_of_code buf x =
  149. let add = Buffer.add_char in
  150. (* Straight <= doesn't work with signed 31-bit ints *)
  151. let maxbits n x = x lsr n = 0 in
  152. if maxbits 7 x then
  153. (* 7 *)
  154. add buf (Char.chr x)
  155. else if maxbits 11 x then (
  156. (* 5 + 6 *)
  157. add buf (Char.chr (0b11000000 lor ((x lsr 6) land 0b00011111)));
  158. add buf (Char.chr (0b10000000 lor (x land 0b00111111)))
  159. )
  160. else if maxbits 16 x then (
  161. (* 4 + 6 + 6 *)
  162. add buf (Char.chr (0b11100000 lor ((x lsr 12) land 0b00001111)));
  163. add buf (Char.chr (0b10000000 lor ((x lsr 6) land 0b00111111)));
  164. add buf (Char.chr (0b10000000 lor (x land 0b00111111)))
  165. )
  166. else if maxbits 21 x then (
  167. (* 3 + 6 + 6 + 6 *)
  168. add buf (Char.chr (0b11110000 lor ((x lsr 18) land 0b00000111)));
  169. add buf (Char.chr (0b10000000 lor ((x lsr 12) land 0b00111111)));
  170. add buf (Char.chr (0b10000000 lor ((x lsr 6) land 0b00111111)));
  171. add buf (Char.chr (0b10000000 lor (x land 0b00111111)));
  172. )
  173. else if maxbits 26 x then (
  174. (* 2 + 6 + 6 + 6 + 6 *)
  175. add buf (Char.chr (0b11111000 lor ((x lsr 24) land 0b00000011)));
  176. add buf (Char.chr (0b10000000 lor ((x lsr 18) land 0b00111111)));
  177. add buf (Char.chr (0b10000000 lor ((x lsr 12) land 0b00111111)));
  178. add buf (Char.chr (0b10000000 lor ((x lsr 6) land 0b00111111)));
  179. add buf (Char.chr (0b10000000 lor (x land 0b00111111)));
  180. )
  181. else (
  182. assert (maxbits 31 x);
  183. (* 1 + 6 + 6 + 6 + 6 + 6 *)
  184. add buf (Char.chr (0b11111100 lor ((x lsr 30) land 0b00000001)));
  185. add buf (Char.chr (0b10000000 lor ((x lsr 24) land 0b00111111)));
  186. add buf (Char.chr (0b10000000 lor ((x lsr 18) land 0b00111111)));
  187. add buf (Char.chr (0b10000000 lor ((x lsr 12) land 0b00111111)));
  188. add buf (Char.chr (0b10000000 lor ((x lsr 6) land 0b00111111)));
  189. add buf (Char.chr (0b10000000 lor (x land 0b00111111)));
  190. )
  191. let code_of_surrogate_pair i j =
  192. let high10 = i - 0xD800 in
  193. let low10 = j - 0xDC00 in
  194. 0x10000 + ((high10 lsl 10) lor low10)
  195. let utf8_of_surrogate_pair buf i j =
  196. utf8_of_code buf (code_of_surrogate_pair i j)
  197. let space = [%sedlex.regexp? Plus (Chars " \t\r\n")]
  198. let digit = [%sedlex.regexp? '0' .. '9']
  199. let nonzero = [%sedlex.regexp? '1' .. '9']
  200. let digits = [%sedlex.regexp? Plus digit]
  201. let frac = [%sedlex.regexp? '.', digits]
  202. let e = [%sedlex.regexp? (Chars "eE"),(Opt (Chars "+-"))]
  203. let exp = [%sedlex.regexp? e, digits]
  204. let positive_int = [%sedlex.regexp? digit | (nonzero, digits)]
  205. let float = [%sedlex.regexp? (Opt '-'), positive_int, (frac | exp | (frac, exp))]
  206. let hex = [%sedlex.regexp? '0'..'9' | 'a'..'f' | 'A'..'F' ]
  207. let rec read_json lexbuf =
  208. match%sedlex lexbuf with
  209. | "true" ->
  210. JBool true
  211. | "false" ->
  212. JBool false
  213. | "null" ->
  214. JNull
  215. | '"' ->
  216. JString (finish_string (Buffer.create 0) lexbuf)
  217. | positive_int ->
  218. make_positive_int lexbuf
  219. | '-', positive_int ->
  220. make_negative_int lexbuf
  221. | float ->
  222. JFloat (float_of_string (lexeme lexbuf))
  223. | '{' ->
  224. let acc = ref [] in
  225. begin try
  226. skip_space lexbuf;
  227. read_object_end lexbuf;
  228. let field_name = read_string lexbuf in
  229. skip_space lexbuf;
  230. read_colon lexbuf;
  231. skip_space lexbuf;
  232. acc := (field_name, read_json lexbuf) :: !acc;
  233. while true do
  234. skip_space lexbuf;
  235. read_object_sep lexbuf;
  236. skip_space lexbuf;
  237. let field_name = read_string lexbuf in
  238. skip_space lexbuf;
  239. read_colon lexbuf;
  240. skip_space lexbuf;
  241. acc := (field_name, read_json lexbuf) :: !acc;
  242. done;
  243. assert false
  244. with Exit ->
  245. JObject (List.rev !acc)
  246. end
  247. | '[' ->
  248. let acc = ref [] in
  249. begin try
  250. skip_space lexbuf;
  251. read_array_end lexbuf;
  252. acc := read_json lexbuf :: !acc;
  253. while true do
  254. skip_space lexbuf;
  255. read_array_sep lexbuf;
  256. skip_space lexbuf;
  257. acc := read_json lexbuf :: !acc;
  258. done;
  259. assert false
  260. with Exit ->
  261. JArray (List.rev !acc)
  262. end
  263. | space ->
  264. read_json lexbuf
  265. | eof ->
  266. json_error "Unexpected end of input"
  267. | _ ->
  268. json_error "Invalid token"
  269. and finish_string buf lexbuf =
  270. match%sedlex lexbuf with
  271. | '"' -> Buffer.contents buf
  272. | '\\' ->
  273. finish_escaped_char buf lexbuf;
  274. finish_string buf lexbuf
  275. | Plus (Compl ('"' | '\\')) ->
  276. Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
  277. finish_string buf lexbuf
  278. | eof -> json_error "Unexpected end of input"
  279. | _ -> assert false
  280. and finish_escaped_char buf lexbuf =
  281. match%sedlex lexbuf with
  282. | '"' | '\\' | '/' ->
  283. Buffer.add_char buf (Uchar.to_char (Sedlexing.lexeme_char lexbuf 0))
  284. | 'b' ->
  285. Buffer.add_char buf '\b'
  286. | 'f' ->
  287. Buffer.add_char buf '\012'
  288. | 'n' ->
  289. Buffer.add_char buf '\n'
  290. | 'r' ->
  291. Buffer.add_char buf '\r'
  292. | 't' ->
  293. Buffer.add_char buf '\t'
  294. | 'u', hex, hex, hex, hex ->
  295. let a,b,c,d =
  296. match Sedlexing.lexeme lexbuf with
  297. | [|_; a; b; c; d|] -> Uchar.to_int a, Uchar.to_int b, Uchar.to_int c, Uchar.to_int d
  298. | _ -> assert false
  299. in
  300. let x =
  301. (hex a lsl 12) lor (hex b lsl 8) lor (hex c lsl 4) lor hex d
  302. in
  303. if x >= 0xD800 && x <= 0xDBFF then
  304. finish_surrogate_pair buf x lexbuf
  305. else
  306. utf8_of_code buf x
  307. | _ ->
  308. json_error "Invalid escape sequence"
  309. and finish_surrogate_pair buf x lexbuf =
  310. match%sedlex lexbuf with
  311. | "\\u", hex, hex, hex, hex ->
  312. let a,b,c,d =
  313. match Sedlexing.lexeme lexbuf with
  314. | [|_;_ ; a; b; c; d|] -> Uchar.to_int a, Uchar.to_int b, Uchar.to_int c, Uchar.to_int d
  315. | _ -> assert false
  316. in
  317. let y =
  318. (hex a lsl 12) lor (hex b lsl 8) lor (hex c lsl 4) lor hex d
  319. in
  320. if y >= 0xDC00 && y <= 0xDFFF then
  321. utf8_of_surrogate_pair buf x y
  322. else
  323. json_error "Invalid low surrogate for code point beyond U+FFFF"
  324. | _ ->
  325. json_error "Missing escape sequence representing low surrogate for code point beyond U+FFFF"
  326. and skip_space lexbuf =
  327. match%sedlex lexbuf with
  328. | space | "" -> ()
  329. | _ -> assert false
  330. and read_string lexbuf =
  331. match%sedlex lexbuf with
  332. | '"' -> finish_string (Buffer.create 0) lexbuf
  333. | _ -> json_error "Expected string"
  334. and read_array_end lexbuf =
  335. match%sedlex lexbuf with
  336. | ']' -> raise Exit
  337. | "" -> ()
  338. | _ -> assert false
  339. and read_array_sep lexbuf =
  340. match%sedlex lexbuf with
  341. | ',' -> ()
  342. | ']' -> raise Exit
  343. | _ -> json_error "Expected ',' or ']'"
  344. and read_object_end lexbuf =
  345. match%sedlex lexbuf with
  346. | '}' -> raise Exit
  347. | "" -> ()
  348. | _ -> assert false
  349. and read_object_sep lexbuf =
  350. match%sedlex lexbuf with
  351. | ',' -> ()
  352. | '}' -> raise Exit
  353. | _ -> json_error "Expected ',' or '}'"
  354. and read_colon lexbuf =
  355. match%sedlex lexbuf with
  356. | ':' -> ()
  357. | _ -> json_error "Expected ':'"
  358. end