Browse Source

Fix compatibility with sedlex 2 (#8676)

* fix compatibility with sedlex 2

* rename UChar to UCharExt
Aleksandr Kuzmenko 6 years ago
parent
commit
ea42f4b801

+ 2 - 2
Makefile

@@ -34,7 +34,7 @@ STATICLINK?=0
 HAXE_DIRECTORIES=core core/json core/display syntax context context/display codegen codegen/gencommon generators generators/jvm optimization filters macro macro/eval macro/eval/bytes typing compiler
 EXTLIB_LIBS=extlib-leftovers extc neko javalib swflib ttflib ilib objsize pcre ziplib
 OCAML_LIBS=unix str threads dynlink
-OPAM_LIBS=sedlex xml-light extlib ptmap sha
+OPAM_LIBS=sedlex.ppx xml-light extlib ptmap sha
 
 FINDLIB_LIBS=$(OCAML_LIBS)
 FINDLIB_LIBS+=$(OPAM_LIBS)
@@ -141,7 +141,7 @@ _build/src/core/metaList.ml: src-json/meta.json prebuild
 build_src: | $(BUILD_SRC) _build/src/syntax/grammar.ml _build/src/compiler/version.ml _build/src/core/defineList.ml _build/src/core/metaList.ml
 
 prebuild: _build/src/core/json/json.ml _build/src/prebuild/main.ml
-	$(COMPILER) -safe-string -linkpkg -g -o $(PREBUILD_OUTPUT) -package sedlex -package extlib -I _build/src/core/json _build/src/core/json/json.ml _build/src/prebuild/main.ml
+	$(COMPILER) -safe-string -linkpkg -g -o $(PREBUILD_OUTPUT) -package sedlex.ppx -package extlib -I _build/src/core/json _build/src/core/json/json.ml _build/src/prebuild/main.ml
 
 haxe: build_src
 	$(MAKE) -f $(MAKEFILENAME) build_pass_1

+ 1 - 1
libs/extlib-leftovers/Makefile

@@ -3,7 +3,7 @@ OCAMLOPT=ocamlopt
 OCAMLC=ocamlc
 
 MODULES = \
- multiArray rbuffer uChar uTF8
+ multiArray rbuffer uCharExt uTF8
 
 # the list is topologically sorted
 

+ 0 - 0
libs/extlib-leftovers/uChar.ml → libs/extlib-leftovers/uCharExt.ml


+ 0 - 0
libs/extlib-leftovers/uChar.mli → libs/extlib-leftovers/uCharExt.mli


+ 17 - 17
libs/extlib-leftovers/uTF8.ml

@@ -1,6 +1,6 @@
-(* 
+(*
  * UTF-8 - UTF-8 encoded Unicode string
- * Copyright 2002, 2003 (C) Yamagata Yoriyuki. 
+ * Copyright 2002, 2003 (C) Yamagata Yoriyuki.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -18,11 +18,11 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  *)
 
-open UChar
+open UCharExt
 
 type t = string
 type index = int
-  
+
 let look s i =
   let n' =
     let n = Char.code s.[i] in
@@ -42,7 +42,7 @@ let look s i =
       let n' = n' lsl 6 lor (0x7f land m) in
       let m = Char.code (String.unsafe_get s (i + 2)) in
       let n' = n' lsl 6 lor (0x7f land m) in
-      n' lsl 6 lor (0x7f land m0)     
+      n' lsl 6 lor (0x7f land m0)
     else if n <= 0xfb then
       let n' = n - 0xf8 in
       let m0 = Char.code s.[i + 4] in
@@ -52,7 +52,7 @@ let look s i =
       let n' = n' lsl 6 lor (0x7f land m) in
       let m = Char.code (String.unsafe_get s (i + 3)) in
       let n' = n' lsl 6 lor (0x7f land m) in
-      n' lsl 6 lor (0x7f land m0)     
+      n' lsl 6 lor (0x7f land m0)
     else if n <= 0xfd then
       let n' = n - 0xfc in
       let m0 = Char.code s.[i + 5] in
@@ -75,7 +75,7 @@ let rec search_head s i =
   if n < 0x80 || n >= 0xc2 then i else
   search_head s (i + 1)
 
-let next s i = 
+let next s i =
   let n = Char.code s.[i] in
   if n < 0x80 then i + 1 else
   if n < 0xc0 then search_head s (i + 1) else
@@ -121,7 +121,7 @@ let add_uchar buf u =
   let k = int_of_uchar u in
   if k < 0 || k >= 0x4000000 then begin
     Buffer.add_char buf (Char.chr (0xfc + (k lsr 30)));
-    Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 24) land masq))); 
+    Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 24) land masq)));
     Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 18) land masq)));
     Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 12) land masq)));
     Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)));
@@ -146,7 +146,7 @@ let add_uchar buf u =
     Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 12) land masq)));
     Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)));
     Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq)));
-  end 
+  end
 
 let init len f =
   let buf = Buffer.create len in
@@ -193,26 +193,26 @@ let validate s =
     let n = Char.code (String.unsafe_get s i) in
     if n < 0x80 then main (i + 1) else
     if n < 0xc2 then raise Malformed_code else
-    if n <= 0xdf then 
-      if trail 1 (i + 1) (n - 0xc0) < 0x80 then raise Malformed_code else 
+    if n <= 0xdf then
+      if trail 1 (i + 1) (n - 0xc0) < 0x80 then raise Malformed_code else
       main (i + 2)
-    else if n <= 0xef then 
-      if trail 2 (i + 1) (n - 0xe0) < 0x800 then raise Malformed_code else 
+    else if n <= 0xef then
+      if trail 2 (i + 1) (n - 0xe0) < 0x800 then raise Malformed_code else
       main (i + 3)
-    else if n <= 0xf7 then 
+    else if n <= 0xf7 then
       if trail 3 (i + 1) (n - 0xf0) < 0x10000 then raise Malformed_code else
       main (i + 4)
-    else if n <= 0xfb then 
+    else if n <= 0xfb then
       if trail 4 (i + 1) (n - 0xf8) < 0x200000 then raise Malformed_code else
       main (i + 5)
-    else if n <= 0xfd then 
+    else if n <= 0xfd then
       let n = trail 5 (i + 1) (n - 0xfc) in
       if n lsr 16 < 0x400 then raise Malformed_code else
       main (i + 6)
     else raise Malformed_code in
   main 0
 
-module Buf = 
+module Buf =
   struct
     include Buffer
     type buf = t

+ 1 - 1
libs/extlib-leftovers/uTF8.mli

@@ -23,7 +23,7 @@
    The Module for UTF-8 encoded Unicode strings.
 *)
 
-open UChar
+open UCharExt
 
 (** UTF-8 encoded Unicode strings. the type is normal string. *)
 type t = string

+ 1 - 1
libs/ttflib/tTFTools.ml

@@ -210,7 +210,7 @@ let parse_range_str str =
 	let range = ref false in
 	let lut = Hashtbl.create 0 in
 	UTF8.iter (fun code ->
-		let code = UChar.code code in
+		let code = UCharExt.code code in
 		if code = Char.code '-' && !last <> Char.code '\\' then
 			range := true
 		else if !range then begin

+ 1 - 1
opam

@@ -22,7 +22,7 @@ depends: [
   "ocaml"               {>= "4.02"}
   "ocamlfind"           {build}
   "camlp5"              {build}
-  "sedlex"              {build & <= "1.99.4"} #https://github.com/HaxeFoundation/haxe/issues/7958
+  "sedlex"              {build}
   "ppx_tools_versioned" {build & != "5.2.1"} #https://github.com/alainfrisch/sedlex/issues/64
   "xml-light"           {build}
   "extlib"              {build & >= "1.7.6"}

+ 3 - 3
src/context/common.ml

@@ -752,12 +752,12 @@ let to_utf8 str p =
 		UTF8.Malformed_code ->
 			(* ISO to utf8 *)
 			let b = UTF8.Buf.create 0 in
-			String.iter (fun c -> UTF8.Buf.add_char b (UChar.of_char c)) str;
+			String.iter (fun c -> UTF8.Buf.add_char b (UCharExt.of_char c)) str;
 			UTF8.Buf.contents b
 	in
 	let ccount = ref 0 in
 	UTF8.iter (fun c ->
-		let c = UChar.code c in
+		let c = UCharExt.code c in
 		if (c >= 0xD800 && c <= 0xDFFF) || c >= 0x110000 then abort "Invalid unicode char" p;
 		incr ccount;
 		if c > 0x10000 then incr ccount;
@@ -781,7 +781,7 @@ let utf16_add buf c =
 
 let utf8_to_utf16 str zt =
 	let b = Buffer.create (String.length str * 2) in
-	(try UTF8.iter (fun c -> utf16_add b (UChar.code c)) str with Invalid_argument _ | UChar.Out_of_range -> ()); (* if malformed *)
+	(try UTF8.iter (fun c -> utf16_add b (UCharExt.code c)) str with Invalid_argument _ | UCharExt.Out_of_range -> ()); (* if malformed *)
 	if zt then utf16_add b 0;
 	Buffer.contents b
 

+ 1 - 1
src/core/ast.ml

@@ -542,7 +542,7 @@ let unescape s =
 					in
 					if u >= 0xD800 && u < 0xE000 then
 						fail (Some "UTF-16 surrogates are not allowed in strings.");
-					UTF8.add_uchar b (UChar.uchar_of_int u);
+					UTF8.add_uchar b (UCharExt.uchar_of_int u);
 					inext := !inext + a;
 				| _ ->
 					fail None);

+ 3 - 3
src/core/json/json.ml

@@ -315,7 +315,7 @@ module Reader = struct
 	and finish_escaped_char buf lexbuf =
 		match%sedlex lexbuf with
 		| '"' | '\\' | '/' ->
-			Buffer.add_char buf (char_of_int (Sedlexing.lexeme_char lexbuf 0))
+			Buffer.add_char buf (Uchar.to_char (Sedlexing.lexeme_char lexbuf 0))
 		| 'b' ->
 			Buffer.add_char buf '\b'
 		| 'f' ->
@@ -329,7 +329,7 @@ module Reader = struct
 		| 'u', hex, hex, hex, hex ->
 			let a,b,c,d =
 				match Sedlexing.lexeme lexbuf with
-				| [|_; a; b; c; d|] -> a, b, c, d
+				| [|_; a; b; c; d|] -> Uchar.to_int a, Uchar.to_int b, Uchar.to_int c, Uchar.to_int d
 				| _ -> assert false
 			in
 			let x =
@@ -347,7 +347,7 @@ module Reader = struct
 		| "\\u", hex, hex, hex, hex ->
 			let a,b,c,d =
 				match Sedlexing.lexeme lexbuf with
-				| [|_;_ ; a; b; c; d|] -> a, b, c, d
+				| [|_;_ ; a; b; c; d|] -> Uchar.to_int a, Uchar.to_int b, Uchar.to_int c, Uchar.to_int d
 				| _ -> assert false
 			in
 			let y =

+ 1 - 1
src/generators/gencpp.ml

@@ -1095,7 +1095,7 @@ let strq ctx s =
             | c when c > 0xFFFF -> Buffer.add_string b (Printf.sprintf "\\U%08x" c)
             | c -> Buffer.add_char b (Char.chr c)
       in
-      UTF8.iter (fun c -> add (UChar.code c) ) s;
+      UTF8.iter (fun c -> add (UCharExt.code c) ) s;
       "HX_W(u\"" ^ (Buffer.contents b) ^ "\"," ^ (gen_wqstring_hash s) ^ ")"
    else
        gen_str "HX_" gen_qstring_hash s

+ 1 - 1
src/generators/gencs.ml

@@ -1135,7 +1135,7 @@ let generate con =
 			let b = Buffer.create 0 in
 			(try
 				UTF8.validate s;
-				UTF8.iter (fun c -> escape (UChar.code c) b) s
+				UTF8.iter (fun c -> escape (UCharExt.code c) b) s
 			with
 				UTF8.Malformed_code ->
 					String.iter (fun c -> escape (Char.code c) b) s

+ 2 - 2
src/generators/genhl.ml

@@ -164,12 +164,12 @@ let to_utf8 str p =
 		UTF8.Malformed_code ->
 			(* ISO to utf8 *)
 			let b = UTF8.Buf.create 0 in
-			String.iter (fun c -> UTF8.Buf.add_char b (UChar.of_char c)) str;
+			String.iter (fun c -> UTF8.Buf.add_char b (UCharExt.of_char c)) str;
 			UTF8.Buf.contents b
 	in
 	let ccount = ref 0 in
 	UTF8.iter (fun c ->
-		let c = UChar.code c in
+		let c = UCharExt.code c in
 		if (c >= 0xD800 && c <= 0xDFFF) || c >= 0x110000 then abort "Invalid unicode char" p;
 		incr ccount;
 		if c >= 0x10000 then incr ccount;

+ 2 - 2
src/generators/genjava.ml

@@ -341,7 +341,7 @@ struct
 		(try
 			UTF8.validate s;
 			UTF8.iter (fun c ->
-				let c = (UChar.code c) in
+				let c = (UCharExt.code c) in
 				if c > 0xFFFF then
 					(h := Int32.add (Int32.mul thirtyone !h)
 						(Int32.of_int (high_surrogate c));
@@ -1352,7 +1352,7 @@ let generate con =
 		let b = Buffer.create 0 in
 		(try
 			UTF8.validate s;
-			UTF8.iter (fun c -> escape (UChar.code c) b) s
+			UTF8.iter (fun c -> escape (UCharExt.code c) b) s
 		with
 			UTF8.Malformed_code ->
 				String.iter (fun c -> escape (Char.code c) b) s

+ 1 - 1
src/generators/genswf9.ml

@@ -657,7 +657,7 @@ let to_utf8 str =
 	with
 		UTF8.Malformed_code ->
 			let b = UTF8.Buf.create 0 in
-			String.iter (fun c -> UTF8.Buf.add_char b (UChar.of_char c)) str;
+			String.iter (fun c -> UTF8.Buf.add_char b (UCharExt.of_char c)) str;
 			UTF8.Buf.contents b
 
 let gen_constant ctx c t p =

+ 1 - 1
src/generators/hlinterp.ml

@@ -329,7 +329,7 @@ let hl_to_caml str =
 		loop 0
 	in
 	let b = UTF8.Buf.create (String.length str / 2) in
-	utf16_iter (fun c -> UTF8.Buf.add_char b (UChar.chr c)) (utf16_eof str);
+	utf16_iter (fun c -> UTF8.Buf.add_char b (UCharExt.chr c)) (utf16_eof str);
 	UTF8.Buf.contents b
 
 let null_access() =

+ 1 - 1
src/generators/jvm/jvmConstantPool.ml

@@ -27,7 +27,7 @@ open JvmSignature
 let utf8jvm (input : string) : bytes =
 	let channel = IO.output_bytes () in
 	UTF8.iter (fun c ->
-		let code = UChar.code c in
+		let code = UCharExt.code c in
 		match code with
 			| b when (b > 0 && b <= 0x7F) ->
 			IO.write_byte channel b

+ 4 - 4
src/macro/eval/evalStdLib.ml

@@ -2995,7 +2995,7 @@ module StdUtf8 = struct
 		| v -> unexpected_value v "string"
 
 	let addChar = vifun1 (fun vthis c ->
-		UTF8.Buf.add_char (this vthis) (UChar.uchar_of_int (decode_int c));
+		UTF8.Buf.add_char (this vthis) (UCharExt.uchar_of_int (decode_int c));
 		vnull
 	)
 
@@ -3012,7 +3012,7 @@ module StdUtf8 = struct
 		let buf = Bytes.create (UTF8.length s) in
 		let i = ref 0 in
 		UTF8.iter (fun uc ->
-			Bytes.unsafe_set buf !i (UChar.char_of uc);
+			Bytes.unsafe_set buf !i (UCharExt.char_of uc);
 			incr i
 		) s;
 		let s = Bytes.unsafe_to_string buf in
@@ -3021,12 +3021,12 @@ module StdUtf8 = struct
 
 	let encode = vfun1 (fun s ->
 		let s = decode_string s in
-		create_unknown (UTF8.init (String.length s) (fun i -> UChar.of_char s.[i]))
+		create_unknown (UTF8.init (String.length s) (fun i -> UCharExt.of_char s.[i]))
 	)
 
 	let iter = vfun2 (fun s f ->
 		let s = decode_string s in
-		UTF8.iter (fun uc -> ignore(call_value f [vint (UChar.int_of_uchar uc)])) s;
+		UTF8.iter (fun uc -> ignore(call_value f [vint (UCharExt.int_of_uchar uc)])) s;
 		vnull
 	)
 

+ 3 - 3
src/macro/eval/evalString.ml

@@ -184,7 +184,7 @@ let char_at s c_index =
 	char
 
 let string_of_char_code i =
-	UTF8.init 1 (fun _ ->  UChar.uchar_of_int i)
+	UTF8.init 1 (fun _ ->  UCharExt.uchar_of_int i)
 
 let from_char_code i =
 	create_with_length (string_of_char_code i) 1
@@ -239,11 +239,11 @@ let case_map this upper =
 	let buf = UTF8.Buf.create 0 in
 	let a,m = if upper then EvalBytes.Unicase._UPPER,1022 else EvalBytes.Unicase._LOWER,1021 in
 	UTF8.iter (fun uc ->
-		let c = UChar.int_of_uchar uc in
+		let c = UCharExt.int_of_uchar uc in
 		let up = c lsr 6 in
 		let uc = if up < m then begin
 			let c = a.(up).(c land ((1 lsl 6) - 1)) in
-			if c <> 0 then UChar.uchar_of_int c
+			if c <> 0 then UCharExt.uchar_of_int c
 			else uc
 		end else
 			uc

+ 2 - 2
src/syntax/lexer.ml

@@ -272,7 +272,7 @@ let mk_keyword lexbuf kwd =
 	mk lexbuf (Kwd kwd)
 
 let invalid_char lexbuf =
-	error (Invalid_character (lexeme_char lexbuf 0)) (lexeme_start lexbuf)
+	error (Invalid_character (Uchar.to_int (lexeme_char lexbuf 0))) (lexeme_start lexbuf)
 
 let ident = [%sedlex.regexp?
 	(
@@ -556,7 +556,7 @@ and regexp lexbuf =
 	| '\\', ('\\' | '$' | '.' | '*' | '+' | '^' | '|' | '{' | '}' | '[' | ']' | '(' | ')' | '?' | '-' | '0'..'9') -> add (lexeme lexbuf); regexp lexbuf
 	| '\\', ('w' | 'W' | 'b' | 'B' | 's' | 'S' | 'd' | 'D' | 'x') -> add (lexeme lexbuf); regexp lexbuf
 	| '\\', ('u' | 'U'), ('0'..'9' | 'a'..'f' | 'A'..'F'), ('0'..'9' | 'a'..'f' | 'A'..'F'), ('0'..'9' | 'a'..'f' | 'A'..'F'), ('0'..'9' | 'a'..'f' | 'A'..'F') -> add (lexeme lexbuf); regexp lexbuf
-	| '\\', Compl '\\' -> error (Invalid_character (lexeme_char lexbuf 0)) (lexeme_end lexbuf - 1)
+	| '\\', Compl '\\' -> error (Invalid_character (Uchar.to_int (lexeme_char lexbuf 0))) (lexeme_end lexbuf - 1)
 	| '/' -> regexp_options lexbuf, lexeme_end lexbuf
 	| Plus (Compl ('\\' | '/' | '\r' | '\n')) -> store lexbuf; regexp lexbuf
 	| _ -> assert false

+ 1 - 1
src/typing/typer.ml

@@ -2450,7 +2450,7 @@ and type_expr ?(mode=MGet) ctx (e,p) (with_type:WithType.t) =
 	match e with
 	| EField ((EConst (String(s,_)),ps),"code") ->
 		if UTF8.length s <> 1 then error "String must be a single UTF8 char" ps;
-		mk (TConst (TInt (Int32.of_int (UChar.code (UTF8.get s 0))))) ctx.t.tint p
+		mk (TConst (TInt (Int32.of_int (UCharExt.code (UTF8.get s 0))))) ctx.t.tint p
 	| EField(_,n) when starts_with n '$' ->
 		error "Field names starting with $ are not allowed" p
 	| EConst (Ident s) ->