Browse Source

[parser] try to get markup names under control

closes #7558
closes #7764
Simon Krajewski 6 năm trước cách đây
mục cha
commit
ce480715f5

+ 6 - 6
src/syntax/grammar.mly

@@ -750,8 +750,8 @@ and parse_function_field doc meta al = parser
 and parse_var_field_assignment = parser
 	| [< '(Binop OpAssign,_); s >] ->
 		begin match s with parser
-		| [< '(Binop OpLt,p1); i = dollar_ident; s >] ->
-			let e = handle_xml_literal p1 i in
+		| [< '(Binop OpLt,p1); s >] ->
+			let e = handle_xml_literal p1 in
 			(* accept but don't expect semicolon *)
 			let p2 = match s with parser
 				| [< '(Semicolon,p) >] -> p
@@ -957,8 +957,8 @@ and parse_block_elt = parser
 		| [< e = secure_expr; _ = semicolon >] -> make_meta Meta.Inline [] e p1
 		| [< >] -> serror()
 		end
-	| [< '(Binop OpLt,p1); i = dollar_ident; s >] ->
-		let e = handle_xml_literal p1 i in
+	| [< '(Binop OpLt,p1); s >] ->
+		let e = handle_xml_literal p1 in
 		(* accept but don't expect semicolon *)
 		begin match s with parser
 			| [< '(Semicolon,_) >] -> ()
@@ -1122,8 +1122,8 @@ and expr = parser
 			let e = EConst (Ident "null"),null_pos in
 			make_meta name params e p
 		end
-	| [< '(Binop OpLt,p1); i = dollar_ident >] ->
-		handle_xml_literal p1 i
+	| [< '(Binop OpLt,p1) >] ->
+		handle_xml_literal p1
 	| [< '(BrOpen,p1); s >] ->
 		(match s with parser
 		| [< b = block1; s >] ->

+ 15 - 4
src/syntax/lexer.ml

@@ -280,7 +280,10 @@ let idtype = [%sedlex.regexp? Star '_', 'A'..'Z', Star ('_' | 'a'..'z' | 'A'..'Z
 
 let integer = [%sedlex.regexp? ('1'..'9', Star ('0'..'9')) | '0']
 
-let xml_ident = [%sedlex.regexp? Opt('$'), (ident | idtype)]
+(* https://www.w3.org/TR/xml/#sec-common-syn plus '$' for JSX *)
+let xml_name_start_char = [%sedlex.regexp? '$' | ':' | 'A'..'Z' | '_' | 'a'..'z' | 0xC0 .. 0xD6 | 0xD8 .. 0xF6 | 0xF8 .. 0x2FF | 0x370 .. 0x37D | 0x37F .. 0x1FFF | 0x200C .. 0x200D | 0x2070 .. 0x218F | 0x2C00 .. 0x2FEF | 0x3001 .. 0xD7FF | 0xF900 .. 0xFDCF | 0xFDF0 .. 0xFFFD | 0x10000 .. 0xEFFFF]
+let xml_name_char = [%sedlex.regexp? xml_name_start_char | '-' | '.' | '0'..'9' | 0xB7 | 0x0300 .. 0x036F | 0x203F .. 0x2040]
+let xml_name = [%sedlex.regexp? xml_name_start_char, Star xml_name_char]
 
 let rec skip_header lexbuf =
 	match%sedlex lexbuf with
@@ -551,7 +554,7 @@ and not_xml ctx depth in_open =
 		store lexbuf;
 		not_xml ctx depth in_open
 	(* closing tag *)
-	| '<','/',xml_ident,'>' ->
+	| '<','/',xml_name,'>' ->
 		let s = lexeme lexbuf in
 		Buffer.add_string buf s;
 		(* If it matches our document close tag, finish or decrease depth. *)
@@ -561,7 +564,7 @@ and not_xml ctx depth in_open =
 		end else
 			not_xml ctx depth false
 	(* opening tag *)
-	| '<',xml_ident ->
+	| '<',xml_name ->
 		let s = lexeme lexbuf in
 		Buffer.add_string buf s;
 		(* If it matches our document open tag, increase depth and set in_open to true. *)
@@ -585,7 +588,15 @@ and not_xml ctx depth in_open =
 	| _ ->
 		assert false
 
-let lex_xml p open_tag close_tag lexbuf =
+let lex_xml p lexbuf =
+	let name,pmin = match%sedlex lexbuf with
+	| xml_name -> lexeme lexbuf,lexeme_start lexbuf
+	| _ -> invalid_char lexbuf
+	in
+	if p + 1 <> pmin then invalid_char lexbuf;
+	Buffer.add_string buf ("<" ^ name);
+	let open_tag = "<" ^ name in
+	let close_tag = "</" ^ name ^ ">" in
 	let ctx = {
 		open_tag = open_tag;
 		close_tag = close_tag;

+ 2 - 6
src/syntax/parser.ml

@@ -224,13 +224,9 @@ let make_is e (t,p_t) p p_is =
 	let e2 = expr_of_type_path (t.tpackage,t.tname) p_t in
 	ECall(e_is,[e;e2]),p
 
-let handle_xml_literal p1 (name,pi) =
-	if p1.pmax <> pi.pmin then error (Custom("Unexpected <")) p1;
-	let open_tag = "<" ^ name in
-	let close_tag = "</" ^ name ^ ">" in
+let handle_xml_literal p1 =
 	Lexer.reset();
-	Buffer.add_string Lexer.buf ("<" ^ name);
-	let i = Lexer.lex_xml p1.pmin open_tag close_tag !code_ref in
+	let i = Lexer.lex_xml p1.pmin !code_ref in
 	let xml = Lexer.contents() in
 	let e = EConst (String xml),{p1 with pmax = i} in
 	let e = make_meta Meta.Markup [] e p1 in

+ 6 - 0
tests/unit/src/unitstd/InlineXml.unit.hx

@@ -13,6 +13,12 @@ unit.HelperMacros.pipeMarkupLiteral(<xml abc />) == "<xml abc />";
 // self-closing nested
 unit.HelperMacros.pipeMarkupLiteral(<xml><xml /></xml>) == "<xml><xml /></xml>";
 
+// special chars
+unit.HelperMacros.pipeMarkupLiteral(<xml-xml></xml-xml>) == "<xml-xml></xml-xml>";
+unit.HelperMacros.pipeMarkupLiteral(<:xml></:xml>) == "<:xml></:xml>";
+unit.HelperMacros.pipeMarkupLiteral(<xml:xml></xml:xml>) == "<xml:xml></xml:xml>";
+unit.HelperMacros.pipeMarkupLiteral(<foo.Bar_barf3-gnieh:blargh></foo.Bar_barf3-gnieh:blargh>) == "<foo.Bar_barf3-gnieh:blargh></foo.Bar_barf3-gnieh:blargh>";
+
 // No check for string literal balancing
 unit.HelperMacros.pipeMarkupLiteral(<xml a=" </xml>) == "<xml a=\" </xml>";
 unit.HelperMacros.pipeMarkupLiteral(<xml a=' </xml>) == "<xml a=' </xml>";