|
@@ -280,7 +280,10 @@ let idtype = [%sedlex.regexp? Star '_', 'A'..'Z', Star ('_' | 'a'..'z' | 'A'..'Z
|
|
|
|
|
|
let integer = [%sedlex.regexp? ('1'..'9', Star ('0'..'9')) | '0']
|
|
|
|
|
|
-let xml_ident = [%sedlex.regexp? Opt('$'), (ident | idtype)]
|
|
|
+(* https://www.w3.org/TR/xml/#sec-common-syn plus '$' for JSX *)
|
|
|
+let xml_name_start_char = [%sedlex.regexp? '$' | ':' | 'A'..'Z' | '_' | 'a'..'z' | 0xC0 .. 0xD6 | 0xD8 .. 0xF6 | 0xF8 .. 0x2FF | 0x370 .. 0x37D | 0x37F .. 0x1FFF | 0x200C .. 0x200D | 0x2070 .. 0x218F | 0x2C00 .. 0x2FEF | 0x3001 .. 0xD7FF | 0xF900 .. 0xFDCF | 0xFDF0 .. 0xFFFD | 0x10000 .. 0xEFFFF]
|
|
|
+let xml_name_char = [%sedlex.regexp? xml_name_start_char | '-' | '.' | '0'..'9' | 0xB7 | 0x0300 .. 0x036F | 0x203F .. 0x2040]
|
|
|
+let xml_name = [%sedlex.regexp? xml_name_start_char, Star xml_name_char]
|
|
|
|
|
|
let rec skip_header lexbuf =
|
|
|
match%sedlex lexbuf with
|
|
@@ -551,7 +554,7 @@ and not_xml ctx depth in_open =
|
|
|
store lexbuf;
|
|
|
not_xml ctx depth in_open
|
|
|
(* closing tag *)
|
|
|
- | '<','/',xml_ident,'>' ->
|
|
|
+ | '<','/',xml_name,'>' ->
|
|
|
let s = lexeme lexbuf in
|
|
|
Buffer.add_string buf s;
|
|
|
(* If it matches our document close tag, finish or decrease depth. *)
|
|
@@ -561,7 +564,7 @@ and not_xml ctx depth in_open =
|
|
|
end else
|
|
|
not_xml ctx depth false
|
|
|
(* opening tag *)
|
|
|
- | '<',xml_ident ->
|
|
|
+ | '<',xml_name ->
|
|
|
let s = lexeme lexbuf in
|
|
|
Buffer.add_string buf s;
|
|
|
(* If it matches our document open tag, increase depth and set in_open to true. *)
|
|
@@ -585,7 +588,15 @@ and not_xml ctx depth in_open =
|
|
|
| _ ->
|
|
|
assert false
|
|
|
|
|
|
-let lex_xml p open_tag close_tag lexbuf =
|
|
|
+let lex_xml p lexbuf =
|
|
|
+ let name,pmin = match%sedlex lexbuf with
|
|
|
+ | xml_name -> lexeme lexbuf,lexeme_start lexbuf
|
|
|
+ | _ -> invalid_char lexbuf
|
|
|
+ in
|
|
|
+ if p + 1 <> pmin then invalid_char lexbuf;
|
|
|
+ Buffer.add_string buf ("<" ^ name);
|
|
|
+ let open_tag = "<" ^ name in
|
|
|
+ let close_tag = "</" ^ name ^ ">" in
|
|
|
let ctx = {
|
|
|
open_tag = open_tag;
|
|
|
close_tag = close_tag;
|