123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374 |
- # This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
- # features that are not compatible with the 8-bit library, or which give
- # different output in 16-bit or 32-bit mode. The output for the two widths is
- # different, so they have separate output files.
-
- #forbid_utf
- #newline_default LF ANY ANYCRLF
- /[^\x{c4}]/IB
-
- /\x{100}/I
- / (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* # optional leading comment
- (?: (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- |
- " (?: # opening quote...
- [^\\\x80-\xff\n\015"] # Anything except backslash and quote
- | # or
- \\ [^\x80-\xff] # Escaped something (something != CR)
- )* " # closing quote
- ) # initial word
- (?: (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* \. (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- |
- " (?: # opening quote...
- [^\\\x80-\xff\n\015"] # Anything except backslash and quote
- | # or
- \\ [^\x80-\xff] # Escaped something (something != CR)
- )* " # closing quote
- ) )* # further okay, if led by a period
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* @ (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- | \[ # [
- (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
- \] # ]
- ) # initial subdomain
- (?: #
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* \. # if led by a period...
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- | \[ # [
- (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
- \] # ]
- ) # ...further okay
- )*
- # address
- | # or
- (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- |
- " (?: # opening quote...
- [^\\\x80-\xff\n\015"] # Anything except backslash and quote
- | # or
- \\ [^\x80-\xff] # Escaped something (something != CR)
- )* " # closing quote
- ) # one word, optionally followed by....
- (?:
- [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
- \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) | # comments, or...
- " (?: # opening quote...
- [^\\\x80-\xff\n\015"] # Anything except backslash and quote
- | # or
- \\ [^\x80-\xff] # Escaped something (something != CR)
- )* " # closing quote
- # quoted strings
- )*
- < (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* # leading <
- (?: @ (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- | \[ # [
- (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
- \] # ]
- ) # initial subdomain
- (?: #
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* \. # if led by a period...
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- | \[ # [
- (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
- \] # ]
- ) # ...further okay
- )*
- (?: (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* , (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* @ (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- | \[ # [
- (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
- \] # ]
- ) # initial subdomain
- (?: #
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* \. # if led by a period...
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- | \[ # [
- (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
- \] # ]
- ) # ...further okay
- )*
- )* # further okay, if led by comma
- : # closing colon
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* )? # optional route
- (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- |
- " (?: # opening quote...
- [^\\\x80-\xff\n\015"] # Anything except backslash and quote
- | # or
- \\ [^\x80-\xff] # Escaped something (something != CR)
- )* " # closing quote
- ) # initial word
- (?: (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* \. (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- |
- " (?: # opening quote...
- [^\\\x80-\xff\n\015"] # Anything except backslash and quote
- | # or
- \\ [^\x80-\xff] # Escaped something (something != CR)
- )* " # closing quote
- ) )* # further okay, if led by a period
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* @ (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- | \[ # [
- (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
- \] # ]
- ) # initial subdomain
- (?: #
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* \. # if led by a period...
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* (?:
- [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
- (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
- | \[ # [
- (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
- \] # ]
- ) # ...further okay
- )*
- # address spec
- (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* > # trailing >
- # name and address
- ) (?: [\040\t] | \(
- (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
- \) )* # optional trailing comment
- /Ix
- /[\h]/B
- >\x09<
- /[\h]+/B
- >\x09\x20\xa0<
- /[\v]/B
- /[^\h]/B
- /\h+/I
- \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
- \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
- /[\h\x{dc00}]+/IB
- \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
- \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
- /\H+/I
- \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
- \x{2000}\x{200a}\x{1fff}\x{200b}
- \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
- \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
- /[\H\x{d800}]+/
- \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
- \x{2000}\x{200a}\x{1fff}\x{200b}
- \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
- \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
- /\v+/I
- \x{2027}\x{2030}\x{2028}\x{2029}
- \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
- /[\v\x{dc00}]+/IB
- \x{2027}\x{2030}\x{2028}\x{2029}
- \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
- /\V+/I
- \x{2028}\x{2029}\x{2027}\x{2030}
- \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
- /[\V\x{d800}]+/
- \x{2028}\x{2029}\x{2027}\x{2030}
- \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
- /\R+/I,bsr=unicode
- \x{2027}\x{2030}\x{2028}\x{2029}
- \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
- /\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
- \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
- /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
- /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
- /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
- /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
- /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
- XX
-
- /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
- XX
- /\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
- /[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
- /\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
- /^\x{ffff}+/i
- \x{ffff}
- /^\x{ffff}?/i
- \x{ffff}
- /^\x{ffff}*/i
- \x{ffff}
- /^\x{ffff}{3}/i
- \x{ffff}\x{ffff}\x{ffff}
- /^\x{ffff}{0,3}/i
- \x{ffff}
- /[^\x00-a]{12,}[^b-\xff]*/B
- /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
- /a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
- /^[\x{1234}\x{4321}]{2,4}?/
- \x{1234}\x{1234}\x{1234}
- # Check maximum non-UTF character size for the 16-bit library.
- /\x{ffff}/
- A\x{ffff}B
- /\x{10000}/
- /\o{20000}/
- # Check maximum character size for the 32-bit library. These will all give
- # errors in the 16-bit library.
- /\x{110000}/
- /\x{7fffffff}/
- /\x{80000000}/
- /\x{ffffffff}/
- /\x{100000000}/
- /\o{17777777777}/
- /\o{20000000000}/
- /\o{37777777777}/
- /\o{40000000000}/
- /\x{7fffffff}\x{7fffffff}/I
- /\x{80000000}\x{80000000}/I
- /\x{ffffffff}\x{ffffffff}/I
- # Non-UTF characters
- /.{2,3}/
- \x{400000}\x{400001}\x{400002}\x{400003}
- /\x{400000}\x{800000}/IBi
- # Check character ranges
- /[\H]/IB
- /[\V]/IB
- /(*THEN:\[A]{65501})/expand
- # We can use pcre2test's utf8_input modifier to create wide pattern characters,
- # even though this test is run when UTF is not supported.
- /abý¿¿¿¿¿z/utf8_input
- abý¿¿¿¿¿z
- ab\x{7fffffff}z
- /abÿý¿¿¿¿¿z/utf8_input
- abÿý¿¿¿¿¿z
- ab\x{ffffffff}z
- /abÿAz/utf8_input
- abÿAz
- ab\x{80000041}z
- /(?i:A{1,}\6666666666)/
- A\x{1b6}6666666
- # End of testinput11
|