123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648 |
- # This set of tests is for UTF-8 support and Unicode property support, with
- # relevance only for the 8-bit library.
- #newline_default lf any anycrlf
- # The next 5 patterns have UTF-8 errors
- /[Ã]/utf
- /Ã/utf
- /ÃÃÃxxx/utf
- /‚‚‚‚‚‚‚Ã/utf
- /‚‚‚‚‚‚‚Ã/match_invalid_utf
- # Now test subjects
- /badutf/utf
- \= Expect UTF-8 errors
- X\xdf
- XX\xef
- XXX\xef\x80
- X\xf7
- XX\xf7\x80
- XXX\xf7\x80\x80
- \xfb
- \xfb\x80
- \xfb\x80\x80
- \xfb\x80\x80\x80
- \xfd
- \xfd\x80
- \xfd\x80\x80
- \xfd\x80\x80\x80
- \xfd\x80\x80\x80\x80
- \xdf\x7f
- \xef\x7f\x80
- \xef\x80\x7f
- \xf7\x7f\x80\x80
- \xf7\x80\x7f\x80
- \xf7\x80\x80\x7f
- \xfb\x7f\x80\x80\x80
- \xfb\x80\x7f\x80\x80
- \xfb\x80\x80\x7f\x80
- \xfb\x80\x80\x80\x7f
- \xfd\x7f\x80\x80\x80\x80
- \xfd\x80\x7f\x80\x80\x80
- \xfd\x80\x80\x7f\x80\x80
- \xfd\x80\x80\x80\x7f\x80
- \xfd\x80\x80\x80\x80\x7f
- \xed\xa0\x80
- \xc0\x8f
- \xe0\x80\x8f
- \xf0\x80\x80\x8f
- \xf8\x80\x80\x80\x8f
- \xfc\x80\x80\x80\x80\x8f
- \x80
- \xfe
- \xff
- /badutf/utf
- \= Expect UTF-8 errors
- XX\xfb\x80\x80\x80\x80
- XX\xfd\x80\x80\x80\x80\x80
- XX\xf7\xbf\xbf\xbf
- /shortutf/utf
- \= Expect UTF-8 errors
- XX\xdf\=ph
- XX\xef\=ph
- XX\xef\x80\=ph
- \xf7\=ph
- \xf7\x80\=ph
- \xf7\x80\x80\=ph
- \xfb\=ph
- \xfb\x80\=ph
- \xfb\x80\x80\=ph
- \xfb\x80\x80\x80\=ph
- \xfd\=ph
- \xfd\x80\=ph
- \xfd\x80\x80\=ph
- \xfd\x80\x80\x80\=ph
- \xfd\x80\x80\x80\x80\=ph
- /anything/utf
- \= Expect UTF-8 errors
- X\xc0\x80
- XX\xc1\x8f
- XXX\xe0\x9f\x80
- \xf0\x8f\x80\x80
- \xf8\x87\x80\x80\x80
- \xfc\x83\x80\x80\x80\x80
- \xfe\x80\x80\x80\x80\x80
- \xff\x80\x80\x80\x80\x80
- \xf8\x88\x80\x80\x80
- \xf9\x87\x80\x80\x80
- \xfc\x84\x80\x80\x80\x80
- \xfd\x83\x80\x80\x80\x80
- \= Expect no match
- \xc3\x8f
- \xe0\xaf\x80
- \xe1\x80\x80
- \xf0\x9f\x80\x80
- \xf1\x8f\x80\x80
- \xf8\x88\x80\x80\x80\=no_utf_check
- \xf9\x87\x80\x80\x80\=no_utf_check
- \xfc\x84\x80\x80\x80\x80\=no_utf_check
- \xfd\x83\x80\x80\x80\x80\=no_utf_check
-
- # Similar tests with offsets
- /badutf/utf
- \= Expect UTF-8 errors
- X\xdfabcd
- X\xdfabcd\=offset=1
- \= Expect no match
- X\xdfabcd\=offset=2
- /(?<=x)badutf/utf
- \= Expect UTF-8 errors
- X\xdfabcd
- X\xdfabcd\=offset=1
- X\xdfabcd\=offset=2
- X\xdfabcd\xdf\=offset=3
- \= Expect no match
- X\xdfabcd\=offset=3
- /(?<=xx)badutf/utf
- \= Expect UTF-8 errors
- X\xdfabcd
- X\xdfabcd\=offset=1
- X\xdfabcd\=offset=2
- X\xdfabcd\=offset=3
- /(?<=xxxx)badutf/utf
- \= Expect UTF-8 errors
- X\xdfabcd
- X\xdfabcd\=offset=1
- X\xdfabcd\=offset=2
- X\xdfabcd\=offset=3
- X\xdfabc\xdf\=offset=6
- X\xdfabc\xdf\=offset=7
- \= Expect no match
- X\xdfabcd\=offset=6
-
- /\x{100}/IB,utf
- /\x{1000}/IB,utf
- /\x{10000}/IB,utf
- /\x{100000}/IB,utf
- /\x{10ffff}/IB,utf
- /[\x{ff}]/IB,utf
- /[\x{100}]/IB,utf
- /\x80/IB,utf
- /\xff/IB,utf
- /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
- \x{D55c}\x{ad6d}\x{C5B4}
- /\x{65e5}\x{672c}\x{8a9e}/IB,utf
- \x{65e5}\x{672c}\x{8a9e}
- /\x{80}/IB,utf
- /\x{084}/IB,utf
- /\x{104}/IB,utf
- /\x{861}/IB,utf
- /\x{212ab}/IB,utf
- /[^ab\xC0-\xF0]/IB,utf
- \x{f1}
- \x{bf}
- \x{100}
- \x{1000}
- \= Expect no match
- \x{c0}
- \x{f0}
- /Ä€{3,4}/IB,utf
- \x{100}\x{100}\x{100}\x{100\x{100}
- /(\x{100}+|x)/IB,utf
- /(\x{100}*a|x)/IB,utf
- /(\x{100}{0,2}a|x)/IB,utf
- /(\x{100}{1,2}a|x)/IB,utf
- /\x{100}/IB,utf
- /a\x{100}\x{101}*/IB,utf
- /a\x{100}\x{101}+/IB,utf
- /[^\x{c4}]/IB
- /[\x{100}]/IB,utf
- \x{100}
- Z\x{100}
- \x{100}Z
- /[\xff]/IB,utf
- >\x{ff}<
- /[^\xff]/IB,utf
- /\x{100}abc(xyz(?1))/IB,utf
- /\777/I,utf
- \x{1ff}
- \777
- /\x{100}+\x{200}/IB,utf
- /\x{100}+X/IB,utf
- /^[\QĀ\E-\Q�\E/B,utf
- # This tests the stricter UTF-8 check according to RFC 3629.
- /X/utf
- \= Expect UTF-8 errors
- \x{d800}
- \x{da00}
- \x{dfff}
- \x{110000}
- \x{2000000}
- \x{7fffffff}
- \= Expect no match
- \x{d800}\=no_utf_check
- \x{da00}\=no_utf_check
- \x{dfff}\=no_utf_check
- \x{110000}\=no_utf_check
- \x{2000000}\=no_utf_check
- \x{7fffffff}\=no_utf_check
- /(*UTF8)\x{1234}/
- abcd\x{1234}pqr
- /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
- /\h/I,utf
- ABC\x{09}
- ABC\x{20}
- ABC\x{a0}
- ABC\x{1680}
- ABC\x{180e}
- ABC\x{2000}
- ABC\x{202f}
- ABC\x{205f}
- ABC\x{3000}
- /\v/I,utf
- ABC\x{0a}
- ABC\x{0b}
- ABC\x{0c}
- ABC\x{0d}
- ABC\x{85}
- ABC\x{2028}
- /\h*A/I,utf
- CDBABC
- /\v+A/I,utf
- /\s?xxx\s/I,utf
- /\sxxx\s/I,utf,tables=2
- AB\x{85}xxx\x{a0}XYZ
- AB\x{a0}xxx\x{85}XYZ
- /\S \S/I,utf,tables=2
- \x{a2} \x{84}
- A Z
- /a+/utf
- a\x{123}aa\=offset=1
- a\x{123}aa\=offset=3
- a\x{123}aa\=offset=4
- \= Expect bad offset value
- a\x{123}aa\=offset=6
- \= Expect bad UTF-8 offset
- a\x{123}aa\=offset=2
- \= Expect no match
- a\x{123}aa\=offset=5
- /\x{1234}+/Ii,utf
- /\x{1234}+?/Ii,utf
- /\x{1234}++/Ii,utf
- /\x{1234}{2}/Ii,utf
- /[^\x{c4}]/IB,utf
- /X+\x{200}/IB,utf
- /\R/I,utf
- /\777/IB,utf
- /\w+\x{C4}/B,utf
- a\x{C4}\x{C4}
- /\w+\x{C4}/B,utf,tables=2
- a\x{C4}\x{C4}
- /\W+\x{C4}/B,utf
- !\x{C4}
- /\W+\x{C4}/B,utf,tables=2
- !\x{C4}
- /\W+\x{A1}/B,utf
- !\x{A1}
- /\W+\x{A1}/B,utf,tables=2
- !\x{A1}
- /X\s+\x{A0}/B,utf
- X\x20\x{A0}\x{A0}
- /X\s+\x{A0}/B,utf,tables=2
- X\x20\x{A0}\x{A0}
- /\S+\x{A0}/B,utf
- X\x{A0}\x{A0}
- /\S+\x{A0}/B,utf,tables=2
- X\x{A0}\x{A0}
- /\x{a0}+\s!/B,utf
- \x{a0}\x20!
- /\x{a0}+\s!/B,utf,tables=2
- \x{a0}\x20!
- /A/utf
- \x{ff000041}
- \x{7f000041}
- /(*UTF8)abc/never_utf
- /abc/utf,never_utf
- /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
- /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
- /AB\x{1fb0}/IB,utf
- /AB\x{1fb0}/IBi,utf
- /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
- \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
- \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
- /[â±¥]/Bi,utf
- /[^â±¥]/Bi,utf
- /\h/I
- /\v/I
- /\R/I
- /[[:blank:]]/B,ucp
- /\x{212a}+/Ii,utf
- KKkk\x{212a}
- /s+/Ii,utf
- SSss\x{17f}
- /\x{100}*A/IB,utf
- A
- /\x{100}*\d(?R)/IB,utf
- /[Z\x{100}]/IB,utf
- Z\x{100}
- \x{100}
- \x{100}Z
- /[z-\x{100}]/IB,utf
- /[z\Qa-d]Ä€\E]/IB,utf
- \x{100}
- Ā
- /[ab\x{100}]abc(xyz(?1))/IB,utf
- /\x{100}*\s/IB,utf
- /\x{100}*\d/IB,utf
- /\x{100}*\w/IB,utf
- /\x{100}*\D/IB,utf
- /\x{100}*\S/IB,utf
- /\x{100}*\W/IB,utf
- /[\x{105}-\x{109}]/IBi,utf
- \x{104}
- \x{105}
- \x{109}
- \= Expect no match
- \x{100}
- \x{10a}
-
- /[z-\x{100}]/IBi,utf
- Z
- z
- \x{39c}
- \x{178}
- |
- \x{80}
- \x{ff}
- \x{100}
- \x{101}
- \= Expect no match
- \x{102}
- Y
- y
- /[z-\x{100}]/IBi,utf
- /\x{3a3}B/IBi,utf
- /abc/utf,replace=Ã
- abc
- /(?<=(a)(?-1))x/I,utf
- a\x80zx\=offset=3
- /[\W\p{Any}]/B
- abc
- 123
- /[\W\pL]/B
- abc
- \= Expect no match
- 123
- /(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/utf
- /[\s[:^ascii:]]/B,ucp
- # A special extra option allows excaped surrogate code points in 8-bit mode,
- # but subjects containing them must not be UTF-checked.
- /\x{d800}/I,utf,allow_surrogate_escapes
- \x{d800}\=no_utf_check
- /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
- \x{dfff}\x{df01}\=no_utf_check
-
- # This has different starting code units in 8-bit mode.
- /^[^ab]/IB,utf
- c
- \x{ff}
- \x{100}
- \= Expect no match
- aaa
-
- # Offsets are different in 8-bit mode.
- /(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
- 123abcáyzabcdef789abcሴqr
-
- # Check name length with non-ASCII characters
- /(?'ABáC678901234567890123456789012'...)/utf
- /(?'ABáC6789012345678901234567890123'...)/utf
- /(?'ABZC6789012345678901234567890123'...)/utf
- /(?(n/utf
- /(?(á/utf
- # Invalid UTF-8 tests
- /.../g,match_invalid_utf
- abcd\x80wxzy\x80pqrs
- abcd\x{80}wxzy\x80pqrs
- /abc/match_invalid_utf
- ab\x80ab\=ph
- \= Expect no match
- ab\x80cdef\=ph
- /.a/match_invalid_utf
- ab\=ph
- ab\=ps
- b\xf0\x91\x88b\=ph
- b\xf0\x91\x88b\=ps
- b\xf0\x91\x88\xb4a
- \= Expect no match
- b\x80\=ph
- b\x80\=ps
- b\xf0\x91\x88\=ph
- b\xf0\x91\x88\=ps
- /.a$/match_invalid_utf
- ab\=ph
- ab\=ps
- \= Expect no match
- b\xf0\x91\x98\=ph
- b\xf0\x91\x98\=ps
- /ab$/match_invalid_utf
- ab\x80cdeab
- \= Expect no match
- ab\x80cde
- /.../g,match_invalid_utf
- abcd\x{80}wxzy\x80pqrs
- /(?<=x)../g,match_invalid_utf
- abcd\x{80}wxzy\x80pqrs
- abcd\x{80}wxzy\x80xpqrs
-
- /X$/match_invalid_utf
- \= Expect no match
- X\xc4
-
- /(?<=..)X/match_invalid_utf,aftertext
- AB\x80AQXYZ
- AB\x80AQXYZ\=offset=5
- AB\x80\x80AXYZXC\=offset=5
- \= Expect no match
- AB\x80XYZ
- AB\x80XYZ\=offset=3
- AB\xfeXYZ
- AB\xffXYZ\=offset=3
- AB\x80AXYZ
- AB\x80AXYZ\=offset=4
- AB\x80\x80AXYZ\=offset=5
- /.../match_invalid_utf
- AB\xc4CCC
- \= Expect no match
- A\x{d800}B
- A\x{110000}B
- A\xc4B
- /\bX/match_invalid_utf
- A\x80X
- /\BX/match_invalid_utf
- \= Expect no match
- A\x80X
-
- /(?<=...)X/match_invalid_utf
- AAA\x80BBBXYZ
- \= Expect no match
- AAA\x80BXYZ
- AAA\x80BBXYZ
- # -------------------------------------
- /(*UTF)(?=\x{123})/I
- /[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf
- /[󿾟,]/BI,utf
- /[\x{fff4}-\x{ffff8}]/I,utf
- /[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf
- /[\xff\x{ffff}]/I,utf
- /[\xff\x{ff}]/I,utf
- abc\x{ff}def
- /[\xff\x{ff}]/I
- abc\x{ff}def
- /[Ss]/I
- /[Ss]/I,utf
- /(?:\x{ff}|\x{3000})/I,utf
- /x/utf
- abxyz
- \x80\=startchar
- abc\x80\=startchar
- abc\x80\=startchar,offset=3
- /\x{c1}+\x{e1}/iIB,ucp
- \x{c1}\x{c1}\x{c1}
- \x{e1}\x{e1}\x{e1}
- /a|\x{c1}/iI,ucp
- \x{e1}xxx
- /a|\x{c1}/iI,utf
- \x{e1}xxx
- /\x{c1}|\x{e1}/iI,ucp
- /X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
- X\x{e1}Y
- /X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended
- X\x{c1}Y
- # Without UTF or UCP characters > 127 have only one case in the default locale.
- /X(\x{e1})Y/replace=>\U$1<,substitute_extended
- X\x{e1}Y
- /A/utf,match_invalid_utf,caseless
- \xe5A
- /\bch\b/utf,match_invalid_utf
- qchq\=ph
- qchq\=ps
- /line1\nbreak/firstline,utf,match_invalid_utf
- line1\nbreak
- line0\nline1\nbreak
- /A\z/utf,match_invalid_utf
- A\x80\x42\n
- # End of testinput10
|