123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544 |
- # This set of tests checks the API, internals, and non-Perl stuff for UTF
- # support, including Unicode properties. However, tests that give different
- # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
- # 12).
- #newline_default lf any anycrlf
- # PCRE2 and Perl disagree about the characteristics of certain Unicode
- # characters. For example, 061C was considered by Perl to be Arabic, though
- # it was not listed as such in the Unicode Scripts.txt file for Unicode 8.
- # However, it *is* in that file for Unicode 10, but when I came to re-check,
- # Perl had changed in the meantime, with 5.026 not recognizing it as Arabic.
- # 2066-2069 are graphic and printable according to Perl, though they are
- # actually "isolate" control characters. That is why the following tests are
- # here rather than in test 4.
- /^[\p{Arabic}]/utf
- \x{061c}
- /^[[:graph:]]+$/utf,ucp
- \= Expect no match
- \x{61c}
- \x{2066}
- \x{2067}
- \x{2068}
- \x{2069}
- /^[[:print:]]+$/utf,ucp
- \= Expect no match
- \x{61c}
- \x{2066}
- \x{2067}
- \x{2068}
- \x{2069}
- /^[[:^graph:]]+$/utf,ucp
- \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}
- \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
- /^[[:^print:]]+$/utf,ucp
- \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
- \x{2068}\x{2069}
- # Perl does not consider U+180e to be a space character. It is true that it
- # does not appear in the Unicode PropList.txt file as such, but in many other
- # sources it is listed as a space, and has been treated as such in PCRE for
- # a long time.
- /^>[[:blank:]]*/utf,ucp
- >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
- /^A\s+Z/utf,ucp
- A\x{85}\x{180e}\x{2005}Z
- /^A[\s]+Z/utf,ucp
- A\x{2005}Z
- A\x{85}\x{2005}Z
- /^[[:graph:]]+$/utf,ucp
- \= Expect no match
- \x{180e}
- /^[[:print:]]+$/utf,ucp
- \x{180e}
- /^[[:^graph:]]+$/utf,ucp
- \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
- /^[[:^print:]]+$/utf,ucp
- \= Expect no match
- \x{180e}
- # End of U+180E tests.
- # ---------------------------------------------------------------------
- /\x{110000}/IB,utf
- /\o{4200000}/IB,utf
- /\x{ffffffff}/utf
- /\o{37777777777}/utf
- /\x{100000000}/utf
- /\o{77777777777}/utf
- /\x{d800}/utf
- /\o{154000}/utf
- /\x{dfff}/utf
- /\o{157777}/utf
- /\x{d7ff}/utf
- /\o{153777}/utf
- /\x{e000}/utf
- /\o{170000}/utf
- /^\x{100}a\x{1234}/utf
- \x{100}a\x{1234}bcd
- /\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf
- \x{0041}\x{2262}\x{0391}\x{002e}
- /.{3,5}X/IB,utf
- \x{212ab}\x{212ab}\x{212ab}\x{861}X
- /.{3,5}?/IB,utf
- \x{212ab}\x{212ab}\x{212ab}\x{861}
- /^[ab]/IB,utf
- bar
- \= Expect no match
- c
- \x{ff}
- \x{100}
- /\x{100}*(\d+|"(?1)")/utf
- 1234
- "1234"
- \x{100}1234
- "\x{100}1234"
- \x{100}\x{100}12ab
- \x{100}\x{100}"12"
- \= Expect no match
- \x{100}\x{100}abcd
- /\x{100}*/IB,utf
- /a\x{100}*/IB,utf
- /ab\x{100}*/IB,utf
- /[\x{200}-\x{100}]/utf
- /[Ä€-Ä„]/utf
- \x{100}
- \x{104}
- \= Expect no match
- \x{105}
- \x{ff}
- /[\xFF]/IB
- >\xff<
- /[^\xFF]/IB
- /[Ä-Ü]/utf
- Ö # Matches without Study
- \x{d6}
- /[Ä-Ü]/utf
- Ö <-- Same with Study
- \x{d6}
- /[\x{c4}-\x{dc}]/utf
- Ö # Matches without Study
- \x{d6}
- /[\x{c4}-\x{dc}]/utf
- Ö <-- Same with Study
- \x{d6}
- /[^\x{100}]abc(xyz(?1))/IB,utf
- /(\x{100}(b(?2)c))?/IB,utf
- /(\x{100}(b(?2)c)){0,2}/IB,utf
- /(\x{100}(b(?1)c))?/IB,utf
- /(\x{100}(b(?1)c)){0,2}/IB,utf
- /\W/utf
- A.B
- A\x{100}B
- /\w/utf
- \x{100}X
- # Use no_start_optimize because the first code unit is different in 8-bit from
- # the wider modes.
- /^\ሴ/IB,utf,no_start_optimize
- /()()()()()()()()()()
- ()()()()()()()()()()
- ()()()()()()()()()()
- ()()()()()()()()()()
- A (x) (?41) B/x,utf
- AxxB
- /^[\x{100}\E-\Q\E\x{150}]/B,utf
- /^[\QĀ\E-\Q�\E]/B,utf
- /^abc./gmx,newline=any,utf
- abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
- /abc.$/gmx,newline=any,utf
- abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
- /^a\Rb/bsr=unicode,utf
- a\nb
- a\rb
- a\r\nb
- a\x0bb
- a\x0cb
- a\x{85}b
- a\x{2028}b
- a\x{2029}b
- \= Expect no match
- a\n\rb
- /^a\R*b/bsr=unicode,utf
- ab
- a\nb
- a\rb
- a\r\nb
- a\x0bb
- a\x0c\x{2028}\x{2029}b
- a\x{85}b
- a\n\rb
- a\n\r\x{85}\x0cb
- /^a\R+b/bsr=unicode,utf
- a\nb
- a\rb
- a\r\nb
- a\x0bb
- a\x0c\x{2028}\x{2029}b
- a\x{85}b
- a\n\rb
- a\n\r\x{85}\x0cb
- \= Expect no match
- ab
- /^a\R{1,3}b/bsr=unicode,utf
- a\nb
- a\n\rb
- a\n\r\x{85}b
- a\r\n\r\nb
- a\r\n\r\n\r\nb
- a\n\r\n\rb
- a\n\n\r\nb
- \= Expect no match
- a\n\n\n\rb
- a\r
- /\H\h\V\v/utf
- X X\x0a
- X\x09X\x0b
- \= Expect no match
- \x{a0} X\x0a
- /\H*\h+\V?\v{3,4}/utf
- \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
- \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
- \x09\x20\x{a0}\x0a\x0b\x0c
- \= Expect no match
- \x09\x20\x{a0}\x0a\x0b
- /\H\h\V\v/utf
- \x{3001}\x{3000}\x{2030}\x{2028}
- X\x{180e}X\x{85}
- \= Expect no match
- \x{2009} X\x0a
- /\H*\h+\V?\v{3,4}/utf
- \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
- \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
- \x09\x20\x{202f}\x0a\x0b\x0c
- \= Expect no match
- \x09\x{200a}\x{a0}\x{2028}\x0b
- /[\h]/B,utf
- >\x{1680}
- /[\h]{3,}/B,utf
- >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}<
- /[\v]/B,utf
- /[\H]/B,utf
- /[\V]/B,utf
- /.*$/newline=any,utf
- \x{1ec5}
- /a\Rb/I,bsr=anycrlf,utf
- a\rb
- a\nb
- a\r\nb
- \= Expect no match
- a\x{85}b
- a\x0bb
- /a\Rb/I,bsr=unicode,utf
- a\rb
- a\nb
- a\r\nb
- a\x{85}b
- a\x0bb
- /a\R?b/I,bsr=anycrlf,utf
- a\rb
- a\nb
- a\r\nb
- \= Expect no match
- a\x{85}b
- a\x0bb
- /a\R?b/I,bsr=unicode,utf
- a\rb
- a\nb
- a\r\nb
- a\x{85}b
- a\x0bb
- /.*a.*=.b.*/utf,newline=any
- QQQ\x{2029}ABCaXYZ=!bPQR
- \= Expect no match
- a\x{2029}b
- \x61\xe2\x80\xa9\x62
- /[[:a\x{100}b:]]/utf
- /a[^]b/utf,allow_empty_class,match_unset_backref
- a\x{1234}b
- a\nb
- \= Expect no match
- ab
- /a[^]+b/utf,allow_empty_class,match_unset_backref
- aXb
- a\nX\nX\x{1234}b
- \= Expect no match
- ab
- /(\x{de})\1/
- \x{de}\x{de}
- /X/newline=any,utf,firstline
- A\x{1ec5}ABCXYZ
- /Xa{2,4}b/utf
- X\=ps
- Xa\=ps
- Xaa\=ps
- Xaaa\=ps
- Xaaaa\=ps
- /Xa{2,4}?b/utf
- X\=ps
- Xa\=ps
- Xaa\=ps
- Xaaa\=ps
- Xaaaa\=ps
- /Xa{2,4}+b/utf
- X\=ps
- Xa\=ps
- Xaa\=ps
- Xaaa\=ps
- Xaaaa\=ps
- /X\x{123}{2,4}b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /X\x{123}{2,4}?b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /X\x{123}{2,4}+b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /X\x{123}{2,4}b/utf
- \= Expect no match
- Xx\=ps
- X\x{123}x\=ps
- X\x{123}\x{123}x\=ps
- X\x{123}\x{123}\x{123}x\=ps
- X\x{123}\x{123}\x{123}\x{123}x\=ps
- /X\x{123}{2,4}?b/utf
- \= Expect no match
- Xx\=ps
- X\x{123}x\=ps
- X\x{123}\x{123}x\=ps
- X\x{123}\x{123}\x{123}x\=ps
- X\x{123}\x{123}\x{123}\x{123}x\=ps
- /X\x{123}{2,4}+b/utf
- \= Expect no match
- Xx\=ps
- X\x{123}x\=ps
- X\x{123}\x{123}x\=ps
- X\x{123}\x{123}\x{123}x\=ps
- X\x{123}\x{123}\x{123}\x{123}x\=ps
- /X\d{2,4}b/utf
- X\=ps
- X3\=ps
- X33\=ps
- X333\=ps
- X3333\=ps
- /X\d{2,4}?b/utf
- X\=ps
- X3\=ps
- X33\=ps
- X333\=ps
- X3333\=ps
- /X\d{2,4}+b/utf
- X\=ps
- X3\=ps
- X33\=ps
- X333\=ps
- X3333\=ps
- /X\D{2,4}b/utf
- X\=ps
- Xa\=ps
- Xaa\=ps
- Xaaa\=ps
- Xaaaa\=ps
- /X\D{2,4}?b/utf
- X\=ps
- Xa\=ps
- Xaa\=ps
- Xaaa\=ps
- Xaaaa\=ps
- /X\D{2,4}+b/utf
- X\=ps
- Xa\=ps
- Xaa\=ps
- Xaaa\=ps
- Xaaaa\=ps
- /X\D{2,4}b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /X\D{2,4}?b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /X\D{2,4}+b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /X[abc]{2,4}b/utf
- X\=ps
- Xa\=ps
- Xaa\=ps
- Xaaa\=ps
- Xaaaa\=ps
- /X[abc]{2,4}?b/utf
- X\=ps
- Xa\=ps
- Xaa\=ps
- Xaaa\=ps
- Xaaaa\=ps
- /X[abc]{2,4}+b/utf
- X\=ps
- Xa\=ps
- Xaa\=ps
- Xaaa\=ps
- Xaaaa\=ps
- /X[abc\x{123}]{2,4}b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /X[abc\x{123}]{2,4}?b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /X[abc\x{123}]{2,4}+b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /X[^a]{2,4}b/utf
- X\=ps
- Xz\=ps
- Xzz\=ps
- Xzzz\=ps
- Xzzzz\=ps
- /X[^a]{2,4}?b/utf
- X\=ps
- Xz\=ps
- Xzz\=ps
- Xzzz\=ps
- Xzzzz\=ps
- /X[^a]{2,4}+b/utf
- X\=ps
- Xz\=ps
- Xzz\=ps
- Xzzz\=ps
- Xzzzz\=ps
- /X[^a]{2,4}b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /X[^a]{2,4}?b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /X[^a]{2,4}+b/utf
- X\=ps
- X\x{123}\=ps
- X\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\=ps
- X\x{123}\x{123}\x{123}\x{123}\=ps
- /(Y)X\1{2,4}b/utf
- YX\=ps
- YXY\=ps
- YXYY\=ps
- YXYYY\=ps
- YXYYYY\=ps
- /(Y)X\1{2,4}?b/utf
- YX\=ps
- YXY\=ps
- YXYY\=ps
- YXYYY\=ps
- YXYYYY\=ps
- /(Y)X\1{2,4}+b/utf
- YX\=ps
- YXY\=ps
- YXYY\=ps
- YXYYY\=ps
- YXYYYY\=ps
- /(\x{123})X\1{2,4}b/utf
- \x{123}X\=ps
- \x{123}X\x{123}\=ps
- \x{123}X\x{123}\x{123}\=ps
- \x{123}X\x{123}\x{123}\x{123}\=ps
- \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
- /(\x{123})X\1{2,4}?b/utf
- \x{123}X\=ps
- \x{123}X\x{123}\=ps
- \x{123}X\x{123}\x{123}\=ps
- \x{123}X\x{123}\x{123}\x{123}\=ps
- \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
- /(\x{123})X\1{2,4}+b/utf
- \x{123}X\=ps
- \x{123}X\x{123}\=ps
- \x{123}X\x{123}\x{123}\=ps
- \x{123}X\x{123}\x{123}\x{123}\=ps
- \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
- /\bthe cat\b/utf
- the cat\=ps
- the cat\=ph
- /abcd*/utf
- xxxxabcd\=ps
- xxxxabcd\=ph
- /abcd*/i,utf
- xxxxabcd\=ps
- xxxxabcd\=ph
- XXXXABCD\=ps
- XXXXABCD\=ph
- /abc\d*/utf
- xxxxabc1\=ps
- xxxxabc1\=ph
- /(a)bc\1*/utf
- xxxxabca\=ps
- xxxxabca\=ph
- /abc[de]*/utf
- xxxxabcde\=ps
- xxxxabcde\=ph
- /X\W{3}X/utf
- X\=ps
- /\sxxx\s/utf,tables=2
- AB\x{85}xxx\x{a0}XYZ
- AB\x{a0}xxx\x{85}XYZ
- /\S \S/utf,tables=2
- \x{a2} \x{84}
- 'A#хц'Bx,newline=any,utf
- 'A#хц
- PQ'Bx,newline=any,utf
- /a+#Ñ…aa
- z#XX?/Bx,newline=any,utf
- /a+#Ñ…aa
- z#Ñ…?/Bx,newline=any,utf
- /\g{A}xxx#bXX(?'A'123)
(?'A'456)/Bx,newline=any,utf
- /\g{A}xxx#bÑ…(?'A'123)
(?'A'456)/Bx,newline=any,utf
- /^\cģ/utf
- /(\R*)(.)/s,utf
- \r\n
- \r\r\n\n\r
- \r\r\n\n\r\n
- /(\R)*(.)/s,utf
- \r\n
- \r\r\n\n\r
- \r\r\n\n\r\n
- /[^\x{1234}]+/Ii,utf
- /[^\x{1234}]+?/Ii,utf
- /[^\x{1234}]++/Ii,utf
- /[^\x{1234}]{2}/Ii,utf
- /f.*/
- for\=ph
- /f.*/s
- for\=ph
- /f.*/utf
- for\=ph
- /f.*/s,utf
- for\=ph
- /\x{d7ff}\x{e000}/utf
- /\x{d800}/utf
- /\x{dfff}/utf
- /\h+/utf
- \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
- \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
- /[\h\x{e000}]+/B,utf
- \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
- \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
- /\H+/utf
- \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
- \x{2000}\x{200a}\x{1fff}\x{200b}
- \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
- \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
- /[\H\x{d7ff}]+/B,utf
- \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
- \x{2000}\x{200a}\x{1fff}\x{200b}
- \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
- \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
- /\v+/utf
- \x{2027}\x{2030}\x{2028}\x{2029}
- \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
- /[\v\x{e000}]+/B,utf
- \x{2027}\x{2030}\x{2028}\x{2029}
- \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
- /\V+/utf
- \x{2028}\x{2029}\x{2027}\x{2030}
- \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
- /[\V\x{d7ff}]+/B,utf
- \x{2028}\x{2029}\x{2027}\x{2030}
- \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
- /\R+/bsr=unicode,utf
- \x{2027}\x{2030}\x{2028}\x{2029}
- \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
- /(..)\1/utf
- ab\=ps
- aba\=ps
- abab\=ps
- /(..)\1/i,utf
- ab\=ps
- abA\=ps
- aBAb\=ps
- /(..)\1{2,}/utf
- ab\=ps
- aba\=ps
- abab\=ps
- ababa\=ps
- ababab\=ps
- ababab\=ph
- abababa\=ps
- abababa\=ph
- /(..)\1{2,}/i,utf
- ab\=ps
- aBa\=ps
- aBAb\=ps
- AbaBA\=ps
- abABAb\=ps
- aBAbaB\=ph
- abABabA\=ps
- abaBABa\=ph
- /(..)\1{2,}?x/i,utf
- ab\=ps
- abA\=ps
- aBAb\=ps
- abaBA\=ps
- abAbaB\=ps
- abaBabA\=ps
- abAbABaBx\=ps
- /./utf,newline=crlf
- \r\=ps
- \r\=ph
- /.{2,3}/utf,newline=crlf
- \r\=ps
- \r\=ph
- \r\r\=ps
- \r\r\=ph
- \r\r\r\=ps
- \r\r\r\=ph
- /.{2,3}?/utf,newline=crlf
- \r\=ps
- \r\=ph
- \r\r\=ps
- \r\r\=ph
- \r\r\r\=ps
- \r\r\r\=ph
- /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf
- /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf
- /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf
- /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf
- /(?<=\x{1234}\x{1234})\bxy/I,utf
- /(?<!^)ETA/utf
- \= Expect no match
- ETA
- /\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref
- /[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref
- /\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref
- /^\u{0000000000010ffff}/utf,extra_alt_bsux
- \x{10ffff}
- /\u{ 1bb1}/utf,extra_alt_bsux
- u{ 1bb1}
- \= Expect no match
- \x{1bb1}
- /\u/utf,alt_bsux
- \\u
- /^a+[a\x{200}]/B,utf
- aa
- /[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf
- /[\p{L}]/IB
- /[\p{^L}]/IB
- /[\P{L}]/IB
- /[\P{^L}]/IB
- /[abc\p{L}\x{0660}]/IB,utf
- /[\p{Nd}]/IB,utf
- 1234
- /[\p{Nd}+-]+/IB,utf
- 1234
- 12-34
- 12+\x{661}-34
- \= Expect no match
- abcd
- /(?:[\PPa*]*){8,}/
- /[\P{Any}]/B
- /[\P{Any}\E]/B
- /(\P{Yi}+\277)/
- /(\P{Yi}+\277)?/
- /(?<=\P{Yi}{3}A)X/
- /\p{Yi}+(\P{Yi}+)(?1)/
- /(\P{Yi}{2}\277)?/
- /[\P{Yi}A]/
- /[\P{Yi}\P{Yi}\P{Yi}A]/
- /[^\P{Yi}A]/
- /[^\P{Yi}\P{Yi}\P{Yi}A]/
- /(\P{Yi}*\277)*/
- /(\P{Yi}*?\277)*/
- /(\p{Yi}*+\277)*/
- /(\P{Yi}?\277)*/
- /(\P{Yi}??\277)*/
- /(\p{Yi}?+\277)*/
- /(\P{Yi}{0,3}\277)*/
- /(\P{Yi}{0,3}?\277)*/
- /(\p{Yi}{0,3}+\277)*/
- /\p{Zl}{2,3}+/B,utf
- 


- \x{2028}\x{2028}\x{2028}
- /\p{Zl}/B,utf
- /\p{Lu}{3}+/B,utf
- /\pL{2}+/B,utf
- /\p{Cc}{2}+/B,utf
- /^\p{Cf}/utf
- \x{180e}
- \x{061c}
- \x{2066}
- \x{2067}
- \x{2068}
- \x{2069}
- /^\p{Cs}/utf
- \x{dfff}\=no_utf_check
- \= Expect no match
- \x{09f}
- /^\p{Mn}/utf
- \x{1a1b}
- /^\p{Pe}/utf
- \x{2309}
- \x{230b}
- /^\p{Ps}/utf
- \x{2308}
- \x{230a}
- /^\p{Sc}+/utf
- $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
- \x{9f2}
- \= Expect no match
- X
- \x{2c2}
- /^\p{Zs}/utf
- \ \
- \x{a0}
- \x{1680}
- \x{2000}
- \x{2001}
- \= Expect no match
- \x{2028}
- \x{200d}
- # These are here because Perl has problems with the negative versions of the
- # properties and has changed how it behaves for caseless matching.
- /\p{^Lu}/i,utf
- 1234
- \= Expect no match
- ABC
- /\P{Lu}/i,utf
- 1234
- \= Expect no match
- ABC
- /\p{Ll}/i,utf
- a
- Az
- \= Expect no match
- ABC
- /\p{Lu}/i,utf
- A
- a\x{10a0}B
- \= Expect no match
- a
- \x{1d00}
- /\p{Lu}/i,utf
- A
- aZ
- \= Expect no match
- abc
- /[\x{c0}\x{391}]/i,utf
- \x{c0}
- \x{e0}
- # The next two are special cases where the lengths of the different cases of
- # the same character differ. The first went wrong with heap frame storage; the
- # second was broken in all cases.
- /^\x{023a}+?(\x{0130}+)/i,utf
- \x{023a}\x{2c65}\x{0130}
- /^\x{023a}+([^X])/i,utf
- \x{023a}\x{2c65}X
- /\x{c0}+\x{116}+/i,utf
- \x{c0}\x{e0}\x{116}\x{117}
- /[\x{c0}\x{116}]+/i,utf
- \x{c0}\x{e0}\x{116}\x{117}
- /(\x{de})\1/i,utf
- \x{de}\x{de}
- \x{de}\x{fe}
- \x{fe}\x{fe}
- \x{fe}\x{de}
- /^\x{c0}$/i,utf
- \x{c0}
- \x{e0}
- /^\x{e0}$/i,utf
- \x{c0}
- \x{e0}
- # The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
- # will match it only with UCP support, because without that it has no notion
- # of case for anything other than the ASCII letters.
- /((?i)[\x{c0}])/utf
- \x{c0}
- \x{e0}
- /(?i:[\x{c0}])/utf
- \x{c0}
- \x{e0}
- # These are PCRE's extra properties to help with Unicodizing \d etc.
- /^\p{Xan}/utf
- ABCD
- 1234
- \x{6ca}
- \x{a6c}
- \x{10a7}
- \= Expect no match
- _ABC
- /^\p{Xan}+/utf
- ABCD1234\x{6ca}\x{a6c}\x{10a7}_
- \= Expect no match
- _ABC
- /^\p{Xan}+?/utf
- \x{6ca}\x{a6c}\x{10a7}_
- /^\p{Xan}*/utf
- ABCD1234\x{6ca}\x{a6c}\x{10a7}_
- /^\p{Xan}{2,9}/utf
- ABCD1234\x{6ca}\x{a6c}\x{10a7}_
- /^\p{Xan}{2,9}?/utf
- \x{6ca}\x{a6c}\x{10a7}_
- /^[\p{Xan}]/utf
- ABCD1234_
- 1234abcd_
- \x{6ca}
- \x{a6c}
- \x{10a7}
- \= Expect no match
- _ABC
- /^[\p{Xan}]+/utf
- ABCD1234\x{6ca}\x{a6c}\x{10a7}_
- \= Expect no match
- _ABC
- /^>\p{Xsp}/utf
- >\x{1680}\x{2028}\x{0b}
- >\x{a0}
- \= Expect no match
- \x{0b}
- /^>\p{Xsp}+/utf
- > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
- /^>\p{Xsp}+?/utf
- >\x{1680}\x{2028}\x{0b}
- /^>\p{Xsp}*/utf
- > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
- /^>\p{Xsp}{2,9}/utf
- > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
- /^>\p{Xsp}{2,9}?/utf
- > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
- /^>[\p{Xsp}]/utf
- >\x{2028}\x{0b}
- /^>[\p{Xsp}]+/utf
- > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
- /^>\p{Xps}/utf
- >\x{1680}\x{2028}\x{0b}
- >\x{a0}
- \= Expect no match
- \x{0b}
- /^>\p{Xps}+/utf
- > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
- /^>\p{Xps}+?/utf
- >\x{1680}\x{2028}\x{0b}
- /^>\p{Xps}*/utf
- > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
- /^>\p{Xps}{2,9}/utf
- > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
- /^>\p{Xps}{2,9}?/utf
- > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
- /^>[\p{Xps}]/utf
- >\x{2028}\x{0b}
- /^>[\p{Xps}]+/utf
- > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
- /^\p{Xwd}/utf
- ABCD
- 1234
- \x{6ca}
- \x{a6c}
- \x{10a7}
- _ABC
- \= Expect no match
- []
- /^\p{Xwd}+/utf
- ABCD1234\x{6ca}\x{a6c}\x{10a7}_
- /^\p{Xwd}+?/utf
- \x{6ca}\x{a6c}\x{10a7}_
- /^\p{Xwd}*/utf
- ABCD1234\x{6ca}\x{a6c}\x{10a7}_
- /^\p{Xwd}{2,9}/utf
- A_B12\x{6ca}\x{a6c}\x{10a7}
- /^\p{Xwd}{2,9}?/utf
- \x{6ca}\x{a6c}\x{10a7}_
- /^[\p{Xwd}]/utf
- ABCD1234_
- 1234abcd_
- \x{6ca}
- \x{a6c}
- \x{10a7}
- _ABC
- \= Expect no match
- []
- /^[\p{Xwd}]+/utf
- ABCD1234\x{6ca}\x{a6c}\x{10a7}_
- # A check not in UTF-8 mode
- /^[\p{Xwd}]+/
- ABCD1234_
- # Some negative checks
- /^[\P{Xwd}]+/utf
- !.+\x{019}\x{482}AB
- /^[\p{^Xwd}]+/utf
- !.+\x{019}\x{589}AB
- /[\D]/B,utf,ucp
- 1\x{3c8}2
- /[\d]/B,utf,ucp
- >\x{6f4}<
- /[\S]/B,utf,ucp
- \x{1680}\x{6f4}\x{1680}
- /[\s]/B,utf,ucp
- >\x{1680}<
- /[\W]/B,utf,ucp
- A\x{1735}B
- /[\w]/B,utf,ucp
- >\x{1723}<
- /\D/B,utf,ucp
- 1\x{3c8}2
- /\d/B,utf,ucp
- >\x{6f4}<
- /\S/B,utf,ucp
- \x{1680}\x{6f4}\x{1680}
- /\s/B,utf,ucp
- >\x{1680}>
- /\W/B,utf,ucp
- A\x{1735}B
- /\w/B,utf,ucp
- >\x{1723}<
- /[[:alpha:]]/B,ucp
- /[[:lower:]]/B,ucp
- /[[:upper:]]/B,ucp
- /[[:alnum:]]/B,ucp
- /[[:ascii:]]/B,ucp
- /[[:cntrl:]]/B,ucp
- /[[:digit:]]/B,ucp
- /[[:digit:]]/B,ucp,ascii_digit
- /[[:graph:]]/B,ucp
- /[[:print:]]/B,ucp
- /[[:punct:]]/B,ucp
- /[[:space:]]/B,ucp
- /[[:word:]]/B,ucp
- /[[:xdigit:]]/B,ucp
- /[[:xdigit:]]/B,ucp,ascii_digit
- # Unicode properties for \b and \B
- /\b...\B/utf,ucp
- abc_
- \x{37e}abc\x{376}
- \x{37e}\x{376}\x{371}\x{393}\x{394}
- !\x{c0}++\x{c1}\x{c2}
- !\x{c0}+++++
- # Without PCRE_UCP, non-ASCII always fail, even if < 256
- /\b...\B/utf
- abc_
- \= Expect no match
- \x{37e}abc\x{376}
- \x{37e}\x{376}\x{371}\x{393}\x{394}
- !\x{c0}++\x{c1}\x{c2}
- !\x{c0}+++++
- # With PCRE_UCP, non-UTF8 chars that are < 256 still check properties
- /\b...\B/ucp
- abc_
- !\x{c0}++\x{c1}\x{c2}
- !\x{c0}+++++
- # Some of these are silly, but they check various combinations
- /[[:^alpha:][:^cntrl:]]+/B,utf,ucp
- 123
- abc
- /[[:^cntrl:][:^alpha:]]+/B,utf,ucp
- 123
- abc
- /[[:alpha:]]+/B,utf,ucp
- abc
- /[[:^alpha:]\S]+/B,utf,ucp
- 123
- abc
- /[^\d]+/B,utf,ucp
- abc123
- abc\x{123}
- \x{660}abc
- /\p{Lu}+9\p{Lu}+B\p{Lu}+b/B
- /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B
- /\P{Lu}+9\P{Lu}+B\P{Lu}+b/B
- /\p{Han}+X\p{Greek}+\x{370}/B,utf
- /\p{Xan}+!\p{Xan}+A/B
- /\p{Xsp}+!\p{Xsp}\t/B
- /\p{Xps}+!\p{Xps}\t/B
- /\p{Xwd}+!\p{Xwd}_/B
- /A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp
- # These behaved oddly in Perl, so they are kept in this test
- /(\x{23a}\x{23a}\x{23a})?\1/i,utf
- \= Expect no match
- \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
- /(ȺȺȺ)?\1/i,utf
- \= Expect no match
- ȺȺȺⱥⱥ
- /(\x{23a}\x{23a}\x{23a})?\1/i,utf
- \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
- /(ȺȺȺ)?\1/i,utf
- ȺȺȺⱥⱥⱥ
- /(\x{23a}\x{23a}\x{23a})\1/i,utf
- \= Expect no match
- \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
- /(ȺȺȺ)\1/i,utf
- \= Expect no match
- ȺȺȺⱥⱥ
- /(\x{23a}\x{23a}\x{23a})\1/i,utf
- \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
- /(ȺȺȺ)\1/i,utf
- ȺȺȺⱥⱥⱥ
- /(\x{2c65}\x{2c65})\1/i,utf
- \x{2c65}\x{2c65}\x{23a}\x{23a}
- /(ⱥⱥ)\1/i,utf
- ⱥⱥȺȺ
- /(\x{23a}\x{23a}\x{23a})\1Y/i,utf
- X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
- /(\x{2c65}\x{2c65})\1Y/i,utf
- X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
- # These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
- /^[\p{Batak}]/utf
- \x{1bc0}
- \x{1bff}
- \= Expect no match
- \x{1bf4}
- /^[\p{Brahmi}]/utf
- \x{11000}
- \x{1106f}
- \= Expect no match
- \x{1104e}
- /^[\p{Mandaic}]/utf
- \x{840}
- \x{85e}
- \= Expect no match
- \x{85c}
- \x{85d}
- /(\X*)(.)/s,utf
- A\x{300}
- /^S(\X*)e(\X*)$/utf
- Ste�re�o
- /^\X/utf
- �re�o
- /^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames
- aX41z
- \= Expect no match
- aAz
- /\X/
- a\=ps
- a\=ph
- /\Xa/
- aa\=ps
- aa\=ph
- /\X{2}/
- aa\=ps
- aa\=ph
- /\X+a/
- a\=ps
- aa\=ps
- aa\=ph
- /\X+?a/
- a\=ps
- ab\=ps
- aa\=ps
- aa\=ph
- aba\=ps
- # These Unicode 6.1.0 scripts are not known to Perl.
- /\p{Chakma}\d/utf,ucp
- \x{11100}\x{1113c}
- /\p{Takri}\d/utf,ucp
- \x{11680}\x{116c0}
- /^\X/utf
- A\=ps
- A\=ph
- A\x{300}\x{301}\=ps
- A\x{300}\x{301}\=ph
- A\x{301}\=ps
- A\x{301}\=ph
- /^\X{2,3}/utf
- A\=ps
- A\=ph
- AA\=ps
- AA\=ph
- A\x{300}\x{301}\=ps
- A\x{300}\x{301}\=ph
- A\x{300}\x{301}A\x{300}\x{301}\=ps
- A\x{300}\x{301}A\x{300}\x{301}\=ph
- /^\X{2}/utf
- AA\=ps
- AA\=ph
- A\x{300}\x{301}A\x{300}\x{301}\=ps
- A\x{300}\x{301}A\x{300}\x{301}\=ph
- /^\X+/utf
- AA\=ps
- AA\=ph
- /^\X+?Z/utf
- AA\=ps
- AA\=ph
- /A\x{3a3}B/IBi,utf
- /[\x{3a3}]/Bi,utf
- /[^\x{3a3}]/Bi,utf
- /[\x{3a3}]+/Bi,utf
- /[^\x{3a3}]+/Bi,utf
- /a*\x{3a3}/Bi,utf
- /\x{3a3}+a/Bi,utf
- /\x{3a3}*\x{3c2}/Bi,utf
- /\x{3a3}{3}/i,utf,aftertext
- \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
- /\x{3a3}{2,4}/i,utf,aftertext
- \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
- /\x{3a3}{2,4}?/i,utf,aftertext
- \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
- /\x{3a3}+./i,utf,aftertext
- \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
- /\x{3a3}++./i,utf,aftertext
- \= Expect no match
- \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
- /\x{3a3}*\x{3c2}/Bi,utf
- /[^\x{3a3}]*\x{3c2}/Bi,utf
- /[^a]*\x{3c2}/Bi,utf
- /ist/Bi,utf
- \= Expect no match
- ikt
- /is+t/i,utf
- iSs\x{17f}t
- \= Expect no match
- ikt
- /is+?t/i,utf
- \= Expect no match
- ikt
- /is?t/i,utf
- \= Expect no match
- ikt
- /is{2}t/i,utf
- \= Expect no match
- iskt
- # This property is a PCRE special
- /^\p{Xuc}/utf
- $abc
- @abc
- `abc
- \x{1234}abc
- \= Expect no match
- abc
- /^\p{Xuc}+/utf
- $@`\x{a0}\x{1234}\x{e000}**
- \= Expect no match
- \x{9f}
- /^\p{Xuc}+?/utf
- $@`\x{a0}\x{1234}\x{e000}**
- \= Expect no match
- \x{9f}
- /^\p{Xuc}+?\*/utf
- $@`\x{a0}\x{1234}\x{e000}**
- \= Expect no match
- \x{9f}
- /^\p{Xuc}++/utf
- $@`\x{a0}\x{1234}\x{e000}**
- \= Expect no match
- \x{9f}
- /^\p{Xuc}{3,5}/utf
- $@`\x{a0}\x{1234}\x{e000}**
- \= Expect no match
- \x{9f}
- /^\p{Xuc}{3,5}?/utf
- $@`\x{a0}\x{1234}\x{e000}**
- \= Expect no match
- \x{9f}
- /^[\p{Xuc}]/utf
- $@`\x{a0}\x{1234}\x{e000}**
- \= Expect no match
- \x{9f}
- /^[\p{Xuc}]+/utf
- $@`\x{a0}\x{1234}\x{e000}**
- \= Expect no match
- \x{9f}
- /^\P{Xuc}/utf
- abc
- \= Expect no match
- $abc
- @abc
- `abc
- \x{1234}abc
- /^[\P{Xuc}]/utf
- abc
- \= Expect no match
- $abc
- @abc
- `abc
- \x{1234}abc
- # Some auto-possessification tests
- /\pN+\z/B
- /\PN+\z/B
- /\pN+/B
- /\PN+/B
- /\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
- /\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp
- /\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp
- /\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp
- /\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp
- /\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp
- /\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp
- /\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp
- /\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp
- /\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp
- /\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp
- /\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp
- /\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp
- /\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp
- /\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp
- /\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp
- /\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp
- /\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp
- /\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp
- /\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp
- # End auto-possessification tests
- /\w+/B,utf,ucp,auto_callout
- abcd
- /[\p{N}]?+/B,no_auto_possess
- /[\p{L}ab]{2,3}+/B,no_auto_possess
- /\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
- /.+\X/Bsx
- /\X+$/Bmx
- /\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
- /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
- /[RST]+/Bi,utf,ucp
- /[R-T]+/Bi,utf,ucp
- /[Q-U]+/Bi,utf,ucp
- /^s?c/Iim,utf
- scat
- /\X?abc/utf,no_start_optimize
- \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
- /\x{100}\x{200}\K\x{300}/utf,startchar
- \x{100}\x{200}\x{300}
- # Test UTF characters in a substitution
- /ábc/utf,replace=XሴZ
- 123ábc123
- /(?<=abc)(|def)/g,utf,replace=<$0>
- 123abcáyzabcdef789abcሴqr
- /[A-`]/iB,utf
- abcdefghijklmno
- /(?<=\K\x{17f})/g,utf,aftertext,allow_lookaround_bsk
- \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
- /(?<=\K\x{17f})/altglobal,utf,aftertext,allow_lookaround_bsk
- \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
- "\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5"
- /$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
- "(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
- /[\pS#moq]/
- =
- /(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
- cxxxz
- /abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended
- abcd
- /a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended
- a\x{e0}\x{101}\x{c0}\x{102}
- /((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
- ab12cde
- /(*UCP)(*UTF)[[:>:]]X/B
- /abc/utf,replace=xyz
- abc\=zero_terminate
- /a[[:punct:]b]/ucp,bincode
- /a[[:punct:]b]/utf,ucp,bincode
- /a[b[:punct:]]/utf,ucp,bincode
- /[[:^ascii:]]/utf,ucp,bincode
- /[[:^ascii:]\w]/utf,ucp,bincode
- /[\w[:^ascii:]]/utf,ucp,bincode
- /[^[:ascii:]\W]/utf,ucp,bincode
- \x{de}
- \x{200}
- \= Expect no match
- \x{589}
- \x{37e}
- /[[:^ascii:]a]/utf,ucp,bincode
- /L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
- /L(?#(|++<!(2)?/B,utf,ucp,auto_callout
- /(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/
- /[\D]/utf
- \x{1d7cf}
- /[\D\P{Nd}]/utf
- \x{1d7cf}
- /[^\D]/utf
- a9b
- \= Expect no match
- \x{1d7cf}
- /[^\D\P{Nd}]/utf
- a9b
- \x{1d7cf}
- \= Expect no match
- \x{10000}
- # Hex uses pattern length, not zero-terminated. This tests for overrunning
- # the given length of a pattern.
- /'(*UTF)'/hex
- /'#('/hex,extended,utf
- /a(?<=A\XB)/utf
- /../utf,auto_callout
- \n\x{123}\x{123}\x{123}\x{123}
- # This tests processing wide characters in extended mode.
- /XÈ€/x,utf
- # These three test a bug fix that was not clearing up after a locale setting
- # when the test or a subsequent one matched a wide character.
- //locale=C
- /[\P{Yi}]/utf
- \x{2f000}
- /[\P{Yi}]/utf,locale=C
- \x{2f000}
- /^(?<!(?=􃡜))/B,utf
- # Horizontal and vertical space lists ignore caseless
- /[\HH]/Bi,utf
- /[^\HH]/Bi,utf
- //g,utf
- \=zero_terminate
- /^(?1)\p{Nd}{3}(a)/
- a123a
- /\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info
- # ---------------------------------------------------------------------------
- # A bunch of tests that hit lines of code that others do not (at least when
- # these were created).
- /^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess
- \= Expect no match
- bbb
- cc
- /^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess
- \= Expect no match
- aaa\x{100}
- /^X\X/no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\p{L&}+?/no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\p{L}+?/no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\p{Lu}+?/no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\p{Arabic}+?/no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\s+?/ucp,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- XX
- /^X\S+?/ucp,no_start_optimize,no_auto_possess
- XX
- \= Expect no match
- X
- /^X\w+?/ucp,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X.+?Z/s,utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\R+?/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\H+?/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\V+?/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\s+?/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- XX
- /^X\S+?/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- /^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess
- XYYYZ
- \= Expect no match
- XY
- XYY
- XYYY
- XYYYYZ
- /^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess
- \= Expect no match
- XY
- XY!
- /^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess
- \= Expect no match
- XY
- XY!
- /^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess
- \= Expect no match
- XY
- XY!
- /^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess
- \= Expect no match
- XY
- XY!
- XY\x{2f00}!
- /^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess
- \= Expect no match
- XY
- XY!
- /^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
- \= Expect no match
- X\n
- X\n!
- X\n\n!
- /^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
- \= Expect no match
- XYY\n
- /^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess
- \= Expect no match
- XY
- XY!
- XYY!
- /^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- X\x{b5}
- X\x{b5}\x{b5}Y
- /^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- X$
- X@@Y
- /(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect partial match
- XYY\r\=ph
- \= Expect no match
- X
- /^X.+?Z/s,utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X
- XYY
- /^X\R+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X\nX
- X\n\rX
- X\n\r\nX
- X\n\n
- X\n\x{0c}
- /(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X\nX
- X\n\rX
- X\n\r\nX
- X\n\n
- X\n\x{0c}
- /^X\H+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- XY\t
- XYY
- /^X\h+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X\t\t
- X\tY
- /^X\V+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- XY\n
- XYY
- /^X\v+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X\n\n
- X\nY
- /^X\D+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- XY9
- XYY
- /^X\d+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X99
- X9Y
- /^X\S+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- XY\n
- XYY
- /^X\s+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X\n\n
- X\nY
- /^X\W+?Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X.A
- X++
- /^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess
- \= Expect no match
- XY
- XY!
- /^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess
- \= Expect no match
- XY
- /^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess
- \= Expect no match
- XY
- /^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess
- \= Expect no match
- XYY
- /^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess
- \= Expect no match
- X$
- # ----------------------------------------------------------------------
- # These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option.
- /\x{d800}/B,utf,bad_escape_is_literal
- /\ud800/B,utf,alt_bsux,bad_escape_is_literal
- # ----------------------------------------------------------------------
- /Aሴ+B/literal,utf,no_utf_check
- Aሴ+B
-
- # These are here because I upgraded to Unicode 10.0.0 before Perl did, so it
- # doesn't recognize all these scripts. In time these three tests can be moved
- # to test 4.
- /^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+)
- (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+)
- (\p{Zanabazar_Square}+)/x,utf
- \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}
- /^\x{1E900}\x{104B0}/i,utf
- \x{1E900}\x{104B0}
- \x{1E922}\x{104D8}
-
- /^(?:(\X)(?C))+$/utf
- \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where
- # Similarly for Unicode 11.0.0
- /^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+)
- (\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf
- \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30}
- # These two are here because of differences from Perl.
- /^\X/utf
- A\x{200d}B A ZWJ
- \x{261d}\x{261d}B Extended_Pictographic Extended_Pictographic
- \x{261D}\x{1F3FB}B Extended_Pictographic Extend
- \x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator
- \x{261D}\x{1F3FB}\x{261d}B Extended_Pictographic Extend E-P
- \x{261D}\x{1F3FB}\x{200d}\x{261d}B Extended_Pictographic Extend ZWJ E-P
- # Regional indicators
- /^(\X)(\X)/utf,aftertext
- \x{1F1E6}\x{1F1E7}\x{1F1E7}B
- \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B
-
- # More differences from Perl
- /^\p{Common}/utf
- \x{60c}
- \x{61f}
- \x{964}
- \x{965}
- /^\p{Inherited}/utf
- \x{64b}
- \x{654}
- \x{655}
- \x{1D1AA}
- /\N{U+}/
- /\N{U+}/utf
- /\N{U}/
- # This tests the non-UTF Unicode NEL pattern whitespace character, only
- # recognized by PCRE2 with /x when there is Unicode support.
- /A
-
…B/x
- AB
-
- # This tests Unicode Pattern White Space characters in verb names when they
- # are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters
- # with code points greater than 255 between A, B, and C in the pattern.
- /(*: A‎B
C)abc/x,utf,mark,alt_verbnames
- abc
-
- # Script run tests: auto-possessification
- /^(*sr:.*)/B,utf
- paypаl.com A classic example of why script run checks are a good thing
- /^(*sr:.*(*ACCEPT))/utf
- paypаl.com But *ACCEPT breaks things
- /^(*sr:\x{2e80}*)/B,utf
- /^(*sr:\x{2e80}*)\x{2e80}/B,utf
- /(?<!)(*sr:)/B
- /(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B
- abcXBXYCCC!
- # Some script run patterns are broken in Perl 5.28.0. These can be moved into
- # test 4 when a mended version of Perl is released.
- /^(*sr:.{4})/utf
- \x{0980}12\x{0993} Bengali Common-digits Bengali
- \x{0780}12\x{07b1} Thaana Common-digits Thaana
- \x{0e01}12\x{0e5b} Thai Common-digits Thai
- \x{1780}12\x{19ff} Khmer Common-digits Khmer
- \x{0904}12\x{0939} Devanagari Common-digits Devanagari
- A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
- A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
-
- # These ones involve non-ASCII but nevertheless Common digits. As of October
- # 2018 even blead Perl wasn't handling all of these - but is going to.
- /^(*sr:.{4})/utf
- A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
- \x{ff10}\x{ff19}.. Common-notascii-digits Common Common
- A\x{ff10}BC Latin Common-notascii-digit Latin Latin
- A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
- \x{1d7ce}\x{1d7cf},, fancy-common-digits Common Common
- A\x{1d7ce}BC Latin fancy-common-digit Latin Latin
-
- # Some Unicode 12.1.0 new script characters
- /\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf
- \x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
- # Some Unicode 13.0.0 new script characters
- /\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
- \x{10FB0}\x{11900}\x{18B00}\x{10E80}
- # -------
- # Test reference and errors in non-ASCII characters in group names
- /(?'ð‘ …ABC'...)/I,utf
- abcde\=copy=ð‘ …ABC
- # Bad ones
- /(?'AB�C'...)\g{AB�C}/utf
- /(?'Ù ABC'...)/utf
- /(?'²ABC'...)/utf
- /(?'X²ABC'...)/utf
- # -------
- /\p{Any}*xyz/I
- /(|ß)7/caseless,ucp
- /(\xc1)\1/i,ucp
- \xc1\xe1\=no_jit
-
- /\p{L&}+\p{bidi_control}/B
- /\p{bidi_control}+\p{L&}/B
- /\p{han}/B
- /\p{script:han}/B
- /\p{sc:han}/B
- /\p{script extensions:han}/B
- /\p{scx:han}/B
- # Test error - invalid script name
- /\p{sc:L}/
- # Some Boolean property tests that differ from Perl
- /\p{emojimodifierbase}\p{ebase}/g,utf
- >AN<>\x{261d}\x{1faf6}<>yz<
- /\p{graphemelink}\p{grlink}/g,utf
- >AN<>\x{11d97}\x{94d}<>yz<
-
- /\p{soft dotted}\p{sd}/g,utf
- >AF23<>\x{1df1a}\x{69}<>yz<
-
- # ------------------------------------------------
- /\p{\2b[:xäigi:t:_/
- # Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without
- # the restriction.
- /AskZ/i,utf,caseless_restrict
- AskZ
- aSKz
- \= Expect no match
- A\x{17f}kZ
- As\x{212a}Z
- /AskZ/i,utf
- AskZ
- aSKz
- A\x{17f}kZ
- As\x{212a}Z
- /A\x{17f}\x{212a}Z/ir,utf
- \= Expect no match
- AskZ
- /A\x{17f}\x{212a}Z/i,utf
- AskZ
- /[AskZ]+/i,utf,caseless_restrict
- AskZ
- aSKz
- A\x{17f}kZ
- As\x{212a}Z
- /[AskZ]+/i,utf
- AskZ
- aSKz
- A\x{17f}kZ
- As\x{212a}Z
- /[\x{17f}\x{212a}]+/ir,utf
- \= Expect no match
- AskZ
- /[\x{17f}\x{212a}]+/i,utf
- AskZ
- /[^s]+/ir,utf
- A\x{17f}Z
- /[^s]+/i,utf
- A\x{17f}Z
- /[^k]+/ir,utf
- A\x{212a}Z
-
- /[^k]+/i,utf
- A\x{212a}Z
-
- /[^sk]+/ir,utf
- A\x{17f}\x{212a}Z
- /[^sk]+/i,utf
- A\x{17f}\x{212a}Z
- /[^\x{17f}]+/ir,utf
- AsSZ
- /[^\x{17f}]+/i,utf
- AsSZ
- /[Ss]+/irB,utf
- Sss\x{17f}ss
- /[Ss]+/iB,utf
- Sss\x{17f}ss
- /[S\x{17f}]/irB,utf
- /[S\x{17f}]/iB,utf
- /[\x{17f}s]/irB,utf
- /[\x{17f}s]/iB,utf
- /[\x{4b}\x{6b}]/irB,utf
- /[\x{4b}\x{6b}]/iB,utf
- /s(?r)s(?-r)s(?r:s)s/i,utf
- \x{17f}S\x{17f}S\x{17f}
- \= Expect no match
- \x{17f}\x{17f}\x{17f}S\x{17f}
- \x{17f}S\x{17f}\x{17f}\x{17f}
- /k(?^i)k/ir,utf
- K\x{212a}
- \= Expect no match
- \x{212a}\x{212a}
- # End caseless restrict tests
- # TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
- # DIGITS
- /\d+/i,utf
- 123\x{660}456
- /\d+/i,utf,ucp
- 123\x{660}456
- /\d+/i,utf,ucp,ascii_bsd
- 123\x{660}456
- /[\d]+/i,utf
- 123\x{660}456
- /[\d]+/i,utf,ucp
- 123\x{660}456
- /[\d]+/i,utf,ucp,ascii_bsd
- 123\x{660}456
- /\d(?aD)\d(?-aD)\d/utf,ucp
- \x{660}9\x{660}
- \= Expect no match
- \x{660}\x{660}\x{660}
- /\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
- 999
- 9\x{660}9
- /\d(?a)\d(?-a)\d/utf,ucp
- \x{660}9\x{660}
- \= Expect no match
- \x{660}\x{660}\x{660}
- /\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
- 999
- 9\x{660}9
- # SPACES
- />\s+</i,utf
- > <
- \= Expect no match
- >\x{a0} <
- />\s+</i,utf,ucp
- > <
- >\x{a0} <
- />\s+</i,utf,ucp,ascii_bss
- > <
- \= Expect no match
- >\x{a0} <
- />[\s]+</i,utf
- > <
- \= Expect no match
- >\x{a0} <
- />[\s]+</i,utf,ucp
- > <
- >\x{a0} <
- />[\s]+</i,utf,ucp,ascii_bss
- > <
- \= Expect no match
- >\x{a0} <
- />\s(?aS)\s(?-aS)\s</utf,ucp
- >\x{a0} \x{a0}<
- \= Expect no match
- >\x{a0}\x{a0}\x{a0}<
- />\s(?a)\s(?-a)\s</utf,ucp
- >\x{a0} \x{a0}<
- \= Expect no match
- >\x{a0}\x{a0}\x{a0}<
-
- # WORDS
- /\w+/i,utf
- 123\x{660}abc
- /\w+/i,utf,ucp
- 123\x{660}abc
- /\w+/i,utf,ucp,ascii_bsw
- 123\x{660}abc
- /[\w]+/i,utf
- 123\x{660}abc
- /[\w]+/i,utf,ucp
- 123\x{660}abc
- /[\w]+/i,utf,ucp,ascii_bsw
- 123\x{660}abc
- /\w(?aW)\w(?-aW)\w/utf,ucp
- \x{660}A\x{c0}
- \= Expect no match
- \x{660}\x{c0}\x{c0}
- /\w(?a)\w(?-a)\w/utf,ucp
- \x{660}A\x{c0}
- \= Expect no match
- \x{660}\x{c0}\x{c0}
-
- # WORD BOUNDARY
- /\bABC\b/utf
- \x{c0}ABC\x{d0}
- /\bABC\b/utf,ucp
- \= Expect no match
- \x{c0}ABC\x{d0}
- /\bABC\b/utf,ucp,ascii_bsw
- \x{c0}ABC\x{d0}
- /\bABC\b/utf,ucp,ascii_all
- \x{c0}ABC\x{d0}
-
- # POSIX
- /^[[:digit:]]+$/utf,ucp
- 123456
- 123\x{660}456
- /^[[:digit:]]+$/utf,ucp,ascii_digit
- 123456
- \= Expect no match
- 123\x{660}456
- /[[:digit:]]+/g,utf,ucp,ascii_digit
- 123\x{660}456
- /(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit
- 11
- \x{ff11}1
- \= Expect no match
- 1\x{ff11}
- /(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit
- 11
- \x{ff11}1
- \= Expect no match
- 1\x{ff11}
- /(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit
- 11
- \= Expect no match
- \x{ff11}1
- 1\x{ff11}
- /[[:digit:]]+/utf,ucp,ascii_posix
- 123\x{660}456
- /(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix
- 11
- \x{ff11}1
- \= Expect no match
- 1\x{ff11}
- /(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix
- 11
- \x{ff11}1
- \= Expect no match
- 1\x{ff11}
- /(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp
- 11
- \x{ff11}1
- \= Expect no match
- 1\x{ff11}
- /^[[:xdigit:]]+$/utf,ucp
- f0
- 1A
- d\x{ff10}
- \x{ff26}8
- \= Expect no match
- 8g\=no_jit
- /^[[:xdigit:]]+$/utf,ucp,ascii_digit
- f0
- 1A
- \= Expect no match
- d\x{ff10}
- \x{ff26}8
- 8g
- />[[:space:]]+</utf,ucp
- >\x{a0} \x{a0}<
- >\x{a0}\x{a0}\x{a0}<
- />[[:space:]]+</utf,ucp,ascii_posix
- \= Expect no match
- >\x{a0} \x{a0}<
- /(?aP)[[:alnum:]]+/i,ucp,utf
- abcáxyz
- abc\x{660}xyz
- /(?aP)[[:alnum:]\d]+/i,ucp,utf
- abc\x{660}xyz
-
- /(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/
- \x{660}A\x{660}
- \= Expect no match
- \x{660}\x{660}\x{660}
-
- # VARIOUS
- /[\d\s\w]+/a,ucp,utf
- 9 A\x{660}À
- 9 AÀ\x{660}
- # End PCRE2_EXTRA_ASCII_xxx tests
- /(?<!(|l ))/utf
- (?<!(|l ))
- # End of testinput5
|