12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928 |
- # This set of tests is for UTF-8 support and Unicode property support, with
- # relevance only for the 8-bit library.
- #newline_default lf any anycrlf
- # The next 5 patterns have UTF-8 errors
- /[Ã]/utf
- Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80
- /Ã/utf
- Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
- /ÃÃÃxxx/utf
- Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
- /‚‚‚‚‚‚‚Ã/utf
- Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
- /‚‚‚‚‚‚‚Ã/match_invalid_utf
- Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
- # Now test subjects
- /badutf/utf
- \= Expect UTF-8 errors
- X\xdf
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
- XX\xef
- Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
- XXX\xef\x80
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
- X\xf7
- Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1
- XX\xf7\x80
- Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
- XXX\xf7\x80\x80
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
- \xfb
- Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
- \xfb\x80
- Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
- \xfb\x80\x80
- Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
- \xfb\x80\x80\x80
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
- \xfd
- Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0
- \xfd\x80
- Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
- \xfd\x80\x80
- Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
- \xfd\x80\x80\x80
- Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
- \xfd\x80\x80\x80\x80
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
- \xdf\x7f
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
- \xef\x7f\x80
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
- \xef\x80\x7f
- Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
- \xf7\x7f\x80\x80
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
- \xf7\x80\x7f\x80
- Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
- \xf7\x80\x80\x7f
- Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
- \xfb\x7f\x80\x80\x80
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
- \xfb\x80\x7f\x80\x80
- Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
- \xfb\x80\x80\x7f\x80
- Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
- \xfb\x80\x80\x80\x7f
- Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0
- \xfd\x7f\x80\x80\x80\x80
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
- \xfd\x80\x7f\x80\x80\x80
- Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
- \xfd\x80\x80\x7f\x80\x80
- Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
- \xfd\x80\x80\x80\x7f\x80
- Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0
- \xfd\x80\x80\x80\x80\x7f
- Failed: error -12: UTF-8 error: byte 6 top bits not 0x80 at offset 0
- \xed\xa0\x80
- Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
- \xc0\x8f
- Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 0
- \xe0\x80\x8f
- Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 0
- \xf0\x80\x80\x8f
- Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0
- \xf8\x80\x80\x80\x8f
- Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
- \xfc\x80\x80\x80\x80\x8f
- Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
- \x80
- Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
- \xfe
- Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
- \xff
- Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
- /badutf/utf
- \= Expect UTF-8 errors
- XX\xfb\x80\x80\x80\x80
- Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2
- XX\xfd\x80\x80\x80\x80\x80
- Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 2
- XX\xf7\xbf\xbf\xbf
- Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
- /shortutf/utf
- \= Expect UTF-8 errors
- XX\xdf\=ph
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
- XX\xef\=ph
- Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
- XX\xef\x80\=ph
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
- \xf7\=ph
- Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
- \xf7\x80\=ph
- Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
- \xf7\x80\x80\=ph
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
- \xfb\=ph
- Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
- \xfb\x80\=ph
- Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
- \xfb\x80\x80\=ph
- Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
- \xfb\x80\x80\x80\=ph
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
- \xfd\=ph
- Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0
- \xfd\x80\=ph
- Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
- \xfd\x80\x80\=ph
- Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
- \xfd\x80\x80\x80\=ph
- Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
- \xfd\x80\x80\x80\x80\=ph
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
- /anything/utf
- \= Expect UTF-8 errors
- X\xc0\x80
- Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1
- XX\xc1\x8f
- Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 2
- XXX\xe0\x9f\x80
- Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 3
- \xf0\x8f\x80\x80
- Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0
- \xf8\x87\x80\x80\x80
- Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
- \xfc\x83\x80\x80\x80\x80
- Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
- \xfe\x80\x80\x80\x80\x80
- Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
- \xff\x80\x80\x80\x80\x80
- Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
- \xf8\x88\x80\x80\x80
- Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
- \xf9\x87\x80\x80\x80
- Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
- \xfc\x84\x80\x80\x80\x80
- Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
- \xfd\x83\x80\x80\x80\x80
- Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
- \= Expect no match
- \xc3\x8f
- No match
- \xe0\xaf\x80
- No match
- \xe1\x80\x80
- No match
- \xf0\x9f\x80\x80
- No match
- \xf1\x8f\x80\x80
- No match
- \xf8\x88\x80\x80\x80\=no_utf_check
- No match
- \xf9\x87\x80\x80\x80\=no_utf_check
- No match
- \xfc\x84\x80\x80\x80\x80\=no_utf_check
- No match
- \xfd\x83\x80\x80\x80\x80\=no_utf_check
- No match
-
- # Similar tests with offsets
- /badutf/utf
- \= Expect UTF-8 errors
- X\xdfabcd
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- X\xdfabcd\=offset=1
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- \= Expect no match
- X\xdfabcd\=offset=2
- No match
- /(?<=x)badutf/utf
- \= Expect UTF-8 errors
- X\xdfabcd
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- X\xdfabcd\=offset=1
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- X\xdfabcd\=offset=2
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- X\xdfabcd\xdf\=offset=3
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
- \= Expect no match
- X\xdfabcd\=offset=3
- No match
- /(?<=xx)badutf/utf
- \= Expect UTF-8 errors
- X\xdfabcd
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- X\xdfabcd\=offset=1
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- X\xdfabcd\=offset=2
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- X\xdfabcd\=offset=3
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- /(?<=xxxx)badutf/utf
- \= Expect UTF-8 errors
- X\xdfabcd
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- X\xdfabcd\=offset=1
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- X\xdfabcd\=offset=2
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- X\xdfabcd\=offset=3
- Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
- X\xdfabc\xdf\=offset=6
- Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
- X\xdfabc\xdf\=offset=7
- Failed: error -33: bad offset value
- \= Expect no match
- X\xdfabcd\=offset=6
- No match
-
- /\x{100}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc4
- Last code unit = \x80
- Subject length lower bound = 1
- /\x{1000}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{1000}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xe1
- Last code unit = \x80
- Subject length lower bound = 1
- /\x{10000}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{10000}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xf0
- Last code unit = \x80
- Subject length lower bound = 1
- /\x{100000}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100000}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xf4
- Last code unit = \x80
- Subject length lower bound = 1
- /\x{10ffff}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{10ffff}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xf4
- Last code unit = \xbf
- Subject length lower bound = 1
- /[\x{ff}]/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{ff}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc3
- Last code unit = \xbf
- Subject length lower bound = 1
- /[\x{100}]/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc4
- Last code unit = \x80
- Subject length lower bound = 1
- /\x80/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{80}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc2
- Last code unit = \x80
- Subject length lower bound = 1
- /\xff/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{ff}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc3
- Last code unit = \xbf
- Subject length lower bound = 1
- /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{d55c}\x{ad6d}\x{c5b4}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xed
- Last code unit = \xb4
- Subject length lower bound = 3
- \x{D55c}\x{ad6d}\x{C5B4}
- 0: \x{d55c}\x{ad6d}\x{c5b4}
- /\x{65e5}\x{672c}\x{8a9e}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{65e5}\x{672c}\x{8a9e}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xe6
- Last code unit = \x9e
- Subject length lower bound = 3
- \x{65e5}\x{672c}\x{8a9e}
- 0: \x{65e5}\x{672c}\x{8a9e}
- /\x{80}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{80}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc2
- Last code unit = \x80
- Subject length lower bound = 1
- /\x{084}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{84}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc2
- Last code unit = \x84
- Subject length lower bound = 1
- /\x{104}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{104}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc4
- Last code unit = \x84
- Subject length lower bound = 1
- /\x{861}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{861}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xe0
- Last code unit = \xa1
- Subject length lower bound = 1
- /\x{212ab}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{212ab}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xf0
- Last code unit = \xab
- Subject length lower bound = 1
- /[^ab\xC0-\xF0]/IB,utf
- ------------------------------------------------------------------
- Bra
- [\x00-`c-\xbf\xf1-\xff] (neg)
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
- \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
- \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
- 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
- Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
- \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
- \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
- \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
- \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
- \xfe \xff
- Subject length lower bound = 1
- \x{f1}
- 0: \x{f1}
- \x{bf}
- 0: \x{bf}
- \x{100}
- 0: \x{100}
- \x{1000}
- 0: \x{1000}
- \= Expect no match
- \x{c0}
- No match
- \x{f0}
- No match
- /Ä€{3,4}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}{3}
- \x{100}?+
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc4
- Last code unit = \x80
- Subject length lower bound = 3
- \x{100}\x{100}\x{100}\x{100\x{100}
- 0: \x{100}\x{100}\x{100}
- /(\x{100}+|x)/IB,utf
- ------------------------------------------------------------------
- Bra
- CBra 1
- \x{100}++
- Alt
- x
- Ket
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 1
- Options: utf
- Starting code units: x \xc4
- Subject length lower bound = 1
- /(\x{100}*a|x)/IB,utf
- ------------------------------------------------------------------
- Bra
- CBra 1
- \x{100}*+
- a
- Alt
- x
- Ket
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 1
- Options: utf
- Starting code units: a x \xc4
- Subject length lower bound = 1
- /(\x{100}{0,2}a|x)/IB,utf
- ------------------------------------------------------------------
- Bra
- CBra 1
- \x{100}{0,2}+
- a
- Alt
- x
- Ket
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 1
- Options: utf
- Starting code units: a x \xc4
- Subject length lower bound = 1
- /(\x{100}{1,2}a|x)/IB,utf
- ------------------------------------------------------------------
- Bra
- CBra 1
- \x{100}
- \x{100}{0,1}+
- a
- Alt
- x
- Ket
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 1
- Options: utf
- Starting code units: x \xc4
- Subject length lower bound = 1
- /\x{100}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc4
- Last code unit = \x80
- Subject length lower bound = 1
- /a\x{100}\x{101}*/IB,utf
- ------------------------------------------------------------------
- Bra
- a\x{100}
- \x{101}*+
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = 'a'
- Last code unit = \x80
- Subject length lower bound = 2
- /a\x{100}\x{101}+/IB,utf
- ------------------------------------------------------------------
- Bra
- a\x{100}
- \x{101}++
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = 'a'
- Last code unit = \x81
- Subject length lower bound = 3
- /[^\x{c4}]/IB
- ------------------------------------------------------------------
- Bra
- [^\x{c4}]
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Subject length lower bound = 1
- /[\x{100}]/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc4
- Last code unit = \x80
- Subject length lower bound = 1
- \x{100}
- 0: \x{100}
- Z\x{100}
- 0: \x{100}
- \x{100}Z
- 0: \x{100}
- /[\xff]/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{ff}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc3
- Last code unit = \xbf
- Subject length lower bound = 1
- >\x{ff}<
- 0: \x{ff}
- /[^\xff]/IB,utf
- ------------------------------------------------------------------
- Bra
- [^\x{ff}]
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Subject length lower bound = 1
- /\x{100}abc(xyz(?1))/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}abc
- CBra 1
- xyz
- Recurse
- Ket
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 1
- Options: utf
- First code unit = \xc4
- Last code unit = 'z'
- Subject length lower bound = 7
- /\777/I,utf
- Capture group count = 0
- Options: utf
- First code unit = \xc7
- Last code unit = \xbf
- Subject length lower bound = 1
- \x{1ff}
- 0: \x{1ff}
- \777
- 0: \x{1ff}
- /\x{100}+\x{200}/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}++
- \x{200}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc4
- Last code unit = \x80
- Subject length lower bound = 2
- /\x{100}+X/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}++
- X
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc4
- Last code unit = 'X'
- Subject length lower bound = 2
- /^[\QĀ\E-\Q�\E/B,utf
- Failed: error 106 at offset 15: missing terminating ] for character class
- # This tests the stricter UTF-8 check according to RFC 3629.
- /X/utf
- \= Expect UTF-8 errors
- \x{d800}
- Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
- \x{da00}
- Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
- \x{dfff}
- Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
- \x{110000}
- Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0
- \x{2000000}
- Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
- \x{7fffffff}
- Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
- \= Expect no match
- \x{d800}\=no_utf_check
- No match
- \x{da00}\=no_utf_check
- No match
- \x{dfff}\=no_utf_check
- No match
- \x{110000}\=no_utf_check
- No match
- \x{2000000}\=no_utf_check
- No match
- \x{7fffffff}\=no_utf_check
- No match
- /(*UTF8)\x{1234}/
- abcd\x{1234}pqr
- 0: \x{1234}
- /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
- Capture group count = 0
- Compile options: <none>
- Overall options: utf
- \R matches any Unicode newline
- Forced newline is CRLF
- First code unit = 'a'
- Last code unit = 'b'
- Subject length lower bound = 3
- /\h/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \x09 \x20 \xc2 \xe1 \xe2 \xe3
- Subject length lower bound = 1
- ABC\x{09}
- 0: \x{09}
- ABC\x{20}
- 0:
- ABC\x{a0}
- 0: \x{a0}
- ABC\x{1680}
- 0: \x{1680}
- ABC\x{180e}
- 0: \x{180e}
- ABC\x{2000}
- 0: \x{2000}
- ABC\x{202f}
- 0: \x{202f}
- ABC\x{205f}
- 0: \x{205f}
- ABC\x{3000}
- 0: \x{3000}
- /\v/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
- Subject length lower bound = 1
- ABC\x{0a}
- 0: \x{0a}
- ABC\x{0b}
- 0: \x{0b}
- ABC\x{0c}
- 0: \x{0c}
- ABC\x{0d}
- 0: \x{0d}
- ABC\x{85}
- 0: \x{85}
- ABC\x{2028}
- 0: \x{2028}
- /\h*A/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \x09 \x20 A \xc2 \xe1 \xe2 \xe3
- Last code unit = 'A'
- Subject length lower bound = 1
- CDBABC
- 0: A
- /\v+A/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
- Last code unit = 'A'
- Subject length lower bound = 2
- /\s?xxx\s/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
- Last code unit = 'x'
- Subject length lower bound = 4
- /\sxxx\s/I,utf,tables=2
- Capture group count = 0
- Options: utf
- Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc2
- Last code unit = 'x'
- Subject length lower bound = 5
- AB\x{85}xxx\x{a0}XYZ
- 0: \x{85}xxx\x{a0}
- AB\x{a0}xxx\x{85}XYZ
- 0: \x{a0}xxx\x{85}
- /\S \S/I,utf,tables=2
- Capture group count = 0
- Options: utf
- Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
- \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
- \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
- D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
- i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4
- \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
- \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
- \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
- \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
- Last code unit = ' '
- Subject length lower bound = 3
- \x{a2} \x{84}
- 0: \x{a2} \x{84}
- A Z
- 0: A Z
- /a+/utf
- a\x{123}aa\=offset=1
- 0: aa
- a\x{123}aa\=offset=3
- 0: aa
- a\x{123}aa\=offset=4
- 0: a
- \= Expect bad offset value
- a\x{123}aa\=offset=6
- Failed: error -33: bad offset value
- \= Expect bad UTF-8 offset
- a\x{123}aa\=offset=2
- Error -36 (bad UTF-8 offset)
- \= Expect no match
- a\x{123}aa\=offset=5
- No match
- /\x{1234}+/Ii,utf
- Capture group count = 0
- Options: caseless utf
- Starting code units: \xe1
- Subject length lower bound = 1
- /\x{1234}+?/Ii,utf
- Capture group count = 0
- Options: caseless utf
- Starting code units: \xe1
- Subject length lower bound = 1
- /\x{1234}++/Ii,utf
- Capture group count = 0
- Options: caseless utf
- Starting code units: \xe1
- Subject length lower bound = 1
- /\x{1234}{2}/Ii,utf
- Capture group count = 0
- Options: caseless utf
- Starting code units: \xe1
- Subject length lower bound = 2
- /[^\x{c4}]/IB,utf
- ------------------------------------------------------------------
- Bra
- [^\x{c4}]
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Subject length lower bound = 1
- /X+\x{200}/IB,utf
- ------------------------------------------------------------------
- Bra
- X++
- \x{200}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = 'X'
- Last code unit = \x80
- Subject length lower bound = 2
- /\R/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
- Subject length lower bound = 1
- /\777/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{1ff}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = \xc7
- Last code unit = \xbf
- Subject length lower bound = 1
- /\w+\x{C4}/B,utf
- ------------------------------------------------------------------
- Bra
- \w++
- \x{c4}
- Ket
- End
- ------------------------------------------------------------------
- a\x{C4}\x{C4}
- 0: a\x{c4}
- /\w+\x{C4}/B,utf,tables=2
- ------------------------------------------------------------------
- Bra
- \w+
- \x{c4}
- Ket
- End
- ------------------------------------------------------------------
- a\x{C4}\x{C4}
- 0: a\x{c4}\x{c4}
- /\W+\x{C4}/B,utf
- ------------------------------------------------------------------
- Bra
- \W+
- \x{c4}
- Ket
- End
- ------------------------------------------------------------------
- !\x{C4}
- 0: !\x{c4}
- /\W+\x{C4}/B,utf,tables=2
- ------------------------------------------------------------------
- Bra
- \W++
- \x{c4}
- Ket
- End
- ------------------------------------------------------------------
- !\x{C4}
- 0: !\x{c4}
- /\W+\x{A1}/B,utf
- ------------------------------------------------------------------
- Bra
- \W+
- \x{a1}
- Ket
- End
- ------------------------------------------------------------------
- !\x{A1}
- 0: !\x{a1}
- /\W+\x{A1}/B,utf,tables=2
- ------------------------------------------------------------------
- Bra
- \W+
- \x{a1}
- Ket
- End
- ------------------------------------------------------------------
- !\x{A1}
- 0: !\x{a1}
- /X\s+\x{A0}/B,utf
- ------------------------------------------------------------------
- Bra
- X
- \s++
- \x{a0}
- Ket
- End
- ------------------------------------------------------------------
- X\x20\x{A0}\x{A0}
- 0: X \x{a0}
- /X\s+\x{A0}/B,utf,tables=2
- ------------------------------------------------------------------
- Bra
- X
- \s+
- \x{a0}
- Ket
- End
- ------------------------------------------------------------------
- X\x20\x{A0}\x{A0}
- 0: X \x{a0}\x{a0}
- /\S+\x{A0}/B,utf
- ------------------------------------------------------------------
- Bra
- \S+
- \x{a0}
- Ket
- End
- ------------------------------------------------------------------
- X\x{A0}\x{A0}
- 0: X\x{a0}\x{a0}
- /\S+\x{A0}/B,utf,tables=2
- ------------------------------------------------------------------
- Bra
- \S++
- \x{a0}
- Ket
- End
- ------------------------------------------------------------------
- X\x{A0}\x{A0}
- 0: X\x{a0}
- /\x{a0}+\s!/B,utf
- ------------------------------------------------------------------
- Bra
- \x{a0}++
- \s
- !
- Ket
- End
- ------------------------------------------------------------------
- \x{a0}\x20!
- 0: \x{a0} !
- /\x{a0}+\s!/B,utf,tables=2
- ------------------------------------------------------------------
- Bra
- \x{a0}+
- \s
- !
- Ket
- End
- ------------------------------------------------------------------
- \x{a0}\x20!
- 0: \x{a0} !
- /A/utf
- \x{ff000041}
- ** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8
- \x{7f000041}
- Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
- /(*UTF8)abc/never_utf
- Failed: error 174 at offset 7: using UTF is disabled by the application
- /abc/utf,never_utf
- Failed: error 174 at offset 0: using UTF is disabled by the application
- /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
- ------------------------------------------------------------------
- Bra
- /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: caseless utf
- First code unit = 'A' (caseless)
- Subject length lower bound = 5
- /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
- ------------------------------------------------------------------
- Bra
- A\x{391}\x{10427}\x{ff3a}\x{1fb0}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = 'A'
- Last code unit = \xb0
- Subject length lower bound = 5
- /AB\x{1fb0}/IB,utf
- ------------------------------------------------------------------
- Bra
- AB\x{1fb0}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- First code unit = 'A'
- Last code unit = \xb0
- Subject length lower bound = 3
- /AB\x{1fb0}/IBi,utf
- ------------------------------------------------------------------
- Bra
- /i AB\x{1fb0}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: caseless utf
- First code unit = 'A' (caseless)
- Last code unit = 'B' (caseless)
- Subject length lower bound = 3
- /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
- Capture group count = 0
- Options: caseless utf
- Starting code units: \xd0 \xd1
- Subject length lower bound = 17
- \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
- 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
- \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
- 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
- /[â±¥]/Bi,utf
- ------------------------------------------------------------------
- Bra
- /i \x{2c65}
- Ket
- End
- ------------------------------------------------------------------
- /[^â±¥]/Bi,utf
- ------------------------------------------------------------------
- Bra
- /i [^\x{2c65}]
- Ket
- End
- ------------------------------------------------------------------
- /\h/I
- Capture group count = 0
- Starting code units: \x09 \x20 \xa0
- Subject length lower bound = 1
- /\v/I
- Capture group count = 0
- Starting code units: \x0a \x0b \x0c \x0d \x85
- Subject length lower bound = 1
- /\R/I
- Capture group count = 0
- Starting code units: \x0a \x0b \x0c \x0d \x85
- Subject length lower bound = 1
- /[[:blank:]]/B,ucp
- ------------------------------------------------------------------
- Bra
- [\x09 \xa0]
- Ket
- End
- ------------------------------------------------------------------
- /\x{212a}+/Ii,utf
- Capture group count = 0
- Options: caseless utf
- Starting code units: K k \xe2
- Subject length lower bound = 1
- KKkk\x{212a}
- 0: KKkk\x{212a}
- /s+/Ii,utf
- Capture group count = 0
- Options: caseless utf
- Starting code units: S s \xc5
- Subject length lower bound = 1
- SSss\x{17f}
- 0: SSss\x{17f}
- /\x{100}*A/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}*+
- A
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: A \xc4
- Last code unit = 'A'
- Subject length lower bound = 1
- A
- 0: A
- /\x{100}*\d(?R)/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}*+
- \d
- Recurse
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4
- Subject length lower bound = 1
- /[Z\x{100}]/IB,utf
- ------------------------------------------------------------------
- Bra
- [Z\x{100}]
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: Z \xc4
- Subject length lower bound = 1
- Z\x{100}
- 0: Z
- \x{100}
- 0: \x{100}
- \x{100}Z
- 0: \x{100}
- /[z-\x{100}]/IB,utf
- ------------------------------------------------------------------
- Bra
- [z-\xff\x{100}]
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4
- Subject length lower bound = 1
- /[z\Qa-d]Ä€\E]/IB,utf
- ------------------------------------------------------------------
- Bra
- [\-\]adz\x{100}]
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: - ] a d z \xc4
- Subject length lower bound = 1
- \x{100}
- 0: \x{100}
- Ā
- 0: \x{100}
- /[ab\x{100}]abc(xyz(?1))/IB,utf
- ------------------------------------------------------------------
- Bra
- [ab\x{100}]
- abc
- CBra 1
- xyz
- Recurse
- Ket
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 1
- Options: utf
- Starting code units: a b \xc4
- Last code unit = 'z'
- Subject length lower bound = 7
- /\x{100}*\s/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}*+
- \s
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4
- Subject length lower bound = 1
- /\x{100}*\d/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}*+
- \d
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4
- Subject length lower bound = 1
- /\x{100}*\w/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}*+
- \w
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
- Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
- \xc4
- Subject length lower bound = 1
- /\x{100}*\D/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}*
- \D
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
- \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
- \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
- ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
- d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2
- \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
- \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
- \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
- \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
- \xff
- Subject length lower bound = 1
- /\x{100}*\S/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}*
- \S
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
- \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
- \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
- D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
- i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4
- \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
- \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
- \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
- \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
- Subject length lower bound = 1
- /\x{100}*\W/IB,utf
- ------------------------------------------------------------------
- Bra
- \x{100}*
- \W
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
- \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
- \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
- ? @ [ \ ] ^ ` { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9
- \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8
- \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7
- \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6
- \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
- Subject length lower bound = 1
- /[\x{105}-\x{109}]/IBi,utf
- ------------------------------------------------------------------
- Bra
- [\x{104}-\x{109}]
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: caseless utf
- Starting code units: \xc4
- Subject length lower bound = 1
- \x{104}
- 0: \x{104}
- \x{105}
- 0: \x{105}
- \x{109}
- 0: \x{109}
- \= Expect no match
- \x{100}
- No match
- \x{10a}
- No match
-
- /[z-\x{100}]/IBi,utf
- ------------------------------------------------------------------
- Bra
- [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: caseless utf
- Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2
- Subject length lower bound = 1
- Z
- 0: Z
- z
- 0: z
- \x{39c}
- 0: \x{39c}
- \x{178}
- 0: \x{178}
- |
- 0: |
- \x{80}
- 0: \x{80}
- \x{ff}
- 0: \x{ff}
- \x{100}
- 0: \x{100}
- \x{101}
- 0: \x{101}
- \= Expect no match
- \x{102}
- No match
- Y
- No match
- y
- No match
- /[z-\x{100}]/IBi,utf
- ------------------------------------------------------------------
- Bra
- [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: caseless utf
- Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2
- Subject length lower bound = 1
- /\x{3a3}B/IBi,utf
- ------------------------------------------------------------------
- Bra
- clist 03a3 03c2 03c3
- /i B
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: caseless utf
- Starting code units: \xce \xcf
- Last code unit = 'B' (caseless)
- Subject length lower bound = 2
- /abc/utf,replace=Ã
- abc
- Failed: error -3: UTF-8 error: 1 byte missing at end
- /(?<=(a)(?-1))x/I,utf
- Capture group count = 1
- Max lookbehind = 2
- Options: utf
- First code unit = 'x'
- Subject length lower bound = 1
- a\x80zx\=offset=3
- Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
- /[\W\p{Any}]/B
- ------------------------------------------------------------------
- Bra
- [\x00-/:-@[-^`{-\xff\p{Any}]
- Ket
- End
- ------------------------------------------------------------------
- abc
- 0: a
- 123
- 0: 1
- /[\W\pL]/B
- ------------------------------------------------------------------
- Bra
- [\x00-/:-@[-^`{-\xff\p{L}]
- Ket
- End
- ------------------------------------------------------------------
- abc
- 0: a
- \= Expect no match
- 123
- No match
- /(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/utf
- Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
- /[\s[:^ascii:]]/B,ucp
- ------------------------------------------------------------------
- Bra
- [\x80-\xff\p{Xsp}]
- Ket
- End
- ------------------------------------------------------------------
- # A special extra option allows excaped surrogate code points in 8-bit mode,
- # but subjects containing them must not be UTF-checked.
- /\x{d800}/I,utf,allow_surrogate_escapes
- Capture group count = 0
- Options: utf
- Extra options: allow_surrogate_escapes
- First code unit = \xed
- Last code unit = \x80
- Subject length lower bound = 1
- \x{d800}\=no_utf_check
- 0: \x{d800}
- /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
- \x{dfff}\x{df01}\=no_utf_check
- 0: \x{dfff}\x{df01}
-
- # This has different starting code units in 8-bit mode.
- /^[^ab]/IB,utf
- ------------------------------------------------------------------
- Bra
- ^
- [\x00-`c-\xff] (neg)
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Compile options: utf
- Overall options: anchored utf
- Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
- \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
- \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
- 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
- Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
- \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
- \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
- \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
- \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
- \xfe \xff
- Subject length lower bound = 1
- c
- 0: c
- \x{ff}
- 0: \x{ff}
- \x{100}
- 0: \x{100}
- \= Expect no match
- aaa
- No match
-
- # Offsets are different in 8-bit mode.
- /(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
- 123abcáyzabcdef789abcሴqr
- 1(2) Old 6 6 "" New 6 8 "<>"
- 2(2) Old 13 13 "" New 15 17 "<>"
- 3(2) Old 13 16 "def" New 17 22 "<def>"
- 4(2) Old 22 22 "" New 28 30 "<>"
- 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
-
- # Check name length with non-ASCII characters
- /(?'ABáC678901234567890123456789012'...)/utf
- /(?'ABáC6789012345678901234567890123'...)/utf
- Failed: error 148 at offset 36: subpattern name is too long (maximum 32 code units)
- /(?'ABZC6789012345678901234567890123'...)/utf
- /(?(n/utf
- Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
- /(?(á/utf
- Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?)
- # Invalid UTF-8 tests
- /.../g,match_invalid_utf
- abcd\x80wxzy\x80pqrs
- 0: abc
- 0: wxz
- 0: pqr
- abcd\x{80}wxzy\x80pqrs
- 0: abc
- 0: d\x{80}w
- 0: xzy
- 0: pqr
- /abc/match_invalid_utf
- ab\x80ab\=ph
- Partial match: ab
- \= Expect no match
- ab\x80cdef\=ph
- No match
- /.a/match_invalid_utf
- ab\=ph
- Partial match: b
- ab\=ps
- Partial match: b
- b\xf0\x91\x88b\=ph
- Partial match: b
- b\xf0\x91\x88b\=ps
- Partial match: b
- b\xf0\x91\x88\xb4a
- 0: \x{11234}a
- \= Expect no match
- b\x80\=ph
- No match
- b\x80\=ps
- No match
- b\xf0\x91\x88\=ph
- No match
- b\xf0\x91\x88\=ps
- No match
- /.a$/match_invalid_utf
- ab\=ph
- Partial match: b
- ab\=ps
- Partial match: b
- \= Expect no match
- b\xf0\x91\x98\=ph
- No match
- b\xf0\x91\x98\=ps
- No match
- /ab$/match_invalid_utf
- ab\x80cdeab
- 0: ab
- \= Expect no match
- ab\x80cde
- No match
- /.../g,match_invalid_utf
- abcd\x{80}wxzy\x80pqrs
- 0: abc
- 0: d\x{80}w
- 0: xzy
- 0: pqr
- /(?<=x)../g,match_invalid_utf
- abcd\x{80}wxzy\x80pqrs
- 0: zy
- abcd\x{80}wxzy\x80xpqrs
- 0: zy
- 0: pq
-
- /X$/match_invalid_utf
- \= Expect no match
- X\xc4
- No match
-
- /(?<=..)X/match_invalid_utf,aftertext
- AB\x80AQXYZ
- 0: X
- 0+ YZ
- AB\x80AQXYZ\=offset=5
- 0: X
- 0+ YZ
- AB\x80\x80AXYZXC\=offset=5
- 0: X
- 0+ C
- \= Expect no match
- AB\x80XYZ
- No match
- AB\x80XYZ\=offset=3
- No match
- AB\xfeXYZ
- No match
- AB\xffXYZ\=offset=3
- No match
- AB\x80AXYZ
- No match
- AB\x80AXYZ\=offset=4
- No match
- AB\x80\x80AXYZ\=offset=5
- No match
- /.../match_invalid_utf
- AB\xc4CCC
- 0: CCC
- \= Expect no match
- A\x{d800}B
- No match
- A\x{110000}B
- No match
- A\xc4B
- No match
- /\bX/match_invalid_utf
- A\x80X
- 0: X
- /\BX/match_invalid_utf
- \= Expect no match
- A\x80X
- No match
-
- /(?<=...)X/match_invalid_utf
- AAA\x80BBBXYZ
- 0: X
- \= Expect no match
- AAA\x80BXYZ
- No match
- AAA\x80BBXYZ
- No match
- # -------------------------------------
- /(*UTF)(?=\x{123})/I
- Capture group count = 0
- May match empty string
- Compile options: <none>
- Overall options: utf
- First code unit = \xc4
- Last code unit = \xa3
- Subject length lower bound = 1
- /[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \xc3
- Last code unit = 'X'
- Subject length lower bound = 3
- /[󿾟,]/BI,utf
- ------------------------------------------------------------------
- Bra
- [,\x{fff9f}]
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: utf
- Starting code units: , \xf3
- Subject length lower bound = 1
- /[\x{fff4}-\x{ffff8}]/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \xef \xf0 \xf1 \xf2 \xf3
- Subject length lower bound = 1
- /[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \xef \xf0 \xf1 \xf2 \xf4
- Subject length lower bound = 1
- /[\xff\x{ffff}]/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \xc3 \xef
- Subject length lower bound = 1
- /[\xff\x{ff}]/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \xc3
- Subject length lower bound = 1
- abc\x{ff}def
- 0: \x{ff}
- /[\xff\x{ff}]/I
- Capture group count = 0
- First code unit = \xff
- Subject length lower bound = 1
- abc\x{ff}def
- 0: \xff
- /[Ss]/I
- Capture group count = 0
- First code unit = 'S' (caseless)
- Subject length lower bound = 1
- /[Ss]/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: S s
- Subject length lower bound = 1
- /(?:\x{ff}|\x{3000})/I,utf
- Capture group count = 0
- Options: utf
- Starting code units: \xc3 \xe3
- Subject length lower bound = 1
- /x/utf
- abxyz
- 0: x
- \x80\=startchar
- Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
- abc\x80\=startchar
- Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
- abc\x80\=startchar,offset=3
- Error -36 (bad UTF-8 offset)
- /\x{c1}+\x{e1}/iIB,ucp
- ------------------------------------------------------------------
- Bra
- /i \x{c1}+
- /i \x{e1}
- Ket
- End
- ------------------------------------------------------------------
- Capture group count = 0
- Options: caseless ucp
- First code unit = \xc1 (caseless)
- Last code unit = \xe1 (caseless)
- Subject length lower bound = 2
- \x{c1}\x{c1}\x{c1}
- 0: \xc1\xc1\xc1
- \x{e1}\x{e1}\x{e1}
- 0: \xe1\xe1\xe1
- /a|\x{c1}/iI,ucp
- Capture group count = 0
- Options: caseless ucp
- Starting code units: A a \xc1 \xe1
- Subject length lower bound = 1
- \x{e1}xxx
- 0: \xe1
- /a|\x{c1}/iI,utf
- Capture group count = 0
- Options: caseless utf
- Starting code units: A a \xc3
- Subject length lower bound = 1
- \x{e1}xxx
- 0: \x{e1}
- /\x{c1}|\x{e1}/iI,ucp
- Capture group count = 0
- Options: caseless ucp
- First code unit = \xc1 (caseless)
- Subject length lower bound = 1
- /X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
- X\x{e1}Y
- 1: >\xc1<
- /X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended
- X\x{c1}Y
- 1: >\xe1<
- # Without UTF or UCP characters > 127 have only one case in the default locale.
- /X(\x{e1})Y/replace=>\U$1<,substitute_extended
- X\x{e1}Y
- 1: >\xe1<
- /A/utf,match_invalid_utf,caseless
- \xe5A
- 0: A
- /\bch\b/utf,match_invalid_utf
- qchq\=ph
- Partial match:
- qchq\=ps
- Partial match:
- /line1\nbreak/firstline,utf,match_invalid_utf
- line1\nbreak
- 0: line1\x{0a}break
- line0\nline1\nbreak
- No match
- /A\z/utf,match_invalid_utf
- A\x80\x42\n
- No match
- # End of testinput10
|