testinput10 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. # This set of tests is for UTF-8 support and Unicode property support, with
  2. # relevance only for the 8-bit library.
  3. # The next 5 patterns have UTF-8 errors
  4. /[Ã]/utf
  5. /Ã/utf
  6. /ÃÃÃxxx/utf
  7. /‚‚‚‚‚‚‚Ã/utf
  8. /‚‚‚‚‚‚‚Ã/match_invalid_utf
  9. # Now test subjects
  10. /badutf/utf
  11. \= Expect UTF-8 errors
  12. X\xdf
  13. XX\xef
  14. XXX\xef\x80
  15. X\xf7
  16. XX\xf7\x80
  17. XXX\xf7\x80\x80
  18. \xfb
  19. \xfb\x80
  20. \xfb\x80\x80
  21. \xfb\x80\x80\x80
  22. \xfd
  23. \xfd\x80
  24. \xfd\x80\x80
  25. \xfd\x80\x80\x80
  26. \xfd\x80\x80\x80\x80
  27. \xdf\x7f
  28. \xef\x7f\x80
  29. \xef\x80\x7f
  30. \xf7\x7f\x80\x80
  31. \xf7\x80\x7f\x80
  32. \xf7\x80\x80\x7f
  33. \xfb\x7f\x80\x80\x80
  34. \xfb\x80\x7f\x80\x80
  35. \xfb\x80\x80\x7f\x80
  36. \xfb\x80\x80\x80\x7f
  37. \xfd\x7f\x80\x80\x80\x80
  38. \xfd\x80\x7f\x80\x80\x80
  39. \xfd\x80\x80\x7f\x80\x80
  40. \xfd\x80\x80\x80\x7f\x80
  41. \xfd\x80\x80\x80\x80\x7f
  42. \xed\xa0\x80
  43. \xc0\x8f
  44. \xe0\x80\x8f
  45. \xf0\x80\x80\x8f
  46. \xf8\x80\x80\x80\x8f
  47. \xfc\x80\x80\x80\x80\x8f
  48. \x80
  49. \xfe
  50. \xff
  51. /badutf/utf
  52. \= Expect UTF-8 errors
  53. XX\xfb\x80\x80\x80\x80
  54. XX\xfd\x80\x80\x80\x80\x80
  55. XX\xf7\xbf\xbf\xbf
  56. /shortutf/utf
  57. \= Expect UTF-8 errors
  58. XX\xdf\=ph
  59. XX\xef\=ph
  60. XX\xef\x80\=ph
  61. \xf7\=ph
  62. \xf7\x80\=ph
  63. \xf7\x80\x80\=ph
  64. \xfb\=ph
  65. \xfb\x80\=ph
  66. \xfb\x80\x80\=ph
  67. \xfb\x80\x80\x80\=ph
  68. \xfd\=ph
  69. \xfd\x80\=ph
  70. \xfd\x80\x80\=ph
  71. \xfd\x80\x80\x80\=ph
  72. \xfd\x80\x80\x80\x80\=ph
  73. /anything/utf
  74. \= Expect UTF-8 errors
  75. X\xc0\x80
  76. XX\xc1\x8f
  77. XXX\xe0\x9f\x80
  78. \xf0\x8f\x80\x80
  79. \xf8\x87\x80\x80\x80
  80. \xfc\x83\x80\x80\x80\x80
  81. \xfe\x80\x80\x80\x80\x80
  82. \xff\x80\x80\x80\x80\x80
  83. \xf8\x88\x80\x80\x80
  84. \xf9\x87\x80\x80\x80
  85. \xfc\x84\x80\x80\x80\x80
  86. \xfd\x83\x80\x80\x80\x80
  87. \= Expect no match
  88. \xc3\x8f
  89. \xe0\xaf\x80
  90. \xe1\x80\x80
  91. \xf0\x9f\x80\x80
  92. \xf1\x8f\x80\x80
  93. \xf8\x88\x80\x80\x80\=no_utf_check
  94. \xf9\x87\x80\x80\x80\=no_utf_check
  95. \xfc\x84\x80\x80\x80\x80\=no_utf_check
  96. \xfd\x83\x80\x80\x80\x80\=no_utf_check
  97. # Similar tests with offsets
  98. /badutf/utf
  99. \= Expect UTF-8 errors
  100. X\xdfabcd
  101. X\xdfabcd\=offset=1
  102. \= Expect no match
  103. X\xdfabcd\=offset=2
  104. /(?<=x)badutf/utf
  105. \= Expect UTF-8 errors
  106. X\xdfabcd
  107. X\xdfabcd\=offset=1
  108. X\xdfabcd\=offset=2
  109. X\xdfabcd\xdf\=offset=3
  110. \= Expect no match
  111. X\xdfabcd\=offset=3
  112. /(?<=xx)badutf/utf
  113. \= Expect UTF-8 errors
  114. X\xdfabcd
  115. X\xdfabcd\=offset=1
  116. X\xdfabcd\=offset=2
  117. X\xdfabcd\=offset=3
  118. /(?<=xxxx)badutf/utf
  119. \= Expect UTF-8 errors
  120. X\xdfabcd
  121. X\xdfabcd\=offset=1
  122. X\xdfabcd\=offset=2
  123. X\xdfabcd\=offset=3
  124. X\xdfabc\xdf\=offset=6
  125. X\xdfabc\xdf\=offset=7
  126. \= Expect no match
  127. X\xdfabcd\=offset=6
  128. /\x{100}/IB,utf
  129. /\x{1000}/IB,utf
  130. /\x{10000}/IB,utf
  131. /\x{100000}/IB,utf
  132. /\x{10ffff}/IB,utf
  133. /[\x{ff}]/IB,utf
  134. /[\x{100}]/IB,utf
  135. /\x80/IB,utf
  136. /\xff/IB,utf
  137. /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
  138. \x{D55c}\x{ad6d}\x{C5B4}
  139. /\x{65e5}\x{672c}\x{8a9e}/IB,utf
  140. \x{65e5}\x{672c}\x{8a9e}
  141. /\x{80}/IB,utf
  142. /\x{084}/IB,utf
  143. /\x{104}/IB,utf
  144. /\x{861}/IB,utf
  145. /\x{212ab}/IB,utf
  146. /[^ab\xC0-\xF0]/IB,utf
  147. \x{f1}
  148. \x{bf}
  149. \x{100}
  150. \x{1000}
  151. \= Expect no match
  152. \x{c0}
  153. \x{f0}
  154. /Ä€{3,4}/IB,utf
  155. \x{100}\x{100}\x{100}\x{100\x{100}
  156. /(\x{100}+|x)/IB,utf
  157. /(\x{100}*a|x)/IB,utf
  158. /(\x{100}{0,2}a|x)/IB,utf
  159. /(\x{100}{1,2}a|x)/IB,utf
  160. /\x{100}/IB,utf
  161. /a\x{100}\x{101}*/IB,utf
  162. /a\x{100}\x{101}+/IB,utf
  163. /[^\x{c4}]/IB
  164. /[\x{100}]/IB,utf
  165. \x{100}
  166. Z\x{100}
  167. \x{100}Z
  168. /[\xff]/IB,utf
  169. >\x{ff}<
  170. /[^\xff]/IB,utf
  171. /\x{100}abc(xyz(?1))/IB,utf
  172. /\777/I,utf
  173. \x{1ff}
  174. \777
  175. /\x{100}+\x{200}/IB,utf
  176. /\x{100}+X/IB,utf
  177. /^[\QĀ\E-\Q�\E/B,utf
  178. # This tests the stricter UTF-8 check according to RFC 3629.
  179. /X/utf
  180. \= Expect UTF-8 errors
  181. \x{d800}
  182. \x{da00}
  183. \x{dfff}
  184. \x{110000}
  185. \x{2000000}
  186. \x{7fffffff}
  187. \= Expect no match
  188. \x{d800}\=no_utf_check
  189. \x{da00}\=no_utf_check
  190. \x{dfff}\=no_utf_check
  191. \x{110000}\=no_utf_check
  192. \x{2000000}\=no_utf_check
  193. \x{7fffffff}\=no_utf_check
  194. /(*UTF8)\x{1234}/
  195. abcd\x{1234}pqr
  196. /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
  197. /\h/I,utf
  198. ABC\x{09}
  199. ABC\x{20}
  200. ABC\x{a0}
  201. ABC\x{1680}
  202. ABC\x{180e}
  203. ABC\x{2000}
  204. ABC\x{202f}
  205. ABC\x{205f}
  206. ABC\x{3000}
  207. /\v/I,utf
  208. ABC\x{0a}
  209. ABC\x{0b}
  210. ABC\x{0c}
  211. ABC\x{0d}
  212. ABC\x{85}
  213. ABC\x{2028}
  214. /\h*A/I,utf
  215. CDBABC
  216. /\v+A/I,utf
  217. /\s?xxx\s/I,utf
  218. /\sxxx\s/I,utf,tables=2
  219. AB\x{85}xxx\x{a0}XYZ
  220. AB\x{a0}xxx\x{85}XYZ
  221. /\S \S/I,utf,tables=2
  222. \x{a2} \x{84}
  223. A Z
  224. /a+/utf
  225. a\x{123}aa\=offset=1
  226. a\x{123}aa\=offset=3
  227. a\x{123}aa\=offset=4
  228. \= Expect bad offset value
  229. a\x{123}aa\=offset=6
  230. \= Expect bad UTF-8 offset
  231. a\x{123}aa\=offset=2
  232. \= Expect no match
  233. a\x{123}aa\=offset=5
  234. /\x{1234}+/Ii,utf
  235. /\x{1234}+?/Ii,utf
  236. /\x{1234}++/Ii,utf
  237. /\x{1234}{2}/Ii,utf
  238. /[^\x{c4}]/IB,utf
  239. /X+\x{200}/IB,utf
  240. /\R/I,utf
  241. /\777/IB,utf
  242. /\w+\x{C4}/B,utf
  243. a\x{C4}\x{C4}
  244. /\w+\x{C4}/B,utf,tables=2
  245. a\x{C4}\x{C4}
  246. /\W+\x{C4}/B,utf
  247. !\x{C4}
  248. /\W+\x{C4}/B,utf,tables=2
  249. !\x{C4}
  250. /\W+\x{A1}/B,utf
  251. !\x{A1}
  252. /\W+\x{A1}/B,utf,tables=2
  253. !\x{A1}
  254. /X\s+\x{A0}/B,utf
  255. X\x20\x{A0}\x{A0}
  256. /X\s+\x{A0}/B,utf,tables=2
  257. X\x20\x{A0}\x{A0}
  258. /\S+\x{A0}/B,utf
  259. X\x{A0}\x{A0}
  260. /\S+\x{A0}/B,utf,tables=2
  261. X\x{A0}\x{A0}
  262. /\x{a0}+\s!/B,utf
  263. \x{a0}\x20!
  264. /\x{a0}+\s!/B,utf,tables=2
  265. \x{a0}\x20!
  266. /A/utf
  267. \x{ff000041}
  268. \x{7f000041}
  269. /(*UTF8)abc/never_utf
  270. /abc/utf,never_utf
  271. /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
  272. /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
  273. /AB\x{1fb0}/IB,utf
  274. /AB\x{1fb0}/IBi,utf
  275. /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
  276. \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
  277. \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
  278. /[â±¥]/Bi,utf
  279. /[^â±¥]/Bi,utf
  280. /\h/I
  281. /\v/I
  282. /\R/I
  283. /[[:blank:]]/B,ucp
  284. /\x{212a}+/Ii,utf
  285. KKkk\x{212a}
  286. /s+/Ii,utf
  287. SSss\x{17f}
  288. /\x{100}*A/IB,utf
  289. A
  290. /\x{100}*\d(?R)/IB,utf
  291. /[Z\x{100}]/IB,utf
  292. Z\x{100}
  293. \x{100}
  294. \x{100}Z
  295. /[z-\x{100}]/IB,utf
  296. /[z\Qa-d]Ä€\E]/IB,utf
  297. \x{100}
  298. Ā
  299. /[ab\x{100}]abc(xyz(?1))/IB,utf
  300. /\x{100}*\s/IB,utf
  301. /\x{100}*\d/IB,utf
  302. /\x{100}*\w/IB,utf
  303. /\x{100}*\D/IB,utf
  304. /\x{100}*\S/IB,utf
  305. /\x{100}*\W/IB,utf
  306. /[\x{105}-\x{109}]/IBi,utf
  307. \x{104}
  308. \x{105}
  309. \x{109}
  310. \= Expect no match
  311. \x{100}
  312. \x{10a}
  313. /[z-\x{100}]/IBi,utf
  314. Z
  315. z
  316. \x{39c}
  317. \x{178}
  318. |
  319. \x{80}
  320. \x{ff}
  321. \x{100}
  322. \x{101}
  323. \= Expect no match
  324. \x{102}
  325. Y
  326. y
  327. /[z-\x{100}]/IBi,utf
  328. /\x{3a3}B/IBi,utf
  329. /abc/utf,replace=Ã
  330. abc
  331. /(?<=(a)(?-1))x/I,utf
  332. a\x80zx\=offset=3
  333. /[\W\p{Any}]/B
  334. abc
  335. 123
  336. /[\W\pL]/B
  337. abc
  338. \= Expect no match
  339. 123
  340. /(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/utf
  341. /[\s[:^ascii:]]/B,ucp
  342. # A special extra option allows excaped surrogate code points in 8-bit mode,
  343. # but subjects containing them must not be UTF-checked.
  344. /\x{d800}/I,utf,allow_surrogate_escapes
  345. \x{d800}\=no_utf_check
  346. /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
  347. \x{dfff}\x{df01}\=no_utf_check
  348. # This has different starting code units in 8-bit mode.
  349. /^[^ab]/IB,utf
  350. c
  351. \x{ff}
  352. \x{100}
  353. \= Expect no match
  354. aaa
  355. # Offsets are different in 8-bit mode.
  356. /(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
  357. 123abcáyzabcdef789abcሴqr
  358. # Check name length with non-ASCII characters
  359. /(?'ABáC678901234567890123456789012'...)/utf
  360. /(?'ABáC6789012345678901234567890123'...)/utf
  361. /(?'ABZC6789012345678901234567890123'...)/utf
  362. /(?(n/utf
  363. /(?(á/utf
  364. # Invalid UTF-8 tests
  365. /.../g,match_invalid_utf
  366. abcd\x80wxzy\x80pqrs
  367. abcd\x{80}wxzy\x80pqrs
  368. /abc/match_invalid_utf
  369. ab\x80ab\=ph
  370. \= Expect no match
  371. ab\x80cdef\=ph
  372. /ab$/match_invalid_utf
  373. ab\x80cdeab
  374. \= Expect no match
  375. ab\x80cde
  376. /.../g,match_invalid_utf
  377. abcd\x{80}wxzy\x80pqrs
  378. /(?<=x)../g,match_invalid_utf
  379. abcd\x{80}wxzy\x80pqrs
  380. abcd\x{80}wxzy\x80xpqrs
  381. /X$/match_invalid_utf
  382. \= Expect no match
  383. X\xc4
  384. /(?<=..)X/match_invalid_utf,aftertext
  385. AB\x80AQXYZ
  386. AB\x80AQXYZ\=offset=5
  387. AB\x80\x80AXYZXC\=offset=5
  388. \= Expect no match
  389. AB\x80XYZ
  390. AB\x80XYZ\=offset=3
  391. AB\xfeXYZ
  392. AB\xffXYZ\=offset=3
  393. AB\x80AXYZ
  394. AB\x80AXYZ\=offset=4
  395. AB\x80\x80AXYZ\=offset=5
  396. /.../match_invalid_utf
  397. AB\xc4CCC
  398. \= Expect no match
  399. A\x{d800}B
  400. A\x{110000}B
  401. A\xc4B
  402. /\bX/match_invalid_utf
  403. A\x80X
  404. /\BX/match_invalid_utf
  405. \= Expect no match
  406. A\x80X
  407. /(?<=...)X/match_invalid_utf
  408. AAA\x80BBBXYZ
  409. \= Expect no match
  410. AAA\x80BXYZ
  411. AAA\x80BBXYZ
  412. # -------------------------------------
  413. /(*UTF)(?=\x{123})/I
  414. /[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf
  415. /[󿾟,]/BI,utf
  416. /[\x{fff4}-\x{ffff8}]/I,utf
  417. /[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf
  418. /[\xff\x{ffff}]/I,utf
  419. /[\xff\x{ff}]/I,utf
  420. abc\x{ff}def
  421. /[\xff\x{ff}]/I
  422. abc\x{ff}def
  423. /[Ss]/I
  424. /[Ss]/I,utf
  425. /(?:\x{ff}|\x{3000})/I,utf
  426. /x/utf
  427. abxyz
  428. \x80\=startchar
  429. abc\x80\=startchar
  430. abc\x80\=startchar,offset=3
  431. /\x{c1}+\x{e1}/iIB,ucp
  432. \x{c1}\x{c1}\x{c1}
  433. \x{e1}\x{e1}\x{e1}
  434. /a|\x{c1}/iI,ucp
  435. \x{e1}xxx
  436. /a|\x{c1}/iI,utf
  437. \x{e1}xxx
  438. /\x{c1}|\x{e1}/iI,ucp
  439. /X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
  440. X\x{e1}Y
  441. /X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended
  442. X\x{c1}Y
  443. # Without UTF or UCP characters > 127 have only one case in the default locale.
  444. /X(\x{e1})Y/replace=>\U$1<,substitute_extended
  445. X\x{e1}Y
  446. /A/utf,match_invalid_utf,caseless
  447. \xe5A
  448. /\bch\b/utf,match_invalid_utf
  449. qchq\=ph
  450. qchq\=ps
  451. # End of testinput10