pm.lua 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. -- $Id: testes/pm.lua $
  2. -- See Copyright Notice in file lua.h
  3. -- UTF-8 file
  4. print('testing pattern matching')
  5. global <const> *
  6. local function checkerror (msg, f, ...)
  7. local s, err = pcall(f, ...)
  8. assert(not s and string.find(err, msg))
  9. end
  10. local function f (s, p)
  11. local i,e = string.find(s, p)
  12. if i then return string.sub(s, i, e) end
  13. end
  14. local a,b = string.find('', '') -- empty patterns are tricky
  15. assert(a == 1 and b == 0);
  16. a,b = string.find('alo', '')
  17. assert(a == 1 and b == 0)
  18. a,b = string.find('a\0o a\0o a\0o', 'a', 1) -- first position
  19. assert(a == 1 and b == 1)
  20. a,b = string.find('a\0o a\0o a\0o', 'a\0o', 2) -- starts in the middle
  21. assert(a == 5 and b == 7)
  22. a,b = string.find('a\0o a\0o a\0o', 'a\0o', 9) -- starts in the middle
  23. assert(a == 9 and b == 11)
  24. a,b = string.find('a\0a\0a\0a\0\0ab', '\0ab', 2); -- finds at the end
  25. assert(a == 9 and b == 11);
  26. a,b = string.find('a\0a\0a\0a\0\0ab', 'b') -- last position
  27. assert(a == 11 and b == 11)
  28. assert(not string.find('a\0a\0a\0a\0\0ab', 'b\0')) -- check ending
  29. assert(not string.find('', '\0'))
  30. assert(string.find('alo123alo', '12') == 4)
  31. assert(not string.find('alo123alo', '^12'))
  32. assert(string.match("aaab", ".*b") == "aaab")
  33. assert(string.match("aaa", ".*a") == "aaa")
  34. assert(string.match("b", ".*b") == "b")
  35. assert(string.match("aaab", ".+b") == "aaab")
  36. assert(string.match("aaa", ".+a") == "aaa")
  37. assert(not string.match("b", ".+b"))
  38. assert(string.match("aaab", ".?b") == "ab")
  39. assert(string.match("aaa", ".?a") == "aa")
  40. assert(string.match("b", ".?b") == "b")
  41. assert(f('aloALO', '%l*') == 'alo')
  42. assert(f('aLo_ALO', '%a*') == 'aLo')
  43. assert(f(" \n\r*&\n\r xuxu \n\n", "%g%g%g+") == "xuxu")
  44. -- Adapt a pattern to UTF-8
  45. local function PU (p)
  46. -- distribute '?' into each individual byte of a character.
  47. -- (For instance, "á?" becomes "\195?\161?".)
  48. p = string.gsub(p, "(" .. utf8.charpattern .. ")%?", function (c)
  49. return string.gsub(c, ".", "%0?")
  50. end)
  51. -- change '.' to utf-8 character patterns
  52. p = string.gsub(p, "%.", utf8.charpattern)
  53. return p
  54. end
  55. assert(f('aaab', 'a*') == 'aaa');
  56. assert(f('aaa', '^.*$') == 'aaa');
  57. assert(f('aaa', 'b*') == '');
  58. assert(f('aaa', 'ab*a') == 'aa')
  59. assert(f('aba', 'ab*a') == 'aba')
  60. assert(f('aaab', 'a+') == 'aaa')
  61. assert(f('aaa', '^.+$') == 'aaa')
  62. assert(not f('aaa', 'b+'))
  63. assert(not f('aaa', 'ab+a'))
  64. assert(f('aba', 'ab+a') == 'aba')
  65. assert(f('a$a', '.$') == 'a')
  66. assert(f('a$a', '.%$') == 'a$')
  67. assert(f('a$a', '.$.') == 'a$a')
  68. assert(not f('a$a', '$$'))
  69. assert(not f('a$b', 'a$'))
  70. assert(f('a$a', '$') == '')
  71. assert(f('', 'b*') == '')
  72. assert(not f('aaa', 'bb*'))
  73. assert(f('aaab', 'a-') == '')
  74. assert(f('aaa', '^.-$') == 'aaa')
  75. assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab')
  76. assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab')
  77. assert(f('alo xo', '.o$') == 'xo')
  78. assert(f(' \n isto é assim', '%S%S*') == 'isto')
  79. assert(f(' \n isto é assim', '%S*$') == 'assim')
  80. assert(f(' \n isto é assim', '[a-z]*$') == 'assim')
  81. assert(f('um caracter ? extra', '[^%sa-z]') == '?')
  82. assert(f('', 'a?') == '')
  83. assert(f('á', PU'á?') == 'á')
  84. assert(f('ábl', PU'á?b?l?') == 'ábl')
  85. assert(f(' ábl', PU'á?b?l?') == '')
  86. assert(f('aa', '^aa?a?a') == 'aa')
  87. assert(f(']]]áb', '[^]]+') == 'áb')
  88. assert(f("0alo alo", "%x*") == "0a")
  89. assert(f("alo alo", "%C+") == "alo alo")
  90. print('+')
  91. local function f1 (s, p)
  92. p = string.gsub(p, "%%([0-9])", function (s)
  93. return "%" .. (tonumber(s)+1)
  94. end)
  95. p = string.gsub(p, "^(^?)", "%1()", 1)
  96. p = string.gsub(p, "($?)$", "()%1", 1)
  97. local t = {string.match(s, p)}
  98. return string.sub(s, t[1], t[#t] - 1)
  99. end
  100. assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o")
  101. assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3')
  102. assert(f1('=======', '^(=*)=%1$') == '=======')
  103. assert(not string.match('==========', '^([=]*)=%1$'))
  104. local function range (i, j)
  105. if i <= j then
  106. return i, range(i+1, j)
  107. end
  108. end
  109. local abc = string.char(range(0, 127)) .. string.char(range(128, 255));
  110. assert(string.len(abc) == 256)
  111. local function strset (p)
  112. local res = {s=''}
  113. string.gsub(abc, p, function (c) res.s = res.s .. c end)
  114. return res.s
  115. end;
  116. assert(string.len(strset('[\200-\210]')) == 11)
  117. assert(strset('[a-z]') == "abcdefghijklmnopqrstuvwxyz")
  118. assert(strset('[a-z%d]') == strset('[%da-uu-z]'))
  119. assert(strset('[a-]') == "-a")
  120. assert(strset('[^%W]') == strset('[%w]'))
  121. assert(strset('[]%%]') == '%]')
  122. assert(strset('[a%-z]') == '-az')
  123. assert(strset('[%^%[%-a%]%-b]') == '-[]^ab')
  124. assert(strset('%Z') == strset('[\1-\255]'))
  125. assert(strset('.') == strset('[\1-\255%z]'))
  126. print('+');
  127. assert(string.match("alo xyzK", "(%w+)K") == "xyz")
  128. assert(string.match("254 K", "(%d*)K") == "")
  129. assert(string.match("alo ", "(%w*)$") == "")
  130. assert(not string.match("alo ", "(%w+)$"))
  131. assert(string.find("(álo)", "%(á") == 1)
  132. local a, b, c, d, e = string.match("âlo alo", PU"^(((.).). (%w*))$")
  133. assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil)
  134. a, b, c, d = string.match('0123456789', '(.+(.?)())')
  135. assert(a == '0123456789' and b == '' and c == 11 and d == nil)
  136. print('+')
  137. assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo')
  138. assert(string.gsub('alo úlo ', ' +$', '') == 'alo úlo') -- trim
  139. assert(string.gsub(' alo alo ', '^%s*(.-)%s*$', '%1') == 'alo alo') -- double trim
  140. assert(string.gsub('alo alo \n 123\n ', '%s+', ' ') == 'alo alo 123 ')
  141. local t = "abç d"
  142. a, b = string.gsub(t, PU'(.)', '%1@')
  143. assert(a == "a@b@ç@ @d@" and b == 5)
  144. a, b = string.gsub('abçd', PU'(.)', '%0@', 2)
  145. assert(a == 'a@b@çd' and b == 2)
  146. assert(string.gsub('alo alo', '()[al]', '%1') == '12o 56o')
  147. assert(string.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") ==
  148. "xyz=abc-abc=xyz")
  149. assert(string.gsub("abc", "%w", "%1%0") == "aabbcc")
  150. assert(string.gsub("abc", "%w+", "%0%1") == "abcabc")
  151. assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú')
  152. assert(string.gsub('', '^', 'r') == 'r')
  153. assert(string.gsub('', '$', 'r') == 'r')
  154. print('+')
  155. do -- new (5.3.3) semantics for empty matches
  156. assert(string.gsub("a b cd", " *", "-") == "-a-b-c-d-")
  157. local res = ""
  158. local sub = "a \nbc\t\td"
  159. local i = 1
  160. for p, e in string.gmatch(sub, "()%s*()") do
  161. res = res .. string.sub(sub, i, p - 1) .. "-"
  162. i = e
  163. end
  164. assert(res == "-a-b-c-d-")
  165. end
  166. assert(string.gsub("um (dois) tres (quatro)", "(%(%w+%))", string.upper) ==
  167. "um (DOIS) tres (QUATRO)")
  168. do
  169. local function setglobal (n,v) rawset(_G, n, v) end
  170. string.gsub("a=roberto,roberto=a", "(%w+)=(%w%w*)", setglobal)
  171. assert(_G.a=="roberto" and _G.roberto=="a")
  172. _G.a = nil; _G.roberto = nil
  173. end
  174. function f(a,b) return string.gsub(a,'.',b) end
  175. assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) ==
  176. "trocar tudo em bbbbb é alalalalalal")
  177. local function dostring (s) return load(s, "")() or "" end
  178. assert(string.gsub("alo $a='x'$ novamente $return a$",
  179. "$([^$]*)%$",
  180. dostring) == "alo novamente x")
  181. local x = string.gsub("$x=string.gsub('alo', '.', string.upper)$ assim vai para $return x$",
  182. "$([^$]*)%$", dostring)
  183. assert(x == ' assim vai para ALO')
  184. _G.a, _G.x = nil
  185. local t = {}
  186. local s = 'a alo jose joao'
  187. local r = string.gsub(s, '()(%w+)()', function (a,w,b)
  188. assert(string.len(w) == b-a);
  189. t[a] = b-a;
  190. end)
  191. assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4)
  192. local function isbalanced (s)
  193. return not string.find(string.gsub(s, "%b()", ""), "[()]")
  194. end
  195. assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a"))
  196. assert(not isbalanced("(9 ((8) 7) a b (\0 c) a"))
  197. assert(string.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo')
  198. local t = {"apple", "orange", "lime"; n=0}
  199. assert(string.gsub("x and x and x", "x", function () t.n=t.n+1; return t[t.n] end)
  200. == "apple and orange and lime")
  201. t = {n=0}
  202. string.gsub("first second word", "%w%w*", function (w) t.n=t.n+1; t[t.n] = w end)
  203. assert(t[1] == "first" and t[2] == "second" and t[3] == "word" and t.n == 3)
  204. t = {n=0}
  205. assert(string.gsub("first second word", "%w+",
  206. function (w) t.n=t.n+1; t[t.n] = w end, 2) == "first second word")
  207. assert(t[1] == "first" and t[2] == "second" and t[3] == undef)
  208. checkerror("invalid replacement value %(a table%)",
  209. string.gsub, "alo", ".", {a = {}})
  210. checkerror("invalid capture index %%2", string.gsub, "alo", ".", "%2")
  211. checkerror("invalid capture index %%0", string.gsub, "alo", "(%0)", "a")
  212. checkerror("invalid capture index %%1", string.gsub, "alo", "(%1)", "a")
  213. checkerror("invalid use of '%%'", string.gsub, "alo", ".", "%x")
  214. if not _soft then
  215. print("big strings")
  216. local a = string.rep('a', 300000)
  217. assert(string.find(a, '^a*.?$'))
  218. assert(not string.find(a, '^a*.?b$'))
  219. assert(string.find(a, '^a-.?$'))
  220. -- bug in 5.1.2
  221. a = string.rep('a', 10000) .. string.rep('b', 10000)
  222. assert(not pcall(string.gsub, a, 'b'))
  223. end
  224. -- recursive nest of gsubs
  225. local function rev (s)
  226. return string.gsub(s, "(.)(.+)", function (c,s1) return rev(s1)..c end)
  227. end
  228. local x = "abcdef"
  229. assert(rev(rev(x)) == x)
  230. -- gsub with tables
  231. assert(string.gsub("alo alo", ".", {}) == "alo alo")
  232. assert(string.gsub("alo alo", "(.)", {a="AA", l=""}) == "AAo AAo")
  233. assert(string.gsub("alo alo", "(.).", {a="AA", l="K"}) == "AAo AAo")
  234. assert(string.gsub("alo alo", "((.)(.?))", {al="AA", o=false}) == "AAo AAo")
  235. assert(string.gsub("alo alo", "().", {'x','yy','zzz'}) == "xyyzzz alo")
  236. t = {}; setmetatable(t, {__index = function (t,s) return string.upper(s) end})
  237. assert(string.gsub("a alo b hi", "%w%w+", t) == "a ALO b HI")
  238. -- tests for gmatch
  239. local a = 0
  240. for i in string.gmatch('abcde', '()') do assert(i == a+1); a=i end
  241. assert(a==6)
  242. t = {n=0}
  243. for w in string.gmatch("first second word", "%w+") do
  244. t.n=t.n+1; t[t.n] = w
  245. end
  246. assert(t[1] == "first" and t[2] == "second" and t[3] == "word")
  247. t = {3, 6, 9}
  248. for i in string.gmatch ("xuxx uu ppar r", "()(.)%2") do
  249. assert(i == table.remove(t, 1))
  250. end
  251. assert(#t == 0)
  252. t = {}
  253. for i,j in string.gmatch("13 14 10 = 11, 15= 16, 22=23", "(%d+)%s*=%s*(%d+)") do
  254. t[tonumber(i)] = tonumber(j)
  255. end
  256. a = 0
  257. for k,v in pairs(t) do assert(k+1 == v+0); a=a+1 end
  258. assert(a == 3)
  259. do -- init parameter in gmatch
  260. local s = 0
  261. for k in string.gmatch("10 20 30", "%d+", 3) do
  262. s = s + tonumber(k)
  263. end
  264. assert(s == 50)
  265. s = 0
  266. for k in string.gmatch("11 21 31", "%d+", -4) do
  267. s = s + tonumber(k)
  268. end
  269. assert(s == 32)
  270. -- there is an empty string at the end of the subject
  271. s = 0
  272. for k in string.gmatch("11 21 31", "%w*", 9) do
  273. s = s + 1
  274. end
  275. assert(s == 1)
  276. -- there are no empty strings after the end of the subject
  277. s = 0
  278. for k in string.gmatch("11 21 31", "%w*", 10) do
  279. s = s + 1
  280. end
  281. assert(s == 0)
  282. end
  283. -- tests for `%f' (`frontiers')
  284. assert(string.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x")
  285. assert(string.gsub("[[]] [][] [[[[", "%f[[].", "x") == "x[]] x]x] x[[[")
  286. assert(string.gsub("01abc45de3", "%f[%d]", ".") == ".01abc.45de.3")
  287. assert(string.gsub("01abc45 de3x", "%f[%D]%w", ".") == "01.bc45 de3.")
  288. assert(string.gsub("function", "%f[\1-\255]%w", ".") == ".unction")
  289. assert(string.gsub("function", "%f[^\1-\255]", ".") == "function.")
  290. assert(string.find("a", "%f[a]") == 1)
  291. assert(string.find("a", "%f[^%z]") == 1)
  292. assert(string.find("a", "%f[^%l]") == 2)
  293. assert(string.find("aba", "%f[a%z]") == 3)
  294. assert(string.find("aba", "%f[%z]") == 4)
  295. assert(not string.find("aba", "%f[%l%z]"))
  296. assert(not string.find("aba", "%f[^%l%z]"))
  297. local i, e = string.find(" alo aalo allo", "%f[%S].-%f[%s].-%f[%S]")
  298. assert(i == 2 and e == 5)
  299. local k = string.match(" alo aalo allo", "%f[%S](.-%f[%s].-%f[%S])")
  300. assert(k == 'alo ')
  301. local a = {1, 5, 9, 14, 17,}
  302. for k in string.gmatch("alo alo th02 is 1hat", "()%f[%w%d]") do
  303. assert(table.remove(a, 1) == k)
  304. end
  305. assert(#a == 0)
  306. -- malformed patterns
  307. local function malform (p, m)
  308. m = m or "malformed"
  309. local r, msg = pcall(string.find, "a", p)
  310. assert(not r and string.find(msg, m))
  311. end
  312. malform("(.", "unfinished capture")
  313. malform(".)", "invalid pattern capture")
  314. malform("[a")
  315. malform("[]")
  316. malform("[^]")
  317. malform("[a%]")
  318. malform("[a%")
  319. malform("%b")
  320. malform("%ba")
  321. malform("%")
  322. malform("%f", "missing")
  323. -- \0 in patterns
  324. assert(string.match("ab\0\1\2c", "[\0-\2]+") == "\0\1\2")
  325. assert(string.match("ab\0\1\2c", "[\0-\0]+") == "\0")
  326. assert(string.find("b$a", "$\0?") == 2)
  327. assert(string.find("abc\0efg", "%\0") == 4)
  328. assert(string.match("abc\0efg\0\1e\1g", "%b\0\1") == "\0efg\0\1e\1")
  329. assert(string.match("abc\0\0\0", "%\0+") == "\0\0\0")
  330. assert(string.match("abc\0\0\0", "%\0%\0?") == "\0\0")
  331. -- magic char after \0
  332. assert(string.find("abc\0\0","\0.") == 4)
  333. assert(string.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4)
  334. do -- test reuse of original string in gsub
  335. local s = string.rep("a", 100)
  336. local r = string.gsub(s, "b", "c") -- no match
  337. assert(string.format("%p", s) == string.format("%p", r))
  338. r = string.gsub(s, ".", {x = "y"}) -- no substitutions
  339. assert(string.format("%p", s) == string.format("%p", r))
  340. local count = 0
  341. r = string.gsub(s, ".", function (x)
  342. assert(x == "a")
  343. count = count + 1
  344. return nil -- no substitution
  345. end)
  346. r = string.gsub(r, ".", {b = 'x'}) -- "a" is not a key; no subst.
  347. assert(count == 100)
  348. assert(string.format("%p", s) == string.format("%p", r))
  349. count = 0
  350. r = string.gsub(s, ".", function (x)
  351. assert(x == "a")
  352. count = count + 1
  353. return x -- substitution...
  354. end)
  355. assert(count == 100)
  356. -- no reuse in this case
  357. assert(r == s and string.format("%p", s) ~= string.format("%p", r))
  358. end
  359. print('OK')