Browse Source

More control over encoding of test files

The few UTF-8 test files are commented as such, and there is only one
non UTF-8 test file (to test non UTF-8 sources).
Roberto Ierusalimschy 1 năm trước cách đây
mục cha
commit
f4211a5ea4
6 tập tin đã thay đổi với 49 bổ sung28 xóa
  1. 3 3
      testes/db.lua
  2. 4 4
      testes/files.lua
  3. 36 20
      testes/pm.lua
  4. 1 1
      testes/sort.lua
  5. 3 0
      testes/strings.lua
  6. 2 0
      testes/utf8.lua

+ 3 - 3
testes/db.lua

@@ -345,7 +345,7 @@ function f(a,b)
   local _, y = debug.getlocal(1, 2)
   assert(x == a and y == b)
   assert(debug.setlocal(2, 3, "pera") == "AA".."AA")
-  assert(debug.setlocal(2, 4, "maçã") == "B")
+  assert(debug.setlocal(2, 4, "manga") == "B")
   x = debug.getinfo(2)
   assert(x.func == g and x.what == "Lua" and x.name == 'g' and
          x.nups == 2 and string.find(x.source, "^@.*db%.lua$"))
@@ -373,9 +373,9 @@ function g (...)
   local arg = {...}
   do local a,b,c; a=math.sin(40); end
   local feijao
-  local AAAA,B = "xuxu", "mamão"
+  local AAAA,B = "xuxu", "abacate"
   f(AAAA,B)
-  assert(AAAA == "pera" and B == "maçã")
+  assert(AAAA == "pera" and B == "manga")
   do
      local B = 13
      local x,y = debug.getlocal(1,5)

+ 4 - 4
testes/files.lua

@@ -92,8 +92,8 @@ assert(io.output():seek("end") == string.len("alo joao"))
 
 assert(io.output():seek("set") == 0)
 
-assert(io.write('"álo"', "{a}\n", "second line\n", "third line \n"))
-assert(io.write('çfourth_line'))
+assert(io.write('"alo"', "{a}\n", "second line\n", "third line \n"))
+assert(io.write('Xfourth_line'))
 io.output(io.stdout)
 collectgarbage()  -- file should be closed by GC
 assert(io.input() == io.stdin and rawequal(io.output(), io.stdout))
@@ -300,14 +300,14 @@ do  -- test error returns
 end
 checkerr("invalid format", io.read, "x")
 assert(io.read(0) == "")   -- not eof
-assert(io.read(5, 'l') == '"álo"')
+assert(io.read(5, 'l') == '"alo"')
 assert(io.read(0) == "")
 assert(io.read() == "second line")
 local x = io.input():seek()
 assert(io.read() == "third line ")
 assert(io.input():seek("set", x))
 assert(io.read('L') == "third line \n")
-assert(io.read(1) == "ç")
+assert(io.read(1) == "X")
 assert(io.read(string.len"fourth_line") == "fourth_line")
 assert(io.input():seek("cur", -string.len"fourth_line"))
 assert(io.read() == "fourth_line")

+ 36 - 20
testes/pm.lua

@@ -1,6 +1,9 @@
 -- $Id: testes/pm.lua $
 -- See Copyright Notice in file all.lua
 
+-- UTF-8 file
+
+
 print('testing pattern matching')
 
 local function checkerror (msg, f, ...)
@@ -50,6 +53,19 @@ assert(f('aLo_ALO', '%a*') == 'aLo')
 
 assert(f("  \n\r*&\n\r   xuxu  \n\n", "%g%g%g+") == "xuxu")
 
+
+-- Adapt a pattern to UTF-8
+local function PU (p)
+  -- break '?' into each individual byte of a character
+  p = string.gsub(p, "(" .. utf8.charpattern .. ")%?", function (c)
+    return string.gsub(c, ".", "%0?")
+  end)
+  -- change '.' to utf-8 character patterns
+  p = string.gsub(p, "%.", utf8.charpattern)
+  return p
+end
+
+
 assert(f('aaab', 'a*') == 'aaa');
 assert(f('aaa', '^.*$') == 'aaa');
 assert(f('aaa', 'b*') == '');
@@ -73,16 +89,16 @@ assert(f('aaa', '^.-$') == 'aaa')
 assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab')
 assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab')
 assert(f('alo xo', '.o$') == 'xo')
-assert(f(' \n isto é assim', '%S%S*') == 'isto')
-assert(f(' \n isto é assim', '%S*$') == 'assim')
-assert(f(' \n isto é assim', '[a-z]*$') == 'assim')
+assert(f(' \n isto é assim', '%S%S*') == 'isto')
+assert(f(' \n isto é assim', '%S*$') == 'assim')
+assert(f(' \n isto é assim', '[a-z]*$') == 'assim')
 assert(f('um caracter ? extra', '[^%sa-z]') == '?')
 assert(f('', 'a?') == '')
-assert(f('á', 'á?') == 'á')
-assert(f('ábl', 'á?b?l?') == 'ábl')
-assert(f('  ábl', 'á?b?l?') == '')
+assert(f('á', PU'á?') == 'á')
+assert(f('ábl', PU'á?b?l?') == 'ábl')
+assert(f('  ábl', PU'á?b?l?') == '')
 assert(f('aa', '^aa?a?a') == 'aa')
-assert(f(']]]áb', '[^]]') == 'á')
+assert(f(']]]áb', '[^]]+') == 'áb')
 assert(f("0alo alo", "%x*") == "0a")
 assert(f("alo alo", "%C+") == "alo alo")
 print('+')
@@ -136,28 +152,28 @@ assert(string.match("alo xyzK", "(%w+)K") == "xyz")
 assert(string.match("254 K", "(%d*)K") == "")
 assert(string.match("alo ", "(%w*)$") == "")
 assert(not string.match("alo ", "(%w+)$"))
-assert(string.find("(álo)", "%(á") == 1)
-local a, b, c, d, e = string.match("âlo alo", "^(((.).).* (%w*))$")
-assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil)
+assert(string.find("(álo)", "%(á") == 1)
+local a, b, c, d, e = string.match("âlo alo", PU"^(((.).). (%w*))$")
+assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil)
 a, b, c, d  = string.match('0123456789', '(.+(.?)())')
 assert(a == '0123456789' and b == '' and c == 11 and d == nil)
 print('+')
 
-assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo')
-assert(string.gsub('alo úlo  ', ' +$', '') == 'alo úlo')  -- trim
+assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo')
+assert(string.gsub('alo úlo  ', ' +$', '') == 'alo úlo')  -- trim
 assert(string.gsub('  alo alo  ', '^%s*(.-)%s*$', '%1') == 'alo alo')  -- double trim
 assert(string.gsub('alo  alo  \n 123\n ', '%s+', ' ') == 'alo alo 123 ')
-local t = "abç d"
-a, b = string.gsub(t, '(.)', '%1@')
-assert('@'..a == string.gsub(t, '', '@') and b == 5)
-a, b = string.gsub('abçd', '(.)', '%0@', 2)
-assert(a == 'a@b@çd' and b == 2)
+local t = "abç d"
+a, b = string.gsub(t, PU'(.)', '%1@')
+assert(a == "a@b@ç@ @d@" and b == 5)
+a, b = string.gsub('abçd', PU'(.)', '%0@', 2)
+assert(a == 'a@b@çd' and b == 2)
 assert(string.gsub('alo alo', '()[al]', '%1') == '12o 56o')
 assert(string.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") ==
               "xyz=abc-abc=xyz")
 assert(string.gsub("abc", "%w", "%1%0") == "aabbcc")
 assert(string.gsub("abc", "%w+", "%0%1") == "abcabc")
-assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú')
+assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú')
 assert(string.gsub('', '^', 'r') == 'r')
 assert(string.gsub('', '$', 'r') == 'r')
 print('+')
@@ -188,8 +204,8 @@ do
 end
 
 function f(a,b) return string.gsub(a,'.',b) end
-assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) ==
-            "trocar tudo em bbbbb é alalalalalal")
+assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) ==
+            "trocar tudo em bbbbb é alalalalalal")
 
 local function dostring (s) return load(s, "")() or "" end
 assert(string.gsub("alo $a='x'$ novamente $return a$",

+ 1 - 1
testes/sort.lua

@@ -289,7 +289,7 @@ timesort(a, limit,  function(x,y) return nil end, "equal")
 
 for i,v in pairs(a) do assert(v == false) end
 
-AA = {"álo", "\0first :-)", "alo", "then this one", "45", "and a new"}
+AA = {"\xE1lo", "\0first :-)", "alo", "then this one", "45", "and a new"}
 table.sort(AA)
 check(AA)
 

+ 3 - 0
testes/strings.lua

@@ -1,6 +1,9 @@
 -- $Id: testes/strings.lua $
 -- See Copyright Notice in file all.lua
 
+-- ISO Latin encoding
+
+
 print('testing strings and string library')
 
 local maxi <const> = math.maxinteger

+ 2 - 0
testes/utf8.lua

@@ -1,6 +1,8 @@
 -- $Id: testes/utf8.lua $
 -- See Copyright Notice in file all.lua
 
+-- UTF-8 file
+
 print "testing UTF-8 library"
 
 local utf8 = require'utf8'