瀏覽代碼

Merge branch 'master' into nextversion

Roberto Ierusalimschy 1 年之前
父節點
當前提交
c815c2f0eb
共有 22 個文件被更改,包括 134 次插入88 次删除
  1. 2 2
      lapi.c
  2. 25 3
      ldebug.c
  3. 1 0
      ldebug.h
  4. 10 6
      lgc.c
  5. 1 1
      lobject.c
  6. 8 10
      lobject.h
  7. 6 6
      lparser.c
  8. 1 1
      lstate.c
  9. 1 1
      lstate.h
  10. 6 5
      lstring.c
  11. 1 1
      lundump.c
  12. 1 2
      lundump.h
  13. 13 17
      lvm.c
  14. 4 0
      manual/manual.of
  15. 1 1
      testes/calls.lua
  16. 3 3
      testes/db.lua
  17. 4 4
      testes/errors.lua
  18. 4 4
      testes/files.lua
  19. 36 20
      testes/pm.lua
  20. 1 1
      testes/sort.lua
  21. 3 0
      testes/strings.lua
  22. 2 0
      testes/utf8.lua

+ 2 - 2
lapi.c

@@ -417,9 +417,9 @@ LUA_API const char *lua_tolstring (lua_State *L, int idx, size_t *len) {
     o = index2value(L, idx);  /* previous call may reallocate the stack */
   }
   if (len != NULL)
-    *len = vslen(o);
+    *len = tsslen(tsvalue(o));
   lua_unlock(L);
-  return svalue(o);
+  return getstr(tsvalue(o));
 }
 
 

+ 25 - 3
ldebug.c

@@ -426,7 +426,7 @@ static const char *getobjname (const Proto *p, int lastpc, int reg,
 */
 static void kname (const Proto *p, int c, const char **name) {
   TValue *kvalue = &p->k[c];
-  *name = (ttisstring(kvalue)) ? svalue(kvalue) : "?";
+  *name = (ttisstring(kvalue)) ? getstr(tsvalue(kvalue)) : "?";
 }
 
 
@@ -569,7 +569,7 @@ static const char *getobjname (const Proto *p, int lastpc, int reg,
         int b = (op == OP_LOADK) ? GETARG_Bx(i)
                                  : GETARG_Ax(p->code[pc + 1]);
         if (ttisstring(&p->k[b])) {
-          *name = svalue(&p->k[b]);
+          *name = getstr(tsvalue(&p->k[b]));
           return "constant";
         }
         break;
@@ -627,7 +627,7 @@ static const char *funcnamefromcode (lua_State *L, const Proto *p,
     default:
       return NULL;  /* cannot find a reasonable name */
   }
-  *name = getstr(G(L)->tmname[tm]) + 2;
+  *name = getshrstr(G(L)->tmname[tm]) + 2;
   return "metamethod";
 }
 
@@ -865,6 +865,28 @@ static int changedline (const Proto *p, int oldpc, int newpc) {
 }
 
 
+/*
+** Traces Lua calls. If code is running the first instruction of a function,
+** and function is not vararg, and it is not coming from an yield,
+** calls 'luaD_hookcall'. (Vararg functions will call 'luaD_hookcall'
+** after adjusting its variable arguments; otherwise, they could call
+** a line/count hook before the call hook. Functions coming from
+** an yield already called 'luaD_hookcall' before yielding.)
+*/
+int luaG_tracecall (lua_State *L) {
+  CallInfo *ci = L->ci;
+  Proto *p = ci_func(ci)->p;
+  ci->u.l.trap = 1;  /* ensure hooks will be checked */
+  if (ci->u.l.savedpc == p->code) {  /* first instruction (not resuming)? */
+    if (p->is_vararg)
+      return 0;  /* hooks will start at VARARGPREP instruction */
+    else if (!(ci->callstatus & CIST_HOOKYIELD))  /* not yieded? */
+      luaD_hookcall(L, ci);  /* check 'call' hook */
+  }
+  return 1;  /* keep 'trap' on */
+}
+
+
 /*
 ** Traces the execution of a Lua function. Called before the execution
 ** of each opcode, when debug is on. 'L->oldpc' stores the last

+ 1 - 0
ldebug.h

@@ -58,6 +58,7 @@ LUAI_FUNC const char *luaG_addinfo (lua_State *L, const char *msg,
                                                   TString *src, int line);
 LUAI_FUNC l_noret luaG_errormsg (lua_State *L);
 LUAI_FUNC int luaG_traceexec (lua_State *L, const Instruction *pc);
+LUAI_FUNC int luaG_tracecall (lua_State *L);
 
 
 #endif

+ 10 - 6
lgc.c

@@ -533,10 +533,12 @@ static void traversestrongtable (global_State *g, Table *h) {
 static void traversetable (global_State *g, Table *h) {
   const char *weakkey, *weakvalue;
   const TValue *mode = gfasttm(g, h->metatable, TM_MODE);
+  TString *smode;
   markobjectN(g, h->metatable);
-  if (mode && ttisstring(mode) &&  /* is there a weak mode? */
-      (cast_void(weakkey = strchr(svalue(mode), 'k')),
-       cast_void(weakvalue = strchr(svalue(mode), 'v')),
+  if (mode && ttisshrstring(mode) &&  /* is there a weak mode? */
+      (cast_void(smode = tsvalue(mode)),
+       cast_void(weakkey = strchr(getshrstr(smode), 'k')),
+       cast_void(weakvalue = strchr(getshrstr(smode), 'v')),
        (weakkey || weakvalue))) {  /* is really weak? */
     if (!weakkey)  /* strong keys? */
       traverseweakvalue(g, h);
@@ -624,7 +626,9 @@ static void traversethread (global_State *g, lua_State *th) {
   for (uv = th->openupval; uv != NULL; uv = uv->u.open.next)
     markobject(g, uv);  /* open upvalues cannot be collected */
   if (g->gcstate == GCSatomic) {  /* final traversal? */
-    for (; o < th->stack_last.p + EXTRA_STACK; o++)
+    if (!g->gcemergency)
+      luaD_shrinkstack(th); /* do not change stack in emergency cycle */
+    for (o = th->top.p; o < th->stack_last.p + EXTRA_STACK; o++)
       setnilvalue(s2v(o));  /* clear dead stack slice */
     /* 'remarkupvals' may have removed thread from 'twups' list */
     if (!isintwups(th) && th->openupval != NULL) {
@@ -632,8 +636,6 @@ static void traversethread (global_State *g, lua_State *th) {
       g->twups = th;
     }
   }
-  else if (!g->gcemergency)
-    luaD_shrinkstack(th); /* do not change stack in emergency cycle */
 }
 
 
@@ -1644,6 +1646,8 @@ static void fullinc (lua_State *L, global_State *g) {
     entersweep(L); /* sweep everything to turn them back to white */
   /* finish any pending sweep phase to start a new cycle */
   luaC_runtilstate(L, bitmask(GCSpause));
+  luaC_runtilstate(L, bitmask(GCSpropagate));  /* start new cycle */
+  g->gcstate = GCSenteratomic;  /* go straight to atomic phase ??? */
   luaC_runtilstate(L, bitmask(GCScallfin));  /* run up to finalizers */
   /* 'marked' must be correct after a full GC cycle */
   lua_assert(g->marked == gettotalobjs(g));

+ 1 - 1
lobject.c

@@ -542,7 +542,7 @@ const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) {
   addstr2buff(&buff, fmt, strlen(fmt));  /* rest of 'fmt' */
   clearbuff(&buff);  /* empty buffer into the stack */
   lua_assert(buff.pushed == 1);
-  return svalue(s2v(L->top.p - 1));
+  return getstr(tsvalue(s2v(L->top.p - 1)));
 }
 
 

+ 8 - 10
lobject.h

@@ -386,7 +386,7 @@ typedef struct GCObject {
 typedef struct TString {
   CommonHeader;
   lu_byte extra;  /* reserved words for short strings; "has hash" for longs */
-  lu_byte shrlen;  /* length for short strings */
+  lu_byte shrlen;  /* length for short strings, 0xFF for long strings */
   unsigned int hash;
   union {
     size_t lnglen;  /* length for long strings */
@@ -398,19 +398,17 @@ typedef struct TString {
 
 
 /*
-** Get the actual string (array of bytes) from a 'TString'.
+** Get the actual string (array of bytes) from a 'TString'. (Generic
+** version and specialized versions for long and short strings.)
 */
-#define getstr(ts)  ((ts)->contents)
+#define getstr(ts)	((ts)->contents)
+#define getlngstr(ts)	check_exp((ts)->shrlen == 0xFF, (ts)->contents)
+#define getshrstr(ts)	check_exp((ts)->shrlen != 0xFF, (ts)->contents)
 
 
-/* get the actual string (array of bytes) from a Lua value */
-#define svalue(o)       getstr(tsvalue(o))
-
 /* get string length from 'TString *s' */
-#define tsslen(s)	((s)->tt == LUA_VSHRSTR ? (s)->shrlen : (s)->u.lnglen)
-
-/* get string length from 'TValue *o' */
-#define vslen(o)	tsslen(tsvalue(o))
+#define tsslen(s)  \
+	((s)->shrlen != 0xFF ? (s)->shrlen : (s)->u.lnglen)
 
 /* }================================================================== */
 

+ 6 - 6
lparser.c

@@ -1030,10 +1030,11 @@ static int explist (LexState *ls, expdesc *v) {
 }
 
 
-static void funcargs (LexState *ls, expdesc *f, int line) {
+static void funcargs (LexState *ls, expdesc *f) {
   FuncState *fs = ls->fs;
   expdesc args;
   int base, nparams;
+  int line = ls->linenumber;
   switch (ls->t.token) {
     case '(': {  /* funcargs -> '(' [ explist ] ')' */
       luaX_next(ls);
@@ -1071,8 +1072,8 @@ static void funcargs (LexState *ls, expdesc *f, int line) {
   }
   init_exp(f, VCALL, luaK_codeABC(fs, OP_CALL, base, nparams+1, 2));
   luaK_fixline(fs, line);
-  fs->freereg = base+1;  /* call remove function and arguments and leaves
-                            (unless changed) one result */
+  fs->freereg = base+1;  /* call removes function and arguments and leaves
+                            one result (unless changed later) */
 }
 
 
@@ -1111,7 +1112,6 @@ static void suffixedexp (LexState *ls, expdesc *v) {
   /* suffixedexp ->
        primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } */
   FuncState *fs = ls->fs;
-  int line = ls->linenumber;
   primaryexp(ls, v);
   for (;;) {
     switch (ls->t.token) {
@@ -1131,12 +1131,12 @@ static void suffixedexp (LexState *ls, expdesc *v) {
         luaX_next(ls);
         codename(ls, &key);
         luaK_self(fs, v, &key);
-        funcargs(ls, v, line);
+        funcargs(ls, v);
         break;
       }
       case '(': case TK_STRING: case '{': {  /* funcargs */
         luaK_exp2nextreg(fs, v);
-        funcargs(ls, v, line);
+        funcargs(ls, v);
         break;
       }
       default: return;

+ 1 - 1
lstate.c

@@ -396,7 +396,7 @@ void luaE_warning (lua_State *L, const char *msg, int tocont) {
 void luaE_warnerror (lua_State *L, const char *where) {
   TValue *errobj = s2v(L->top.p - 1);  /* error object */
   const char *msg = (ttisstring(errobj))
-                  ? svalue(errobj)
+                  ? getstr(tsvalue(errobj))
                   : "error object is not a string";
   /* produce warning "error in %s (%s)" (where, msg) */
   luaE_warning(L, "error in ", 1);

+ 1 - 1
lstate.h

@@ -182,7 +182,7 @@ struct CallInfo {
   union {
     struct {  /* only for Lua functions */
       const Instruction *savedpc;
-      volatile l_signalT trap;
+      volatile l_signalT trap;  /* function is tracing lines/counts */
       int nextraargs;  /* # of extra arguments in vararg functions */
     } l;
     struct {  /* only for C functions */

+ 6 - 5
lstring.c

@@ -36,7 +36,7 @@ int luaS_eqlngstr (TString *a, TString *b) {
   lua_assert(a->tt == LUA_VLNGSTR && b->tt == LUA_VLNGSTR);
   return (a == b) ||  /* same instance or... */
     ((len == b->u.lnglen) &&  /* equal length and ... */
-     (memcmp(getstr(a), getstr(b), len) == 0));  /* equal contents */
+     (memcmp(getlngstr(a), getlngstr(b), len) == 0));  /* equal contents */
 }
 
 
@@ -52,7 +52,7 @@ unsigned int luaS_hashlongstr (TString *ts) {
   lua_assert(ts->tt == LUA_VLNGSTR);
   if (ts->extra == 0) {  /* no hash? */
     size_t len = ts->u.lnglen;
-    ts->hash = luaS_hash(getstr(ts), len, ts->hash);
+    ts->hash = luaS_hash(getlngstr(ts), len, ts->hash);
     ts->extra = 1;  /* now it has its hash */
   }
   return ts->hash;
@@ -157,6 +157,7 @@ static TString *createstrobj (lua_State *L, size_t l, int tag, unsigned int h) {
 TString *luaS_createlngstrobj (lua_State *L, size_t l) {
   TString *ts = createstrobj(L, l, LUA_VLNGSTR, G(L)->seed);
   ts->u.lnglen = l;
+  ts->shrlen = 0xFF;  /* signals that it is a long string */
   return ts;
 }
 
@@ -193,7 +194,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) {
   TString **list = &tb->hash[lmod(h, tb->size)];
   lua_assert(str != NULL);  /* otherwise 'memcmp'/'memcpy' are undefined */
   for (ts = *list; ts != NULL; ts = ts->u.hnext) {
-    if (l == ts->shrlen && (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
+    if (l == ts->shrlen && (memcmp(str, getshrstr(ts), l * sizeof(char)) == 0)) {
       /* found! */
       if (isdead(g, ts))  /* dead (but not collected yet)? */
         changewhite(ts);  /* resurrect it */
@@ -206,7 +207,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) {
     list = &tb->hash[lmod(h, tb->size)];  /* rehash with new size */
   }
   ts = createstrobj(L, l, LUA_VSHRSTR, h);
-  memcpy(getstr(ts), str, l * sizeof(char));
+  memcpy(getshrstr(ts), str, l * sizeof(char));
   ts->shrlen = cast_byte(l);
   ts->u.hnext = *list;
   *list = ts;
@@ -226,7 +227,7 @@ TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
     if (l_unlikely(l >= (MAX_SIZE - sizeof(TString))/sizeof(char)))
       luaM_toobig(L);
     ts = luaS_createlngstrobj(L, l);
-    memcpy(getstr(ts), str, l * sizeof(char));
+    memcpy(getlngstr(ts), str, l * sizeof(char));
     return ts;
   }
 }

+ 1 - 1
lundump.c

@@ -131,7 +131,7 @@ static TString *loadStringN (LoadState *S, Proto *p) {
     ts = luaS_createlngstrobj(L, size);  /* create string */
     setsvalue2s(L, L->top.p, ts);  /* anchor it ('loadVector' can GC) */
     luaD_inctop(L);
-    loadVector(S, getstr(ts), size);  /* load directly in final place */
+    loadVector(S, getlngstr(ts), size);  /* load directly in final place */
     L->top.p--;  /* pop string */
   }
   luaC_objbarrier(L, p, ts);

+ 1 - 2
lundump.h

@@ -21,8 +21,7 @@
 /*
 ** Encode major-minor version in one byte, one nibble for each
 */
-#define MYINT(s)	(s[0]-'0')  /* assume one-digit numerals */
-#define LUAC_VERSION	(MYINT(LUA_VERSION_MAJOR)*16+MYINT(LUA_VERSION_MINOR))
+#define LUAC_VERSION	(LUA_VERSION_MAJOR_N*16+LUA_VERSION_MINOR_N)
 
 #define LUAC_FORMAT	0	/* this is the official format */
 

+ 13 - 17
lvm.c

@@ -91,8 +91,10 @@ static int l_strton (const TValue *obj, TValue *result) {
   lua_assert(obj != result);
   if (!cvt2num(obj))  /* is object not a string? */
     return 0;
-  else
-    return (luaO_str2num(svalue(obj), result) == vslen(obj) + 1);
+  else {
+  TString *st = tsvalue(obj);
+    return (luaO_str2num(getstr(st), result) == tsslen(st) + 1);
+  }
 }
 
 
@@ -627,8 +629,9 @@ int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) {
 static void copy2buff (StkId top, int n, char *buff) {
   size_t tl = 0;  /* size already copied */
   do {
-    size_t l = vslen(s2v(top - n));  /* length of string being copied */
-    memcpy(buff + tl, svalue(s2v(top - n)), l * sizeof(char));
+    TString *st = tsvalue(s2v(top - n));
+    size_t l = tsslen(st);  /* length of string being copied */
+    memcpy(buff + tl, getstr(st), l * sizeof(char));
     tl += l;
   } while (--n > 0);
 }
@@ -654,11 +657,11 @@ void luaV_concat (lua_State *L, int total) {
     }
     else {
       /* at least two non-empty string values; get as many as possible */
-      size_t tl = vslen(s2v(top - 1));
+      size_t tl = tsslen(tsvalue(s2v(top - 1)));
       TString *ts;
       /* collect total length and number of strings */
       for (n = 1; n < total && tostring(L, s2v(top - n - 1)); n++) {
-        size_t l = vslen(s2v(top - n - 1));
+        size_t l = tsslen(tsvalue(s2v(top - n - 1)));
         if (l_unlikely(l >= (MAX_SIZE/sizeof(char)) - tl)) {
           L->top.p = top - total;  /* pop strings to avoid wasting stack */
           luaG_runerror(L, "string length overflow");
@@ -672,7 +675,7 @@ void luaV_concat (lua_State *L, int total) {
       }
       else {  /* long string; copy strings directly to final result */
         ts = luaS_createlngstrobj(L, tl);
-        copy2buff(top, n, getstr(ts));
+        copy2buff(top, n, getlngstr(ts));
       }
       setsvalue2s(L, top - n, ts);  /* create result */
     }
@@ -1158,18 +1161,11 @@ void luaV_execute (lua_State *L, CallInfo *ci) {
  startfunc:
   trap = L->hookmask;
  returning:  /* trap already set */
-  cl = clLvalue(s2v(ci->func.p));
+  cl = ci_func(ci);
   k = cl->p->k;
   pc = ci->u.l.savedpc;
-  if (l_unlikely(trap)) {
-    if (pc == cl->p->code) {  /* first instruction (not resuming)? */
-      if (cl->p->is_vararg)
-        trap = 0;  /* hooks will start after VARARGPREP instruction */
-      else  /* check 'call' hook */
-        luaD_hookcall(L, ci);
-    }
-    ci->u.l.trap = 1;  /* assume trap is on, for now */
-  }
+  if (l_unlikely(trap))
+    trap = luaG_tracecall(L);
   base = ci->func.p + 1;
   /* main loop of interpreter */
   for (;;) {

+ 4 - 0
manual/manual.of

@@ -9033,6 +9033,10 @@ Lua does not consult any environment variables.
 In particular,
 the values of @Lid{package.path} and @Lid{package.cpath}
 are set with the default paths defined in @id{luaconf.h}.
+To signal to the libraries that this option is on,
+the stand-alone interpreter sets the field
+@idx{"LUA_NOENV"} in the registry to a true value.
+Other libraries may consult this field for the same purpose.
 
 The options @T{-e}, @T{-l}, and @T{-W} are handled in
 the order they appear.

+ 1 - 1
testes/calls.lua

@@ -342,7 +342,7 @@ do   -- another bug (in 5.4.0)
 end
 
 
-do   -- another bug (since 5.2)
+if not _port then   -- another bug (since 5.2)
   -- corrupted binary dump: list of upvalue names is larger than number
   -- of upvalues, overflowing the array of upvalues.
   local code =

+ 3 - 3
testes/db.lua

@@ -345,7 +345,7 @@ function f(a,b)
   local _, y = debug.getlocal(1, 2)
   assert(x == a and y == b)
   assert(debug.setlocal(2, 3, "pera") == "AA".."AA")
-  assert(debug.setlocal(2, 4, "maçã") == "B")
+  assert(debug.setlocal(2, 4, "manga") == "B")
   x = debug.getinfo(2)
   assert(x.func == g and x.what == "Lua" and x.name == 'g' and
          x.nups == 2 and string.find(x.source, "^@.*db%.lua$"))
@@ -373,9 +373,9 @@ function g (...)
   local arg = {...}
   do local a,b,c; a=math.sin(40); end
   local feijao
-  local AAAA,B = "xuxu", "mamão"
+  local AAAA,B = "xuxu", "abacate"
   f(AAAA,B)
-  assert(AAAA == "pera" and B == "maçã")
+  assert(AAAA == "pera" and B == "manga")
   do
      local B = 13
      local x,y = debug.getlocal(1,5)

+ 4 - 4
testes/errors.lua

@@ -392,19 +392,19 @@ lineerror("a\n=\n-\n\nprint\n;", 3)
 
 lineerror([[
 a
-(
+(     -- <<
 23)
-]], 1)
+]], 2)
 
 lineerror([[
 local a = {x = 13}
 a
 .
 x
-(
+(     -- <<
 23
 )
-]], 2)
+]], 5)
 
 lineerror([[
 local a = {x = 13}

+ 4 - 4
testes/files.lua

@@ -92,8 +92,8 @@ assert(io.output():seek("end") == string.len("alo joao"))
 
 assert(io.output():seek("set") == 0)
 
-assert(io.write('"álo"', "{a}\n", "second line\n", "third line \n"))
-assert(io.write('çfourth_line'))
+assert(io.write('"alo"', "{a}\n", "second line\n", "third line \n"))
+assert(io.write('Xfourth_line'))
 io.output(io.stdout)
 collectgarbage()  -- file should be closed by GC
 assert(io.input() == io.stdin and rawequal(io.output(), io.stdout))
@@ -300,14 +300,14 @@ do  -- test error returns
 end
 checkerr("invalid format", io.read, "x")
 assert(io.read(0) == "")   -- not eof
-assert(io.read(5, 'l') == '"álo"')
+assert(io.read(5, 'l') == '"alo"')
 assert(io.read(0) == "")
 assert(io.read() == "second line")
 local x = io.input():seek()
 assert(io.read() == "third line ")
 assert(io.input():seek("set", x))
 assert(io.read('L') == "third line \n")
-assert(io.read(1) == "ç")
+assert(io.read(1) == "X")
 assert(io.read(string.len"fourth_line") == "fourth_line")
 assert(io.input():seek("cur", -string.len"fourth_line"))
 assert(io.read() == "fourth_line")

+ 36 - 20
testes/pm.lua

@@ -1,6 +1,9 @@
 -- $Id: testes/pm.lua $
 -- See Copyright Notice in file all.lua
 
+-- UTF-8 file
+
+
 print('testing pattern matching')
 
 local function checkerror (msg, f, ...)
@@ -50,6 +53,19 @@ assert(f('aLo_ALO', '%a*') == 'aLo')
 
 assert(f("  \n\r*&\n\r   xuxu  \n\n", "%g%g%g+") == "xuxu")
 
+
+-- Adapt a pattern to UTF-8
+local function PU (p)
+  -- break '?' into each individual byte of a character
+  p = string.gsub(p, "(" .. utf8.charpattern .. ")%?", function (c)
+    return string.gsub(c, ".", "%0?")
+  end)
+  -- change '.' to utf-8 character patterns
+  p = string.gsub(p, "%.", utf8.charpattern)
+  return p
+end
+
+
 assert(f('aaab', 'a*') == 'aaa');
 assert(f('aaa', '^.*$') == 'aaa');
 assert(f('aaa', 'b*') == '');
@@ -73,16 +89,16 @@ assert(f('aaa', '^.-$') == 'aaa')
 assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab')
 assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab')
 assert(f('alo xo', '.o$') == 'xo')
-assert(f(' \n isto é assim', '%S%S*') == 'isto')
-assert(f(' \n isto é assim', '%S*$') == 'assim')
-assert(f(' \n isto é assim', '[a-z]*$') == 'assim')
+assert(f(' \n isto é assim', '%S%S*') == 'isto')
+assert(f(' \n isto é assim', '%S*$') == 'assim')
+assert(f(' \n isto é assim', '[a-z]*$') == 'assim')
 assert(f('um caracter ? extra', '[^%sa-z]') == '?')
 assert(f('', 'a?') == '')
-assert(f('á', 'á?') == 'á')
-assert(f('ábl', 'á?b?l?') == 'ábl')
-assert(f('  ábl', 'á?b?l?') == '')
+assert(f('á', PU'á?') == 'á')
+assert(f('ábl', PU'á?b?l?') == 'ábl')
+assert(f('  ábl', PU'á?b?l?') == '')
 assert(f('aa', '^aa?a?a') == 'aa')
-assert(f(']]]áb', '[^]]') == 'á')
+assert(f(']]]áb', '[^]]+') == 'áb')
 assert(f("0alo alo", "%x*") == "0a")
 assert(f("alo alo", "%C+") == "alo alo")
 print('+')
@@ -136,28 +152,28 @@ assert(string.match("alo xyzK", "(%w+)K") == "xyz")
 assert(string.match("254 K", "(%d*)K") == "")
 assert(string.match("alo ", "(%w*)$") == "")
 assert(not string.match("alo ", "(%w+)$"))
-assert(string.find("(álo)", "%(á") == 1)
-local a, b, c, d, e = string.match("âlo alo", "^(((.).).* (%w*))$")
-assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil)
+assert(string.find("(álo)", "%(á") == 1)
+local a, b, c, d, e = string.match("âlo alo", PU"^(((.).). (%w*))$")
+assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil)
 a, b, c, d  = string.match('0123456789', '(.+(.?)())')
 assert(a == '0123456789' and b == '' and c == 11 and d == nil)
 print('+')
 
-assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo')
-assert(string.gsub('alo úlo  ', ' +$', '') == 'alo úlo')  -- trim
+assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo')
+assert(string.gsub('alo úlo  ', ' +$', '') == 'alo úlo')  -- trim
 assert(string.gsub('  alo alo  ', '^%s*(.-)%s*$', '%1') == 'alo alo')  -- double trim
 assert(string.gsub('alo  alo  \n 123\n ', '%s+', ' ') == 'alo alo 123 ')
-local t = "abç d"
-a, b = string.gsub(t, '(.)', '%1@')
-assert('@'..a == string.gsub(t, '', '@') and b == 5)
-a, b = string.gsub('abçd', '(.)', '%0@', 2)
-assert(a == 'a@b@çd' and b == 2)
+local t = "abç d"
+a, b = string.gsub(t, PU'(.)', '%1@')
+assert(a == "a@b@ç@ @d@" and b == 5)
+a, b = string.gsub('abçd', PU'(.)', '%0@', 2)
+assert(a == 'a@b@çd' and b == 2)
 assert(string.gsub('alo alo', '()[al]', '%1') == '12o 56o')
 assert(string.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") ==
               "xyz=abc-abc=xyz")
 assert(string.gsub("abc", "%w", "%1%0") == "aabbcc")
 assert(string.gsub("abc", "%w+", "%0%1") == "abcabc")
-assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú')
+assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú')
 assert(string.gsub('', '^', 'r') == 'r')
 assert(string.gsub('', '$', 'r') == 'r')
 print('+')
@@ -188,8 +204,8 @@ do
 end
 
 function f(a,b) return string.gsub(a,'.',b) end
-assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) ==
-            "trocar tudo em bbbbb é alalalalalal")
+assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) ==
+            "trocar tudo em bbbbb é alalalalalal")
 
 local function dostring (s) return load(s, "")() or "" end
 assert(string.gsub("alo $a='x'$ novamente $return a$",

+ 1 - 1
testes/sort.lua

@@ -289,7 +289,7 @@ timesort(a, limit,  function(x,y) return nil end, "equal")
 
 for i,v in pairs(a) do assert(v == false) end
 
-AA = {"álo", "\0first :-)", "alo", "then this one", "45", "and a new"}
+AA = {"\xE1lo", "\0first :-)", "alo", "then this one", "45", "and a new"}
 table.sort(AA)
 check(AA)
 

+ 3 - 0
testes/strings.lua

@@ -1,6 +1,9 @@
 -- $Id: testes/strings.lua $
 -- See Copyright Notice in file all.lua
 
+-- ISO Latin encoding
+
+
 print('testing strings and string library')
 
 local maxi <const> = math.maxinteger

+ 2 - 0
testes/utf8.lua

@@ -1,6 +1,8 @@
 -- $Id: testes/utf8.lua $
 -- See Copyright Notice in file all.lua
 
+-- UTF-8 file
+
 print "testing UTF-8 library"
 
 local utf8 = require'utf8'