Browse Source

Short strings can be external, too

That complicates a little object equality (and therefore table access
for long strings), but the old behavior was somewhat weird. (Short
strings, a concept otherwise absent from the manual, could not be
external.)
Roberto Ierusalimschy 2 weeks ago
parent
commit
60b6599e83
9 changed files with 168 additions and 120 deletions
  1. 2 2
      loadlib.c
  2. 1 0
      lobject.h
  3. 20 26
      lstring.c
  4. 2 1
      lstring.h
  5. 50 32
      ltable.c
  6. 5 1
      ltests.c
  7. 64 42
      lvm.c
  8. 3 9
      manual/manual.of
  9. 21 7
      testes/attrib.lua

+ 2 - 2
loadlib.c

@@ -345,8 +345,8 @@ static void *freelib (void *ud, void *ptr, size_t osize, size_t nsize) {
 ** Create a library string that, when deallocated, will unload 'plib'
 */
 static void createlibstr (lua_State *L, void *plib) {
-  static const char dummy[] =  /* common long body for all library strings */
-    "01234567890123456789012345678901234567890123456789";
+  /* common content for all library strings */
+  static const char dummy[] = "01234567890";
   lua_pushexternalstring(L, dummy, sizeof(dummy) - 1, freelib, plib);
 }
 

+ 1 - 0
lobject.h

@@ -418,6 +418,7 @@ typedef struct TString {
 
 
 #define strisshr(ts)	((ts)->shrlen >= 0)
+#define isextstr(ts)	(ttislngstring(ts) && tsvalue(ts)->shrlen != LSTRREG)
 
 
 /*

+ 20 - 26
lstring.c

@@ -39,14 +39,14 @@
 
 
 /*
-** equality for long strings
+** generic equality for strings
 */
-int luaS_eqlngstr (TString *a, TString *b) {
-  size_t len = a->u.lnglen;
-  lua_assert(a->tt == LUA_VLNGSTR && b->tt == LUA_VLNGSTR);
-  return (a == b) ||  /* same instance or... */
-    ((len == b->u.lnglen) &&  /* equal length and ... */
-     (memcmp(getlngstr(a), getlngstr(b), len) == 0));  /* equal contents */
+int luaS_eqstr (TString *a, TString *b) {
+  size_t len1, len2;
+  const char *s1 = getlstr(a, len1);
+  const char *s2 = getlstr(b, len2);
+  return ((len1 == len2) &&  /* equal length and ... */
+          (memcmp(s1, s2, len1) == 0));  /* equal contents */
 }
 
 
@@ -315,28 +315,9 @@ static void f_newext (lua_State *L, void *ud) {
 }
 
 
-static void f_pintern (lua_State *L, void *ud) {
-  struct NewExt *ne = cast(struct NewExt *, ud);
-  ne->ts = internshrstr(L, ne->s, ne->len);
-}
-
-
 TString *luaS_newextlstr (lua_State *L,
 	          const char *s, size_t len, lua_Alloc falloc, void *ud) {
   struct NewExt ne;
-  if (len <= LUAI_MAXSHORTLEN) {  /* short string? */
-    ne.s = s; ne.len = len;
-    if (!falloc)
-      f_pintern(L, &ne);  /* just internalize string */
-    else {
-      TStatus status = luaD_rawrunprotected(L, f_pintern, &ne);
-      (*falloc)(ud, cast_voidp(s), len + 1, 0);  /* free external string */
-      if (status != LUA_OK)  /* memory error? */
-        luaM_error(L);  /* re-raise memory error */
-    }
-    return ne.ts;
-  }
-  /* "normal" case: long strings */
   if (!falloc) {
     ne.kind = LSTRFIX;
     f_newext(L, &ne);  /* just create header */
@@ -357,3 +338,16 @@ TString *luaS_newextlstr (lua_State *L,
 }
 
 
+/*
+** Normalize an external string: If it is short, internalize it.
+*/
+TString *luaS_normstr (lua_State *L, TString *ts) {
+  size_t len = ts->u.lnglen;
+  if (len > LUAI_MAXSHORTLEN)
+    return ts;  /* long string; keep the original */
+  else {
+    const char *str = getlngstr(ts);
+    return internshrstr(L, str, len);
+  }
+}
+

+ 2 - 1
lstring.h

@@ -56,7 +56,7 @@
 
 LUAI_FUNC unsigned luaS_hash (const char *str, size_t l, unsigned seed);
 LUAI_FUNC unsigned luaS_hashlongstr (TString *ts);
-LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
+LUAI_FUNC int luaS_eqstr (TString *a, TString *b);
 LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
 LUAI_FUNC void luaS_clearcache (global_State *g);
 LUAI_FUNC void luaS_init (lua_State *L);
@@ -69,5 +69,6 @@ LUAI_FUNC TString *luaS_createlngstrobj (lua_State *L, size_t l);
 LUAI_FUNC TString *luaS_newextlstr (lua_State *L,
 		const char *s, size_t len, lua_Alloc falloc, void *ud);
 LUAI_FUNC size_t luaS_sizelngstr (size_t len, int kind);
+LUAI_FUNC TString *luaS_normstr (lua_State *L, TString *ts);
 
 #endif

+ 50 - 32
ltable.c

@@ -234,41 +234,51 @@ l_sinline Node *mainpositionfromnode (const Table *t, Node *nd) {
 ** Check whether key 'k1' is equal to the key in node 'n2'. This
 ** equality is raw, so there are no metamethods. Floats with integer
 ** values have been normalized, so integers cannot be equal to
-** floats. It is assumed that 'eqshrstr' is simply pointer equality, so
-** that short strings are handled in the default case.
-** A true 'deadok' means to accept dead keys as equal to their original
-** values. All dead keys are compared in the default case, by pointer
-** identity. (Only collectable objects can produce dead keys.) Note that
-** dead long strings are also compared by identity.
-** Once a key is dead, its corresponding value may be collected, and
-** then another value can be created with the same address. If this
-** other value is given to 'next', 'equalkey' will signal a false
-** positive. In a regular traversal, this situation should never happen,
-** as all keys given to 'next' came from the table itself, and therefore
-** could not have been collected. Outside a regular traversal, we
-** have garbage in, garbage out. What is relevant is that this false
-** positive does not break anything.  (In particular, 'next' will return
-** some other valid item on the table or nil.)
+** floats. It is assumed that 'eqshrstr' is simply pointer equality,
+** so that short strings are handled in the default case.  The flag
+** 'deadok' means to accept dead keys as equal to their original values.
+** (Only collectable objects can produce dead keys.) Note that dead
+** long strings are also compared by identity.  Once a key is dead,
+** its corresponding value may be collected, and then another value
+** can be created with the same address. If this other value is given
+** to 'next', 'equalkey' will signal a false positive. In a regular
+** traversal, this situation should never happen, as all keys given to
+** 'next' came from the table itself, and therefore could not have been
+** collected. Outside a regular traversal, we have garbage in, garbage
+** out. What is relevant is that this false positive does not break
+** anything.  (In particular, 'next' will return some other valid item
+** on the table or nil.)
 */
 static int equalkey (const TValue *k1, const Node *n2, int deadok) {
-  if ((rawtt(k1) != keytt(n2)) &&  /* not the same variants? */
-       !(deadok && keyisdead(n2) && iscollectable(k1)))
-   return 0;  /* cannot be same key */
-  switch (keytt(n2)) {
-    case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
-      return 1;
-    case LUA_VNUMINT:
-      return (ivalue(k1) == keyival(n2));
-    case LUA_VNUMFLT:
-      return luai_numeq(fltvalue(k1), fltvalueraw(keyval(n2)));
-    case LUA_VLIGHTUSERDATA:
-      return pvalue(k1) == pvalueraw(keyval(n2));
-    case LUA_VLCF:
-      return fvalue(k1) == fvalueraw(keyval(n2));
-    case ctb(LUA_VLNGSTR):
-      return luaS_eqlngstr(tsvalue(k1), keystrval(n2));
-    default:
+  if (rawtt(k1) != keytt(n2)) {  /* not the same variants? */
+    if (keyisshrstr(n2) && ttislngstring(k1)) {
+      /* an external string can be equal to a short-string key */
+      return luaS_eqstr(tsvalue(k1), keystrval(n2));
+    }
+    else if (deadok && keyisdead(n2) && iscollectable(k1)) {
+      /* a collectable value can be equal to a dead key */
       return gcvalue(k1) == gcvalueraw(keyval(n2));
+   }
+   else
+     return 0;  /* otherwise, different variants cannot be equal */
+  }
+  else {  /* equal variants */
+    switch (keytt(n2)) {
+      case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
+        return 1;
+      case LUA_VNUMINT:
+        return (ivalue(k1) == keyival(n2));
+      case LUA_VNUMFLT:
+        return luai_numeq(fltvalue(k1), fltvalueraw(keyval(n2)));
+      case LUA_VLIGHTUSERDATA:
+        return pvalue(k1) == pvalueraw(keyval(n2));
+      case LUA_VLCF:
+        return fvalue(k1) == fvalueraw(keyval(n2));
+      case ctb(LUA_VLNGSTR):
+        return luaS_eqstr(tsvalue(k1), keystrval(n2));
+      default:
+        return gcvalue(k1) == gcvalueraw(keyval(n2));
+    }
   }
 }
 
@@ -1158,6 +1168,14 @@ void luaH_finishset (lua_State *L, Table *t, const TValue *key,
       else if (l_unlikely(luai_numisnan(f)))
         luaG_runerror(L, "table index is NaN");
     }
+    else if (isextstr(key)) {  /* external string? */
+      /* If string is short, must internalize it to be used as table key */
+      TString *ts = luaS_normstr(L, tsvalue(key));
+      setsvalue2s(L, L->top.p++, ts);  /* anchor 'ts' (EXTRA_STACK) */
+      luaH_newkey(L, t, s2v(L->top.p - 1), value);
+      L->top.p--;
+      return;
+    }
     luaH_newkey(L, t, key, value);
   }
   else if (hres > 0) {  /* regular Node? */

+ 5 - 1
ltests.c

@@ -1066,8 +1066,12 @@ static int tracegc (lua_State *L) {
 
 static int hash_query (lua_State *L) {
   if (lua_isnone(L, 2)) {
+    TString *ts;
     luaL_argcheck(L, lua_type(L, 1) == LUA_TSTRING, 1, "string expected");
-    lua_pushinteger(L, cast_int(tsvalue(obj_at(L, 1))->hash));
+    ts = tsvalue(obj_at(L, 1));
+    if (ts->tt == LUA_VLNGSTR)
+      luaS_hashlongstr(ts);  /* make sure long string has a hash */
+    lua_pushinteger(L, cast_int(ts->hash));
   }
   else {
     TValue *o = obj_at(L, 1);

+ 64 - 42
lvm.c

@@ -573,52 +573,74 @@ int luaV_lessequal (lua_State *L, const TValue *l, const TValue *r) {
 */
 int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) {
   const TValue *tm;
-  if (ttypetag(t1) != ttypetag(t2)) {  /* not the same variant? */
-    if (ttype(t1) != ttype(t2) || ttype(t1) != LUA_TNUMBER)
-      return 0;  /* only numbers can be equal with different variants */
-    else {  /* two numbers with different variants */
-      /* One of them is an integer. If the other does not have an
-         integer value, they cannot be equal; otherwise, compare their
-         integer values. */
-      lua_Integer i1, i2;
-      return (luaV_tointegerns(t1, &i1, F2Ieq) &&
-              luaV_tointegerns(t2, &i2, F2Ieq) &&
-              i1 == i2);
+  if (ttype(t1) != ttype(t2))  /* not the same type? */
+    return 0;
+  else if (ttypetag(t1) != ttypetag(t2)) {
+    switch (ttypetag(t1)) {
+      case LUA_VNUMINT: {  /* integer == float? */
+        /* integer and float can only be equal if float has an integer
+           value equal to the integer */
+        lua_Integer i2;
+        return (luaV_flttointeger(fltvalue(t2), &i2, F2Ieq) &&
+                ivalue(t1) == i2);
+      }
+      case LUA_VNUMFLT: {  /* float == integer? */
+        lua_Integer i1;  /* see comment in previous case */
+        return (luaV_flttointeger(fltvalue(t1), &i1, F2Ieq) &&
+                i1 == ivalue(t2));
+      }
+      case LUA_VSHRSTR: case LUA_VLNGSTR: {
+        /* compare two strings with different variants: they can be
+           equal when one string is a short string and the other is
+           an external string  */
+        return luaS_eqstr(tsvalue(t1), tsvalue(t2));
+      }
+      default:
+        /* only numbers (integer/float) and strings (long/short) can have
+           equal values with different variants */
+        return 0;
     }
   }
-  /* values have same type and same variant */
-  switch (ttypetag(t1)) {
-    case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE: return 1;
-    case LUA_VNUMINT: return (ivalue(t1) == ivalue(t2));
-    case LUA_VNUMFLT: return luai_numeq(fltvalue(t1), fltvalue(t2));
-    case LUA_VLIGHTUSERDATA: return pvalue(t1) == pvalue(t2);
-    case LUA_VLCF: return fvalue(t1) == fvalue(t2);
-    case LUA_VSHRSTR: return eqshrstr(tsvalue(t1), tsvalue(t2));
-    case LUA_VLNGSTR: return luaS_eqlngstr(tsvalue(t1), tsvalue(t2));
-    case LUA_VUSERDATA: {
-      if (uvalue(t1) == uvalue(t2)) return 1;
-      else if (L == NULL) return 0;
-      tm = fasttm(L, uvalue(t1)->metatable, TM_EQ);
-      if (tm == NULL)
-        tm = fasttm(L, uvalue(t2)->metatable, TM_EQ);
-      break;  /* will try TM */
+  else {  /* equal variants */
+    switch (ttypetag(t1)) {
+      case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
+        return 1;
+      case LUA_VNUMINT:
+        return (ivalue(t1) == ivalue(t2));
+      case LUA_VNUMFLT:
+        return (fltvalue(t1) == fltvalue(t2));
+      case LUA_VLIGHTUSERDATA: return pvalue(t1) == pvalue(t2);
+      case LUA_VSHRSTR:
+        return eqshrstr(tsvalue(t1), tsvalue(t2));
+      case LUA_VLNGSTR:
+        return luaS_eqstr(tsvalue(t1), tsvalue(t2));
+      case LUA_VUSERDATA: {
+        if (uvalue(t1) == uvalue(t2)) return 1;
+        else if (L == NULL) return 0;
+        tm = fasttm(L, uvalue(t1)->metatable, TM_EQ);
+        if (tm == NULL)
+          tm = fasttm(L, uvalue(t2)->metatable, TM_EQ);
+        break;  /* will try TM */
+      }
+      case LUA_VTABLE: {
+        if (hvalue(t1) == hvalue(t2)) return 1;
+        else if (L == NULL) return 0;
+        tm = fasttm(L, hvalue(t1)->metatable, TM_EQ);
+        if (tm == NULL)
+          tm = fasttm(L, hvalue(t2)->metatable, TM_EQ);
+        break;  /* will try TM */
+      }
+      case LUA_VLCF:
+        return (fvalue(t1) == fvalue(t2));
+      default:  /* functions and threads */
+        return (gcvalue(t1) == gcvalue(t2));
     }
-    case LUA_VTABLE: {
-      if (hvalue(t1) == hvalue(t2)) return 1;
-      else if (L == NULL) return 0;
-      tm = fasttm(L, hvalue(t1)->metatable, TM_EQ);
-      if (tm == NULL)
-        tm = fasttm(L, hvalue(t2)->metatable, TM_EQ);
-      break;  /* will try TM */
+    if (tm == NULL)  /* no TM? */
+      return 0;  /* objects are different */
+    else {
+      int tag = luaT_callTMres(L, tm, t1, t2, L->top.p);  /* call TM */
+      return !tagisfalse(tag);
     }
-    default:
-      return gcvalue(t1) == gcvalue(t2);
-  }
-  if (tm == NULL)  /* no TM? */
-    return 0;  /* objects are different */
-  else {
-    int tag = luaT_callTMres(L, tm, t1, t2, L->top.p);  /* call TM */
-    return !tagisfalse(tag);
   }
 }
 

+ 3 - 9
manual/manual.of

@@ -2419,8 +2419,8 @@ for instance @T{foo(e1, e2, e3)} @see{functioncall}.}
 @item{A multiple assignment,
 for instance @T{a , b, c = e1, e2, e3} @see{assignment}.}
 
-@item{A local declaration,
-for instance @T{local a , b, c = e1, e2, e3} @see{localvar}.}
+@item{A local or global declaration,
+which is a special case of multiple assignment.}
 
 @item{The initial values in a generic @rw{for} loop,
 for instance @T{for k in e1, e2, e3 do ... end} @see{for}.}
@@ -2431,8 +2431,7 @@ the list of values from the list of expressions
 must be @emph{adjusted} to a specific length:
 the number of parameters in a call to a non-variadic function
 @see{func-def},
-the number of variables in a multiple assignment or
-a local declaration,
+the number of variables in a multiple assignment or a declaration,
 and exactly four values for a generic @rw{for} loop.
 The @def{adjustment} follows these rules:
 If there are more values than needed,
@@ -4075,11 +4074,6 @@ the string @id{s} as the block,
 the length plus one (to account for the ending zero) as the old size,
 and 0 as the new size.
 
-Lua always @x{internalizes} strings with lengths up to 40 characters.
-So, for strings in that range,
-this function will immediately internalize the string
-and call @id{falloc} to free the buffer.
-
 Even when using an external buffer,
 Lua still has to allocate a header for the string.
 In case of a memory-allocation error,

+ 21 - 7
testes/attrib.lua

@@ -300,12 +300,6 @@ else
   assert(_ENV.x == "lib2-v2" and _ENV.y == DC"lib2-v2")
   assert(lib2.id("x") == true)   -- a different "id" implementation
 
-  for _, len in ipairs{0, 10, 39, 40, 41, 1000} do
-    local str = string.rep("a", len)
-    local str1 = lib2.newstr(str)
-    assert(str == str1)
-  end
-
   -- test C submodules
   local fs, ext = require"lib1.sub"
   assert(_ENV.x == "lib1.sub" and _ENV.y == DC"lib1")
@@ -314,11 +308,11 @@ else
   _ENV.x, _ENV.y = nil
 end
 
+
 _ENV = _G
 
 
 -- testing preload
-
 do
   local p = package
   package = {}
@@ -337,6 +331,26 @@ do
   assert(type(package.path) == "string")
 end
 
+
+do  print("testing external strings")
+  package.cpath = DC"?"
+  local lib2 = require"lib2-v2"
+  local t = {}
+  for _, len in ipairs{0, 10, 39, 40, 41, 1000} do
+    local str = string.rep("a", len)
+    local str1 = lib2.newstr(str)
+    assert(str == str1)
+    assert(not T or T.hash(str) == T.hash(str1))
+    t[str1] = 20; assert(t[str] == 20 and t[str1] == 20)
+    t[str] = 10; assert(t[str1] == 10)
+    local tt = {[str1] = str1}
+    assert(next(tt) == str1 and next(tt, str1) == nil)
+    assert(tt[str] == str)
+    local str2 = lib2.newstr(str1)
+    assert(str == str2 and t[str2] == 10 and tt[str2] == str)
+  end
+end
+
 print('+')
 
 end  --]