浏览代码

Short strings can be external, too

That complicates a little object equality (and therefore table access
for long strings), but the old behavior was somewhat weird. (Short
strings, a concept otherwise absent from the manual, could not be
external.)
Roberto Ierusalimschy 2 周之前
父节点
当前提交
60b6599e83
共有 9 个文件被更改,包括 168 次插入120 次删除
  1. 2 2
      loadlib.c
  2. 1 0
      lobject.h
  3. 20 26
      lstring.c
  4. 2 1
      lstring.h
  5. 50 32
      ltable.c
  6. 5 1
      ltests.c
  7. 64 42
      lvm.c
  8. 3 9
      manual/manual.of
  9. 21 7
      testes/attrib.lua

+ 2 - 2
loadlib.c

@@ -345,8 +345,8 @@ static void *freelib (void *ud, void *ptr, size_t osize, size_t nsize) {
 ** Create a library string that, when deallocated, will unload 'plib'
 ** Create a library string that, when deallocated, will unload 'plib'
 */
 */
 static void createlibstr (lua_State *L, void *plib) {
 static void createlibstr (lua_State *L, void *plib) {
-  static const char dummy[] =  /* common long body for all library strings */
-    "01234567890123456789012345678901234567890123456789";
+  /* common content for all library strings */
+  static const char dummy[] = "01234567890";
   lua_pushexternalstring(L, dummy, sizeof(dummy) - 1, freelib, plib);
   lua_pushexternalstring(L, dummy, sizeof(dummy) - 1, freelib, plib);
 }
 }
 
 

+ 1 - 0
lobject.h

@@ -418,6 +418,7 @@ typedef struct TString {
 
 
 
 
 #define strisshr(ts)	((ts)->shrlen >= 0)
 #define strisshr(ts)	((ts)->shrlen >= 0)
+#define isextstr(ts)	(ttislngstring(ts) && tsvalue(ts)->shrlen != LSTRREG)
 
 
 
 
 /*
 /*

+ 20 - 26
lstring.c

@@ -39,14 +39,14 @@
 
 
 
 
 /*
 /*
-** equality for long strings
+** generic equality for strings
 */
 */
-int luaS_eqlngstr (TString *a, TString *b) {
-  size_t len = a->u.lnglen;
-  lua_assert(a->tt == LUA_VLNGSTR && b->tt == LUA_VLNGSTR);
-  return (a == b) ||  /* same instance or... */
-    ((len == b->u.lnglen) &&  /* equal length and ... */
-     (memcmp(getlngstr(a), getlngstr(b), len) == 0));  /* equal contents */
+int luaS_eqstr (TString *a, TString *b) {
+  size_t len1, len2;
+  const char *s1 = getlstr(a, len1);
+  const char *s2 = getlstr(b, len2);
+  return ((len1 == len2) &&  /* equal length and ... */
+          (memcmp(s1, s2, len1) == 0));  /* equal contents */
 }
 }
 
 
 
 
@@ -315,28 +315,9 @@ static void f_newext (lua_State *L, void *ud) {
 }
 }
 
 
 
 
-static void f_pintern (lua_State *L, void *ud) {
-  struct NewExt *ne = cast(struct NewExt *, ud);
-  ne->ts = internshrstr(L, ne->s, ne->len);
-}
-
-
 TString *luaS_newextlstr (lua_State *L,
 TString *luaS_newextlstr (lua_State *L,
 	          const char *s, size_t len, lua_Alloc falloc, void *ud) {
 	          const char *s, size_t len, lua_Alloc falloc, void *ud) {
   struct NewExt ne;
   struct NewExt ne;
-  if (len <= LUAI_MAXSHORTLEN) {  /* short string? */
-    ne.s = s; ne.len = len;
-    if (!falloc)
-      f_pintern(L, &ne);  /* just internalize string */
-    else {
-      TStatus status = luaD_rawrunprotected(L, f_pintern, &ne);
-      (*falloc)(ud, cast_voidp(s), len + 1, 0);  /* free external string */
-      if (status != LUA_OK)  /* memory error? */
-        luaM_error(L);  /* re-raise memory error */
-    }
-    return ne.ts;
-  }
-  /* "normal" case: long strings */
   if (!falloc) {
   if (!falloc) {
     ne.kind = LSTRFIX;
     ne.kind = LSTRFIX;
     f_newext(L, &ne);  /* just create header */
     f_newext(L, &ne);  /* just create header */
@@ -357,3 +338,16 @@ TString *luaS_newextlstr (lua_State *L,
 }
 }
 
 
 
 
+/*
+** Normalize an external string: If it is short, internalize it.
+*/
+TString *luaS_normstr (lua_State *L, TString *ts) {
+  size_t len = ts->u.lnglen;
+  if (len > LUAI_MAXSHORTLEN)
+    return ts;  /* long string; keep the original */
+  else {
+    const char *str = getlngstr(ts);
+    return internshrstr(L, str, len);
+  }
+}
+

+ 2 - 1
lstring.h

@@ -56,7 +56,7 @@
 
 
 LUAI_FUNC unsigned luaS_hash (const char *str, size_t l, unsigned seed);
 LUAI_FUNC unsigned luaS_hash (const char *str, size_t l, unsigned seed);
 LUAI_FUNC unsigned luaS_hashlongstr (TString *ts);
 LUAI_FUNC unsigned luaS_hashlongstr (TString *ts);
-LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
+LUAI_FUNC int luaS_eqstr (TString *a, TString *b);
 LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
 LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
 LUAI_FUNC void luaS_clearcache (global_State *g);
 LUAI_FUNC void luaS_clearcache (global_State *g);
 LUAI_FUNC void luaS_init (lua_State *L);
 LUAI_FUNC void luaS_init (lua_State *L);
@@ -69,5 +69,6 @@ LUAI_FUNC TString *luaS_createlngstrobj (lua_State *L, size_t l);
 LUAI_FUNC TString *luaS_newextlstr (lua_State *L,
 LUAI_FUNC TString *luaS_newextlstr (lua_State *L,
 		const char *s, size_t len, lua_Alloc falloc, void *ud);
 		const char *s, size_t len, lua_Alloc falloc, void *ud);
 LUAI_FUNC size_t luaS_sizelngstr (size_t len, int kind);
 LUAI_FUNC size_t luaS_sizelngstr (size_t len, int kind);
+LUAI_FUNC TString *luaS_normstr (lua_State *L, TString *ts);
 
 
 #endif
 #endif

+ 50 - 32
ltable.c

@@ -234,41 +234,51 @@ l_sinline Node *mainpositionfromnode (const Table *t, Node *nd) {
 ** Check whether key 'k1' is equal to the key in node 'n2'. This
 ** Check whether key 'k1' is equal to the key in node 'n2'. This
 ** equality is raw, so there are no metamethods. Floats with integer
 ** equality is raw, so there are no metamethods. Floats with integer
 ** values have been normalized, so integers cannot be equal to
 ** values have been normalized, so integers cannot be equal to
-** floats. It is assumed that 'eqshrstr' is simply pointer equality, so
-** that short strings are handled in the default case.
-** A true 'deadok' means to accept dead keys as equal to their original
-** values. All dead keys are compared in the default case, by pointer
-** identity. (Only collectable objects can produce dead keys.) Note that
-** dead long strings are also compared by identity.
-** Once a key is dead, its corresponding value may be collected, and
-** then another value can be created with the same address. If this
-** other value is given to 'next', 'equalkey' will signal a false
-** positive. In a regular traversal, this situation should never happen,
-** as all keys given to 'next' came from the table itself, and therefore
-** could not have been collected. Outside a regular traversal, we
-** have garbage in, garbage out. What is relevant is that this false
-** positive does not break anything.  (In particular, 'next' will return
-** some other valid item on the table or nil.)
+** floats. It is assumed that 'eqshrstr' is simply pointer equality,
+** so that short strings are handled in the default case.  The flag
+** 'deadok' means to accept dead keys as equal to their original values.
+** (Only collectable objects can produce dead keys.) Note that dead
+** long strings are also compared by identity.  Once a key is dead,
+** its corresponding value may be collected, and then another value
+** can be created with the same address. If this other value is given
+** to 'next', 'equalkey' will signal a false positive. In a regular
+** traversal, this situation should never happen, as all keys given to
+** 'next' came from the table itself, and therefore could not have been
+** collected. Outside a regular traversal, we have garbage in, garbage
+** out. What is relevant is that this false positive does not break
+** anything.  (In particular, 'next' will return some other valid item
+** on the table or nil.)
 */
 */
 static int equalkey (const TValue *k1, const Node *n2, int deadok) {
 static int equalkey (const TValue *k1, const Node *n2, int deadok) {
-  if ((rawtt(k1) != keytt(n2)) &&  /* not the same variants? */
-       !(deadok && keyisdead(n2) && iscollectable(k1)))
-   return 0;  /* cannot be same key */
-  switch (keytt(n2)) {
-    case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
-      return 1;
-    case LUA_VNUMINT:
-      return (ivalue(k1) == keyival(n2));
-    case LUA_VNUMFLT:
-      return luai_numeq(fltvalue(k1), fltvalueraw(keyval(n2)));
-    case LUA_VLIGHTUSERDATA:
-      return pvalue(k1) == pvalueraw(keyval(n2));
-    case LUA_VLCF:
-      return fvalue(k1) == fvalueraw(keyval(n2));
-    case ctb(LUA_VLNGSTR):
-      return luaS_eqlngstr(tsvalue(k1), keystrval(n2));
-    default:
+  if (rawtt(k1) != keytt(n2)) {  /* not the same variants? */
+    if (keyisshrstr(n2) && ttislngstring(k1)) {
+      /* an external string can be equal to a short-string key */
+      return luaS_eqstr(tsvalue(k1), keystrval(n2));
+    }
+    else if (deadok && keyisdead(n2) && iscollectable(k1)) {
+      /* a collectable value can be equal to a dead key */
       return gcvalue(k1) == gcvalueraw(keyval(n2));
       return gcvalue(k1) == gcvalueraw(keyval(n2));
+   }
+   else
+     return 0;  /* otherwise, different variants cannot be equal */
+  }
+  else {  /* equal variants */
+    switch (keytt(n2)) {
+      case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
+        return 1;
+      case LUA_VNUMINT:
+        return (ivalue(k1) == keyival(n2));
+      case LUA_VNUMFLT:
+        return luai_numeq(fltvalue(k1), fltvalueraw(keyval(n2)));
+      case LUA_VLIGHTUSERDATA:
+        return pvalue(k1) == pvalueraw(keyval(n2));
+      case LUA_VLCF:
+        return fvalue(k1) == fvalueraw(keyval(n2));
+      case ctb(LUA_VLNGSTR):
+        return luaS_eqstr(tsvalue(k1), keystrval(n2));
+      default:
+        return gcvalue(k1) == gcvalueraw(keyval(n2));
+    }
   }
   }
 }
 }
 
 
@@ -1158,6 +1168,14 @@ void luaH_finishset (lua_State *L, Table *t, const TValue *key,
       else if (l_unlikely(luai_numisnan(f)))
       else if (l_unlikely(luai_numisnan(f)))
         luaG_runerror(L, "table index is NaN");
         luaG_runerror(L, "table index is NaN");
     }
     }
+    else if (isextstr(key)) {  /* external string? */
+      /* If string is short, must internalize it to be used as table key */
+      TString *ts = luaS_normstr(L, tsvalue(key));
+      setsvalue2s(L, L->top.p++, ts);  /* anchor 'ts' (EXTRA_STACK) */
+      luaH_newkey(L, t, s2v(L->top.p - 1), value);
+      L->top.p--;
+      return;
+    }
     luaH_newkey(L, t, key, value);
     luaH_newkey(L, t, key, value);
   }
   }
   else if (hres > 0) {  /* regular Node? */
   else if (hres > 0) {  /* regular Node? */

+ 5 - 1
ltests.c

@@ -1066,8 +1066,12 @@ static int tracegc (lua_State *L) {
 
 
 static int hash_query (lua_State *L) {
 static int hash_query (lua_State *L) {
   if (lua_isnone(L, 2)) {
   if (lua_isnone(L, 2)) {
+    TString *ts;
     luaL_argcheck(L, lua_type(L, 1) == LUA_TSTRING, 1, "string expected");
     luaL_argcheck(L, lua_type(L, 1) == LUA_TSTRING, 1, "string expected");
-    lua_pushinteger(L, cast_int(tsvalue(obj_at(L, 1))->hash));
+    ts = tsvalue(obj_at(L, 1));
+    if (ts->tt == LUA_VLNGSTR)
+      luaS_hashlongstr(ts);  /* make sure long string has a hash */
+    lua_pushinteger(L, cast_int(ts->hash));
   }
   }
   else {
   else {
     TValue *o = obj_at(L, 1);
     TValue *o = obj_at(L, 1);

+ 64 - 42
lvm.c

@@ -573,52 +573,74 @@ int luaV_lessequal (lua_State *L, const TValue *l, const TValue *r) {
 */
 */
 int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) {
 int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) {
   const TValue *tm;
   const TValue *tm;
-  if (ttypetag(t1) != ttypetag(t2)) {  /* not the same variant? */
-    if (ttype(t1) != ttype(t2) || ttype(t1) != LUA_TNUMBER)
-      return 0;  /* only numbers can be equal with different variants */
-    else {  /* two numbers with different variants */
-      /* One of them is an integer. If the other does not have an
-         integer value, they cannot be equal; otherwise, compare their
-         integer values. */
-      lua_Integer i1, i2;
-      return (luaV_tointegerns(t1, &i1, F2Ieq) &&
-              luaV_tointegerns(t2, &i2, F2Ieq) &&
-              i1 == i2);
+  if (ttype(t1) != ttype(t2))  /* not the same type? */
+    return 0;
+  else if (ttypetag(t1) != ttypetag(t2)) {
+    switch (ttypetag(t1)) {
+      case LUA_VNUMINT: {  /* integer == float? */
+        /* integer and float can only be equal if float has an integer
+           value equal to the integer */
+        lua_Integer i2;
+        return (luaV_flttointeger(fltvalue(t2), &i2, F2Ieq) &&
+                ivalue(t1) == i2);
+      }
+      case LUA_VNUMFLT: {  /* float == integer? */
+        lua_Integer i1;  /* see comment in previous case */
+        return (luaV_flttointeger(fltvalue(t1), &i1, F2Ieq) &&
+                i1 == ivalue(t2));
+      }
+      case LUA_VSHRSTR: case LUA_VLNGSTR: {
+        /* compare two strings with different variants: they can be
+           equal when one string is a short string and the other is
+           an external string  */
+        return luaS_eqstr(tsvalue(t1), tsvalue(t2));
+      }
+      default:
+        /* only numbers (integer/float) and strings (long/short) can have
+           equal values with different variants */
+        return 0;
     }
     }
   }
   }
-  /* values have same type and same variant */
-  switch (ttypetag(t1)) {
-    case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE: return 1;
-    case LUA_VNUMINT: return (ivalue(t1) == ivalue(t2));
-    case LUA_VNUMFLT: return luai_numeq(fltvalue(t1), fltvalue(t2));
-    case LUA_VLIGHTUSERDATA: return pvalue(t1) == pvalue(t2);
-    case LUA_VLCF: return fvalue(t1) == fvalue(t2);
-    case LUA_VSHRSTR: return eqshrstr(tsvalue(t1), tsvalue(t2));
-    case LUA_VLNGSTR: return luaS_eqlngstr(tsvalue(t1), tsvalue(t2));
-    case LUA_VUSERDATA: {
-      if (uvalue(t1) == uvalue(t2)) return 1;
-      else if (L == NULL) return 0;
-      tm = fasttm(L, uvalue(t1)->metatable, TM_EQ);
-      if (tm == NULL)
-        tm = fasttm(L, uvalue(t2)->metatable, TM_EQ);
-      break;  /* will try TM */
+  else {  /* equal variants */
+    switch (ttypetag(t1)) {
+      case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
+        return 1;
+      case LUA_VNUMINT:
+        return (ivalue(t1) == ivalue(t2));
+      case LUA_VNUMFLT:
+        return (fltvalue(t1) == fltvalue(t2));
+      case LUA_VLIGHTUSERDATA: return pvalue(t1) == pvalue(t2);
+      case LUA_VSHRSTR:
+        return eqshrstr(tsvalue(t1), tsvalue(t2));
+      case LUA_VLNGSTR:
+        return luaS_eqstr(tsvalue(t1), tsvalue(t2));
+      case LUA_VUSERDATA: {
+        if (uvalue(t1) == uvalue(t2)) return 1;
+        else if (L == NULL) return 0;
+        tm = fasttm(L, uvalue(t1)->metatable, TM_EQ);
+        if (tm == NULL)
+          tm = fasttm(L, uvalue(t2)->metatable, TM_EQ);
+        break;  /* will try TM */
+      }
+      case LUA_VTABLE: {
+        if (hvalue(t1) == hvalue(t2)) return 1;
+        else if (L == NULL) return 0;
+        tm = fasttm(L, hvalue(t1)->metatable, TM_EQ);
+        if (tm == NULL)
+          tm = fasttm(L, hvalue(t2)->metatable, TM_EQ);
+        break;  /* will try TM */
+      }
+      case LUA_VLCF:
+        return (fvalue(t1) == fvalue(t2));
+      default:  /* functions and threads */
+        return (gcvalue(t1) == gcvalue(t2));
     }
     }
-    case LUA_VTABLE: {
-      if (hvalue(t1) == hvalue(t2)) return 1;
-      else if (L == NULL) return 0;
-      tm = fasttm(L, hvalue(t1)->metatable, TM_EQ);
-      if (tm == NULL)
-        tm = fasttm(L, hvalue(t2)->metatable, TM_EQ);
-      break;  /* will try TM */
+    if (tm == NULL)  /* no TM? */
+      return 0;  /* objects are different */
+    else {
+      int tag = luaT_callTMres(L, tm, t1, t2, L->top.p);  /* call TM */
+      return !tagisfalse(tag);
     }
     }
-    default:
-      return gcvalue(t1) == gcvalue(t2);
-  }
-  if (tm == NULL)  /* no TM? */
-    return 0;  /* objects are different */
-  else {
-    int tag = luaT_callTMres(L, tm, t1, t2, L->top.p);  /* call TM */
-    return !tagisfalse(tag);
   }
   }
 }
 }
 
 

+ 3 - 9
manual/manual.of

@@ -2419,8 +2419,8 @@ for instance @T{foo(e1, e2, e3)} @see{functioncall}.}
 @item{A multiple assignment,
 @item{A multiple assignment,
 for instance @T{a , b, c = e1, e2, e3} @see{assignment}.}
 for instance @T{a , b, c = e1, e2, e3} @see{assignment}.}
 
 
-@item{A local declaration,
-for instance @T{local a , b, c = e1, e2, e3} @see{localvar}.}
+@item{A local or global declaration,
+which is a special case of multiple assignment.}
 
 
 @item{The initial values in a generic @rw{for} loop,
 @item{The initial values in a generic @rw{for} loop,
 for instance @T{for k in e1, e2, e3 do ... end} @see{for}.}
 for instance @T{for k in e1, e2, e3 do ... end} @see{for}.}
@@ -2431,8 +2431,7 @@ the list of values from the list of expressions
 must be @emph{adjusted} to a specific length:
 must be @emph{adjusted} to a specific length:
 the number of parameters in a call to a non-variadic function
 the number of parameters in a call to a non-variadic function
 @see{func-def},
 @see{func-def},
-the number of variables in a multiple assignment or
-a local declaration,
+the number of variables in a multiple assignment or a declaration,
 and exactly four values for a generic @rw{for} loop.
 and exactly four values for a generic @rw{for} loop.
 The @def{adjustment} follows these rules:
 The @def{adjustment} follows these rules:
 If there are more values than needed,
 If there are more values than needed,
@@ -4075,11 +4074,6 @@ the string @id{s} as the block,
 the length plus one (to account for the ending zero) as the old size,
 the length plus one (to account for the ending zero) as the old size,
 and 0 as the new size.
 and 0 as the new size.
 
 
-Lua always @x{internalizes} strings with lengths up to 40 characters.
-So, for strings in that range,
-this function will immediately internalize the string
-and call @id{falloc} to free the buffer.
-
 Even when using an external buffer,
 Even when using an external buffer,
 Lua still has to allocate a header for the string.
 Lua still has to allocate a header for the string.
 In case of a memory-allocation error,
 In case of a memory-allocation error,

+ 21 - 7
testes/attrib.lua

@@ -300,12 +300,6 @@ else
   assert(_ENV.x == "lib2-v2" and _ENV.y == DC"lib2-v2")
   assert(_ENV.x == "lib2-v2" and _ENV.y == DC"lib2-v2")
   assert(lib2.id("x") == true)   -- a different "id" implementation
   assert(lib2.id("x") == true)   -- a different "id" implementation
 
 
-  for _, len in ipairs{0, 10, 39, 40, 41, 1000} do
-    local str = string.rep("a", len)
-    local str1 = lib2.newstr(str)
-    assert(str == str1)
-  end
-
   -- test C submodules
   -- test C submodules
   local fs, ext = require"lib1.sub"
   local fs, ext = require"lib1.sub"
   assert(_ENV.x == "lib1.sub" and _ENV.y == DC"lib1")
   assert(_ENV.x == "lib1.sub" and _ENV.y == DC"lib1")
@@ -314,11 +308,11 @@ else
   _ENV.x, _ENV.y = nil
   _ENV.x, _ENV.y = nil
 end
 end
 
 
+
 _ENV = _G
 _ENV = _G
 
 
 
 
 -- testing preload
 -- testing preload
-
 do
 do
   local p = package
   local p = package
   package = {}
   package = {}
@@ -337,6 +331,26 @@ do
   assert(type(package.path) == "string")
   assert(type(package.path) == "string")
 end
 end
 
 
+
+do  print("testing external strings")
+  package.cpath = DC"?"
+  local lib2 = require"lib2-v2"
+  local t = {}
+  for _, len in ipairs{0, 10, 39, 40, 41, 1000} do
+    local str = string.rep("a", len)
+    local str1 = lib2.newstr(str)
+    assert(str == str1)
+    assert(not T or T.hash(str) == T.hash(str1))
+    t[str1] = 20; assert(t[str] == 20 and t[str1] == 20)
+    t[str] = 10; assert(t[str1] == 10)
+    local tt = {[str1] = str1}
+    assert(next(tt) == str1 and next(tt, str1) == nil)
+    assert(tt[str] == str)
+    local str2 = lib2.newstr(str1)
+    assert(str == str2 and t[str2] == 10 and tt[str2] == str)
+  end
+end
+
 print('+')
 print('+')
 
 
 end  --]
 end  --]