瀏覽代碼

first implementation of long strings

Roberto Ierusalimschy 13 年之前
父節點
當前提交
a4b96ce9a3
共有 9 個文件被更改,包括 144 次插入47 次删除
  1. 16 10
      lgc.c
  2. 10 1
      llimits.h
  3. 12 3
      lobject.h
  4. 3 2
      lstate.h
  5. 77 19
      lstring.c
  6. 6 3
      lstring.h
  7. 15 6
      ltable.c
  8. 3 2
      ltests.c
  9. 2 1
      lvm.c

+ 16 - 10
lgc.c

@@ -65,7 +65,11 @@
 #define white2gray(x)	resetbits(gch(x)->marked, WHITEBITS)
 #define white2gray(x)	resetbits(gch(x)->marked, WHITEBITS)
 #define black2gray(x)	resetbit(gch(x)->marked, BLACKBIT)
 #define black2gray(x)	resetbit(gch(x)->marked, BLACKBIT)
 
 
-#define stringmark(s)	((void)((s) && resetbits((s)->tsv.marked, WHITEBITS)))
+/*
+** dirty trick: we know that 'reallymarkobject' does not use 'g' when
+** object is a string
+*/
+#define stringmark(s)	markobject(NULL, s)
 
 
 
 
 #define isfinalized(x)		testbit(gch(x)->marked, FINALIZEDBIT)
 #define isfinalized(x)		testbit(gch(x)->marked, FINALIZEDBIT)
@@ -240,18 +244,18 @@ GCObject *luaC_newobj (lua_State *L, int tt, size_t sz, GCObject **list,
 
 
 
 
 /*
 /*
-** mark an object. Userdata and closed upvalues are visited and turned
-** black here. Strings remain gray (it is the same as making them
-** black). Other objects are marked gray and added to appropriate list
-** to be visited (and turned black) later. (Open upvalues are already
-** linked in 'headuv' list.)
+** mark an object. Userdata, strings, and closed upvalues are visited
+** and turned black here. Other objects are marked gray and added
+** to appropriate list to be visited (and turned black) later. (Open
+** upvalues are already linked in 'headuv' list.)
 */
 */
 static void reallymarkobject (global_State *g, GCObject *o) {
 static void reallymarkobject (global_State *g, GCObject *o) {
-  lua_assert(iswhite(o) && !isdead(g, o));
   white2gray(o);
   white2gray(o);
   switch (gch(o)->tt) {
   switch (gch(o)->tt) {
-    case LUA_TSTRING: {
-      return;  /* for strings, gray is as good as black */
+    case LUA_TSHRSTR:
+    case LUA_TLNGSTR: {
+      gray2black(o);
+      return;  /* nothing else to mark */
     }
     }
     case LUA_TUSERDATA: {
     case LUA_TUSERDATA: {
       Table *mt = gco2u(o)->metatable;
       Table *mt = gco2u(o)->metatable;
@@ -663,8 +667,10 @@ static void freeobj (lua_State *L, GCObject *o) {
     case LUA_TTABLE: luaH_free(L, gco2t(o)); break;
     case LUA_TTABLE: luaH_free(L, gco2t(o)); break;
     case LUA_TTHREAD: luaE_freethread(L, gco2th(o)); break;
     case LUA_TTHREAD: luaE_freethread(L, gco2th(o)); break;
     case LUA_TUSERDATA: luaM_freemem(L, o, sizeudata(gco2u(o))); break;
     case LUA_TUSERDATA: luaM_freemem(L, o, sizeudata(gco2u(o))); break;
-    case LUA_TSTRING: {
+    case LUA_TSHRSTR: 
       G(L)->strt.nuse--;
       G(L)->strt.nuse--;
+      /* go through */
+    case LUA_TLNGSTR: {
       luaM_freemem(L, o, sizestring(gco2ts(o)));
       luaM_freemem(L, o, sizestring(gco2ts(o)));
       break;
       break;
     }
     }

+ 10 - 1
llimits.h

@@ -1,5 +1,5 @@
 /*
 /*
-** $Id: llimits.h,v 1.94 2011/11/29 15:39:48 roberto Exp roberto $
+** $Id: llimits.h,v 1.95 2011/12/06 16:58:36 roberto Exp roberto $
 ** Limits, basic types, and some other `installation-dependent' definitions
 ** Limits, basic types, and some other `installation-dependent' definitions
 ** See Copyright Notice in lua.h
 ** See Copyright Notice in lua.h
 */
 */
@@ -124,6 +124,15 @@ typedef LUAI_UACNUMBER l_uacNumber;
 #define MAXUPVAL	UCHAR_MAX
 #define MAXUPVAL	UCHAR_MAX
 
 
 
 
+/*
+** maximum length for short strings, that is, strings that are
+** internalized. (Cannot be smaller than reserved words or tags
+** for metamethods; #"function" = 8, #"__newindex" = 10; should
+** not be larger than 255, to allow future changes)
+*/
+#define LUA_MAXSHORTLEN		(8 * sizeof(void*))
+
+
 /*
 /*
 ** type for virtual-machine instructions
 ** type for virtual-machine instructions
 ** must be an unsigned with (at least) 4 bytes (see details in lopcodes.h)
 ** must be an unsigned with (at least) 4 bytes (see details in lopcodes.h)

+ 12 - 3
lobject.h

@@ -52,6 +52,12 @@
 #define LUA_TCCL	(LUA_TFUNCTION | (2 << 4))  /* C closure */
 #define LUA_TCCL	(LUA_TFUNCTION | (2 << 4))  /* C closure */
 
 
 
 
+/*
+** LUA_TSTRING variants */
+#define LUA_TSHRSTR	(LUA_TSTRING | (0 << 4))  /* short strings */
+#define LUA_TLNGSTR	(LUA_TSTRING | (1 << 4))  /* long strings */
+
+
 /* Bit mark for collectable types */
 /* Bit mark for collectable types */
 #define BIT_ISCOLLECTABLE	(1 << 6)
 #define BIT_ISCOLLECTABLE	(1 << 6)
 
 
@@ -129,7 +135,9 @@ typedef struct lua_TValue TValue;
 #define ttisnil(o)		checktag((o), LUA_TNIL)
 #define ttisnil(o)		checktag((o), LUA_TNIL)
 #define ttisboolean(o)		checktag((o), LUA_TBOOLEAN)
 #define ttisboolean(o)		checktag((o), LUA_TBOOLEAN)
 #define ttislightuserdata(o)	checktag((o), LUA_TLIGHTUSERDATA)
 #define ttislightuserdata(o)	checktag((o), LUA_TLIGHTUSERDATA)
-#define ttisstring(o)		checktag((o), ctb(LUA_TSTRING))
+#define ttisstring(o)		checktype((o), LUA_TSTRING)
+#define ttisshrstring(o)	checktag((o), ctb(LUA_TSHRSTR))
+#define ttislngstring(o)	checktag((o), ctb(LUA_TLNGSTR))
 #define ttistable(o)		checktag((o), ctb(LUA_TTABLE))
 #define ttistable(o)		checktag((o), ctb(LUA_TTABLE))
 #define ttisfunction(o)		checktype(o, LUA_TFUNCTION)
 #define ttisfunction(o)		checktype(o, LUA_TFUNCTION)
 #define ttisclosure(o)		((rttype(o) & 0x1F) == LUA_TFUNCTION)
 #define ttisclosure(o)		((rttype(o) & 0x1F) == LUA_TFUNCTION)
@@ -199,7 +207,8 @@ typedef struct lua_TValue TValue;
 
 
 #define setsvalue(L,obj,x) \
 #define setsvalue(L,obj,x) \
   { TValue *io=(obj); \
   { TValue *io=(obj); \
-    val_(io).gc=cast(GCObject *, (x)); settt_(io, ctb(LUA_TSTRING)); \
+    TString *x_ = (x); \
+    val_(io).gc=cast(GCObject *, x_); settt_(io, ctb(x_->tsv.tt)); \
     checkliveness(G(L),io); }
     checkliveness(G(L),io); }
 
 
 #define setuvalue(L,obj,x) \
 #define setuvalue(L,obj,x) \
@@ -409,7 +418,7 @@ typedef union TString {
   L_Umaxalign dummy;  /* ensures maximum alignment for strings */
   L_Umaxalign dummy;  /* ensures maximum alignment for strings */
   struct {
   struct {
     CommonHeader;
     CommonHeader;
-    lu_byte extra;  /* reserved words for strings */
+    lu_byte extra;  /* reserved words for short strings; "has hash" for longs */
     unsigned int hash;
     unsigned int hash;
     size_t len;  /* number of characters in string */
     size_t len;  /* number of characters in string */
   } tsv;
   } tsv;

+ 3 - 2
lstate.h

@@ -1,5 +1,5 @@
 /*
 /*
-** $Id: lstate.h,v 2.74 2011/09/30 12:45:07 roberto Exp roberto $
+** $Id: lstate.h,v 2.75 2012/01/20 22:05:50 roberto Exp roberto $
 ** Global State
 ** Global State
 ** See Copyright Notice in lua.h
 ** See Copyright Notice in lua.h
 */
 */
@@ -193,7 +193,8 @@ union GCObject {
 #define gch(o)		(&(o)->gch)
 #define gch(o)		(&(o)->gch)
 
 
 /* macros to convert a GCObject into a specific value */
 /* macros to convert a GCObject into a specific value */
-#define rawgco2ts(o)	check_exp((o)->gch.tt == LUA_TSTRING, &((o)->ts))
+#define rawgco2ts(o)  \
+	check_exp(novariant((o)->gch.tt) == LUA_TSTRING, &((o)->ts))
 #define gco2ts(o)	(&rawgco2ts(o)->tsv)
 #define gco2ts(o)	(&rawgco2ts(o)->tsv)
 #define rawgco2u(o)	check_exp((o)->gch.tt == LUA_TUSERDATA, &((o)->u))
 #define rawgco2u(o)	check_exp((o)->gch.tt == LUA_TUSERDATA, &((o)->u))
 #define gco2u(o)	(&rawgco2u(o)->uv)
 #define gco2u(o)	(&rawgco2u(o)->uv)

+ 77 - 19
lstring.c

@@ -18,7 +18,37 @@
 #include "lstring.h"
 #include "lstring.h"
 
 
 
 
+/*
+** equality for long strings
+*/
+int luaS_eqlngstr (TString *a, TString *b) {
+  size_t len = a->tsv.len;
+  lua_assert(a->tsv.tt == LUA_TLNGSTR && b->tsv.tt == LUA_TLNGSTR);
+  return (len == b->tsv.len) && (memcmp(getstr(a), getstr(b), len) == 0);
+}
+
+
+/*
+** equality for strings
+*/
+int luaS_eqstr (TString *a, TString *b) {
+  return (a->tsv.tt == b->tsv.tt) &&
+         (a->tsv.tt == LUA_TSHRSTR ? eqshrstr(a, b) : luaS_eqlngstr(a, b));
+}
+
+
+unsigned int luaS_hash (const char *str, size_t l) {
+  unsigned int h = cast(unsigned int, l);  /* seed */
+  size_t l1;
+  for (l1 = 0; l1 < l; l1++)
+    h = h ^ ((h<<5) + (h>>2) + cast_byte(str[l1]));
+  return h;
+}
+
 
 
+/*
+** resizes the string table
+*/
 void luaS_resize (lua_State *L, int newsize) {
 void luaS_resize (lua_State *L, int newsize) {
   int i;
   int i;
   stringtable *tb = &G(L)->strt;
   stringtable *tb = &G(L)->strt;
@@ -50,36 +80,47 @@ void luaS_resize (lua_State *L, int newsize) {
 }
 }
 
 
 
 
-static TString *newlstr (lua_State *L, const char *str, size_t l,
-                                       unsigned int h) {
-  size_t totalsize;  /* total size of TString object */
-  GCObject **list;  /* (pointer to) list where it will be inserted */
+/*
+** creates a new string object
+*/
+static TString *createstrobj (lua_State *L, const char *str, size_t l,
+                              int tag, unsigned int h, GCObject **list) {
   TString *ts;
   TString *ts;
-  stringtable *tb = &G(L)->strt;
-  if (l+1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
-    luaM_toobig(L);
-  if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
-    luaS_resize(L, tb->size*2);  /* too crowded */
+  size_t totalsize;  /* total size of TString object */
   totalsize = sizeof(TString) + ((l + 1) * sizeof(char));
   totalsize = sizeof(TString) + ((l + 1) * sizeof(char));
-  list = &tb->hash[lmod(h, tb->size)];
-  ts = &luaC_newobj(L, LUA_TSTRING, totalsize, list, 0)->ts;
+  ts = &luaC_newobj(L, tag, totalsize, list, 0)->ts;
   ts->tsv.len = l;
   ts->tsv.len = l;
   ts->tsv.hash = h;
   ts->tsv.hash = h;
   ts->tsv.extra = 0;
   ts->tsv.extra = 0;
   memcpy(ts+1, str, l*sizeof(char));
   memcpy(ts+1, str, l*sizeof(char));
   ((char *)(ts+1))[l] = '\0';  /* ending 0 */
   ((char *)(ts+1))[l] = '\0';  /* ending 0 */
-  tb->nuse++;
   return ts;
   return ts;
 }
 }
 
 
 
 
-TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
+/*
+** creates a new short string, inserting it into string table
+*/
+static TString *newshrstr (lua_State *L, const char *str, size_t l,
+                                       unsigned int h) {
+  GCObject **list;  /* (pointer to) list where it will be inserted */
+  stringtable *tb = &G(L)->strt;
+  TString *s;
+  if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
+    luaS_resize(L, tb->size*2);  /* too crowded */
+  list = &tb->hash[lmod(h, tb->size)];
+  s = createstrobj(L, str, l, LUA_TSHRSTR, h, list);
+  tb->nuse++;
+  return s;
+}
+
+
+/*
+** checks whether short string exists and reuses it or creates a new one
+*/
+static TString *internshrstr (lua_State *L, const char *str, size_t l) {
   GCObject *o;
   GCObject *o;
-  unsigned int h = cast(unsigned int, l);  /* seed */
-  size_t step = (l>>5)+1;  /* if string is too long, don't hash all its chars */
-  size_t l1;
-  for (l1=l; l1>=step; l1-=step)  /* compute hash */
-    h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
+  unsigned int h = luaS_hash(str, l);
   for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
   for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
        o != NULL;
        o != NULL;
        o = gch(o)->next) {
        o = gch(o)->next) {
@@ -92,10 +133,27 @@ TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
       return ts;
       return ts;
     }
     }
   }
   }
-  return newlstr(L, str, l, h);  /* not found; create a new string */
+  return newshrstr(L, str, l, h);  /* not found; create a new string */
 }
 }
 
 
 
 
+/*
+** new string (with explicit length)
+*/
+TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
+  if (l <= LUA_MAXSHORTLEN)  /* short string? */
+    return internshrstr(L, str, l);
+  else {
+    if (l + 1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
+      luaM_toobig(L);
+    return createstrobj(L, str, l, LUA_TLNGSTR, 0, NULL);
+  }
+}
+
+
+/*
+** new zero-terminated string
+*/
 TString *luaS_new (lua_State *L, const char *str) {
 TString *luaS_new (lua_State *L, const char *str) {
   return luaS_newlstr(L, str, strlen(str));
   return luaS_newlstr(L, str, strlen(str));
 }
 }

+ 6 - 3
lstring.h

@@ -25,15 +25,18 @@
 /*
 /*
 ** test whether a string is a reserved word
 ** test whether a string is a reserved word
 */
 */
-#define isreserved(s)	((s)->tsv.extra > 0)
+#define isreserved(s)	((s)->tsv.tt == LUA_TSHRSTR && (s)->tsv.extra > 0)
 
 
 
 
 /*
 /*
-** equality for strings, which are always internalized
+** equality for short strings, which are always internalized
 */
 */
-#define luaS_eqstr(a,b)		((a) == (b))
+#define eqshrstr(a,b)	check_exp((a)->tsv.tt == LUA_TSHRSTR, (a) == (b))
 
 
 
 
+LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l);
+LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
+LUAI_FUNC int luaS_eqstr (TString *a, TString *b);
 LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
 LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
 LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, Table *e);
 LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, Table *e);
 LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l);
 LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l);

+ 15 - 6
ltable.c

@@ -50,7 +50,7 @@
 
 
 #define hashpow2(t,n)      (gnode(t, lmod((n), sizenode(t))))
 #define hashpow2(t,n)      (gnode(t, lmod((n), sizenode(t))))
 
 
-#define hashstr(t,str)  hashpow2(t, (str)->tsv.hash)
+#define hashstr(t,str)		hashpow2(t, (str)->tsv.hash)
 #define hashboolean(t,p)        hashpow2(t, p)
 #define hashboolean(t,p)        hashpow2(t, p)
 
 
 
 
@@ -98,7 +98,15 @@ static Node *mainposition (const Table *t, const TValue *key) {
   switch (ttype(key)) {
   switch (ttype(key)) {
     case LUA_TNUMBER:
     case LUA_TNUMBER:
       return hashnum(t, nvalue(key));
       return hashnum(t, nvalue(key));
-    case LUA_TSTRING:
+    case LUA_TLNGSTR: {
+      TString *s = rawtsvalue(key);
+      if (s->tsv.extra == 0) {  /* no hash? */
+        s->tsv.hash = luaS_hash(getstr(s), s->tsv.len);
+        s->tsv.extra = 1;  /* now it has its hash */
+      }
+      return hashstr(t, rawtsvalue(key));
+    }
+    case LUA_TSHRSTR:
       return hashstr(t, rawtsvalue(key));
       return hashstr(t, rawtsvalue(key));
     case LUA_TBOOLEAN:
     case LUA_TBOOLEAN:
       return hashboolean(t, bvalue(key));
       return hashboolean(t, bvalue(key));
@@ -453,12 +461,13 @@ const TValue *luaH_getint (Table *t, int key) {
 
 
 
 
 /*
 /*
-** search function for strings
+** search function for short strings
 */
 */
 const TValue *luaH_getstr (Table *t, TString *key) {
 const TValue *luaH_getstr (Table *t, TString *key) {
   Node *n = hashstr(t, key);
   Node *n = hashstr(t, key);
+  lua_assert(key->tsv.tt == LUA_TSHRSTR);
   do {  /* check whether `key' is somewhere in the chain */
   do {  /* check whether `key' is somewhere in the chain */
-    if (ttisstring(gkey(n)) && luaS_eqstr(rawtsvalue(gkey(n)), key))
+    if (ttisshrstring(gkey(n)) && eqshrstr(rawtsvalue(gkey(n)), key))
       return gval(n);  /* that's it */
       return gval(n);  /* that's it */
     else n = gnext(n);
     else n = gnext(n);
   } while (n);
   } while (n);
@@ -470,9 +479,9 @@ const TValue *luaH_getstr (Table *t, TString *key) {
 ** main search function
 ** main search function
 */
 */
 const TValue *luaH_get (Table *t, const TValue *key) {
 const TValue *luaH_get (Table *t, const TValue *key) {
-  switch (ttypenv(key)) {
+  switch (ttype(key)) {
     case LUA_TNIL: return luaO_nilobject;
     case LUA_TNIL: return luaO_nilobject;
-    case LUA_TSTRING: return luaH_getstr(t, rawtsvalue(key));
+    case LUA_TSHRSTR: return luaH_getstr(t, rawtsvalue(key));
     case LUA_TNUMBER: {
     case LUA_TNUMBER: {
       int k;
       int k;
       lua_Number n = nvalue(key);
       lua_Number n = nvalue(key);

+ 3 - 2
ltests.c

@@ -1,5 +1,5 @@
 /*
 /*
-** $Id: ltests.c,v 2.124 2011/11/09 19:08:07 roberto Exp roberto $
+** $Id: ltests.c,v 2.125 2012/01/20 22:05:50 roberto Exp roberto $
 ** Internal Module for Debugging of the Lua Implementation
 ** Internal Module for Debugging of the Lua Implementation
 ** See Copyright Notice in lua.h
 ** See Copyright Notice in lua.h
 */
 */
@@ -360,7 +360,8 @@ static void checkobject (global_State *g, GCObject *o) {
         checkproto(g, gco2p(o));
         checkproto(g, gco2p(o));
         break;
         break;
       }
       }
-      case LUA_TSTRING: break;
+      case LUA_TSHRSTR:
+      case LUA_TLNGSTR: break;
       default: lua_assert(0);
       default: lua_assert(0);
     }
     }
   }
   }

+ 2 - 1
lvm.c

@@ -258,7 +258,8 @@ int luaV_equalobj_ (lua_State *L, const TValue *t1, const TValue *t2) {
     case LUA_TBOOLEAN: return bvalue(t1) == bvalue(t2);  /* true must be 1 !! */
     case LUA_TBOOLEAN: return bvalue(t1) == bvalue(t2);  /* true must be 1 !! */
     case LUA_TLIGHTUSERDATA: return pvalue(t1) == pvalue(t2);
     case LUA_TLIGHTUSERDATA: return pvalue(t1) == pvalue(t2);
     case LUA_TLCF: return fvalue(t1) == fvalue(t2);
     case LUA_TLCF: return fvalue(t1) == fvalue(t2);
-    case LUA_TSTRING: return luaS_eqstr(rawtsvalue(t1), rawtsvalue(t2));
+    case LUA_TSHRSTR: return eqshrstr(rawtsvalue(t1), rawtsvalue(t2));
+    case LUA_TLNGSTR: return luaS_eqlngstr(rawtsvalue(t1), rawtsvalue(t2));
     case LUA_TUSERDATA: {
     case LUA_TUSERDATA: {
       if (uvalue(t1) == uvalue(t2)) return 1;
       if (uvalue(t1) == uvalue(t2)) return 1;
       else if (L == NULL) return 0;
       else if (L == NULL) return 0;