Explorar o código

String buffers, part 4a: Add metatable serialization dictionary.

Sponsored by fmad.io.
Mike Pall %!s(int64=4) %!d(string=hai) anos
pai
achega
15ed84bd49
Modificáronse 6 ficheiros con 116 adicións e 41 borrados
  1. 28 17
      doc/ext_buffer.html
  2. 12 5
      src/lib_buffer.c
  3. 2 1
      src/lj_buf.h
  4. 4 2
      src/lj_gc.c
  5. 68 15
      src/lj_serialize.c
  6. 2 1
      src/lj_serialize.h

+ 28 - 17
doc/ext_buffer.html

@@ -127,7 +127,7 @@ space.
 <p>
 Buffers operate like a FIFO (first-in first-out) data structure. Data
 can be appended (written) to the end of the buffer and consumed (read)
-from the front of the buffer. These operations can be freely mixed.
+from the front of the buffer. These operations may be freely mixed.
 </p>
 <p>
 The buffer space that holds the characters is managed automatically
@@ -199,7 +199,7 @@ may be reused.
 <h3 id="buffer_free"><tt>buf = buf:free()</tt></h3>
 <p>
 The buffer space of the buffer object is freed. The object itself
-remains intact, empty and it may be reused.
+remains intact, empty and may be reused.
 </p>
 <p>
 Note: you normally don't need to use this method. The garbage collector
@@ -404,8 +404,8 @@ speed is mostly constrained by object creation cost.
 </p>
 <p>
 The serializer handles most Lua types, common FFI number types and
-nested structures. Functions, thread objects, other FFI cdata, full
-userdata and associated metatables cannot be serialized (yet).
+nested structures. Functions, thread objects, other FFI cdata and full
+userdata cannot be serialized (yet).
 </p>
 <p>
 The encoder serializes nested structures as trees. Multiple references
@@ -461,21 +461,31 @@ commonly occur as table keys of objects you are serializing. These keys
 are compactly encoded as indexes during serialization. A well chosen
 dictionary saves space and improves serialization performance.
 </li>
+<li>
+<tt>metatable</tt> is a Lua table holding a <b>dictionary of metatables</b>
+for the table objects you are serializing.
+</li>
 </ul>
 <p>
-<tt>dict</tt> needs to be an array of strings, starting at index 1 and
-without holes (no <tt>nil</tt> inbetween). The table is anchored in the
-buffer object and internally modified into a two-way index (don't do
-this yourself, just pass a plain array). The table must not be modified
-after it has been passed to <tt>buffer.new()</tt>.
+<tt>dict</tt> needs to be an array of strings and <tt>metatable</tt> needs
+to be an array of tables. Both starting at index 1 and without holes (no
+<tt>nil</tt> inbetween). The tables are anchored in the buffer object and
+internally modified into a two-way index (don't do this yourself, just pass
+a plain array). The tables must not be modified after they have been passed
+to <tt>buffer.new()</tt>.
+</p>
+<p>
+The <tt>dict</tt> and <tt>metatable</tt> tables used by the encoder and
+decoder must be the same. Put the most common entries at the front. Extend
+at the end to ensure backwards-compatibility &mdash; older encodings can
+then still be read. You may also set some indexes to <tt>false</tt> to
+explicitly drop backwards-compatibility. Old encodings that use these
+indexes will throw an error when decoded.
 </p>
 <p>
-The <tt>dict</tt> tables used by the encoder and decoder must be the
-same. Put the most common entries at the front. Extend at the end to
-ensure backwards-compatibility &mdash; older encodings can then still be
-read. You may also set some indexes to <tt>false</tt> to explicitly drop
-backwards-compatibility. Old encodings that use these indexes will throw
-an error when decoded.
+Metatables that are not found in the <tt>metatable</tt> dictionary are
+ignored when encoding. Decoding returns a table with a <tt>nil</tt>
+metatable.
 </p>
 <p>
 Note: parsing and preparation of the options table is somewhat
@@ -564,7 +574,7 @@ suffix.
 <pre>
 object    → nil | false | true
           | null | lightud32 | lightud64
-          | int | num | tab
+          | int | num | tab | tab_mt
           | int64 | uint64 | complex
           | string
 
@@ -585,13 +595,14 @@ tab       → 0x08                                   // Empty table
           | 0x0b a.U a*object h.U h*{object object}      // Mixed
           | 0x0c a.U (a-1)*object                // 1-based array
           | 0x0d a.U (a-1)*object h.U h*{object object}  // Mixed
+tab_mt    → 0x0e (index-1).U tab          // Metatable dict entry
 
 int64     → 0x10 int.L                             // FFI int64_t
 uint64    → 0x11 uint.L                           // FFI uint64_t
 complex   → 0x12 re.L im.L                         // FFI complex
 
 string    → (0x20+len).U len*char.B
-          | 0x0f (index-1).U                        // Dict entry
+          | 0x0f (index-1).U                 // String dict entry
 
 .B = 8 bit
 .I = 32 bit little-endian

+ 12 - 5
src/lib_buffer.c

@@ -288,7 +288,7 @@ LJLIB_CF(buffer_new)
 {
   MSize sz = 0;
   int targ = 1;
-  GCtab *env, *dict = NULL;
+  GCtab *env, *dict_str = NULL, *dict_mt = NULL;
   GCudata *ud;
   SBufExt *sbx;
   if (L->base < L->top && !tvistab(L->base)) {
@@ -298,10 +298,16 @@ LJLIB_CF(buffer_new)
   }
   if (L->base+targ-1 < L->top) {
     GCtab *options = lj_lib_checktab(L, targ);
-    cTValue *opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
+    cTValue *opt_dict, *opt_mt;
+    opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
     if (opt_dict && tvistab(opt_dict)) {
-      dict = tabV(opt_dict);
-      lj_serialize_dict_prep(L, dict);
+      dict_str = tabV(opt_dict);
+      lj_serialize_dict_prep_str(L, dict_str);
+    }
+    opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable"));
+    if (opt_mt && tvistab(opt_mt)) {
+      dict_mt = tabV(opt_mt);
+      lj_serialize_dict_prep_mt(L, dict_mt);
     }
   }
   env = tabref(curr_func(L)->c.env);
@@ -312,7 +318,8 @@ LJLIB_CF(buffer_new)
   setudataV(L, L->top++, ud);
   sbx = (SBufExt *)uddata(ud);
   lj_bufx_init(L, sbx);
-  setgcref(sbx->dict, obj2gco(dict));
+  setgcref(sbx->dict_str, obj2gco(dict_str));
+  setgcref(sbx->dict_mt, obj2gco(dict_mt));
   if (sz > 0) lj_buf_need2((SBuf *)sbx, sz);
   return 1;
 }

+ 2 - 1
src/lj_buf.h

@@ -27,7 +27,8 @@ typedef struct SBufExt {
     MRef bsb;		/* Borrowed string buffer. */
   };
   char *r;		/* Read pointer. */
-  GCRef dict;		/* Serialization string dictionary table. */
+  GCRef dict_str;	/* Serialization string dictionary table. */
+  GCRef dict_mt;	/* Serialization metatable dictionary table. */
   int depth;		/* Remaining recursion depth. */
 } SBufExt;
 

+ 4 - 2
src/lj_gc.c

@@ -69,8 +69,10 @@ static void gc_mark(global_State *g, GCobj *o)
       SBufExt *sbx = (SBufExt *)uddata(gco2ud(o));
       if (sbufiscow(sbx) && gcref(sbx->cowref))
 	gc_markobj(g, gcref(sbx->cowref));
-      if (gcref(sbx->dict))
-	gc_markobj(g, gcref(sbx->dict));
+      if (gcref(sbx->dict_str))
+	gc_markobj(g, gcref(sbx->dict_str));
+      if (gcref(sbx->dict_mt))
+	gc_markobj(g, gcref(sbx->dict_mt));
     }
   } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) {
     GCupval *uv = gco2uv(o);

+ 68 - 15
src/lj_serialize.c

@@ -34,8 +34,8 @@ enum {
   SER_TAG_INT,
   SER_TAG_NUM,
   SER_TAG_TAB,		/* 0x08 */
-  SER_TAG_0x0e = SER_TAG_TAB+6,
-  SER_TAG_DICT,
+  SER_TAG_DICT_MT = SER_TAG_TAB+6,
+  SER_TAG_DICT_STR,
   SER_TAG_INT64,	/* 0x10 */
   SER_TAG_UINT64,
   SER_TAG_COMPLEX,
@@ -124,7 +124,7 @@ static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv)
 }
 
 /* Prepare string dictionary for use (once). */
-void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict)
+void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict)
 {
   if (!dict->hmask) {  /* No hash part means not prepared, yet. */
     MSize i, len = lj_tab_len(dict);
@@ -143,6 +143,26 @@ void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict)
   }
 }
 
+/* Prepare metatable dictionary for use (once). */
+void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict)
+{
+  if (!dict->hmask) {  /* No hash part means not prepared, yet. */
+    MSize i, len = lj_tab_len(dict);
+    if (!len) return;
+    lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
+    for (i = 1; i <= len && i < dict->asize; i++) {
+      cTValue *o = arrayslot(dict, i);
+      if (tvistab(o)) {
+	if (tvisnil(lj_tab_get(L, dict, o))) {  /* Ignore dups. */
+	  lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
+	}
+      } else if (!tvisfalse(o)) {
+	lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
+      }
+    }
+  }
+}
+
 /* -- Internal serializer ------------------------------------------------- */
 
 /* Put serialized object into buffer. */
@@ -185,6 +205,22 @@ static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
       for (i = 0; i <= hmask; i++)
 	nhash += !tvisnil(&node[i].val);
     }
+    /* Write metatable index. */
+    if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) {
+      TValue mto;
+      Node *n;
+      settabV(sbufL(sbx), &mto, tabref(t->metatable));
+      n = hashgcref(tabref(sbx->dict_mt), mto.gcr);
+      do {
+	if (n->key.u64 == mto.u64) {
+	  uint32_t idx = n->val.u32.lo;
+	  w = serialize_more(w, sbx, 1+5);
+	  *w++ = SER_TAG_DICT_MT;
+	  w = serialize_wu124(w, idx);
+	  break;
+	}
+      } while ((n = nextnode(n)));
+    }
     /* Write number of array slots and hash slots. */
     w = serialize_more(w, sbx, 1+2*5);
     *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0));
@@ -197,19 +233,19 @@ static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
     }
     if (nhash) {  /* Write hash entries. */
       const Node *node = noderef(t->node) + t->hmask;
-      GCtab *dict = tabref(sbx->dict);
-      if (LJ_UNLIKELY(dict)) {
+      GCtab *dict_str = tabref(sbx->dict_str);
+      if (LJ_UNLIKELY(dict_str)) {
 	for (;; node--)
 	  if (!tvisnil(&node->val)) {
 	    if (LJ_LIKELY(tvisstr(&node->key))) {
 	      /* Inlined lj_tab_getstr is 30% faster. */
 	      const GCstr *str = strV(&node->key);
-	      Node *n = hashstr(dict, str);
+	      Node *n = hashstr(dict_str, str);
 	      do {
 		if (tvisstr(&n->key) && strV(&n->key) == str) {
 		  uint32_t idx = n->val.u32.lo;
 		  w = serialize_more(w, sbx, 1+5);
-		  *w++ = SER_TAG_DICT;
+		  *w++ = SER_TAG_DICT_STR;
 		  w = serialize_wu124(w, idx);
 		  break;
 		}
@@ -322,19 +358,32 @@ static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
     if (!tvisnum(o)) setnanV(o);
   } else if (tp <= SER_TAG_TRUE) {
     setpriV(o, ~tp);
-  } else if (tp == SER_TAG_DICT) {
-    GCtab *dict;
+  } else if (tp == SER_TAG_DICT_STR) {
+    GCtab *dict_str;
     uint32_t idx;
     r = serialize_ru124(r, w, &idx);
     idx++;
-    dict = tabref(sbx->dict);
-    if (dict && idx < dict->asize && tvisstr(arrayslot(dict, idx)))
-      copyTV(sbufL(sbx), o, arrayslot(dict, idx));
+    dict_str = tabref(sbx->dict_str);
+    if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx)))
+      copyTV(sbufL(sbx), o, arrayslot(dict_str, idx));
     else
       lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
-  } else if (tp >= SER_TAG_TAB && tp < SER_TAG_TAB+6) {
+  } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) {
     uint32_t narray = 0, nhash = 0;
-    GCtab *t;
+    GCtab *t, *mt = NULL;
+    if (tp == SER_TAG_DICT_MT) {
+      GCtab *dict_mt;
+      uint32_t idx;
+      r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
+      idx++;
+      dict_mt = tabref(sbx->dict_mt);
+      if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx)))
+	mt = tabV(arrayslot(dict_mt, idx));
+      else
+	lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
+      r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
+      if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag;
+    }
     if (tp >= SER_TAG_TAB+2) {
       r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob;
     }
@@ -342,6 +391,8 @@ static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
       r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob;
     }
     t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash));
+    /* NOBARRIER: The table is new (marked white). */
+    setgcref(t->metatable, obj2gco(mt));
     settabV(sbufL(sbx), o, t);
     if (narray) {
       TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4);
@@ -395,6 +446,7 @@ static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
     setrawlightudV(o, (void *)ud);
 #endif
   } else {
+badtag:
     lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp);
   }
   return r;
@@ -460,10 +512,11 @@ LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx)
     case SER_TAG_NUM: return IRT_NUM;
     case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2:
     case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5:
+    case SER_TAG_DICT_MT:
       return IRT_TAB;
     case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX:
       return IRT_CDATA;
-    case SER_TAG_DICT:
+    case SER_TAG_DICT_STR:
     default:
       return IRT_STR;
     }

+ 2 - 1
src/lj_serialize.h

@@ -13,7 +13,8 @@
 
 #define LJ_SERIALIZE_DEPTH	100	/* Default depth. */
 
-LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict);
+LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict);
+LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict);
 LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o);
 LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);
 LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o);