Sfoglia il codice sorgente

String buffers, part 1: object serialization.

Sponsored by fmad.io.
Mike Pall 4 anni fa
parent
commit
4c6b669c41

+ 2 - 0
doc/contact.html

@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 275 - 0
doc/ext_buffer.html

@@ -0,0 +1,275 @@
+<!DOCTYPE html>
+<html>
+<head>
+<title>String Buffers</title>
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>String Buffers</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="https://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a class="current" href="ext_buffer.html">String Buffers</a>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+
+The string buffer library allows <b>high-performance manipulation of
+string-like data</b>.
+
+</p>
+<p>
+
+Unlike Lua strings, which are constants, string buffers are
+<b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data
+can be stored, formatted and encoded into a string buffer and later
+converted, decoded or extracted.
+
+</p>
+<p>
+
+The convenient string buffer API simplifies common string manipulation
+tasks, that would otherwise require creating many intermediate strings.
+String buffers improve performance by eliminating redundant memory
+copies, object creation, string interning and garbage collection
+overhead. In conjunction with the FFI library, they allow zero-copy
+operations.
+
+</p>
+
+<h2 id="load">Using the String Buffer Library</h2>
+<p>
+The string buffer library is built into LuaJIT by default, but it's not
+loaded by default. Add this to the start of every Lua file that needs
+one of its functions:
+</p>
+<pre class="code">
+local buffer = require("string.buffer")
+</pre>
+
+<h2 id="wip" style="color:#ff0000">Work in Progress</h2>
+
+<p>
+
+<b style="color:#ff0000">This library is a work in progress. More
+functions will be added soon.</b>
+
+</p>
+
+<h2 id="serialize">Serialization of Lua Objects</h2>
+<p>
+
+The following functions and methods allow <b>high-speed serialization</b>
+(encoding) of a Lua object into a string and decoding it back to a Lua
+object. This allows convenient storage and transport of <b>structured
+data</b>.
+
+</p>
+<p>
+
+The encoded data is in an <a href="#serialize_format">internal binary
+format</a>. The data can be stored in files, binary-transparent
+databases or transmitted to other LuaJIT instances across threads,
+processes or networks.
+
+</p>
+<p>
+
+Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or
+server-class system, even when serializing many small objects. Decoding
+speed is mostly constrained by object creation cost.
+
+</p>
+<p>
+
+The serializer handles most Lua types, common FFI number types and
+nested structures. Functions, thread objects, other FFI cdata, full
+userdata and associated metatables cannot be serialized (yet).
+
+</p>
+<p>
+
+The encoder serializes nested structures as trees. Multiple references
+to a single object will be stored separately and create distinct objects
+after decoding. Circular references cause an error.
+
+
+</p>
+
+<h3 id="buffer_encode"><tt>str = buffer.encode(obj)</tt></h3>
+<p>
+
+Serializes (encodes) the Lua object <tt>obj</tt> into the string
+<tt>str</tt>.
+
+</p>
+<p>
+
+<tt>obj</tt> can be any of the supported Lua types &mdash; it doesn't
+need to be a Lua table.
+
+</p>
+<p>
+
+This function may throw an error when attempting to serialize
+unsupported object types, circular references or deeply nested tables.
+
+</p>
+
+<h3 id="buffer_decode"><tt>obj = buffer.decode(str)</tt></h3>
+<p>
+
+De-serializes (decodes) the string <tt>str</tt> into the Lua object
+<tt>obj</tt>.
+
+</p>
+<p>
+
+The returned object may be any of the supported Lua types &mdash;
+even <tt>nil</tt>.
+
+</p>
+<p>
+
+This function may throw an error when fed with malformed or incomplete
+encoded data. The standalone function throws when there's left-over data
+after decoding a single top-level object.
+
+</p>
+
+<h2 id="serialize_format">Serialization Format Specification</h2>
+<p>
+
+This serialization format is designed for <b>internal use</b> by LuaJIT
+applications. Serialized data is upwards-compatible and portable across
+all supported LuaJIT platforms.
+
+</p>
+<p>
+
+It's an <b>8-bit binary format</b> and not human-readable. It uses e.g.
+embedded zeroes and stores embedded Lua string objects unmodified, which
+are 8-bit-clean, too. Encoded data can be safely concatenated for
+streaming and later decoded one top-level object at a time.
+
+</p>
+<p>
+
+The encoding is reasonably compact, but tuned for maximum performance,
+not for minimum space usage. It compresses well with any of the common
+byte-oriented data compression algorithms.
+
+</p>
+<p>
+
+Although documented here for reference, this format is explicitly
+<b>not</b> intended to be a 'public standard' for structured data
+interchange across computer languages (like JSON or MessagePack). Please
+do not use it as such.
+
+</p>
+<p>
+
+The specification is given below as a context-free grammar with a
+top-level <tt>object</tt> as the starting point. Alternatives are
+separated by the <tt>|</tt> symbol and <tt>*</tt> indicates repeats.
+Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are
+either plain hex numbers, encoded as bytes, or have a <tt>.format</tt>
+suffix.
+
+</p>
+<pre>
+object    → nil | false | true
+          | null | lightud32 | lightud64
+          | int | num | tab
+          | int64 | uint64 | complex
+          | string
+
+nil       → 0x00
+false     → 0x01
+true      → 0x02
+
+null      → 0x03                            // NULL lightuserdata
+lightud32 → 0x04 data.I                   // 32 bit lightuserdata
+lightud64 → 0x05 data.L                   // 64 bit lightuserdata
+
+int       → 0x06 int.I                                 // int32_t
+num       → 0x07 double.L
+
+tab       → 0x08                                   // Empty table
+          | 0x09 h.U h*{object object}          // Key/value hash
+          | 0x0a a.U a*object                    // 0-based array
+          | 0x0b a.U a*object h.U h*{object object}      // Mixed
+          | 0x0c a.U (a-1)*object                // 1-based array
+          | 0x0d a.U (a-1)*object h.U h*{object object}  // Mixed
+
+int64     → 0x10 int.L                             // FFI int64_t
+uint64    → 0x11 uint.L                           // FFI uint64_t
+complex   → 0x12 re.L im.L                         // FFI complex
+
+string    → (0x20+len).U len*char.B
+
+.B = 8 bit
+.I = 32 bit little-endian
+.L = 64 bit little-endian
+.U = prefix-encoded 32 bit unsigned number n:
+     0x00..0xdf   → n.B
+     0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B
+   0x1fe0..       → 0xff n.I
+</pre>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2021
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 2 - 0
doc/ext_c_api.html

@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a class="current" href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/ext_ffi.html

@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/ext_ffi_api.html

@@ -42,6 +42,8 @@ td.abiparam { font-weight: bold; width: 6em; }
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/ext_ffi_semantics.html

@@ -42,6 +42,8 @@ td.convop { font-style: italic; width: 40%; }
 <a class="current" href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/ext_ffi_tutorial.html

@@ -44,6 +44,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/ext_jit.html

@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a class="current" href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/ext_profiler.html

@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/extensions.html

@@ -54,6 +54,8 @@ td.excinterop {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/faq.html

@@ -40,6 +40,8 @@ dd { margin-left: 1.5em; }
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/install.html

@@ -65,6 +65,8 @@ td.compatno {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/luajit.html

@@ -122,6 +122,8 @@ table.feature small {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/running.html

@@ -59,6 +59,8 @@ td.param_default {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 2 - 0
doc/status.html

@@ -40,6 +40,8 @@ ul li { padding-bottom: 0.3em; }
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 4 - 2
src/Makefile

@@ -482,13 +482,15 @@ LJVM_BOUT= $(LJVM_S)
 LJVM_MODE= elfasm
 
 LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
-	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
+	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \
+	 lib_buffer.o
 LJLIB_C= $(LJLIB_O:.o=.c)
 
 LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \
-	  lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \
+	  lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \
+	  lj_api.o lj_profile.o \
 	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \

+ 21 - 15
src/Makefile.dep

@@ -10,6 +10,9 @@ lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
  lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
  lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
  lj_ffdef.h lj_lib.h lj_libdef.h
+lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_buf.h lj_str.h lj_serialize.h lj_lib.h \
+ lj_libdef.h
 lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
  lj_libdef.h
@@ -170,15 +173,18 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
  lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
  lj_vm.h lj_vmevent.h
+lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h
 lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
  lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
-lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h
 lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
  lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
  lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
  lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h
+lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
+ lj_udata.h lj_ctype.h lj_cdata.h lj_serialize.h
 lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
  lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
@@ -189,7 +195,7 @@ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \
  lj_alloc.h luajit.h
 lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_char.h
+ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h
 lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
 lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
@@ -204,7 +210,7 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \
  lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h
 lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_udata.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_udata.h
 lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
  lj_vm.h lj_vmevent.h
@@ -216,23 +222,23 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \
  lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
  lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \
  lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \
- lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
- lj_prng.c lj_prng.h lj_state.c lj_lex.h lj_alloc.h luajit.h \
+ lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \
+ lj_debug.c lj_prng.c lj_state.c lj_lex.h lj_alloc.h luajit.h \
  lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \
- lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_api.c \
- lj_profile.c lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c \
- lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h \
- lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h \
- lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \
- lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h \
- lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
- lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \
- lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
+ lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_serialize.c \
+ lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
+ lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
+ lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
+ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
+ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
+ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
+ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
+ lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
  lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
  lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
  lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
  lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
- lib_init.c
+ lib_buffer.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
 host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
  lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \

+ 66 - 0
src/lib_buffer.c

@@ -0,0 +1,66 @@
+/*
+** Buffer library.
+** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_buffer_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+
+#if LJ_HASBUFFER
+#include "lj_gc.h"
+#include "lj_buf.h"
+#include "lj_serialize.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_buffer
+
+/* Note: this uses interim structs until the SBuf reorg. */
+
+LJLIB_CF(buffer_encode)
+{
+  cTValue *o = lj_lib_checkany(L, 1);
+  StrBuf sbuf;
+  sbuf.sb = lj_buf_tmp_(L);
+  lj_serialize_put(&sbuf, o);
+  setstrV(L, L->top++, lj_buf_str(L, sbuf.sb));
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_decode)
+{
+  GCstr *str = lj_lib_checkstr(L, 1);
+  const char *p = strdata(str);
+  SBuf sb;
+  StrBuf sbuf;
+  setsbufL(&sb, L);
+  setmref(sb.b, p);
+  setmref(sb.p, p + str->len);
+  setmref(sb.e, p + str->len);
+  sbuf.sb = &sb;
+  sbuf.r = (char *)p;
+  setnilV(L->top++);
+  lj_serialize_get(&sbuf, L->top-1);
+  lj_gc_check(L);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+int luaopen_string_buffer(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, buffer);
+  return 1;
+}
+
+#endif

+ 3 - 0
src/lib_string.c

@@ -743,6 +743,9 @@ LUALIB_API int luaopen_string(lua_State *L)
   setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
   settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1));
   mt->nomm = (uint8_t)(~(1u<<MM_index));
+#if LJ_HASBUFFER
+  lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer, tabV(L->top-1));
+#endif
   return 1;
 }
 

+ 7 - 0
src/lj_arch.h

@@ -549,6 +549,13 @@
 #define LJ_HASFFI		1
 #endif
 
+/* Disable or enable the string buffer extension. */
+#if defined(LUAJIT_DISABLE_BUFFER)
+#define LJ_HASBUFFER		0
+#else
+#define LJ_HASBUFFER		1
+#endif
+
 #if defined(LUAJIT_DISABLE_PROFILE)
 #define LJ_HASPROFILE		0
 #elif LJ_TARGET_POSIX

+ 8 - 1
src/lj_buf.h

@@ -10,7 +10,7 @@
 #include "lj_gc.h"
 #include "lj_str.h"
 
-/* Resizable string buffers. Struct definition in lj_obj.h. */
+/* Resizable string buffers. SBuf struct definition in lj_obj.h. */
 #define sbufB(sb)	(mref((sb)->b, char))
 #define sbufP(sb)	(mref((sb)->p, char))
 #define sbufE(sb)	(mref((sb)->e, char))
@@ -100,4 +100,11 @@ static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
   return lj_str_new(L, sbufB(sb), sbuflen(sb));
 }
 
+/* Interim user-accessible string buffer. */
+typedef struct StrBuf {
+  SBuf *sb;		/* Pointer to system buffer. */
+  char *r;		/* String buffer read pointer. */
+  int depth;		/* Remaining recursion depth. */
+} StrBuf;
+
 #endif

+ 10 - 0
src/lj_errmsg.h

@@ -179,6 +179,16 @@ ERRDEF(FFI_NYIPACKBIT,	"NYI: packed bit fields")
 ERRDEF(FFI_NYICALL,	"NYI: cannot call this C function (yet)")
 #endif
 
+#if LJ_HASBUFFER
+/* String buffer errors. */
+ERRDEF(BUFFER_BADENC,	"cannot serialize " LUA_QS)
+ERRDEF(BUFFER_BADDEC,	"cannot deserialize tag 0x%02x")
+ERRDEF(BUFFER_DEPTH,	"too deep to serialize")
+ERRDEF(BUFFER_DUPKEY,	"duplicate table key")
+ERRDEF(BUFFER_EOB,	"unexpected end of buffer")
+ERRDEF(BUFFER_LEFTOV,	"left-over data in buffer")
+#endif
+
 #undef ERRDEF
 
 /* Detecting unused error messages:

+ 351 - 0
src/lj_serialize.c

@@ -0,0 +1,351 @@
+/*
+** Object de/serialization.
+** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_serialize_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_udata.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#endif
+#include "lj_serialize.h"
+
+/* Tags for internal serialization format. */
+enum {
+  SER_TAG_NIL,		/* 0x00 */
+  SER_TAG_FALSE,
+  SER_TAG_TRUE,
+  SER_TAG_NULL,
+  SER_TAG_LIGHTUD32,
+  SER_TAG_LIGHTUD64,
+  SER_TAG_INT,
+  SER_TAG_NUM,
+  SER_TAG_TAB,		/* 0x08 */
+  SER_TAG_0x0e = SER_TAG_TAB+6,
+  SER_TAG_0x0f,
+  SER_TAG_INT64,	/* 0x10 */
+  SER_TAG_UINT64,
+  SER_TAG_COMPLEX,
+  SER_TAG_0x13,
+  SER_TAG_0x14,
+  SER_TAG_0x15,
+  SER_TAG_0x16,
+  SER_TAG_0x17,
+  SER_TAG_0x18,		/* 0x18 */
+  SER_TAG_0x19,
+  SER_TAG_0x1a,
+  SER_TAG_0x1b,
+  SER_TAG_0x1c,
+  SER_TAG_0x1d,
+  SER_TAG_0x1e,
+  SER_TAG_0x1f,
+  SER_TAG_STR,		/* 0x20 + str->len */
+};
+LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0);
+
+/* -- Helper functions ---------------------------------------------------- */
+
+static LJ_AINLINE char *serialize_more(char *w, StrBuf *sbuf, MSize sz)
+{
+  if (LJ_UNLIKELY(sz > (MSize)(sbufE(sbuf->sb) - w))) {
+    setsbufP(sbuf->sb, w);
+    w = lj_buf_more2(sbuf->sb, sz);
+  }
+  return w;
+}
+
+/* Write U124 to buffer. */
+static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v)
+{
+  if (v < 0x1fe0) {
+    v -= 0xe0;
+    *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v;
+  } else {
+    *w++ = (char)0xff;
+#if LJ_BE
+    v = lj_bswap(v);
+#endif
+    memcpy(w, &v, 4); w += 4;
+  }
+  return w;
+}
+
+static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v)
+{
+  if (LJ_LIKELY(v < 0xe0)) {
+    *w++ = (char)v;
+    return w;
+  } else {
+    return serialize_wu124_(w, v);
+  }
+}
+
+static LJ_NOINLINE char *serialize_ru124_(char *r, char *e, uint32_t *pv)
+{
+  uint32_t v = *pv;
+  if (v != 0xff) {
+    if (r >= e) return NULL;
+    v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++;
+  } else {
+    if (r + 4 > e) return NULL;
+    v = lj_getu32(r); r += 4;
+#if LJ_BE
+    v = lj_bswap(v);
+#endif
+  }
+  *pv = v;
+  return r;
+}
+
+static LJ_AINLINE char *serialize_ru124(char *r, char *e, uint32_t *pv)
+{
+  if (LJ_LIKELY(r < e)) {
+    uint32_t v = *(uint8_t *)r; r++;
+    *pv = v;
+    if (LJ_UNLIKELY(v >= 0xe0)) {
+      r = serialize_ru124_(r, e, pv);
+    }
+    return r;
+  }
+  return NULL;
+}
+
+/* -- Internal serializer ------------------------------------------------- */
+
+/* Put serialized object into buffer. */
+static char *serialize_put(char *w, StrBuf *sbuf, cTValue *o)
+{
+  if (LJ_LIKELY(tvisstr(o))) {
+    const GCstr *str = strV(o);
+    MSize len = str->len;
+    w = serialize_more(w, sbuf, 5+len);
+    w = serialize_wu124(w, SER_TAG_STR + len);
+    w = lj_buf_wmem(w, strdata(str), len);
+  } else if (tvisint(o)) {
+    uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o);
+    w = serialize_more(w, sbuf, 1+4);
+    *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4;
+  } else if (tvisnum(o)) {
+    uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64;
+    w = serialize_more(w, sbuf, 1+sizeof(lua_Number));
+    *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8;
+  } else if (tvispri(o)) {
+    w = serialize_more(w, sbuf, 1);
+    *w++ = (char)(SER_TAG_NIL + ~itype(o));
+  } else if (tvistab(o)) {
+    const GCtab *t = tabV(o);
+    uint32_t narray = 0, nhash = 0, one = 2;
+    if (sbuf->depth <= 0) lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_DEPTH);
+    sbuf->depth--;
+    if (t->asize > 0) {  /* Determine max. length of array part. */
+      ptrdiff_t i;
+      TValue *array = tvref(t->array);
+      for (i = (ptrdiff_t)t->asize-1; i >= 0; i--)
+	if (!tvisnil(&array[i]))
+	  break;
+      narray = (uint32_t)(i+1);
+      if (narray && tvisnil(&array[0])) one = 4;
+    }
+    if (t->hmask > 0) {  /* Count number of used hash slots. */
+      uint32_t i, hmask = t->hmask;
+      Node *node = noderef(t->node);
+      for (i = 0; i <= hmask; i++)
+	nhash += !tvisnil(&node[i].val);
+    }
+    /* Write number of array slots and hash slots. */
+    w = serialize_more(w, sbuf, 1+2*5);
+    *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0));
+    if (narray) w = serialize_wu124(w, narray);
+    if (nhash) w = serialize_wu124(w, nhash);
+    if (narray) {  /* Write array entries. */
+      cTValue *oa = tvref(t->array) + (one >> 2);
+      cTValue *oe = tvref(t->array) + narray;
+      while (oa < oe) w = serialize_put(w, sbuf, oa++);
+    }
+    if (nhash) {  /* Write hash entries. */
+      const Node *node = noderef(t->node) + t->hmask;
+      for (;; node--)
+	if (!tvisnil(&node->val)) {
+	  w = serialize_put(w, sbuf, &node->key);
+	  w = serialize_put(w, sbuf, &node->val);
+	  if (--nhash == 0) break;
+	}
+    }
+    sbuf->depth++;
+#if LJ_HASFFI
+  } else if (tviscdata(o)) {
+    CTState *cts = ctype_cts(sbufL(sbuf->sb));
+    CType *s = ctype_raw(cts, cdataV(o)->ctypeid);
+    uint8_t *sp = cdataptr(cdataV(o));
+    if (ctype_isinteger(s->info) && s->size == 8) {
+      w = serialize_more(w, sbuf, 1+8);
+      *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64;
+#if LJ_BE
+      { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); }
+#else
+      memcpy(w, sp, 8);
+#endif
+      w += 8;
+    } else if (ctype_iscomplex(s->info) && s->size == 16) {
+      w = serialize_more(w, sbuf, 1+16);
+      *w++ = SER_TAG_COMPLEX;
+#if LJ_BE
+      {  /* Only swap the doubles. The re/im order stays the same. */
+	uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8);
+	u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8);
+      }
+#else
+      memcpy(w, sp, 16);
+#endif
+      w += 16;
+    } else {
+      goto badenc;  /* NYI other cdata */
+    }
+#endif
+  } else if (tvislightud(o)) {
+    uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbuf->sb)), o);
+    w = serialize_more(w, sbuf, 1+sizeof(ud));
+    if (ud == 0) {
+      *w++ = SER_TAG_NULL;
+    } else if (LJ_32 || checku32(ud)) {
+#if LJ_BE && LJ_64
+      ud = lj_bswap64(ud);
+#elif LJ_BE
+      ud = lj_bswap(ud);
+#endif
+      *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4;
+    } else {
+#if LJ_BE
+      ud = lj_bswap64(ud);
+#endif
+      *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8;
+    }
+  } else {
+    /* NYI userdata */
+#if LJ_HASFFI
+  badenc:
+#endif
+    lj_err_callerv(sbufL(sbuf->sb), LJ_ERR_BUFFER_BADENC, lj_typename(o));
+  }
+  return w;
+}
+
+/* Get serialized object from buffer. */
+static char *serialize_get(char *r, StrBuf *sbuf, TValue *o)
+{
+  char *e = sbufE(sbuf->sb);
+  uint32_t tp;
+  r = serialize_ru124(r, e, &tp); if (LJ_UNLIKELY(!r)) goto eob;
+  if (LJ_LIKELY(tp >= SER_TAG_STR)) {
+    uint32_t len = tp - SER_TAG_STR;
+    if (LJ_UNLIKELY(len > (uint32_t)(e - r))) goto eob;
+    setstrV(sbufL(sbuf->sb), o, lj_str_new(sbufL(sbuf->sb), r, len));
+    r += len;
+  } else if (tp == SER_TAG_INT) {
+    if (LJ_UNLIKELY(r + 4 > e)) goto eob;
+    setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)));
+    r += 4;
+  } else if (tp == SER_TAG_NUM) {
+    if (LJ_UNLIKELY(r + 8 > e)) goto eob;
+    memcpy(o, r, 8); r += 8;
+#if LJ_BE
+    o->u64 = lj_bswap64(o->u64);
+#endif
+    if (!tvisnum(o)) setnanV(o);
+  } else if (tp <= SER_TAG_TRUE) {
+    setpriV(o, ~tp);
+  } else if (tp >= SER_TAG_TAB && tp < SER_TAG_TAB+6) {
+    uint32_t narray = 0, nhash = 0;
+    GCtab *t;
+    if (tp >= SER_TAG_TAB+2) {
+      r = serialize_ru124(r, e, &narray); if (LJ_UNLIKELY(!r)) goto eob;
+    }
+    if ((tp & 1)) {
+      r = serialize_ru124(r, e, &nhash); if (LJ_UNLIKELY(!r)) goto eob;
+    }
+    t = lj_tab_new(sbufL(sbuf->sb), narray, hsize2hbits(nhash));
+    settabV(sbufL(sbuf->sb), o, t);
+    if (narray) {
+      TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4);
+      TValue *oe = tvref(t->array) + narray;
+      while (oa < oe) r = serialize_get(r, sbuf, oa++);
+    }
+    if (nhash) {
+      do {
+	TValue k, *v;
+	r = serialize_get(r, sbuf, &k);
+	v = lj_tab_set(sbufL(sbuf->sb), t, &k);
+	if (LJ_UNLIKELY(!tvisnil(v)))
+	  lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_DUPKEY);
+	r = serialize_get(r, sbuf, v);
+      } while (--nhash);
+    }
+#if LJ_HASFFI
+  } else if (tp >= SER_TAG_INT64 &&  tp <= SER_TAG_COMPLEX) {
+    uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8;
+    GCcdata *cd;
+    if (LJ_UNLIKELY(r + sz > e)) goto eob;
+    cd = lj_cdata_new_(sbufL(sbuf->sb),
+	   tp == SER_TAG_INT64 ? CTID_INT64 :
+	   tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE,
+	   sz);
+    memcpy(cdataptr(cd), r, sz); r += sz;
+#if LJ_BE
+    *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd));
+    if (sz == 16)
+      ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]);
+#endif
+    setcdataV(sbufL(sbuf->sb), o, cd);
+#endif
+  } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) {
+    uintptr_t ud = 0;
+    if (tp == SER_TAG_LIGHTUD32) {
+      if (LJ_UNLIKELY(r + 4 > e)) goto eob;
+      ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r));
+      r += 4;
+    }
+#if LJ_64
+    else if (tp == SER_TAG_LIGHTUD64) {
+      if (LJ_UNLIKELY(r + 8 > e)) goto eob;
+      memcpy(&ud, r, 8); r += 8;
+#if LJ_BE
+      ud = lj_bswap64(ud);
+#endif
+    }
+    setrawlightudV(o, lj_lightud_intern(sbufL(sbuf->sb), (void *)ud));
+#else
+    setrawlightudV(o, (void *)ud);
+#endif
+  } else {
+    lj_err_callerv(sbufL(sbuf->sb), LJ_ERR_BUFFER_BADDEC, tp);
+  }
+  return r;
+eob:
+  lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_EOB);
+  return NULL;
+}
+
+StrBuf * LJ_FASTCALL lj_serialize_put(StrBuf *sbuf, cTValue *o)
+{
+  sbuf->depth = LJ_SERIALIZE_DEPTH;
+  setsbufP(sbuf->sb, serialize_put(sbufP(sbuf->sb), sbuf, o));
+  return sbuf;
+}
+
+StrBuf * LJ_FASTCALL lj_serialize_get(StrBuf *sbuf, TValue *o)
+{
+  char *r = serialize_get(sbuf->r, sbuf, o);
+  if (r != sbufP(sbuf->sb))
+    lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_LEFTOV);
+  sbuf->r = r;
+  return sbuf;
+}
+

+ 21 - 0
src/lj_serialize.h

@@ -0,0 +1,21 @@
+/*
+** Object de/serialization.
+** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_SERIALIZE_H
+#define _LJ_SERIALIZE_H
+
+#include "lj_obj.h"
+#include "lj_buf.h"
+
+#if LJ_HASBUFFER
+
+#define LJ_SERIALIZE_DEPTH	100	/* Default depth. */
+
+LJ_FUNC StrBuf * LJ_FASTCALL lj_serialize_put(StrBuf *sb, cTValue *o);
+LJ_FUNC StrBuf * LJ_FASTCALL lj_serialize_get(StrBuf *sb, TValue *o);
+
+#endif
+
+#endif

+ 2 - 0
src/ljamalg.c

@@ -39,6 +39,7 @@
 #include "lj_strscan.c"
 #include "lj_strfmt.c"
 #include "lj_strfmt_num.c"
+#include "lj_serialize.c"
 #include "lj_api.c"
 #include "lj_profile.c"
 #include "lj_lex.c"
@@ -85,5 +86,6 @@
 #include "lib_bit.c"
 #include "lib_jit.c"
 #include "lib_ffi.c"
+#include "lib_buffer.c"
 #include "lib_init.c"
 

+ 1 - 0
src/lualib.h

@@ -33,6 +33,7 @@ LUALIB_API int luaopen_debug(lua_State *L);
 LUALIB_API int luaopen_bit(lua_State *L);
 LUALIB_API int luaopen_jit(lua_State *L);
 LUALIB_API int luaopen_ffi(lua_State *L);
+LUALIB_API int luaopen_string_buffer(lua_State *L);
 
 LUALIB_API void luaL_openlibs(lua_State *L);