Переглянути джерело

Merge branch 'main' into 12.x

Alex Szpakowski 4 роки тому
батько
коміт
d1c1d96165
100 змінених файлів з 4713 додано та 1185 видалено
  1. 2 2
      CMakeLists.txt
  2. 4 2
      libs/LuaJIT/doc/contact.html
  3. 693 0
      libs/LuaJIT/doc/ext_buffer.html
  4. 4 2
      libs/LuaJIT/doc/ext_c_api.html
  5. 4 2
      libs/LuaJIT/doc/ext_ffi.html
  6. 4 2
      libs/LuaJIT/doc/ext_ffi_api.html
  7. 4 2
      libs/LuaJIT/doc/ext_ffi_semantics.html
  8. 4 2
      libs/LuaJIT/doc/ext_ffi_tutorial.html
  9. 4 2
      libs/LuaJIT/doc/ext_jit.html
  10. 4 2
      libs/LuaJIT/doc/ext_profiler.html
  11. 13 19
      libs/LuaJIT/doc/extensions.html
  12. 19 5
      libs/LuaJIT/doc/faq.html
  13. 18 10
      libs/LuaJIT/doc/install.html
  14. 4 2
      libs/LuaJIT/doc/luajit.html
  15. 6 4
      libs/LuaJIT/doc/running.html
  16. 4 8
      libs/LuaJIT/doc/status.html
  17. 5 2
      libs/LuaJIT/dynasm/dasm_arm.h
  18. 3 3
      libs/LuaJIT/dynasm/dasm_arm.lua
  19. 50 8
      libs/LuaJIT/dynasm/dasm_arm64.h
  20. 94 41
      libs/LuaJIT/dynasm/dasm_arm64.lua
  21. 5 2
      libs/LuaJIT/dynasm/dasm_mips.h
  22. 3 3
      libs/LuaJIT/dynasm/dasm_mips.lua
  23. 5 2
      libs/LuaJIT/dynasm/dasm_ppc.h
  24. 3 3
      libs/LuaJIT/dynasm/dasm_ppc.lua
  25. 2 2
      libs/LuaJIT/dynasm/dasm_proto.h
  26. 20 4
      libs/LuaJIT/dynasm/dasm_x86.h
  27. 39 11
      libs/LuaJIT/dynasm/dasm_x86.lua
  28. 3 3
      libs/LuaJIT/dynasm/dynasm.lua
  29. 14 7
      libs/LuaJIT/src/Makefile
  30. 41 33
      libs/LuaJIT/src/Makefile.dep
  31. 2 0
      libs/LuaJIT/src/host/buildvm_lib.c
  32. 1 1
      libs/LuaJIT/src/host/minilua.c
  33. 1 1
      libs/LuaJIT/src/jit/dis_arm64.lua
  34. 19 9
      libs/LuaJIT/src/jit/dump.lua
  35. 1 0
      libs/LuaJIT/src/jit/p.lua
  36. 18 4
      libs/LuaJIT/src/lib_base.c
  37. 356 0
      libs/LuaJIT/src/lib_buffer.c
  38. 1 0
      libs/LuaJIT/src/lib_ffi.c
  39. 19 13
      libs/LuaJIT/src/lib_io.c
  40. 1 5
      libs/LuaJIT/src/lib_jit.c
  41. 8 80
      libs/LuaJIT/src/lib_string.c
  42. 1 1
      libs/LuaJIT/src/lib_table.c
  43. 1 1
      libs/LuaJIT/src/lj_alloc.c
  44. 7 31
      libs/LuaJIT/src/lj_api.c
  45. 35 17
      libs/LuaJIT/src/lj_arch.h
  46. 154 62
      libs/LuaJIT/src/lj_asm.c
  47. 89 38
      libs/LuaJIT/src/lj_asm_arm.h
  48. 78 35
      libs/LuaJIT/src/lj_asm_arm64.h
  49. 141 91
      libs/LuaJIT/src/lj_asm_mips.h
  50. 90 40
      libs/LuaJIT/src/lj_asm_ppc.h
  51. 93 55
      libs/LuaJIT/src/lj_asm_x86.h
  52. 6 10
      libs/LuaJIT/src/lj_bcread.c
  53. 11 11
      libs/LuaJIT/src/lj_bcwrite.c
  54. 132 59
      libs/LuaJIT/src/lj_buf.c
  55. 114 19
      libs/LuaJIT/src/lj_buf.h
  56. 4 4
      libs/LuaJIT/src/lj_ccall.c
  57. 7 2
      libs/LuaJIT/src/lj_ccallback.c
  58. 5 0
      libs/LuaJIT/src/lj_cconv.c
  59. 4 4
      libs/LuaJIT/src/lj_cparse.c
  60. 49 5
      libs/LuaJIT/src/lj_crecord.c
  61. 5 0
      libs/LuaJIT/src/lj_crecord.h
  62. 1 1
      libs/LuaJIT/src/lj_ctype.c
  63. 11 0
      libs/LuaJIT/src/lj_ctype.h
  64. 1 1
      libs/LuaJIT/src/lj_debug.c
  65. 7 1
      libs/LuaJIT/src/lj_dispatch.c
  66. 2 2
      libs/LuaJIT/src/lj_dispatch.h
  67. 34 32
      libs/LuaJIT/src/lj_emit_arm64.h
  68. 1 1
      libs/LuaJIT/src/lj_emit_mips.h
  69. 408 164
      libs/LuaJIT/src/lj_err.c
  70. 18 1
      libs/LuaJIT/src/lj_err.h
  71. 14 0
      libs/LuaJIT/src/lj_errmsg.h
  72. 399 25
      libs/LuaJIT/src/lj_ffrecord.c
  73. 6 6
      libs/LuaJIT/src/lj_frame.h
  74. 9 0
      libs/LuaJIT/src/lj_gc.c
  75. 2 1
      libs/LuaJIT/src/lj_ir.c
  76. 27 11
      libs/LuaJIT/src/lj_ir.h
  77. 49 27
      libs/LuaJIT/src/lj_ircall.h
  78. 1 0
      libs/LuaJIT/src/lj_iropt.h
  79. 7 1
      libs/LuaJIT/src/lj_jit.h
  80. 6 10
      libs/LuaJIT/src/lj_lex.c
  81. 56 0
      libs/LuaJIT/src/lj_lib.c
  82. 6 0
      libs/LuaJIT/src/lj_lib.h
  83. 17 8
      libs/LuaJIT/src/lj_mcode.c
  84. 9 4
      libs/LuaJIT/src/lj_meta.c
  85. 11 5
      libs/LuaJIT/src/lj_obj.h
  86. 70 16
      libs/LuaJIT/src/lj_opt_fold.c
  87. 1 0
      libs/LuaJIT/src/lj_opt_loop.c
  88. 30 3
      libs/LuaJIT/src/lj_opt_mem.c
  89. 1 1
      libs/LuaJIT/src/lj_opt_split.c
  90. 3 3
      libs/LuaJIT/src/lj_parse.c
  91. 11 5
      libs/LuaJIT/src/lj_prng.c
  92. 2 3
      libs/LuaJIT/src/lj_profile.c
  93. 162 18
      libs/LuaJIT/src/lj_record.c
  94. 2 0
      libs/LuaJIT/src/lj_record.h
  95. 538 0
      libs/LuaJIT/src/lj_serialize.c
  96. 28 0
      libs/LuaJIT/src/lj_serialize.h
  97. 54 9
      libs/LuaJIT/src/lj_snap.c
  98. 1 0
      libs/LuaJIT/src/lj_state.c
  99. 172 38
      libs/LuaJIT/src/lj_strfmt.c
  100. 5 0
      libs/LuaJIT/src/lj_strfmt.h

+ 2 - 2
CMakeLists.txt

@@ -201,12 +201,12 @@ endif()
 
 set(MEGA_ZLIB_VER "1.2.11")
 set(MEGA_LUA51_VER "5.1.5")
-set(MEGA_LUAJIT_VER "2.1.0-ec6edc5")
+set(MEGA_LUAJIT_VER "2.1.0-f3c8569")
 set(MEGA_LIBOGG_VER "1.3.2")
 set(MEGA_LIBVORBIS_VER "1.3.5")
 set(MEGA_LIBTHEORA_VER "1.1.1")
 set(MEGA_FREETYPE_VER "2.8.1")
-set(MEGA_SDL2_VER "2.0.14")
+set(MEGA_SDL2_VER "2.0.18")
 set(MEGA_OPENAL_VER "1.21.1")
 set(MEGA_MODPLUG_VER "0.8.8.4")
 

+ 4 - 2
libs/LuaJIT/doc/contact.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Contact</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 693 - 0
libs/LuaJIT/doc/ext_buffer.html

@@ -0,0 +1,693 @@
+<!DOCTYPE html>
+<html>
+<head>
+<title>String Buffer Library</title>
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+.lib {
+  vertical-align: middle;
+  margin-left: 5px;
+  padding: 0 5px;
+  font-size: 60%;
+  border-radius: 5px;
+  background: #c5d5ff;
+  color: #000;
+}
+</style>
+</head>
+<body>
+<div id="site">
+<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>String Buffer Library</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="https://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a class="current" href="ext_buffer.html">String Buffers</a>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+The string buffer library allows <b>high-performance manipulation of
+string-like data</b>.
+</p>
+<p>
+Unlike Lua strings, which are constants, string buffers are
+<b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data
+can be stored, formatted and encoded into a string buffer and later
+converted, extracted or decoded.
+</p>
+<p>
+The convenient string buffer API simplifies common string manipulation
+tasks, that would otherwise require creating many intermediate strings.
+String buffers improve performance by eliminating redundant memory
+copies, object creation, string interning and garbage collection
+overhead. In conjunction with the FFI library, they allow zero-copy
+operations.
+</p>
+<p>
+The string buffer libary also includes a high-performance
+<a href="serialize">serializer</a> for Lua objects.
+</p>
+
+<h2 id="wip" style="color:#ff0000">Work in Progress</h2>
+<p>
+<b style="color:#ff0000">This library is a work in progress. More
+functionality will be added soon.</b>
+</p>
+
+<h2 id="use">Using the String Buffer Library</h2>
+<p>
+The string buffer library is built into LuaJIT by default, but it's not
+loaded by default. Add this to the start of every Lua file that needs
+one of its functions:
+</p>
+<pre class="code">
+local buffer = require("string.buffer")
+</pre>
+<p>
+The convention for the syntax shown on this page is that <tt>buffer</tt>
+refers to the buffer library and <tt>buf</tt> refers to an individual
+buffer object.
+</p>
+<p>
+Please note the difference between a Lua function call, e.g.
+<tt>buffer.new()</tt> (with a dot) and a Lua method call, e.g.
+<tt>buf:reset()</tt> (with a colon).
+</p>
+
+<h3 id="buffer_object">Buffer Objects</h3>
+<p>
+A buffer object is a garbage-collected Lua object. After creation with
+<tt>buffer.new()</tt>, it can (and should) be reused for many operations.
+When the last reference to a buffer object is gone, it will eventually
+be freed by the garbage collector, along with the allocated buffer
+space.
+</p>
+<p>
+Buffers operate like a FIFO (first-in first-out) data structure. Data
+can be appended (written) to the end of the buffer and consumed (read)
+from the front of the buffer. These operations may be freely mixed.
+</p>
+<p>
+The buffer space that holds the characters is managed automatically
+&mdash; it grows as needed and already consumed space is recycled. Use
+<tt>buffer.new(size)</tt> and <tt>buf:free()</tt>, if you need more
+control.
+</p>
+<p>
+The maximum size of a single buffer is the same as the maximum size of a
+Lua string, which is slightly below two gigabytes. For huge data sizes,
+neither strings nor buffers are the right data structure &mdash; use the
+FFI library to directly map memory or files up to the virtual memory
+limit of your OS.
+</p>
+
+<h3 id="buffer_overview">Buffer Method Overview</h3>
+<ul>
+<li>
+The <tt>buf:put*()</tt>-like methods append (write) characters to the
+end of the buffer.
+</li>
+<li>
+The <tt>buf:get*()</tt>-like methods consume (read) characters from the
+front of the buffer.
+</li>
+<li>
+Other methods, like <tt>buf:tostring()</tt> only read the buffer
+contents, but don't change the buffer.
+</li>
+<li>
+The <tt>buf:set()</tt> method allows zero-copy consumption of a string
+or an FFI cdata object as a buffer.
+</li>
+<li>
+The FFI-specific methods allow zero-copy read/write-style operations or
+modifying the buffer contents in-place. Please check the
+<a href="#ffi_caveats">FFI caveats</a> below, too.
+</li>
+<li>
+Methods that don't need to return anything specific, return the buffer
+object itself as a convenience. This allows method chaining, e.g.:
+<tt>buf:reset():encode(obj)</tt> or <tt>buf:skip(len):get()</tt>
+</li>
+</ul>
+
+<h2 id="create">Buffer Creation and Management</h2>
+
+<h3 id="buffer_new"><tt>local buf = buffer.new([size [,options]])<br>
+local buf = buffer.new([options])</tt></h3>
+<p>
+Creates a new buffer object.
+</p>
+<p>
+The optional <tt>size</tt> argument ensures a minimum initial buffer
+size. This is strictly an optimization when the required buffer size is
+known beforehand. The buffer space will grow as needed, in any case.
+</p>
+<p>
+The optional table <tt>options</tt> sets various
+<a href="#serialize_options">serialization options</a>.
+</p>
+
+<h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3>
+<p>
+Reset (empty) the buffer. The allocated buffer space is not freed and
+may be reused.
+</p>
+
+<h3 id="buffer_free"><tt>buf = buf:free()</tt></h3>
+<p>
+The buffer space of the buffer object is freed. The object itself
+remains intact, empty and may be reused.
+</p>
+<p>
+Note: you normally don't need to use this method. The garbage collector
+automatically frees the buffer space, when the buffer object is
+collected. Use this method, if you need to free the associated memory
+immediately.
+</p>
+
+<h2 id="write">Buffer Writers</h2>
+
+<h3 id="buffer_put"><tt>buf = buf:put([str|num|obj] [,…])</tt></h3>
+<p>
+Appends a string <tt>str</tt>, a number <tt>num</tt> or any object
+<tt>obj</tt> with a <tt>__tostring</tt> metamethod to the buffer.
+Multiple arguments are appended in the given order.
+</p>
+<p>
+Appending a buffer to a buffer is possible and short-circuited
+internally. But it still involves a copy. Better combine the buffer
+writes to use a single buffer.
+</p>
+
+<h3 id="buffer_putf"><tt>buf = buf:putf(format, …)</tt></h3>
+<p>
+Appends the formatted arguments to the buffer. The <tt>format</tt>
+string supports the same options as <tt>string.format()</tt>.
+</p>
+
+<h3 id="buffer_putcdata"><tt>buf = buf:putcdata(cdata, len)</tt><span class="lib">FFI</span></h3>
+<p>
+Appends the given <tt>len</tt> number of bytes from the memory pointed
+to by the FFI <tt>cdata</tt> object to the buffer. The object needs to
+be convertible to a (constant) pointer.
+</p>
+
+<h3 id="buffer_set"><tt>buf = buf:set(str)<br>
+buf = buf:set(cdata, len)</tt><span class="lib">FFI</span></h3>
+<p>
+This method allows zero-copy consumption of a string or an FFI cdata
+object as a buffer. It stores a reference to the passed string
+<tt>str</tt> or the FFI <tt>cdata</tt> object in the buffer. Any buffer
+space originally allocated is freed. This is <i>not</i> an append
+operation, unlike the <tt>buf:put*()</tt> methods.
+</p>
+<p>
+After calling this method, the buffer behaves as if
+<tt>buf:free():put(str)</tt> or <tt>buf:free():put(cdata,&nbsp;len)</tt>
+had been called. However, the data is only referenced and not copied, as
+long as the buffer is only consumed.
+</p>
+<p>
+In case the buffer is written to later on, the referenced data is copied
+and the object reference is removed (copy-on-write semantics).
+</p>
+<p>
+The stored reference is an anchor for the garbage collector and keeps the
+originally passed string or FFI cdata object alive.
+</p>
+
+<h3 id="buffer_reserve"><tt>ptr, len = buf:reserve(size)</tt><span class="lib">FFI</span><br>
+<tt>buf = buf:commit(used)</tt><span class="lib">FFI</span></h3>
+<p>
+The <tt>reserve</tt> method reserves at least <tt>size</tt> bytes of
+write space in the buffer. It returns an <tt>uint8_t&nbsp;*</tt> FFI
+cdata pointer <tt>ptr</tt> that points to this space.
+</p>
+<p>
+The available length in bytes is returned in <tt>len</tt>. This is at
+least <tt>size</tt> bytes, but may be more to facilitate efficient
+buffer growth. You can either make use of the additional space or ignore
+<tt>len</tt> and only use <tt>size</tt> bytes.
+</p>
+<p>
+The <tt>commit</tt> method appends the <tt>used</tt> bytes of the
+previously returned write space to the buffer data.
+</p>
+<p>
+This pair of methods allows zero-copy use of C read-style APIs:
+</p>
+<pre class="code">
+local MIN_SIZE = 65536
+repeat
+  local ptr, len = buf:reserve(MIN_SIZE)
+  local n = C.read(fd, ptr, len)
+  if n == 0 then break end -- EOF.
+  if n &lt; 0 then error("read error") end
+  buf:commit(n)
+until false
+</pre>
+<p>
+The reserved write space is <i>not</i> initialized. At least the
+<tt>used</tt> bytes <b>must</b> be written to before calling the
+<tt>commit</tt> method. There's no need to call the <tt>commit</tt>
+method, if nothing is added to the buffer (e.g. on error).
+</p>
+
+<h2 id="read">Buffer Readers</h2>
+
+<h3 id="buffer_length"><tt>len = #buf</tt></h3>
+<p>
+Returns the current length of the buffer data in bytes.
+</p>
+
+<h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf […]</tt></h3>
+<p>
+The Lua concatenation operator <tt>..</tt> also accepts buffers, just
+like strings or numbers. It always returns a string and not a buffer.
+</p>
+<p>
+Note that although this is supported for convenience, this thwarts one
+of the main reasons to use buffers, which is to avoid string
+allocations. Rewrite it with <tt>buf:put()</tt> and <tt>buf:get()</tt>.
+</p>
+<p>
+Mixing this with unrelated objects that have a <tt>__concat</tt>
+metamethod may not work, since these probably only expect strings.
+</p>
+
+<h3 id="buffer_skip"><tt>buf = buf:skip(len)</tt></h3>
+<p>
+Skips (consumes) <tt>len</tt> bytes from the buffer up to the current
+length of the buffer data.
+</p>
+
+<h3 id="buffer_get"><tt>str, … = buf:get([len|nil] [,…])</tt></h3>
+<p>
+Consumes the buffer data and returns one or more strings. If called
+without arguments, the whole buffer data is consumed. If called with a
+number, up to <tt>len</tt> bytes are consumed. A <tt>nil</tt> argument
+consumes the remaining buffer space (this only makes sense as the last
+argument). Multiple arguments consume the buffer data in the given
+order.
+</p>
+<p>
+Note: a zero length or no remaining buffer data returns an empty string
+and not <tt>nil</tt>.
+</p>
+
+<h3 id="buffer_tostring"><tt>str = buf:tostring()<br>
+str = tostring(buf)</tt></h3>
+<p>
+Creates a string from the buffer data, but doesn't consume it. The
+buffer remains unchanged.
+</p>
+<p>
+Buffer objects also define a <tt>__tostring</tt> metamethod. This means
+buffers can be passed to the global <tt>tostring()</tt> function and
+many other functions that accept this in place of strings. The important
+internal uses in functions like <tt>io.write()</tt> are short-circuited
+to avoid the creation of an intermediate string object.
+</p>
+
+<h3 id="buffer_ref"><tt>ptr, len = buf:ref()</tt><span class="lib">FFI</span></h3>
+<p>
+Returns an <tt>uint8_t&nbsp;*</tt> FFI cdata pointer <tt>ptr</tt> that
+points to the buffer data. The length of the buffer data in bytes is
+returned in <tt>len</tt>.
+</p>
+<p>
+The returned pointer can be directly passed to C functions that expect a
+buffer and a length. You can also do bytewise reads
+(<tt>local&nbsp;x&nbsp;=&nbsp;ptr[i]</tt>) or writes
+(<tt>ptr[i]&nbsp;=&nbsp;0x40</tt>) of the buffer data.
+</p>
+<p>
+In conjunction with the <tt>skip</tt> method, this allows zero-copy use
+of C write-style APIs:
+</p>
+<pre class="code">
+repeat
+  local ptr, len = buf:ref()
+  if len == 0 then break end
+  local n = C.write(fd, ptr, len)
+  if n &lt; 0 then error("write error") end
+  buf:skip(n)
+until n >= len
+</pre>
+<p>
+Unlike Lua strings, buffer data is <i>not</i> implicitly
+zero-terminated. It's not safe to pass <tt>ptr</tt> to C functions that
+expect zero-terminated strings. If you're not using <tt>len</tt>, then
+you're doing something wrong.
+</p>
+
+<h2 id="serialize">Serialization of Lua Objects</h2>
+<p>
+The following functions and methods allow <b>high-speed serialization</b>
+(encoding) of a Lua object into a string and decoding it back to a Lua
+object. This allows convenient storage and transport of <b>structured
+data</b>.
+</p>
+<p>
+The encoded data is in an <a href="#serialize_format">internal binary
+format</a>. The data can be stored in files, binary-transparent
+databases or transmitted to other LuaJIT instances across threads,
+processes or networks.
+</p>
+<p>
+Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or
+server-class system, even when serializing many small objects. Decoding
+speed is mostly constrained by object creation cost.
+</p>
+<p>
+The serializer handles most Lua types, common FFI number types and
+nested structures. Functions, thread objects, other FFI cdata and full
+userdata cannot be serialized (yet).
+</p>
+<p>
+The encoder serializes nested structures as trees. Multiple references
+to a single object will be stored separately and create distinct objects
+after decoding. Circular references cause an error.
+</p>
+
+<h3 id="serialize_methods">Serialization Functions and Methods</h3>
+
+<h3 id="buffer_encode"><tt>str = buffer.encode(obj)<br>
+buf = buf:encode(obj)</tt></h3>
+<p>
+Serializes (encodes) the Lua object <tt>obj</tt>. The stand-alone
+function returns a string <tt>str</tt>. The buffer method appends the
+encoding to the buffer.
+</p>
+<p>
+<tt>obj</tt> can be any of the supported Lua types &mdash; it doesn't
+need to be a Lua table.
+</p>
+<p>
+This function may throw an error when attempting to serialize
+unsupported object types, circular references or deeply nested tables.
+</p>
+
+<h3 id="buffer_decode"><tt>obj = buffer.decode(str)<br>
+obj = buf:decode()</tt></h3>
+<p>
+The stand-alone function de-serializes (decodes) the string
+<tt>str</tt>, the buffer method de-serializes one object from the
+buffer. Both return a Lua object <tt>obj</tt>.
+</p>
+<p>
+The returned object may be any of the supported Lua types &mdash;
+even <tt>nil</tt>.
+</p>
+<p>
+This function may throw an error when fed with malformed or incomplete
+encoded data. The stand-alone function throws when there's left-over
+data after decoding a single top-level object. The buffer method leaves
+any left-over data in the buffer.
+</p>
+
+<h3 id="serialize_options">Serialization Options</h3>
+<p>
+The <tt>options</tt> table passed to <tt>buffer.new()</tt> may contain
+the following members (all optional):
+</p>
+<ul>
+<li>
+<tt>dict</tt> is a Lua table holding a <b>dictionary of strings</b> that
+commonly occur as table keys of objects you are serializing. These keys
+are compactly encoded as indexes during serialization. A well chosen
+dictionary saves space and improves serialization performance.
+</li>
+<li>
+<tt>metatable</tt> is a Lua table holding a <b>dictionary of metatables</b>
+for the table objects you are serializing.
+</li>
+</ul>
+<p>
+<tt>dict</tt> needs to be an array of strings and <tt>metatable</tt> needs
+to be an array of tables. Both starting at index 1 and without holes (no
+<tt>nil</tt> inbetween). The tables are anchored in the buffer object and
+internally modified into a two-way index (don't do this yourself, just pass
+a plain array). The tables must not be modified after they have been passed
+to <tt>buffer.new()</tt>.
+</p>
+<p>
+The <tt>dict</tt> and <tt>metatable</tt> tables used by the encoder and
+decoder must be the same. Put the most common entries at the front. Extend
+at the end to ensure backwards-compatibility &mdash; older encodings can
+then still be read. You may also set some indexes to <tt>false</tt> to
+explicitly drop backwards-compatibility. Old encodings that use these
+indexes will throw an error when decoded.
+</p>
+<p>
+Metatables that are not found in the <tt>metatable</tt> dictionary are
+ignored when encoding. Decoding returns a table with a <tt>nil</tt>
+metatable.
+</p>
+<p>
+Note: parsing and preparation of the options table is somewhat
+expensive. Create a buffer object only once and recycle it for multiple
+uses. Avoid mixing encoder and decoder buffers, since the
+<tt>buf:set()</tt> method frees the already allocated buffer space:
+</p>
+<pre class="code">
+local options = {
+  dict = { "commonly", "used", "string", "keys" },
+}
+local buf_enc = buffer.new(options)
+local buf_dec = buffer.new(options)
+
+local function encode(obj)
+  return buf_enc:reset():encode(obj):get()
+end
+
+local function decode(str)
+  return buf_dec:set(str):decode()
+end
+</pre>
+
+<h3 id="serialize_stream">Streaming Serialization</h3>
+<p>
+In some contexts, it's desirable to do piecewise serialization of large
+datasets, also known as <i>streaming</i>.
+</p>
+<p>
+This serialization format can be safely concatenated and supports streaming.
+Multiple encodings can simply be appended to a buffer and later decoded
+individually:
+</p>
+<pre class="code">
+local buf = buffer.new()
+buf:encode(obj1)
+buf:encode(obj2)
+local copy1 = buf:decode()
+local copy2 = buf:decode()
+</pre>
+<p>
+Here's how to iterate over a stream:
+</p>
+<pre class="code">
+while #buf ~= 0 do
+  local obj = buf:decode()
+  -- Do something with obj.
+end
+</pre>
+<p>
+Since the serialization format doesn't prepend a length to its encoding,
+network applications may need to transmit the length, too.
+</p>
+
+<h3 id="serialize_format">Serialization Format Specification</h3>
+<p>
+This serialization format is designed for <b>internal use</b> by LuaJIT
+applications. Serialized data is upwards-compatible and portable across
+all supported LuaJIT platforms.
+</p>
+<p>
+It's an <b>8-bit binary format</b> and not human-readable. It uses e.g.
+embedded zeroes and stores embedded Lua string objects unmodified, which
+are 8-bit-clean, too. Encoded data can be safely concatenated for
+streaming and later decoded one top-level object at a time.
+</p>
+<p>
+The encoding is reasonably compact, but tuned for maximum performance,
+not for minimum space usage. It compresses well with any of the common
+byte-oriented data compression algorithms.
+</p>
+<p>
+Although documented here for reference, this format is explicitly
+<b>not</b> intended to be a 'public standard' for structured data
+interchange across computer languages (like JSON or MessagePack). Please
+do not use it as such.
+</p>
+<p>
+The specification is given below as a context-free grammar with a
+top-level <tt>object</tt> as the starting point. Alternatives are
+separated by the <tt>|</tt> symbol and <tt>*</tt> indicates repeats.
+Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are
+either plain hex numbers, encoded as bytes, or have a <tt>.format</tt>
+suffix.
+</p>
+<pre>
+object    → nil | false | true
+          | null | lightud32 | lightud64
+          | int | num | tab | tab_mt
+          | int64 | uint64 | complex
+          | string
+
+nil       → 0x00
+false     → 0x01
+true      → 0x02
+
+null      → 0x03                            // NULL lightuserdata
+lightud32 → 0x04 data.I                   // 32 bit lightuserdata
+lightud64 → 0x05 data.L                   // 64 bit lightuserdata
+
+int       → 0x06 int.I                                 // int32_t
+num       → 0x07 double.L
+
+tab       → 0x08                                   // Empty table
+          | 0x09 h.U h*{object object}          // Key/value hash
+          | 0x0a a.U a*object                    // 0-based array
+          | 0x0b a.U a*object h.U h*{object object}      // Mixed
+          | 0x0c a.U (a-1)*object                // 1-based array
+          | 0x0d a.U (a-1)*object h.U h*{object object}  // Mixed
+tab_mt    → 0x0e (index-1).U tab          // Metatable dict entry
+
+int64     → 0x10 int.L                             // FFI int64_t
+uint64    → 0x11 uint.L                           // FFI uint64_t
+complex   → 0x12 re.L im.L                         // FFI complex
+
+string    → (0x20+len).U len*char.B
+          | 0x0f (index-1).U                 // String dict entry
+
+.B = 8 bit
+.I = 32 bit little-endian
+.L = 64 bit little-endian
+.U = prefix-encoded 32 bit unsigned number n:
+     0x00..0xdf   → n.B
+     0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B
+   0x1fe0..       → 0xff n.I
+</pre>
+
+<h2 id="error">Error handling</h2>
+<p>
+Many of the buffer methods can throw an error. Out-of-memory or usage
+errors are best caught with an outer wrapper for larger parts of code.
+There's not much one can do after that, anyway.
+</p>
+<p>
+OTOH you may want to catch some errors individually. Buffer methods need
+to receive the buffer object as the first argument. The Lua colon-syntax
+<tt>obj:method()</tt> does that implicitly. But to wrap a method with
+<tt>pcall()</tt>, the arguments need to be passed like this:
+</p>
+<pre class="code">
+local ok, err = pcall(buf.encode, buf, obj)
+if not ok then
+  -- Handle error in err.
+end
+</pre>
+
+<h2 id="ffi_caveats">FFI caveats</h2>
+<p>
+The string buffer library has been designed to work well together with
+the FFI library. But due to the low-level nature of the FFI library,
+some care needs to be taken:
+</p>
+<p>
+First, please remember that FFI pointers are zero-indexed. The space
+returned by <tt>buf:reserve()</tt> and <tt>buf:ref()</tt> starts at the
+returned pointer and ends before <tt>len</tt> bytes after that.
+</p>
+<p>
+I.e. the first valid index is <tt>ptr[0]</tt> and the last valid index
+is <tt>ptr[len-1]</tt>. If the returned length is zero, there's no valid
+index at all. The returned pointer may even be <tt>NULL</tt>.
+</p>
+<p>
+The space pointed to by the returned pointer is only valid as long as
+the buffer is not modified in any way (neither append, nor consume, nor
+reset, etc.). The pointer is also not a GC anchor for the buffer object
+itself.
+</p>
+<p>
+Buffer data is only guaranteed to be byte-aligned. Casting the returned
+pointer to a data type with higher alignment may cause unaligned
+accesses. It depends on the CPU architecture whether this is allowed or
+not (it's always OK on x86/x64 and mostly OK on other modern
+architectures).
+</p>
+<p>
+FFI pointers or references do not count as GC anchors for an underlying
+object. E.g. an <tt>array</tt> allocated with <tt>ffi.new()</tt> is
+anchored by <tt>buf:set(array,&nbsp;len)</tt>, but not by
+<tt>buf:set(array+offset,&nbsp;len)</tt>. The addition of the offset
+creates a new pointer, even when the offset is zero. In this case, you
+need to make sure there's still a reference to the original array as
+long as its contents are in use by the buffer.
+</p>
+<p>
+Even though each LuaJIT VM instance is single-threaded (but you can
+create multiple VMs), FFI data structures can be accessed concurrently.
+Be careful when reading/writing FFI cdata from/to buffers to avoid
+concurrent accesses or modifications. In particular, the memory
+referenced by <tt>buf:set(cdata,&nbsp;len)</tt> must not be modified
+while buffer readers are working on it. Shared, but read-only memory
+mappings of files are OK, but only if the file does not change.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2021
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 4 - 2
libs/LuaJIT/doc/ext_c_api.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Lua/C API Extensions</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a class="current" href="ext_c_api.html">Lua/C API</a>

+ 4 - 2
libs/LuaJIT/doc/ext_ffi.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>FFI Library</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 4 - 2
libs/LuaJIT/doc/ext_ffi_api.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>ffi.* API Functions</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -42,6 +42,8 @@ td.abiparam { font-weight: bold; width: 6em; }
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 4 - 2
libs/LuaJIT/doc/ext_ffi_semantics.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>FFI Semantics</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -42,6 +42,8 @@ td.convop { font-style: italic; width: 40%; }
 <a class="current" href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 4 - 2
libs/LuaJIT/doc/ext_ffi_tutorial.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>FFI Tutorial</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -44,6 +44,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 4 - 2
libs/LuaJIT/doc/ext_jit.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>jit.* Library</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a class="current" href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 4 - 2
libs/LuaJIT/doc/ext_profiler.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Profiler</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 13 - 19
libs/LuaJIT/doc/extensions.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Extensions</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -54,6 +54,8 @@ td.excinterop {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -392,29 +394,19 @@ the toolchain used to compile LuaJIT:
 <td class="excinterop">Interoperability</td>
 </tr>
 <tr class="odd separate">
-<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
-<td class="exccompiler">GCC 4.3+, Clang</td>
+<td class="excplatform">External frame unwinding</td>
+<td class="exccompiler">GCC, Clang, MSVC</td>
 <td class="excinterop"><b style="color: #00a000;">Full</b></td>
 </tr>
 <tr class="even">
-<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
-<td class="exccompiler">GCC, Clang</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
-</tr>
-<tr class="odd">
-<td class="excplatform">Other platforms, DWARF2 unwinding</td>
+<td class="excplatform">Internal frame unwinding + DWARF2</td>
 <td class="exccompiler">GCC, Clang</td>
 <td class="excinterop"><b style="color: #c06000;">Limited</b></td>
 </tr>
-<tr class="even">
-<td class="excplatform">Windows/x64</td>
-<td class="exccompiler">MSVC</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
-</tr>
 <tr class="odd">
-<td class="excplatform">Windows/x86</td>
-<td class="exccompiler">Any</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
+<td class="excplatform">Windows 64 bit</td>
+<td class="exccompiler">non-MSVC</td>
+<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
 </tr>
 <tr class="even">
 <td class="excplatform">Other platforms</td>
@@ -435,7 +427,9 @@ the toolchain used to compile LuaJIT:
 on the C&nbsp;stack. The contents of the C++&nbsp;exception object
 pass through unmodified.</li>
 <li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>.
-The corresponding Lua error message can be retrieved from the Lua stack.</li>
+The corresponding Lua error message can be retrieved from the Lua stack.<br>
+For MSVC for Windows 64 bit this requires compilation of your C++ code
+with <tt>/EHa</tt>.</li>
 <li>Throwing Lua errors across C++ frames is safe. C++ destructors
 will be called.</li>
 </ul>

+ 19 - 5
libs/LuaJIT/doc/faq.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Frequently Asked Questions (FAQ)</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -40,6 +40,8 @@ dd { margin-left: 1.5em; }
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -74,7 +76,7 @@ about LuaJIT.</li>
 and links to books and papers about Lua.</li>
 <li>The community-managed <a href="http://lua-users.org/wiki/"><span class="ext">&raquo;</span>&nbsp;Lua Wiki</a>
 has information about diverse topics.</li>
-</ul>
+</ul></dd>
 </dl>
 
 <dl id="tech">
@@ -119,8 +121,7 @@ Consider testing your application with older versions, too.<br>
 
 Similarly, the Borland/Delphi runtime modifies the FPU control word and
 enables FP exceptions. Of course this violates the Windows ABI, too.
-Please check the Delphi docs for the Set8087CW method.
-
+Please check the Delphi docs for the Set8087CW method.</dd>
 </dl>
 
 <dl id="ctrlc">
@@ -133,6 +134,19 @@ You have to press Ctrl-C twice to get stop your program. That's similar
 to when it's stuck running inside a C function under the Lua interpreter.</dd>
 </dl>
 
+<dl id="order">
+<dt>Q: Table iteration with <tt>pairs()</tt> does not result in the same order?</dt>
+<dd>The order of table iteration is explicitly <b>undefined</b> by
+the Lua language standard.<br>
+Different Lua implementations or versions may use different orders for
+otherwise identical tables. Different ways of constructing a table may
+result in different orders, too.<br>
+Due to improved VM security, LuaJIT 2.1 may even use a different order
+on separate VM invocations or when string keys are newly interned.<br><br>
+If your program relies on a deterministic order, it has a bug. Rewrite it,
+so it doesn't rely on the key order. Or sort the table keys, if you must.</dd>
+</dl>
+
 <dl id="sandbox">
 <dt>Q: Can Lua code be safely sandboxed?</dt>
 <dd>

+ 18 - 10
libs/LuaJIT/doc/install.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Installation</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -65,6 +65,8 @@ td.compatno {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -186,14 +188,18 @@ Debian/Ubuntu, install <tt>libc6-dev</tt> with the package manager.
 </p>
 <p>
 The recommended way to fetch the latest version is to do a pull from
-the git repository. Alternatively download the latest source package of
-LuaJIT (pick the .tar.gz). Move it to a directory of your choice,
-open a terminal window and change to this directory. Now unpack the archive
-and change to the newly created directory:
+the git repository.
+</p>
+<p>
+Alternatively download the latest source package of LuaJIT (pick the .tar.gz).
+Move it to a directory of your choice, open a terminal window and change
+to this directory. Now unpack the archive and change to the newly created
+directory (replace XX.YY.ZZ with the version you downloaded):
 </p>
 <pre class="code">
-tar zxf LuaJIT-2.1.0-beta3.tar.gz
-cd LuaJIT-2.1.0-beta3</pre>
+tar zxf LuaJIT-XX.YY.ZZ.tar.gz
+cd LuaJIT-XX.YY.ZZ
+</pre>
 <h3>Building LuaJIT</h3>
 <p>
 The supplied Makefiles try to auto-detect the settings needed for your
@@ -405,7 +411,8 @@ NDKCROSS=$NDKBIN/aarch64-linux-android-
 NDKCC=$NDKBIN/aarch64-linux-android21-clang
 make CROSS=$NDKCROSS \
      STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
-     TARGET_LD=$NDKCC
+     TARGET_LD=$NDKCC TARGET_AR=$NDKBIN/llvm-ar
+     TARGET_STRIP=$NDKBIN/llvm-strip
 
 # Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB)
 NDKDIR=/opt/android/ndk
@@ -414,7 +421,8 @@ NDKCROSS=$NDKBIN/arm-linux-androideabi-
 NDKCC=$NDKBIN/armv7a-linux-androideabi16-clang
 make HOST_CC="gcc -m32" CROSS=$NDKCROSS \
      STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
-     TARGET_LD=$NDKCC
+     TARGET_LD=$NDKCC TARGET_AR=$NDKBIN/llvm-ar
+     TARGET_STRIP=$NDKBIN/llvm-strip
 </pre>
 <p>
 You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="https://developer.apple.com/ios/"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>:

+ 4 - 2
libs/LuaJIT/doc/luajit.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>LuaJIT</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -122,6 +122,8 @@ table.feature small {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>

+ 6 - 4
libs/LuaJIT/doc/running.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Running LuaJIT</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -59,6 +59,8 @@ td.param_default {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -183,8 +185,8 @@ itself. For a description of their options and output format, please
 read the comment block at the start of their source.
 They can be found in the <tt>lib</tt> directory of the source
 distribution or installed under the <tt>jit</tt> directory. By default
-this is <tt>/usr/local/share/luajit-2.1.0-beta3/jit</tt> on POSIX
-systems.
+this is <tt>/usr/local/share/luajit-XX.YY.ZZ>/jit</tt> on POSIX
+systems (replace XX.YY.ZZ by the installed version).
 </p>
 
 <h3 id="opt_O"><tt>-O[level]</tt><br>

+ 4 - 8
libs/LuaJIT/doc/status.html

@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Status</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
 <meta name="Copyright" content="Copyright (C) 2005-2021">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -40,6 +40,8 @@ ul li { padding-bottom: 0.3em; }
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -90,12 +92,6 @@ The Lua <b>debug API</b> is missing a couple of features (return
 hooks for non-Lua functions) and shows slightly different behavior
 in LuaJIT (no per-coroutine hooks, no tail call counting).
 </li>
-<li>
-Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not
-handled correctly. The error may fall through an on-trace
-<tt>pcall</tt> or it may be passed on to the function set with
-<tt>lua_atpanic</tt> on x64.
-</li>
 </ul>
 <br class="flush">
 </div>

+ 5 - 2
libs/LuaJIT/dynasm/dasm_arm.h

@@ -294,7 +294,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 
   { /* Handle globals not defined in this translation unit. */
     int idx;
-    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+    for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
       int n = D->lglabels[idx];
       /* Undefined label: Collapse rel chain and replace with marker (< 0). */
       while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -371,7 +371,10 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
 	  break;
 	case DASM_REL_LG:
-	  CK(n >= 0, UNDEF_LG);
+	  if (n < 0) {
+	    n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4);
+	    goto patchrel;
+	  }
 	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);

+ 3 - 3
libs/LuaJIT/dynasm/dasm_arm.lua

@@ -9,9 +9,9 @@
 local _info = {
   arch =	"arm",
   description =	"DynASM ARM module",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2015-10-18",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   license =	"MIT",
 }

+ 50 - 8
libs/LuaJIT/dynasm/dasm_arm64.h

@@ -21,8 +21,9 @@ enum {
   /* The following actions need a buffer position. */
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   /* The following actions also have an argument. */
-  DASM_REL_PC, DASM_LABEL_PC,
+  DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A,
   DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
+  DASM_IMMV, DASM_VREG,
   DASM__MAX
 };
 
@@ -39,6 +40,7 @@ enum {
 #define DASM_S_RANGE_LG		0x13000000
 #define DASM_S_RANGE_PC		0x14000000
 #define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_RANGE_VREG	0x16000000
 #define DASM_S_UNDEF_LG		0x21000000
 #define DASM_S_UNDEF_PC		0x22000000
 
@@ -247,7 +249,7 @@ void dasm_put(Dst_DECL, int start, ...)
 	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
 	D->section = &D->sections[n]; goto stop;
       case DASM_ESC: p++; ofs += 4; break;
-      case DASM_REL_EXT: break;
+      case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break;
       case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
       case DASM_REL_LG:
 	n = (ins & 2047) - 10; pl = D->lglabels + n;
@@ -268,6 +270,11 @@ void dasm_put(Dst_DECL, int start, ...)
 	  *pl = pos;
 	}
 	pos++;
+	if ((ins & 0x8000)) ofs += 8;
+	break;
+      case DASM_REL_A:
+	b[pos++] = n;
+	b[pos++] = va_arg(ap, int);
 	break;
       case DASM_LABEL_LG:
 	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
@@ -312,13 +319,21 @@ void dasm_put(Dst_DECL, int start, ...)
 	}
       case DASM_IMML: {
 #ifdef DASM_CHECKS
-	int scale = (p[-2] >> 30);
+	int scale = (ins & 3);
 	CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
 	   (unsigned int)(n+256) < 512, RANGE_I);
 #endif
 	b[pos++] = n;
 	break;
 	}
+      case DASM_IMMV:
+	ofs += 4;
+	b[pos++] = n;
+	break;
+      case DASM_VREG:
+	CK(n < 32, RANGE_VREG);
+	b[pos++] = n;
+	break;
       }
     }
   }
@@ -348,7 +363,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 
   { /* Handle globals not defined in this translation unit. */
     int idx;
-    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+    for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
       int n = D->lglabels[idx];
       /* Undefined label: Collapse rel chain and replace with marker (< 0). */
       while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -375,8 +390,8 @@ int dasm_link(Dst_DECL, size_t *szp)
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
 	case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
-	case DASM_IMML: pos++; break;
-	case DASM_IMM13X: pos += 2; break;
+	case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break;
+	case DASM_IMM13X: case DASM_REL_A: pos += 2; break;
 	}
       }
       stop: (void)0;
@@ -426,7 +441,12 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
 	  break;
 	case DASM_REL_LG:
-	  CK(n >= 0, UNDEF_LG);
+	  if (n < 0) {
+	    ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4;
+	    n = (int)na;
+	    CK((ptrdiff_t)n == na, RANGE_REL);
+	    goto patchrel;
+	  }
 	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
@@ -446,8 +466,24 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  } else if ((ins & 0x1000)) {  /* TBZ, TBNZ */
 	    CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
 	    cp[-1] |= ((n << 3) & 0x0007ffe0);
+	  } else if ((ins & 0x8000)) {  /* absolute */
+	    cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n);
+	    cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32);
+	    cp += 2;
 	  }
 	  break;
+	case DASM_REL_A: {
+	  ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n);
+	  if ((ins & 0x3000) == 0x3000) {  /* ADRP */
+	    ins &= ~0x1000;
+	    na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12);
+	  } else {
+	    na = na - (ptrdiff_t)cp + 4;
+	  }
+	  n = (int)na;
+	  CK((ptrdiff_t)n == na, RANGE_REL);
+	  goto patchrel;
+	}
 	case DASM_LABEL_LG:
 	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
 	  break;
@@ -468,11 +504,17 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  cp[-1] |= (dasm_imm13(n, *b++) << 10);
 	  break;
 	case DASM_IMML: {
-	  int scale = (p[-2] >> 30);
+	  int scale = (ins & 3);
 	  cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
 	    ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
 	  break;
 	  }
+	case DASM_IMMV:
+	  *cp++ = n;
+	  break;
+	case DASM_VREG:
+	  cp[-1] |= (n & 0x1f) << (ins & 0x1f);
+	  break;
 	default: *cp++ = ins; break;
 	}
       }

+ 94 - 41
libs/LuaJIT/dynasm/dasm_arm64.lua

@@ -9,9 +9,9 @@
 local _info = {
   arch =	"arm",
   description =	"DynASM ARM64 module",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2015-10-18",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   license =	"MIT",
 }
@@ -23,12 +23,12 @@ local _M = { _info = _info }
 local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
 local assert, setmetatable, rawget = assert, setmetatable, rawget
 local _s = string
-local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local format, byte, char = _s.format, _s.byte, _s.char
 local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
 local concat, sort, insert = table.concat, table.sort, table.insert
 local bit = bit or require("bit")
 local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
-local ror, tohex = bit.ror, bit.tohex
+local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit
 
 -- Inherited tables and callbacks.
 local g_opt, g_arch
@@ -39,7 +39,9 @@ local wline, werror, wfatal, wwarn
 local action_names = {
   "STOP", "SECTION", "ESC", "REL_EXT",
   "ALIGN", "REL_LG", "LABEL_LG",
-  "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
+  "REL_PC", "LABEL_PC", "REL_A",
+  "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV",
+  "VREG",
 }
 
 -- Maximum number of section buffer positions for dasm_put().
@@ -246,9 +248,12 @@ local map_cond = {
 
 local parse_reg_type
 
-local function parse_reg(expr)
+local function parse_reg(expr, shift)
   if not expr then werror("expected register name") end
   local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
+  if not tname then
+    tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$")
+  end
   local tp = map_type[tname or expr]
   if tp then
     local reg = ovreg or tp.reg
@@ -266,18 +271,28 @@ local function parse_reg(expr)
       elseif parse_reg_type ~= rt then
 	werror("register size mismatch")
       end
-      return r, tp
+      return shl(r, shift), tp
+    end
+  end
+  local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$")
+  if vreg then
+    if not parse_reg_type then
+      parse_reg_type = vrt
+    elseif parse_reg_type ~= vrt then
+      werror("register size mismatch")
     end
+    if shift then waction("VREG", shift, vreg) end
+    return 0
   end
   werror("bad register name `"..expr.."'")
 end
 
 local function parse_reg_base(expr)
   if expr == "sp" then return 0x3e0 end
-  local base, tp = parse_reg(expr)
+  local base, tp = parse_reg(expr, 5)
   if parse_reg_type ~= "x" then werror("bad register type") end
   parse_reg_type = false
-  return shl(base, 5), tp
+  return base, tp
 end
 
 local parse_ctx = {}
@@ -297,7 +312,7 @@ local function parse_number(n)
   local code = loadenv("return "..n)
   if code then
     local ok, y = pcall(code)
-    if ok then return y end
+    if ok and type(y) == "number" then return y end
   end
   return nil
 end
@@ -403,7 +418,7 @@ local function parse_imm_load(imm, scale)
     end
     werror("out of range immediate `"..imm.."'")
   else
-    waction("IMML", 0, imm)
+    waction("IMML", scale, imm)
     return 0
   end
 end
@@ -462,6 +477,7 @@ end
 
 local function parse_load(params, nparams, n, op)
   if params[n+2] then werror("too many operands") end
+  local scale = shr(op, 30)
   local pn, p2 = params[n], params[n+1]
   local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
   if not p1 then
@@ -470,14 +486,13 @@ local function parse_load(params, nparams, n, op)
       if reg and tailr ~= "" then
 	local base, tp = parse_reg_base(reg)
 	if tp then
-	  waction("IMML", 0, format(tp.ctypefmt, tailr))
+	  waction("IMML", scale, format(tp.ctypefmt, tailr))
 	  return op + base
 	end
       end
     end
     werror("expected address operand")
   end
-  local scale = shr(op, 30)
   if p2 then
     if wb == "!" then werror("bad use of '!'") end
     op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
@@ -494,7 +509,7 @@ local function parse_load(params, nparams, n, op)
 	op = op + parse_imm_load(imm, scale)
       else
 	local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
-	op = op + shl(parse_reg(p2b), 16) + 0x00200800
+	op = op + parse_reg(p2b, 16) + 0x00200800
 	if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
 	  werror("bad index register type")
 	end
@@ -561,14 +576,14 @@ local function parse_load_pair(params, nparams, n, op)
 end
 
 local function parse_label(label, def)
-  local prefix = sub(label, 1, 2)
+  local prefix = label:sub(1, 2)
   -- =>label (pc label reference)
   if prefix == "=>" then
-    return "PC", 0, sub(label, 3)
+    return "PC", 0, label:sub(3)
   end
   -- ->name (global label reference)
   if prefix == "->" then
-    return "LG", map_global[sub(label, 3)]
+    return "LG", map_global[label:sub(3)]
   end
   if def then
     -- [1-9] (local label definition)
@@ -586,8 +601,11 @@ local function parse_label(label, def)
     if extname then
       return "EXT", map_extern[extname]
     end
+    -- &expr (pointer)
+    if label:sub(1, 1) == "&" then
+      return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2))
+    end
   end
-  werror("bad label `"..label.."'")
 end
 
 local function branch_type(op)
@@ -620,24 +638,24 @@ local function alias_bfx(p)
 end
 
 local function alias_bfiz(p)
-  parse_reg(p[1])
+  parse_reg(p[1], 0)
   if parse_reg_type == "w" then
-    p[3] = "#-("..p[3]:sub(2)..")%32"
+    p[3] = "#(32-("..p[3]:sub(2).."))%32"
     p[4] = "#("..p[4]:sub(2)..")-1"
   else
-    p[3] = "#-("..p[3]:sub(2)..")%64"
+    p[3] = "#(64-("..p[3]:sub(2).."))%64"
     p[4] = "#("..p[4]:sub(2)..")-1"
   end
 end
 
 local alias_lslimm = op_alias("ubfm_4", function(p)
-  parse_reg(p[1])
+  parse_reg(p[1], 0)
   local sh = p[3]:sub(2)
   if parse_reg_type == "w" then
-    p[3] = "#-("..sh..")%32"
+    p[3] = "#(32-("..sh.."))%32"
     p[4] = "#31-("..sh..")"
   else
-    p[3] = "#-("..sh..")%64"
+    p[3] = "#(64-("..sh.."))%64"
     p[4] = "#63-("..sh..")"
   end
 end)
@@ -881,25 +899,25 @@ end
 
 -- Handle opcodes defined with template strings.
 local function parse_template(params, template, nparams, pos)
-  local op = tonumber(sub(template, 1, 8), 16)
+  local op = tonumber(template:sub(1, 8), 16)
   local n = 1
   local rtt = {}
 
   parse_reg_type = false
 
   -- Process each character.
-  for p in gmatch(sub(template, 9), ".") do
+  for p in gmatch(template:sub(9), ".") do
     local q = params[n]
     if p == "D" then
-      op = op + parse_reg(q); n = n + 1
+      op = op + parse_reg(q, 0); n = n + 1
     elseif p == "N" then
-      op = op + shl(parse_reg(q), 5); n = n + 1
+      op = op + parse_reg(q, 5); n = n + 1
     elseif p == "M" then
-      op = op + shl(parse_reg(q), 16); n = n + 1
+      op = op + parse_reg(q, 16); n = n + 1
     elseif p == "A" then
-      op = op + shl(parse_reg(q), 10); n = n + 1
+      op = op + parse_reg(q, 10); n = n + 1
     elseif p == "m" then
-      op = op + shl(parse_reg(params[n-1]), 16)
+      op = op + parse_reg(params[n-1], 16)
 
     elseif p == "p" then
       if q == "sp" then params[n] = "@x31" end
@@ -930,8 +948,14 @@ local function parse_template(params, template, nparams, pos)
 
     elseif p == "B" then
       local mode, v, s = parse_label(q, false); n = n + 1
+      if not mode then werror("bad label `"..q.."'") end
       local m = branch_type(op)
-      waction("REL_"..mode, v+m, s, 1)
+      if mode == "A" then
+	waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s))
+	actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s)
+      else
+	waction("REL_"..mode, v+m, s, 1)
+      end
 
     elseif p == "I" then
       op = op + parse_imm12(q); n = n + 1
@@ -977,8 +1001,8 @@ function op_template(params, template, nparams)
   if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
 
   -- Limit number of section buffer positions used by a single dasm_put().
-  -- A single opcode needs a maximum of 3 positions.
-  if secpos+3 > maxsecpos then wflush() end
+  -- A single opcode needs a maximum of 4 positions.
+  if secpos+4 > maxsecpos then wflush() end
   local pos = wpos()
   local lpos, apos, spos = #actlist, #actargs, secpos
 
@@ -990,9 +1014,11 @@ function op_template(params, template, nparams)
     actlist[lpos+1] = nil
     actlist[lpos+2] = nil
     actlist[lpos+3] = nil
+    actlist[lpos+4] = nil
     actargs[apos+1] = nil
     actargs[apos+2] = nil
     actargs[apos+3] = nil
+    actargs[apos+4] = nil
   end
   error(err, 0)
 end
@@ -1036,23 +1062,50 @@ map_op[".label_1"] = function(params)
   if not params then return "[1-9] | ->global | =>pcexpr" end
   if secpos+1 > maxsecpos then wflush() end
   local mode, n, s = parse_label(params[1], true)
-  if mode == "EXT" then werror("bad label definition") end
+  if not mode or mode == "EXT" then werror("bad label definition") end
   waction("LABEL_"..mode, n, s, 1)
 end
 
 ------------------------------------------------------------------------------
 
 -- Pseudo-opcodes for data storage.
-map_op[".long_*"] = function(params)
+local function op_data(params)
   if not params then return "imm..." end
+  local sz = params.op == ".long" and 4 or 8
   for _,p in ipairs(params) do
-    local n = tonumber(p)
-    if not n then werror("bad immediate `"..p.."'") end
-    if n < 0 then n = n + 2^32 end
-    wputw(n)
+    local imm = parse_number(p)
+    if imm then
+      local n = tobit(imm)
+      if n == imm or (n < 0 and n + 2^32 == imm) then
+	wputw(n < 0 and n + 2^32 or n)
+	if sz == 8 then
+	  wputw(imm < 0 and 0xffffffff or 0)
+	end
+      elseif sz == 4 then
+	werror("bad immediate `"..p.."'")
+      else
+	imm = nil
+      end
+    end
+    if not imm then
+      local mode, v, s = parse_label(p, false)
+      if sz == 4 then
+	if mode then werror("label does not fit into .long") end
+	waction("IMMV", 0, p)
+      elseif mode and mode ~= "A" then
+	waction("REL_"..mode, v+0x8000, s, 1)
+      else
+	if mode == "A" then p = s end
+	waction("IMMV", 0, format("(unsigned int)(%s)", p))
+	waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p))
+      end
+    end
     if secpos+2 > maxsecpos then wflush() end
   end
 end
+map_op[".long_*"] = op_data
+map_op[".quad_*"] = op_data
+map_op[".addr_*"] = op_data
 
 -- Alignment pseudo-opcode.
 map_op[".align_1"] = function(params)

+ 5 - 2
libs/LuaJIT/dynasm/dasm_mips.h

@@ -273,7 +273,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 
   { /* Handle globals not defined in this translation unit. */
     int idx;
-    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+    for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
       int n = D->lglabels[idx];
       /* Undefined label: Collapse rel chain and replace with marker (< 0). */
       while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -349,7 +349,10 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
 	  break;
 	case DASM_REL_LG:
-	  CK(n >= 0, UNDEF_LG);
+	  if (n < 0) {
+	    n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
+	    goto patchrel;
+	  }
 	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);

+ 3 - 3
libs/LuaJIT/dynasm/dasm_mips.lua

@@ -12,9 +12,9 @@ local mipsr6 = _map_def.MIPSR6
 local _info = {
   arch =	mips64 and "mips64" or "mips",
   description =	"DynASM MIPS32/MIPS64 module",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2020-01-20",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   license =	"MIT",
 }

+ 5 - 2
libs/LuaJIT/dynasm/dasm_ppc.h

@@ -277,7 +277,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 
   { /* Handle globals not defined in this translation unit. */
     int idx;
-    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+    for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
       int n = D->lglabels[idx];
       /* Undefined label: Collapse rel chain and replace with marker (< 0). */
       while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -353,7 +353,10 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
 	  break;
 	case DASM_REL_LG:
-	  CK(n >= 0, UNDEF_LG);
+	  if (n < 0) {
+	    n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
+	    goto patchrel;
+	  }
 	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);

+ 3 - 3
libs/LuaJIT/dynasm/dasm_ppc.lua

@@ -11,9 +11,9 @@
 local _info = {
   arch =	"ppc",
   description =	"DynASM PPC module",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2015-10-18",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   license =	"MIT",
 }

+ 2 - 2
libs/LuaJIT/dynasm/dasm_proto.h

@@ -10,8 +10,8 @@
 #include <stddef.h>
 #include <stdarg.h>
 
-#define DASM_IDENT	"DynASM 1.4.0"
-#define DASM_VERSION	10400	/* 1.4.0 */
+#define DASM_IDENT	"DynASM 1.5.0"
+#define DASM_VERSION	10500	/* 1.5.0 */
 
 #ifndef Dst_DECL
 #define Dst_DECL	dasm_State **Dst

+ 20 - 4
libs/LuaJIT/dynasm/dasm_x86.h

@@ -239,8 +239,11 @@ void dasm_put(Dst_DECL, int start, ...)
 	}
 	pos++;
 	ofs += 4;  /* Maximum offset needed. */
-	if (action == DASM_REL_LG || action == DASM_REL_PC)
+	if (action == DASM_REL_LG || action == DASM_REL_PC) {
 	  b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	} else if (sizeof(ptrdiff_t) == 8) {
+	  ofs += 4;
+	}
 	break;
       case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
       case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
@@ -365,10 +368,22 @@ int dasm_link(Dst_DECL, size_t *szp)
   do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
 #define dasmd(x) \
   do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
+#define dasmq(x) \
+  do { *((unsigned long long *)cp) = (unsigned long long)(x); cp+=8; } while (0)
 #else
 #define dasmw(x)	do { dasmb(x); dasmb((x)>>8); } while (0)
 #define dasmd(x)	do { dasmw(x); dasmw((x)>>16); } while (0)
+#define dasmq(x)	do { dasmd(x); dasmd((x)>>32); } while (0)
 #endif
+static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x)
+{
+  if (sizeof(ptrdiff_t) == 8)
+    dasmq((unsigned long long)x);
+  else
+    dasmd((unsigned int)x);
+  return cp;
+}
+#define dasma(x)	(cp = dasma_(cp, (x)))
 
 /* Pass 3: Encode sections. */
 int dasm_encode(Dst_DECL, void *buffer)
@@ -443,12 +458,13 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  goto wb;
 	}
 	case DASM_IMM_LG:
-	  p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
+	  p++;
+	  if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; }
 	  /* fallthrough */
 	case DASM_IMM_PC: {
 	  int *pb = DASM_POS2PTR(D, n);
-	  n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
-	  goto wd;
+	  dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base));
+	  break;
 	}
 	case DASM_LABEL_LG: {
 	  int idx = *p++;

+ 39 - 11
libs/LuaJIT/dynasm/dasm_x86.lua

@@ -11,9 +11,9 @@ local x64 = x64
 local _info = {
   arch =	x64 and "x64" or "x86",
   description =	"DynASM x86/x64 module",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2015-10-18",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   license =	"MIT",
 }
@@ -484,6 +484,22 @@ local function wputdarg(n)
   end
 end
 
+-- Put signed or unsigned qword or arg.
+local function wputqarg(n)
+  local tn = type(n)
+  if tn == "number" then -- This is only used for numbers from -2^31..2^32-1.
+    wputb(band(n, 255))
+    wputb(band(shr(n, 8), 255))
+    wputb(band(shr(n, 16), 255))
+    wputb(shr(n, 24))
+    local sign = n < 0 and 255 or 0
+    wputb(sign); wputb(sign); wputb(sign); wputb(sign)
+  else
+    waction("IMM_D", format("(unsigned int)(%s)", n))
+    waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n))
+  end
+end
+
 -- Put operand-size dependent number or arg (defaults to dword).
 local function wputszarg(sz, n)
   if not sz or sz == "d" or sz == "q" then wputdarg(n)
@@ -663,10 +679,16 @@ local function opmodestr(op, args)
 end
 
 -- Convert number to valid integer or nil.
-local function toint(expr)
+local function toint(expr, isqword)
   local n = tonumber(expr)
   if n then
-    if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
+    if n % 1 ~= 0 then
+      werror("not an integer number `"..expr.."'")
+    elseif isqword then
+      if n < -2147483648 or n > 2147483647 then
+	n = nil -- Handle it as an expression to avoid precision loss.
+      end
+    elseif n < -2147483648 or n > 4294967295 then
       werror("bad integer number `"..expr.."'")
     end
     return n
@@ -749,7 +771,7 @@ local function rtexpr(expr)
 end
 
 -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
-local function parseoperand(param)
+local function parseoperand(param, isqword)
   local t = {}
 
   local expr = param
@@ -810,7 +832,7 @@ local function parseoperand(param)
       if t.disp then break end
 
       -- [reg+xreg...]
-      local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
+      local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$")
       xreg, t.xreg, tp = rtexpr(xreg)
       if not t.xreg then
 	-- [reg+-expr]
@@ -837,7 +859,7 @@ local function parseoperand(param)
       t.disp = dispexpr(tailx)
     else
       -- imm or opsize*imm
-      local imm = toint(expr)
+      local imm = toint(expr, isqword)
       if not imm and sub(expr, 1, 1) == "*" and t.opsize then
 	imm = toint(sub(expr, 2))
 	if imm then
@@ -1952,7 +1974,7 @@ local function dopattern(pat, args, sz, op, needrex)
 	local a = args[narg]
 	narg = narg + 1
 	local mode, imm = a.mode, a.imm
-	if mode == "iJ" and not match("iIJ", c) then
+	if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then
 	  werror("bad operand size for label")
 	end
 	if c == "S" then
@@ -2144,14 +2166,16 @@ end
 local function op_data(params)
   if not params then return "imm..." end
   local sz = sub(params.op, 2, 2)
-  if sz == "a" then sz = addrsize end
+  if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end
   for _,p in ipairs(params) do
-    local a = parseoperand(p)
+    local a = parseoperand(p, sz == "q")
     if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
       werror("bad mode or size in `"..p.."'")
     end
     if a.mode == "iJ" then
       wputlabel("IMM_", a.imm, 1)
+    elseif sz == "q" then
+      wputqarg(a.imm)
     else
       wputszarg(sz, a.imm)
     end
@@ -2163,7 +2187,11 @@ map_op[".byte_*"] = op_data
 map_op[".sbyte_*"] = op_data
 map_op[".word_*"] = op_data
 map_op[".dword_*"] = op_data
+map_op[".qword_*"] = op_data
 map_op[".aword_*"] = op_data
+map_op[".long_*"] = op_data
+map_op[".quad_*"] = op_data
+map_op[".addr_*"] = op_data
 
 ------------------------------------------------------------------------------
 

+ 3 - 3
libs/LuaJIT/dynasm/dynasm.lua

@@ -10,9 +10,9 @@
 local _info = {
   name =	"DynASM",
   description =	"A dynamic assembler for code generation engines",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2015-10-18",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   url =		"https://luajit.org/dynasm.html",
   license =	"MIT",

+ 14 - 7
libs/LuaJIT/src/Makefile

@@ -211,7 +211,7 @@ TARGET_CC= $(STATIC_CC)
 TARGET_STCC= $(STATIC_CC)
 TARGET_DYNCC= $(DYNAMIC_CC)
 TARGET_LD= $(CROSS)$(CC)
-TARGET_AR= $(CROSS)ar rcus 2>/dev/null
+TARGET_AR= $(CROSS)ar rcus
 TARGET_STRIP= $(CROSS)strip
 
 TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
@@ -311,6 +311,7 @@ ifeq (Windows,$(TARGET_SYS))
   TARGET_XSHLDFLAGS= -shared -Wl,--out-implib,$(TARGET_DLLDOTANAME)
   TARGET_DYNXLDOPTS=
 else
+  TARGET_AR+= 2>/dev/null
 ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
   TARGET_XCFLAGS+= -fno-stack-protector
 endif
@@ -319,13 +320,10 @@ ifeq (Darwin,$(TARGET_SYS))
     $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY)
   endif
   TARGET_STRIP+= -x
+  TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
-  ifeq (x64,$(TARGET_LJARCH))
-    TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000
-    TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000
-  endif
 else
 ifeq (iOS,$(TARGET_SYS))
   TARGET_STRIP+= -x
@@ -336,6 +334,13 @@ ifeq (iOS,$(TARGET_SYS))
     TARGET_XCFLAGS+= -fno-omit-frame-pointer
   endif
 else
+  ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
+    # Find out whether the target toolchain always generates unwind tables.
+    TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o)
+    ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
+      TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+    endif
+  endif
   ifneq (SunOS,$(TARGET_SYS))
     ifneq (PS3,$(TARGET_SYS))
       TARGET_XLDFLAGS+= -Wl,-E
@@ -479,13 +484,15 @@ LJVM_BOUT= $(LJVM_S)
 LJVM_MODE= elfasm
 
 LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
-	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
+	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \
+	 lib_buffer.o
 LJLIB_C= $(LJLIB_O:.o=.c)
 
 LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \
-	  lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \
+	  lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \
+	  lj_api.o lj_profile.o \
 	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \

+ 41 - 33
libs/LuaJIT/src/Makefile.dep

@@ -2,14 +2,18 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
  lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \
  lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h
 lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
- lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \
- lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
- lj_strfmt.h lj_lib.h lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \
+ lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
+ lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \
+ lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h
 lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
  lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
  lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
  lj_ffdef.h lj_lib.h lj_libdef.h
+lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \
+ lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h
 lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
  lj_libdef.h
@@ -48,10 +52,10 @@ lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
  lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
 lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
- lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
- lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \
- lj_asm_*.h
+ lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \
+ lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
+ lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \
+ lj_emit_*.h lj_asm_*.h
 lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
 lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
  lj_bcdef.h
@@ -77,8 +81,8 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \
  lj_traceerr.h lj_vm.h
 lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
- lj_ccallback.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \
+ lj_cdata.h lj_cconv.h lj_ccallback.h
 lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
 lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
@@ -110,10 +114,10 @@ lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
  lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
  lj_traceerr.h lj_vm.h lj_strfmt.h
 lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
- lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \
+ lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \
+ lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h
 lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h lj_vm.h
@@ -127,15 +131,15 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
  lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
- lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_prng.h
+ lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h
 lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
  lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
  lj_strfmt.h
 lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
- lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
- lj_bcdump.h lj_lib.h
+ lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \
+ lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h
 lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
  lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
@@ -170,15 +174,18 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
  lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
  lj_vm.h lj_vmevent.h
+lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h
 lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
  lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
-lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h
 lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
  lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
  lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
  lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h
+lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
+ lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h
 lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
  lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
@@ -189,9 +196,10 @@ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \
  lj_alloc.h luajit.h
 lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_char.h
+ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h
 lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \
+ lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h
 lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h
 lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
@@ -204,7 +212,7 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \
  lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h
 lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_udata.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_udata.h
 lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
  lj_vm.h lj_vmevent.h
@@ -216,23 +224,23 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \
  lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
  lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \
  lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \
- lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
- lj_prng.c lj_prng.h lj_state.c lj_lex.h lj_alloc.h luajit.h \
+ lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \
+ lj_debug.c lj_prng.c lj_state.c lj_lex.h lj_alloc.h luajit.h \
  lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \
- lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_api.c \
- lj_profile.c lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c \
- lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h \
- lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h \
- lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \
- lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h \
- lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
- lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \
- lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
+ lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_serialize.c \
+ lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
+ lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
+ lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
+ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
+ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
+ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
+ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
+ lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
  lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
  lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
  lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
  lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
- lib_init.c
+ lib_buffer.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
 host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
  lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \

+ 2 - 0
libs/LuaJIT/src/host/buildvm_lib.c

@@ -385,6 +385,8 @@ void emit_lib(BuildCtx *ctx)
 	  ok = LJ_HASJIT;
 	else if (!strcmp(buf, "#if LJ_HASFFI\n"))
 	  ok = LJ_HASFFI;
+	else if (!strcmp(buf, "#if LJ_HASBUFFER\n"))
+	  ok = LJ_HASBUFFER;
 	if (!ok) {
 	  int lvl = 1;
 	  while (fgets(buf, sizeof(buf), fp) != NULL) {

+ 1 - 1
libs/LuaJIT/src/host/minilua.c

@@ -1134,7 +1134,7 @@ if(!cl->isC){
 CallInfo*ci;
 StkId st,base;
 Proto*p=cl->p;
-luaD_checkstack(L,p->maxstacksize);
+luaD_checkstack(L,p->maxstacksize+p->numparams);
 func=restorestack(L,funcr);
 if(!p->is_vararg){
 base=func+1;

+ 1 - 1
libs/LuaJIT/src/jit/dis_arm64.lua

@@ -1089,7 +1089,7 @@ local function disass_ins(ctx)
 	  last = "#"..(sf+32 - immr)
 	  operands[#operands] = last
 	  x = x + 1
-	elseif x >= immr then
+	else
 	  name = a2
 	  x = x - immr + 1
 	end

+ 19 - 9
libs/LuaJIT/src/jit/dump.lua

@@ -102,10 +102,12 @@ end
 local function fillsymtab(tr, nexit)
   local t = symtab
   if nexitsym == 0 then
+    local maskaddr = jit.arch == "arm" and -2
     local ircall = vmdef.ircall
     for i=0,#ircall do
       local addr = ircalladdr(i)
       if addr ~= 0 then
+	if maskaddr then addr = band(addr, maskaddr) end
 	if addr < 0 then addr = addr + 2^32 end
 	t[addr] = ircall[i]
       end
@@ -217,8 +219,10 @@ local function colorize_text(s)
   return s
 end
 
-local function colorize_ansi(s, t)
-  return format(colortype_ansi[t], s)
+local function colorize_ansi(s, t, extra)
+  local out = format(colortype_ansi[t], s)
+  if extra then out = "\027[3m"..out end
+  return out
 end
 
 local irtype_ansi = setmetatable({},
@@ -227,9 +231,10 @@ local irtype_ansi = setmetatable({},
 
 local html_escape = { ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;", }
 
-local function colorize_html(s, t)
+local function colorize_html(s, t, extra)
   s = gsub(s, "[<>&]", html_escape)
-  return format('<span class="irt_%s">%s</span>', irtype_text[t], s)
+  return format('<span class="irt_%s%s">%s</span>',
+		irtype_text[t], extra and " irt_extra" or "", s)
 end
 
 local irtype_html = setmetatable({},
@@ -254,6 +259,7 @@ span.irt_tab { color: #c00000; }
 span.irt_udt, span.irt_lud { color: #00c0c0; }
 span.irt_num { color: #4040c0; }
 span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; }
+span.irt_extra { font-style: italic; }
 </style>
 ]]
 
@@ -269,6 +275,7 @@ local litname = {
     if band(mode, 8) ~= 0 then s = s.."C" end
     if band(mode, 16) ~= 0 then s = s.."R" end
     if band(mode, 32) ~= 0 then s = s.."I" end
+    if band(mode, 64) ~= 0 then s = s.."K" end
     t[mode] = s
     return s
   end}),
@@ -277,15 +284,18 @@ local litname = {
     local s = irtype[band(mode, 31)]
     s = irtype[band(shr(mode, 5), 31)].."."..s
     if band(mode, 0x800) ~= 0 then s = s.." sext" end
-    local c = shr(mode, 14)
-    if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
+    local c = shr(mode, 12)
+    if c == 1 then s = s.." none"
+    elseif c == 2 then s = s.." index"
+    elseif c == 3 then s = s.." check" end
     t[mode] = s
     return s
   end}),
   ["FLOAD "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FPMATH"] = vmdef.irfpm,
-  ["BUFHDR"] = { [0] = "RESET", "APPEND" },
+  ["TMPREF"] = { [0] = "", "IN", "OUT", "INOUT", "", "", "OUT2", "INOUT2" },
+  ["BUFHDR"] = { [0] = "RESET", "APPEND", "WRITE" },
   ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
 }
 
@@ -345,7 +355,7 @@ local function formatk(tr, idx, sn)
   else
     s = tostring(k) -- For primitives.
   end
-  s = colorize(format("%-4s", s), t)
+  s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0)
   if slot then
     s = format("%s @%d", s, slot)
   end
@@ -365,7 +375,7 @@ local function printsnap(tr, snap)
 	out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
       else
 	local m, ot, op1, op2 = traceir(tr, ref)
-	out:write(colorize(format("%04d", ref), band(ot, 31)))
+	out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~= 0))
       end
       out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME
     else

+ 1 - 0
libs/LuaJIT/src/jit/p.lua

@@ -238,6 +238,7 @@ local function prof_finish()
     prof_count1 = nil
     prof_count2 = nil
     prof_ud = nil
+    if out ~= stdout then out:close() end
   end
 end
 

+ 18 - 4
libs/LuaJIT/src/lib_base.c

@@ -19,6 +19,7 @@
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_debug.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
@@ -75,9 +76,10 @@ LJLIB_ASM_(type)		LJLIB_REC(.)
 /* This solves a circular dependency problem -- change FF_next_N as needed. */
 LJ_STATIC_ASSERT((int)FF_next == FF_next_N);
 
-LJLIB_ASM(next)
+LJLIB_ASM(next)			LJLIB_REC(.)
 {
   lj_lib_checktab(L, 1);
+  lj_err_msg(L, LJ_ERR_NEXTIDX);
   return FFH_UNREACHABLE;
 }
 
@@ -406,10 +408,22 @@ LJLIB_CF(load)
   GCstr *name = lj_lib_optstr(L, 2);
   GCstr *mode = lj_lib_optstr(L, 3);
   int status;
-  if (L->base < L->top && (tvisstr(L->base) || tvisnumber(L->base))) {
-    GCstr *s = lj_lib_checkstr(L, 1);
+  if (L->base < L->top &&
+      (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) {
+    const char *s;
+    MSize len;
+    if (tvisbuf(L->base)) {
+      SBufExt *sbx = bufV(L->base);
+      s = sbx->r;
+      len = sbufxlen(sbx);
+      if (!name) name = &G(L)->strempty;  /* Buffers are not NUL-terminated. */
+    } else {
+      GCstr *str = lj_lib_checkstr(L, 1);
+      s = strdata(str);
+      len = str->len;
+    }
     lua_settop(L, 4);  /* Ensure env arg exists. */
-    status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s),
+    status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s,
 			      mode ? strdata(mode) : NULL);
   } else {
     lj_lib_checkfunc(L, 1);

+ 356 - 0
libs/LuaJIT/src/lib_buffer.c

@@ -0,0 +1,356 @@
+/*
+** Buffer library.
+** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_buffer_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+
+#if LJ_HASBUFFER
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_udata.h"
+#include "lj_meta.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lj_cconv.h"
+#endif
+#include "lj_strfmt.h"
+#include "lj_serialize.h"
+#include "lj_lib.h"
+
+/* -- Helper functions ---------------------------------------------------- */
+
+/* Check that the first argument is a string buffer. */
+static SBufExt *buffer_tobuf(lua_State *L)
+{
+  if (!(L->base < L->top && tvisbuf(L->base)))
+    lj_err_argtype(L, 1, "buffer");
+  return bufV(L->base);
+}
+
+/* Ditto, but for writers. */
+static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  setsbufXL_(sbx, L);
+  return sbx;
+}
+
+#define buffer_toudata(sbx)	((GCudata *)(sbx)-1)
+
+/* -- Buffer methods ------------------------------------------------------ */
+
+#define LJLIB_MODULE_buffer_method
+
+LJLIB_CF(buffer_method_free)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  lj_bufx_free(L, sbx);
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_reset)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  lj_bufx_reset(sbx);
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_skip)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
+  MSize len = sbufxlen(sbx);
+  if (n < len) {
+    sbx->r += n;
+  } else {
+    sbx->r = sbx->w = sbx->b;
+  }
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_set)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  GCobj *ref;
+  const char *p;
+  MSize len;
+#if LJ_HASFFI
+  if (tviscdata(L->base+1)) {
+    CTState *cts = ctype_cts(L);
+    lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
+		   L->base+1, CCF_ARG(2));
+    len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
+  } else
+#endif
+  {
+    GCstr *str = lj_lib_checkstrx(L, 2);
+    p = strdata(str);
+    len = str->len;
+  }
+  lj_bufx_free(L, sbx);
+  lj_bufx_set_cow(L, sbx, p, len);
+  ref = gcV(L->base+1);
+  setgcref(sbx->cowref, ref);
+  lj_gc_objbarrier(L, buffer_toudata(sbx), ref);
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_put)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  ptrdiff_t arg, narg = L->top - L->base;
+  for (arg = 1; arg < narg; arg++) {
+    cTValue *o = &L->base[arg], *mo = NULL;
+  retry:
+    if (tvisstr(o)) {
+      lj_buf_putstr((SBuf *)sbx, strV(o));
+    } else if (tvisint(o)) {
+      lj_strfmt_putint((SBuf *)sbx, intV(o));
+    } else if (tvisnum(o)) {
+      lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o));
+    } else if (tvisbuf(o)) {
+      SBufExt *sbx2 = bufV(o);
+      if (sbx2 == sbx) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
+      lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2));
+    } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+      /* Call __tostring metamethod inline. */
+      copyTV(L, L->top++, mo);
+      copyTV(L, L->top++, o);
+      lua_call(L, 1, 1);
+      o = &L->base[arg];  /* The stack may have been reallocated. */
+      copyTV(L, &L->base[arg], L->top-1);
+      L->top = L->base + narg;
+      goto retry;  /* Retry with the result. */
+    } else {
+      lj_err_argtype(L, arg+1, "string/number/__tostring");
+    }
+    /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */
+  }
+  L->top = L->base+1;  /* Chain buffer object. */
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_method_putf)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2);
+  L->top = L->base+1;  /* Chain buffer object. */
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_method_get)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  ptrdiff_t arg, narg = L->top - L->base;
+  if (narg == 1) {
+    narg++;
+    setnilV(L->top++);  /* get() is the same as get(nil). */
+  }
+  for (arg = 1; arg < narg; arg++) {
+    TValue *o = &L->base[arg];
+    MSize n = tvisnil(o) ? LJ_MAX_BUF :
+	      (MSize) lj_lib_checkintrange(L, arg+1, 0, LJ_MAX_BUF);
+    MSize len = sbufxlen(sbx);
+    if (n > len) n = len;
+    setstrV(L, o, lj_str_new(L, sbx->r, n));
+    sbx->r += n;
+  }
+  if (sbx->r == sbx->w) sbx->r = sbx->w = sbx->b;
+  lj_gc_check(L);
+  return narg-1;
+}
+
+#if LJ_HASFFI
+LJLIB_CF(buffer_method_putcdata)	LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  const char *p;
+  MSize len;
+  if (tviscdata(L->base+1)) {
+    CTState *cts = ctype_cts(L);
+    lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
+		   L->base+1, CCF_ARG(2));
+  } else {
+    lj_err_argtype(L, 2, "cdata");
+  }
+  len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
+  lj_buf_putmem((SBuf *)sbx, p, len);
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_reserve)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
+  GCcdata *cd;
+  lj_buf_more((SBuf *)sbx, sz);
+  ctype_loadffi(L);
+  cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
+  *(void **)cdataptr(cd) = sbx->w;
+  setcdataV(L, L->top++, cd);
+  setintV(L->top++, sbufleft(sbx));
+  return 2;
+}
+
+LJLIB_CF(buffer_method_commit)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
+  if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG);
+  sbx->w += len;
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_ref)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  GCcdata *cd;
+  ctype_loadffi(L);
+  cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
+  *(void **)cdataptr(cd) = sbx->r;
+  setcdataV(L, L->top++, cd);
+  setintV(L->top++, sbufxlen(sbx));
+  return 2;
+}
+#endif
+
+LJLIB_CF(buffer_method_encode)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  cTValue *o = lj_lib_checkany(L, 2);
+  lj_serialize_put(sbx, o);
+  lj_gc_check(L);
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_decode)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  setnilV(L->top++);
+  sbx->r = lj_serialize_get(sbx, L->top-1);
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_method___gc)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  lj_bufx_free(L, sbx);
+  return 0;
+}
+
+LJLIB_CF(buffer_method___tostring)	LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx)));
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_method___len)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  setintV(L->top-1, (int32_t)sbufxlen(sbx));
+  return 1;
+}
+
+LJLIB_PUSH("buffer") LJLIB_SET(__metatable)
+LJLIB_PUSH(top-1) LJLIB_SET(__index)
+
+/* -- Buffer library functions -------------------------------------------- */
+
+#define LJLIB_MODULE_buffer
+
+LJLIB_PUSH(top-2) LJLIB_SET(!)  /* Set environment. */
+
+LJLIB_CF(buffer_new)
+{
+  MSize sz = 0;
+  int targ = 1;
+  GCtab *env, *dict_str = NULL, *dict_mt = NULL;
+  GCudata *ud;
+  SBufExt *sbx;
+  if (L->base < L->top && !tvistab(L->base)) {
+    targ = 2;
+    if (!tvisnil(L->base))
+      sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF);
+  }
+  if (L->base+targ-1 < L->top) {
+    GCtab *options = lj_lib_checktab(L, targ);
+    cTValue *opt_dict, *opt_mt;
+    opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
+    if (opt_dict && tvistab(opt_dict)) {
+      dict_str = tabV(opt_dict);
+      lj_serialize_dict_prep_str(L, dict_str);
+    }
+    opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable"));
+    if (opt_mt && tvistab(opt_mt)) {
+      dict_mt = tabV(opt_mt);
+      lj_serialize_dict_prep_mt(L, dict_mt);
+    }
+  }
+  env = tabref(curr_func(L)->c.env);
+  ud = lj_udata_new(L, sizeof(SBufExt), env);
+  ud->udtype = UDTYPE_BUFFER;
+  /* NOBARRIER: The GCudata is new (marked white). */
+  setgcref(ud->metatable, obj2gco(env));
+  setudataV(L, L->top++, ud);
+  sbx = (SBufExt *)uddata(ud);
+  lj_bufx_init(L, sbx);
+  setgcref(sbx->dict_str, obj2gco(dict_str));
+  setgcref(sbx->dict_mt, obj2gco(dict_mt));
+  if (sz > 0) lj_buf_need2((SBuf *)sbx, sz);
+  return 1;
+}
+
+LJLIB_CF(buffer_encode)			LJLIB_REC(.)
+{
+  cTValue *o = lj_lib_checkany(L, 1);
+  setstrV(L, L->top++, lj_serialize_encode(L, o));
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_decode)			LJLIB_REC(.)
+{
+  GCstr *str = lj_lib_checkstrx(L, 1);
+  setnilV(L->top++);
+  lj_serialize_decode(L, L->top-1, str);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+int luaopen_string_buffer(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, buffer_method);
+  lua_getfield(L, -1, "__tostring");
+  lua_setfield(L, -2, "tostring");
+  LJ_LIB_REG(L, NULL, buffer);
+  return 1;
+}
+
+#endif

+ 1 - 0
libs/LuaJIT/src/lib_ffi.c

@@ -573,6 +573,7 @@ LJLIB_CF(ffi_typeinfo)
       setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
     if (gcref(ct->name)) {
       GCstr *s = gco2str(gcref(ct->name));
+      if (isdead(G(L), obj2gco(s))) flipwhite(obj2gco(s));
       setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
     }
     lj_gc_check(L);

+ 19 - 13
libs/LuaJIT/src/lib_io.c

@@ -60,12 +60,12 @@ static IOFileUD *io_tofile(lua_State *L)
   return iof;
 }
 
-static FILE *io_stdfile(lua_State *L, ptrdiff_t id)
+static IOFileUD *io_stdfile(lua_State *L, ptrdiff_t id)
 {
   IOFileUD *iof = IOSTDF_IOF(L, id);
   if (iof->fp == NULL)
     lj_err_caller(L, LJ_ERR_IOSTDCL);
-  return iof->fp;
+  return iof;
 }
 
 static IOFileUD *io_file_new(lua_State *L)
@@ -178,7 +178,7 @@ static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
     MSize n = (MSize)fread(buf, 1, m, fp);
     setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
     lj_gc_check(L);
-    return (n > 0 || m == 0);
+    return n > 0;
   } else {
     int c = getc(fp);
     ungetc(c, fp);
@@ -187,8 +187,9 @@ static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
   }
 }
 
-static int io_file_read(lua_State *L, FILE *fp, int start)
+static int io_file_read(lua_State *L, IOFileUD *iof, int start)
 {
+  FILE *fp = iof->fp;
   int ok, n, nargs = (int)(L->top - L->base) - start;
   clearerr(fp);
   if (nargs == 0) {
@@ -224,8 +225,9 @@ static int io_file_read(lua_State *L, FILE *fp, int start)
   return n - start;
 }
 
-static int io_file_write(lua_State *L, FILE *fp, int start)
+static int io_file_write(lua_State *L, IOFileUD *iof, int start)
 {
+  FILE *fp = iof->fp;
   cTValue *tv;
   int status = 1;
   for (tv = L->base+start; tv < L->top; tv++) {
@@ -253,13 +255,11 @@ static int io_file_iter(lua_State *L)
     lj_err_caller(L, LJ_ERR_IOCLFL);
   L->top = L->base;
   if (n) {  /* Copy upvalues with options to stack. */
-    if (n > LUAI_MAXCSTACK)
-      lj_err_caller(L, LJ_ERR_STKOV);
     lj_state_checkstack(L, (MSize)n);
     memcpy(L->top, &fn->c.upvalue[1], n*sizeof(TValue));
     L->top += n;
   }
-  n = io_file_read(L, iof->fp, 0);
+  n = io_file_read(L, iof, 0);
   if (ferror(iof->fp))
     lj_err_callermsg(L, strVdata(L->top-2));
   if (tvisnil(L->base) && (iof->type & IOFILE_FLAG_CLOSE)) {
@@ -284,19 +284,25 @@ static int io_file_lines(lua_State *L)
 
 LJLIB_CF(io_method_close)
 {
-  IOFileUD *iof = L->base < L->top ? io_tofile(L) :
-		  IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
+  IOFileUD *iof;
+  if (L->base < L->top) {
+    iof = io_tofile(L);
+  } else {
+    iof = IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
+    if (iof->fp == NULL)
+      lj_err_caller(L, LJ_ERR_IOCLFL);
+  }
   return io_file_close(L, iof);
 }
 
 LJLIB_CF(io_method_read)
 {
-  return io_file_read(L, io_tofile(L)->fp, 1);
+  return io_file_read(L, io_tofile(L), 1);
 }
 
 LJLIB_CF(io_method_write)		LJLIB_REC(io_write 0)
 {
-  return io_file_write(L, io_tofile(L)->fp, 1);
+  return io_file_write(L, io_tofile(L), 1);
 }
 
 LJLIB_CF(io_method_flush)		LJLIB_REC(io_flush 0)
@@ -458,7 +464,7 @@ LJLIB_CF(io_write)		LJLIB_REC(io_write GCROOT_IO_OUTPUT)
 
 LJLIB_CF(io_flush)		LJLIB_REC(io_flush GCROOT_IO_OUTPUT)
 {
-  return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL);
+  return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)->fp) == 0, NULL);
 }
 
 static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode)

+ 1 - 5
libs/LuaJIT/src/lib_jit.c

@@ -346,11 +346,7 @@ LJLIB_CF(jit_util_tracek)
       ir = &T->ir[ir->op1];
     }
 #if LJ_HASFFI
-    if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
-      ptrdiff_t oldtop = savestack(L, L->top);
-      luaopen_ffi(L);  /* Load FFI library on-demand. */
-      L->top = restorestack(L, oldtop);
-    }
+    if (ir->o == IR_KINT64) ctype_loadffi(L);
 #endif
     lj_ir_kvalue(L, L->top-2, ir);
     setintV(L->top-1, (int32_t)irt_type(ir->t));

+ 8 - 80
libs/LuaJIT/src/lib_string.c

@@ -640,89 +640,14 @@ LJLIB_CF(string_gsub)
 
 /* ------------------------------------------------------------------------ */
 
-/* Emulate tostring() inline. */
-static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
-{
-  TValue *o = L->base+arg-1;
-  cTValue *mo;
-  lj_assertL(o < L->top, "bad usage");  /* Caller already checks for existence. */
-  if (LJ_LIKELY(tvisstr(o)))
-    return strV(o);
-  if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
-    copyTV(L, L->top++, mo);
-    copyTV(L, L->top++, o);
-    lua_call(L, 1, 1);
-    copyTV(L, L->base+arg-1, --L->top);
-    return NULL;  /* Buffer may be overwritten, retry. */
-  }
-  return lj_strfmt_obj(L, o);
-}
-
 LJLIB_CF(string_format)		LJLIB_REC(.)
 {
-  int arg, top = (int)(L->top - L->base);
-  GCstr *fmt;
-  SBuf *sb;
-  FormatState fs;
-  SFormat sf;
   int retry = 0;
-again:
-  arg = 1;
-  sb = lj_buf_tmp_(L);
-  fmt = lj_lib_checkstr(L, arg);
-  lj_strfmt_init(&fs, strdata(fmt), fmt->len);
-  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
-    if (sf == STRFMT_LIT) {
-      lj_buf_putmem(sb, fs.str, fs.len);
-    } else if (sf == STRFMT_ERR) {
-      lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
-    } else {
-      if (++arg > top)
-	luaL_argerror(L, arg, lj_obj_typename[0]);
-      switch (STRFMT_TYPE(sf)) {
-      case STRFMT_INT:
-	if (tvisint(L->base+arg-1)) {
-	  int32_t k = intV(L->base+arg-1);
-	  if (sf == STRFMT_INT)
-	    lj_strfmt_putint(sb, k);  /* Shortcut for plain %d. */
-	  else
-	    lj_strfmt_putfxint(sb, sf, k);
-	} else {
-	  lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
-	}
-	break;
-      case STRFMT_UINT:
-	if (tvisint(L->base+arg-1))
-	  lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
-	else
-	  lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
-	break;
-      case STRFMT_NUM:
-	lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
-	break;
-      case STRFMT_STR: {
-	GCstr *str = string_fmt_tostring(L, arg, retry);
-	if (str == NULL)
-	  retry = 1;
-	else if ((sf & STRFMT_T_QUOTED))
-	  lj_strfmt_putquoted(sb, str);  /* No formatting. */
-	else
-	  lj_strfmt_putfstr(sb, sf, str);
-	break;
-	}
-      case STRFMT_CHAR:
-	lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
-	break;
-      case STRFMT_PTR:  /* No formatting. */
-	lj_strfmt_putptr(sb, lj_obj_ptr(G(L), L->base+arg-1));
-	break;
-      default:
-	lj_assertL(0, "bad string format type");
-	break;
-      }
-    }
-  }
-  if (retry++ == 1) goto again;
+  SBuf *sb;
+  do {
+    sb = lj_buf_tmp_(L);
+    retry = lj_strfmt_putarg(L, sb, 1, -retry);
+  } while (retry > 0);
   setstrV(L, L->top-1, lj_buf_str(L, sb));
   lj_gc_check(L);
   return 1;
@@ -743,6 +668,9 @@ LUALIB_API int luaopen_string(lua_State *L)
   setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
   settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1));
   mt->nomm = (uint8_t)(~(1u<<MM_index));
+#if LJ_HASBUFFER
+  lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer, tabV(L->top-1));
+#endif
   return 1;
 }
 

+ 1 - 1
libs/LuaJIT/src/lib_table.c

@@ -159,7 +159,7 @@ LJLIB_CF(table_concat)		LJLIB_REC(.)
   SBuf *sb = lj_buf_tmp_(L);
   SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
   if (LJ_UNLIKELY(!sbx)) {  /* Error: bad element type. */
-    int32_t idx = (int32_t)(intptr_t)sbufP(sb);
+    int32_t idx = (int32_t)(intptr_t)sb->w;
     cTValue *o = lj_tab_getint(t, idx);
     lj_err_callerv(L, LJ_ERR_TABCAT,
 		   lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);

+ 1 - 1
libs/LuaJIT/src/lj_alloc.c

@@ -365,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
 #define CALL_MREMAP_NOMOVE	0
 #define CALL_MREMAP_MAYMOVE	1
-#if LJ_64 && !LJ_GC64
+#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64)
 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
 #else
 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE

+ 7 - 31
libs/LuaJIT/src/lj_api.c

@@ -707,36 +707,10 @@ LUA_API void lua_pushboolean(lua_State *L, int b)
   incr_top(L);
 }
 
-#if LJ_64
-static void *lightud_intern(lua_State *L, void *p)
-{
-  global_State *g = G(L);
-  uint64_t u = (uint64_t)p;
-  uint32_t up = lightudup(u);
-  uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
-  MSize segnum = g->gc.lightudnum;
-  if (segmap) {
-    MSize seg;
-    for (seg = 0; seg <= segnum; seg++)
-      if (segmap[seg] == up)  /* Fast path. */
-	return (void *)(((uint64_t)seg << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
-    segnum++;
-  }
-  if (!((segnum-1) & segnum) && segnum != 1) {
-    if (segnum >= (1 << LJ_LIGHTUD_BITS_SEG)) lj_err_msg(L, LJ_ERR_BADLU);
-    lj_mem_reallocvec(L, segmap, segnum, segnum ? 2*segnum : 2u, uint32_t);
-    setmref(g->gc.lightudseg, segmap);
-  }
-  g->gc.lightudnum = segnum;
-  segmap[segnum] = up;
-  return (void *)(((uint64_t)segnum << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
-}
-#endif
-
 LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
 {
 #if LJ_64
-  p = lightud_intern(L, p);
+  p = lj_lightud_intern(L, p);
 #endif
   setrawlightudV(L->top, p);
   incr_top(L);
@@ -919,11 +893,13 @@ LUA_API int lua_next(lua_State *L, int idx)
   cTValue *t = index2adr(L, idx);
   int more;
   lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
-  more = lj_tab_next(L, tabV(t), L->top-1);
-  if (more) {
+  more = lj_tab_next(tabV(t), L->top-1, L->top-1);
+  if (more > 0) {
     incr_top(L);  /* Return new key and value slot. */
-  } else {  /* End of traversal. */
+  } else if (!more) {  /* End of traversal. */
     L->top--;  /* Remove key slot. */
+  } else {
+    lj_err_msg(L, LJ_ERR_NEXTIDX);
   }
   return more;
 }
@@ -1179,7 +1155,7 @@ static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
   setfuncV(L, top++, fn);
   if (LJ_FR2) setnilV(top++);
 #if LJ_64
-  ud = lightud_intern(L, ud);
+  ud = lj_lightud_intern(L, ud);
 #endif
   setrawlightudV(top++, ud);
   cframe_nres(L->cframe) = 1+0;  /* Zero results. */

+ 35 - 17
libs/LuaJIT/src/lj_arch.h

@@ -170,14 +170,10 @@
 #define LJ_ARCH_NAME		"x86"
 #define LJ_ARCH_BITS		32
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
-#define LJ_ABI_WIN		1
-#else
-#define LJ_ABI_WIN		0
-#endif
 #define LJ_TARGET_X86		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	8
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNALIGNED	1
@@ -188,14 +184,10 @@
 #define LJ_ARCH_NAME		"x64"
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
-#define LJ_ABI_WIN		1
-#else
-#define LJ_ABI_WIN		0
-#endif
 #define LJ_TARGET_X64		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	16
 #define LJ_TARGET_JUMPRANGE	31	/* +-2^31 = +-2GB */
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
@@ -203,6 +195,8 @@
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
 #ifndef LUAJIT_DISABLE_GC64
 #define LJ_TARGET_GC64		1
+#elif LJ_TARGET_OSX
+#error "macOS requires GC64 -- don't disable it"
 #endif
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
@@ -219,6 +213,7 @@
 #define LJ_ABI_EABI		1
 #define LJ_TARGET_ARM		1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	14
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
 #define LJ_TARGET_MASKSHIFT	0
 #define LJ_TARGET_MASKROT	1
@@ -249,6 +244,7 @@
 #endif
 #define LJ_TARGET_ARM64		1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	30
 #define LJ_TARGET_JUMPRANGE	27	/* +-2^27 = +-128MB */
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
@@ -304,6 +300,7 @@
 
 #define LJ_TARGET_PPC		1
 #define LJ_TARGET_EHRETREG	3
+#define LJ_TARGET_EHRAREG	65
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
 #define LJ_TARGET_MASKSHIFT	0
 #define LJ_TARGET_MASKROT	1
@@ -406,6 +403,7 @@
 #endif
 #define LJ_TARGET_MIPS		1
 #define LJ_TARGET_EHRETREG	4
+#define LJ_TARGET_EHRAREG	31
 #define LJ_TARGET_JUMPRANGE	27	/* 2*2^27 = 256MB-aligned region */
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
@@ -551,6 +549,13 @@
 #define LJ_HASFFI		1
 #endif
 
+/* Disable or enable the string buffer extension. */
+#if defined(LUAJIT_DISABLE_BUFFER)
+#define LJ_HASBUFFER		0
+#else
+#define LJ_HASBUFFER		1
+#endif
+
 #if defined(LUAJIT_DISABLE_PROFILE)
 #define LJ_HASPROFILE		0
 #elif LJ_TARGET_POSIX
@@ -611,13 +616,10 @@
 #define LJ_NO_SYSTEM		1
 #endif
 
-#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
-/* NYI: no support for compact unwind specification, yet. */
-#define LUAJIT_NO_UNWIND	1
-#endif
-
-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
-#define LJ_NO_UNWIND		1
+#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
+#define LJ_ABI_WIN		1
+#else
+#define LJ_ABI_WIN		0
 #endif
 
 #if LJ_TARGET_WINDOWS
@@ -632,6 +634,22 @@ extern void *LJ_WIN_LOADLIBA(const char *path);
 #endif
 #endif
 
+#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
+#define LJ_NO_UNWIND		1
+#endif
+
+#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__))))
+#define LJ_UNWIND_EXT		1
+#else
+#define LJ_UNWIND_EXT		0
+#endif
+
+#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN && LJ_TARGET_X86)
+#define LJ_UNWIND_JIT		1
+#else
+#define LJ_UNWIND_JIT		0
+#endif
+
 /* Compatibility with Lua 5.1 vs. 5.2. */
 #ifdef LUAJIT_ENABLE_LUA52COMPAT
 #define LJ_52			1

+ 154 - 62
libs/LuaJIT/src/lj_asm.c

@@ -11,6 +11,7 @@
 #if LJ_HASJIT
 
 #include "lj_gc.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_frame.h"
@@ -71,6 +72,8 @@ typedef struct ASMState {
   IRRef snaprename;	/* Rename highwater mark for snapshot check. */
   SnapNo snapno;	/* Current snapshot number. */
   SnapNo loopsnapno;	/* Loop snapshot number. */
+  int snapalloc;	/* Current snapshot needs allocation. */
+  BloomFilter snapfilt1, snapfilt2;	/* Filled with snapshot refs. */
 
   IRRef fuseref;	/* Fusion limit (loopref, 0 or FUSE_DISABLED). */
   IRRef sectref;	/* Section base reference (loopref or 0). */
@@ -84,6 +87,7 @@ typedef struct ASMState {
 
   MCode *mcbot;		/* Bottom of reserved MCode. */
   MCode *mctop;		/* Top of generated MCode. */
+  MCode *mctoporig;	/* Original top of generated MCode. */
   MCode *mcloop;	/* Pointer to loop MCode (or NULL). */
   MCode *invmcp;	/* Points to invertible loop branch (or NULL). */
   MCode *flagmcp;	/* Pending opportunity to merge flag setting ins. */
@@ -694,7 +698,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
   RA_DBGX((as, "rename    $f $r $r", regcost_ref(as->cost[up]), down, up));
   emit_movrr(as, ir, down, up);  /* Backwards codegen needs inverse move. */
   if (!ra_hasspill(IR(ref)->s)) {  /* Add the rename to the IR. */
-    ra_addrename(as, down, ref, as->snapno);
+    /*
+    ** The rename is effective at the subsequent (already emitted) exit
+    ** branch. This is for the current snapshot (as->snapno). Except if we
+    ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
+    ** then it belongs to the next snapshot.
+    ** See also the discussion at asm_snap_checkrename().
+    */
+    ra_addrename(as, down, ref, as->snapno + as->snapalloc);
   }
 }
 
@@ -807,11 +818,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
 }
 #endif
 
-#if !LJ_64
 /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
 static void ra_destpair(ASMState *as, IRIns *ir)
 {
   Reg destlo = ir->r, desthi = (ir+1)->r;
+  IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir;
   /* First spill unrelated refs blocking the destination registers. */
   if (!rset_test(as->freeset, RID_RETLO) &&
       destlo != RID_RETLO && desthi != RID_RETLO)
@@ -835,29 +846,28 @@ static void ra_destpair(ASMState *as, IRIns *ir)
   /* Check for conflicts and shuffle the registers as needed. */
   if (destlo == RID_RETHI) {
     if (desthi == RID_RETLO) {
-#if LJ_TARGET_X86
-      *--as->mcp = XI_XCHGa + RID_RETHI;
+#if LJ_TARGET_X86ORX64
+      *--as->mcp = REX_64IR(irx, XI_XCHGa + RID_RETHI);
 #else
-      emit_movrr(as, ir, RID_RETHI, RID_TMP);
-      emit_movrr(as, ir, RID_RETLO, RID_RETHI);
-      emit_movrr(as, ir, RID_TMP, RID_RETLO);
+      emit_movrr(as, irx, RID_RETHI, RID_TMP);
+      emit_movrr(as, irx, RID_RETLO, RID_RETHI);
+      emit_movrr(as, irx, RID_TMP, RID_RETLO);
 #endif
     } else {
-      emit_movrr(as, ir, RID_RETHI, RID_RETLO);
-      if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
+      emit_movrr(as, irx, RID_RETHI, RID_RETLO);
+      if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
     }
   } else if (desthi == RID_RETLO) {
-    emit_movrr(as, ir, RID_RETLO, RID_RETHI);
-    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
+    emit_movrr(as, irx, RID_RETLO, RID_RETHI);
+    if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
   } else {
-    if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
-    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
+    if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
+    if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
   }
   /* Restore spill slots (if any). */
   if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
   if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
 }
-#endif
 
 /* -- Snapshot handling --------- ----------------------------------------- */
 
@@ -892,7 +902,10 @@ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
 static void asm_snap_alloc1(ASMState *as, IRRef ref)
 {
   IRIns *ir = IR(ref);
-  if (!irref_isk(ref) && (!(ra_used(ir) || ir->r == RID_SUNK))) {
+  if (!irref_isk(ref) && ir->r != RID_SUNK) {
+    bloomset(as->snapfilt1, ref);
+    bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS));
+    if (ra_used(ir)) return;
     if (ir->r == RID_SINK) {
       ir->r = RID_SUNK;
 #if LJ_HASFFI
@@ -947,11 +960,12 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
 }
 
 /* Allocate refs escaping to a snapshot. */
-static void asm_snap_alloc(ASMState *as)
+static void asm_snap_alloc(ASMState *as, int snapno)
 {
-  SnapShot *snap = &as->T->snap[as->snapno];
+  SnapShot *snap = &as->T->snap[snapno];
   SnapEntry *map = &as->T->snapmap[snap->mapofs];
   MSize n, nent = snap->nent;
+  as->snapfilt1 = as->snapfilt2 = 0;
   for (n = 0; n < nent; n++) {
     SnapEntry sn = map[n];
     IRRef ref = snap_ref(sn);
@@ -960,7 +974,7 @@ static void asm_snap_alloc(ASMState *as)
       if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
 	lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
 		   "snap %d[%d] points to bad SOFTFP IR %04d",
-		   as->snapno, n, ref - REF_BIAS);
+		   snapno, n, ref - REF_BIAS);
 	asm_snap_alloc1(as, ref+1);
       }
     }
@@ -976,35 +990,26 @@ static void asm_snap_alloc(ASMState *as)
 */
 static int asm_snap_checkrename(ASMState *as, IRRef ren)
 {
-  SnapShot *snap = &as->T->snap[as->snapno];
-  SnapEntry *map = &as->T->snapmap[snap->mapofs];
-  MSize n, nent = snap->nent;
-  for (n = 0; n < nent; n++) {
-    SnapEntry sn = map[n];
-    IRRef ref = snap_ref(sn);
-    if (ref == ren || (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && ++ref == ren)) {
-      IRIns *ir = IR(ref);
-      ra_spill(as, ir);  /* Register renamed, so force a spill slot. */
-      RA_DBGX((as, "snaprensp $f $s", ref, ir->s));
-      return 1;  /* Found. */
-    }
+  if (bloomtest(as->snapfilt1, ren) &&
+      bloomtest(as->snapfilt2, hashrot(ren, ren + HASH_BIAS))) {
+    IRIns *ir = IR(ren);
+    ra_spill(as, ir);  /* Register renamed, so force a spill slot. */
+    RA_DBGX((as, "snaprensp $f $s", ren, ir->s));
+    return 1;  /* Found. */
   }
   return 0;  /* Not found. */
 }
 
-/* Prepare snapshot for next guard instruction. */
+/* Prepare snapshot for next guard or throwing instruction. */
 static void asm_snap_prep(ASMState *as)
 {
-  if (as->curins < as->snapref) {
-    do {
-      if (as->snapno == 0) return;  /* Called by sunk stores before snap #0. */
-      as->snapno--;
-      as->snapref = as->T->snap[as->snapno].ref;
-    } while (as->curins < as->snapref);
-    asm_snap_alloc(as);
+  if (as->snapalloc) {
+    /* Alloc on first invocation for each snapshot. */
+    as->snapalloc = 0;
+    asm_snap_alloc(as, as->snapno);
     as->snaprename = as->T->nins;
   } else {
-    /* Process any renames above the highwater mark. */
+    /* Check any renames above the highwater mark. */
     for (; as->snaprename < as->T->nins; as->snaprename++) {
       IRIns *ir = &as->T->ir[as->snaprename];
       if (asm_snap_checkrename(as, ir->op1))
@@ -1013,6 +1018,35 @@ static void asm_snap_prep(ASMState *as)
   }
 }
 
+/* Move to previous snapshot when we cross the current snapshot ref. */
+static void asm_snap_prev(ASMState *as)
+{
+  if (as->curins < as->snapref) {
+    uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp);
+    if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
+    do {
+      if (as->snapno == 0) return;
+      as->snapno--;
+      as->snapref = as->T->snap[as->snapno].ref;
+      as->T->snap[as->snapno].mcofs = (uint16_t)ofs;  /* Remember mcode ofs. */
+    } while (as->curins < as->snapref);  /* May have no ins inbetween. */
+    as->snapalloc = 1;
+  }
+}
+
+/* Fixup snapshot mcode offsetst. */
+static void asm_snap_fixup_mcofs(ASMState *as)
+{
+  uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
+  SnapShot *snap = as->T->snap;
+  SnapNo i;
+  for (i = as->T->nsnap-1; i > 0; i--) {
+    /* Compute offset from mcode start and store in correct snapshot. */
+    snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
+  }
+  snap[0].mcofs = 0;
+}
+
 /* -- Miscellaneous helpers ----------------------------------------------- */
 
 /* Calculate stack adjustment. */
@@ -1057,6 +1091,7 @@ static void asm_snew(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
   IRRef args[3];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;  /* lua_State *L    */
   args[1] = ir->op1;   /* const char *str */
   args[2] = ir->op2;   /* size_t len      */
@@ -1069,6 +1104,7 @@ static void asm_tnew(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
   IRRef args[2];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;     /* lua_State *L    */
   args[1] = ASMREF_TMP1;  /* uint32_t ahsize */
   as->gcsteps++;
@@ -1081,6 +1117,7 @@ static void asm_tdup(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
   IRRef args[2];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;  /* lua_State *L    */
   args[1] = ir->op1;   /* const GCtab *kt */
   as->gcsteps++;
@@ -1106,28 +1143,43 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
 
 /* -- Buffer operations --------------------------------------------------- */
 
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode);
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb);
+#endif
 
 static void asm_bufhdr(ASMState *as, IRIns *ir)
 {
   Reg sb = ra_dest(as, ir, RSET_GPR);
-  if ((ir->op2 & IRBUFHDR_APPEND)) {
+  switch (ir->op2) {
+  case IRBUFHDR_RESET: {
+    Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+    IRIns irbp;
+    irbp.ot = IRT(0, IRT_PTR);  /* Buffer data pointer type. */
+    emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w));
+    emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b));
+    break;
+    }
+  case IRBUFHDR_APPEND: {
     /* Rematerialize const buffer pointer instead of likely spill. */
     IRIns *irp = IR(ir->op1);
     if (!(ra_hasreg(irp->r) || irp == ir-1 ||
 	  (irp == ir-2 && !ra_used(ir-1)))) {
-      while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
+      while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET))
 	irp = IR(irp->op1);
       if (irref_isk(irp->op1)) {
 	ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
 	ir = irp;
       }
     }
-  } else {
-    Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
-    /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
-    emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
-    emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
+    break;
+    }
+#if LJ_HASBUFFER
+  case IRBUFHDR_WRITE:
+    asm_bufhdr_write(as, sb);
+    break;
+#endif
+  default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break;
   }
 #if LJ_TARGET_X86ORX64
   ra_left(as, sb, ir->op1);
@@ -1179,7 +1231,7 @@ static void asm_bufput(ASMState *as, IRIns *ir)
   if (args[1] == ASMREF_TMP1) {
     Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
     if (kchar == -129)
-      asm_tvptr(as, tmp, irs->op1);
+      asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1);
     else
       ra_allockreg(as, kchar, tmp);
   }
@@ -1201,6 +1253,7 @@ static void asm_tostr(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci;
   IRRef args[2];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;
   as->gcsteps++;
   if (ir->op2 == IRTOSTR_NUM) {
@@ -1216,7 +1269,7 @@ static void asm_tostr(ASMState *as, IRIns *ir)
   asm_setupresult(as, ir, ci);  /* GCstr * */
   asm_gencall(as, ci, args);
   if (ir->op2 == IRTOSTR_NUM)
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
+    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1);
 }
 
 #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
@@ -1257,12 +1310,19 @@ static void asm_newref(ASMState *as, IRIns *ir)
   IRRef args[3];
   if (ir->r == RID_SINK)
     return;
+  asm_snap_prep(as);
   args[0] = ASMREF_L;     /* lua_State *L */
   args[1] = ir->op1;      /* GCtab *t     */
   args[2] = ASMREF_TMP1;  /* cTValue *key */
   asm_setupresult(as, ir, ci);  /* TValue * */
   asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
+  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1);
+}
+
+static void asm_tmpref(ASMState *as, IRIns *ir)
+{
+  Reg r = ra_dest(as, ir, RSET_GPR);
+  asm_tvptr(as, r, ir->op1, ir->op2);
 }
 
 static void asm_lref(ASMState *as, IRIns *ir)
@@ -1744,6 +1804,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
   case IR_NEWREF: asm_newref(as, ir); break;
   case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
   case IR_FREF: asm_fref(as, ir); break;
+  case IR_TMPREF: asm_tmpref(as, ir); break;
   case IR_STRREF: asm_strref(as, ir); break;
   case IR_LREF: asm_lref(as, ir); break;
 
@@ -1838,8 +1899,7 @@ static void asm_head_side(ASMState *as)
 
   if (as->snapno && as->topslot > as->parent->topslot) {
     /* Force snap #0 alloc to prevent register overwrite in stack check. */
-    as->snapno = 0;
-    asm_snap_alloc(as);
+    asm_snap_alloc(as, 0);
   }
   allow = asm_head_side_base(as, irp, allow);
 
@@ -2078,6 +2138,9 @@ static void asm_setup_regsp(ASMState *as)
 #endif
 
   ra_setup(as);
+#if LJ_TARGET_ARM64
+  ra_setkref(as, RID_GL, (intptr_t)J2G(as->J));
+#endif
 
   /* Clear reg/sp for constants. */
   for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
@@ -2100,6 +2163,7 @@ static void asm_setup_regsp(ASMState *as)
   as->snaprename = nins;
   as->snapref = nins;
   as->snapno = T->nsnap;
+  as->snapalloc = 0;
 
   as->stopins = REF_BASE;
   as->orignins = nins;
@@ -2148,6 +2212,10 @@ static void asm_setup_regsp(ASMState *as)
       ir->prev = (uint16_t)REGSP_HINT((rload & 15));
       rload = lj_ror(rload, 4);
       continue;
+    case IR_TMPREF:
+      if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4)
+	as->evenspill = 4;  /* TMPREF OUT2 needs two TValues on the stack. */
+      break;
 #endif
     case IR_CALLXS: {
       CCallInfo ci;
@@ -2157,7 +2225,17 @@ static void asm_setup_regsp(ASMState *as)
 	as->modset |= RSET_SCRATCH;
       continue;
       }
-    case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
+    case IR_CALLL:
+      /* lj_vm_next needs two TValues on the stack. */
+#if LJ_TARGET_X64 && LJ_ABI_WIN
+      if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4)
+	as->evenspill = SPS_FIRST + 4;
+#else
+      if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4)
+	as->evenspill = 4;
+#endif
+      /* fallthrough */
+    case IR_CALLN: case IR_CALLA: case IR_CALLS: {
       const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
       ir->prev = asm_setup_call_slots(as, ir, ci);
       if (inloop)
@@ -2165,7 +2243,6 @@ static void asm_setup_regsp(ASMState *as)
 		      (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
       continue;
       }
-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
     case IR_HIOP:
       switch ((ir-1)->o) {
 #if LJ_SOFTFP && LJ_TARGET_ARM
@@ -2176,7 +2253,7 @@ static void asm_setup_regsp(ASMState *as)
 	}
 	break;
 #endif
-#if !LJ_SOFTFP && LJ_NEED_FP64
+#if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI
       case IR_CONV:
 	if (irt_isfp((ir-1)->t)) {
 	  ir->prev = REGSP_HINT(RID_FPRET);
@@ -2184,7 +2261,7 @@ static void asm_setup_regsp(ASMState *as)
 	}
 #endif
       /* fallthrough */
-      case IR_CALLN: case IR_CALLXS:
+      case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
 #if LJ_SOFTFP
       case IR_MIN: case IR_MAX:
 #endif
@@ -2195,7 +2272,6 @@ static void asm_setup_regsp(ASMState *as)
 	break;
       }
       break;
-#endif
 #if LJ_SOFTFP
     case IR_MIN: case IR_MAX:
       if ((ir+1)->o != IR_HIOP) break;
@@ -2250,13 +2326,23 @@ static void asm_setup_regsp(ASMState *as)
       }
       /* fallthrough */ /* for integer POW */
     case IR_DIV: case IR_MOD:
-      if (!irt_isnum(ir->t)) {
+      if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) {
 	ir->prev = REGSP_HINT(RID_RET);
 	if (inloop)
 	  as->modset |= (RSET_SCRATCH & RSET_GPR);
 	continue;
       }
       break;
+#if LJ_64 && LJ_SOFTFP
+    case IR_ADD: case IR_SUB: case IR_MUL:
+      if (irt_isnum(ir->t)) {
+	ir->prev = REGSP_HINT(RID_RET);
+	if (inloop)
+	  as->modset |= (RSET_SCRATCH & RSET_GPR);
+	continue;
+      }
+      break;
+#endif
     case IR_FPMATH:
 #if LJ_TARGET_X86ORX64
       if (ir->op2 <= IRFPM_TRUNC) {
@@ -2327,7 +2413,6 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
 {
   ASMState as_;
   ASMState *as = &as_;
-  MCode *origtop;
 
   /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
   {
@@ -2355,7 +2440,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
   as->parent = J->parent ? traceref(J, J->parent) : NULL;
 
   /* Reserve MCode memory. */
-  as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot);
+  as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
   as->mcp = as->mctop;
   as->mclim = as->mcbot + MCLIM_REDZONE;
   asm_setup_target(as);
@@ -2417,6 +2502,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
       lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
 		 "IR %04d has unsplit 64 bit type",
 		 (int)(ir - as->ir) - REF_BIAS);
+      asm_snap_prev(as);
       if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
 	continue;  /* Dead-code elimination can be soooo easy. */
       if (irt_isguard(ir->t))
@@ -2450,6 +2536,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
       memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
 	     (T->nins - as->orignins) * sizeof(IRIns));  /* Copy RENAMEs. */
       T->nins = J->curfinal->nins;
+      /* Fill mcofs of any unprocessed snapshots. */
+      as->curins = REF_FIRST;
+      asm_snap_prev(as);
       break;  /* Done. */
     }
 
@@ -2468,13 +2557,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
   /* Set trace entry point before fixing up tail to allow link to self. */
   T->mcode = as->mcp;
   T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
-  if (!as->loopref)
+  if (as->loopref)
+    asm_loop_tail_fixup(as);
+  else
     asm_tail_fixup(as, T->link);  /* Note: this may change as->mctop! */
   T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
+  asm_snap_fixup_mcofs(as);
 #if LJ_TARGET_MCODE_FIXUP
   asm_mcode_fixup(T->mcode, T->szmcode);
 #endif
-  lj_mcode_sync(T->mcode, origtop);
+  lj_mcode_sync(T->mcode, as->mctoporig);
 }
 
 #undef IR

+ 89 - 38
libs/LuaJIT/src/lj_asm_arm.h

@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
 	*ofsp = (ofs & 255);  /* Mask out less bits to allow LDRD. */
 	return ra_allock(as, (ofs & ~255), allow);
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = 0;
+      return RID_SP;
     }
   }
   *ofsp = 0;
@@ -498,6 +501,30 @@ static void asm_retf(ASMState *as, IRIns *ir)
   emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+  IRIns irgc;
+  int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L);
+  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
+  emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+  if ((as->flags & JIT_F_ARMV6T2)) {
+    emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp);
+  } else {
+    emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp);
+    emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp);
+  }
+  emit_lso(as, ARMI_LDR, RID_TMP,
+	   ra_allock(as, (addr & ~4095),
+		     rset_exclude(rset_exclude(RSET_GPR, sb), tmp)),
+	   (addr & 4095));
+  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
 /* -- Type conversions ---------------------------------------------------- */
 
 #if !LJ_SOFTFP
@@ -666,35 +693,55 @@ static void asm_strto(ASMState *as, IRIns *ir)
 /* -- Memory references --------------------------------------------------- */
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref)) {
-      /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
-    } else {
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if ((mode & IRTMPREF_OUT1)) {
+#if LJ_SOFTFP
+	lj_assertA(irref_isk(ref), "unsplit FP op");
+	emit_dm(as, ARMI_MOV, dest, RID_SP);
+	emit_lso(as, ARMI_STR,
+		 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+		 RID_SP, 0);
+	emit_lso(as, ARMI_STR,
+		 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+		 RID_SP, 4);
+#else
+	Reg src = ra_alloc1(as, ref, RSET_FPR);
+	emit_dm(as, ARMI_MOV, dest, RID_SP);
+	emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0);
+#endif
+      } else if (irref_isk(ref)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+      } else {
 #if LJ_SOFTFP
-      lj_assertA(0, "unsplit FP op");
+	lj_assertA(0, "unsplit FP op");
 #else
-      /* Otherwise force a spill and use the spill slot. */
-      emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+	/* Otherwise force a spill and use the spill slot. */
+	emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
 #endif
+      }
+    } else {
+      /* Otherwise use [sp] and [sp+4] to hold the TValue.
+      ** This assumes the following call has max. 4 args.
+      */
+      Reg type;
+      emit_dm(as, ARMI_MOV, dest, RID_SP);
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, RSET_GPR);
+	emit_lso(as, ARMI_STR, src, RID_SP, 0);
+      }
+      if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+	type = ra_alloc1(as, ref+1, RSET_GPR);
+      else
+	type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
+      emit_lso(as, ARMI_STR, type, RID_SP, 4);
     }
   } else {
-    /* Otherwise use [sp] and [sp+4] to hold the TValue. */
-    RegSet allow = rset_exclude(RSET_GPR, dest);
-    Reg type;
     emit_dm(as, ARMI_MOV, dest, RID_SP);
-    if (!irt_ispri(ir->t)) {
-      Reg src = ra_alloc1(as, ref, allow);
-      emit_lso(as, ARMI_STR, src, RID_SP, 0);
-    }
-    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-      type = ra_alloc1(as, ref+1, allow);
-    else
-      type = ra_allock(as, irt_toitype(ir->t), allow);
-    emit_lso(as, ARMI_STR, type, RID_SP, 4);
   }
 }
 
@@ -1086,6 +1133,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
   }
   idx = asm_fuseahuref(as, ir->op1, &ofs, allow,
 		       (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);
+  if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
   if (!hiop || type == RID_NONE) {
     rset_clear(allow, idx);
     if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
@@ -1837,15 +1885,15 @@ static void asm_int64comp(ASMState *as, IRIns *ir)
 }
 #endif
 
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
 
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-#if LJ_HASFFI || LJ_SOFTFP
   /* HIOP is marked as a store because it needs its own DCE logic. */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_HASFFI || LJ_SOFTFP
   if ((ir-1)->o <= IR_NE) {  /* 64 bit integer or FP comparisons. ORDER IR. */
     as->curins--;  /* Always skip the loword comparison. */
 #if LJ_SOFTFP
@@ -1876,6 +1924,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
       asm_xstore_(as, ir, 4);
     return;
   }
+#endif
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
 #if LJ_HASFFI
@@ -1894,6 +1943,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
     asm_intneg(as, ir, ARMI_RSC);
     asm_intneg(as, ir-1, ARMI_RSB|ARMI_S);
     break;
+  case IR_CNEWI:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
 #if LJ_SOFTFP
   case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
@@ -1901,25 +1953,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
     if (!uselo)
       ra_allocref(as, ir->op1, RSET_GPR);  /* Mark lo op as used. */
     break;
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
-  case IR_CALLN:
-  case IR_CALLS:
-  case IR_CALLXS:
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
     if (!uselo)
       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
     break;
-#if LJ_SOFTFP
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
-#endif
-  case IR_CNEWI:
-    /* Nothing to do here. Handled by lo op itself. */
-    break;
   default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
   }
-#else
-  /* Unused without SOFTFP or FFI. */
-  UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
-#endif
 }
 
 /* -- Profiling ----------------------------------------------------------- */
@@ -2021,6 +2064,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
       } else if ((sn & SNAP_SOFTFPNUM)) {
 	type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE));
 #endif
+      } else if ((sn & SNAP_KEYINDEX)) {
+	type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd);
       } else {
 	type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);
       }
@@ -2082,6 +2127,12 @@ static void asm_loop_fixup(ASMState *as)
   }
 }
 
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+  UNUSED(as);  /* Nothing to do. */
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 
 /* Reload L register from g->cur_L. */

+ 78 - 35
libs/LuaJIT/src/lj_asm_arm64.h

@@ -198,6 +198,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
 	  return RID_GL;
 	}
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv);
+      return RID_GL;
     }
   }
   *ofsp = 0;
@@ -457,8 +460,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   RegSet drop = RSET_SCRATCH;
+  int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r); /* Dest reg handled below. */
+  if (hiop && ra_hasreg((ir+1)->r))
+    rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
   ra_evictset(as, drop); /* Evictions must be performed first. */
   if (ra_used(ir)) {
     lj_assertA(!irt_ispri(ir->t), "PRI dest");
@@ -470,6 +476,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       } else {
 	ra_destreg(as, ir, RID_FPRET);
       }
+    } else if (hiop) {
+      ra_destpair(as, ir);
     } else {
       ra_destreg(as, ir, RID_RET);
     }
@@ -519,6 +527,21 @@ static void asm_retf(ASMState *as, IRIns *ir)
   emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+  IRIns irgc;
+  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
+  emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+  emit_dn(as, A64I_BFMx | A64F_IMMS(lj_fls(SBUF_MASK_FLAG)) | A64F_IMMR(0), RID_TMP, tmp);
+  emit_getgl(as, RID_TMP, cur_L);
+  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
 /* -- Type conversions ---------------------------------------------------- */
 
 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
@@ -602,7 +625,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
 	emit_dn(as, A64I_SXTW, dest, left);
       }
     } else {
-      if (st64) {
+      if (st64 && !(ir->op2 & IRCONV_NONE)) {
 	/* This is either a 32 bit reg/reg mov which zeroes the hiword
 	** or a load of the loword from a 64 bit address.
 	*/
@@ -675,22 +698,23 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
 }
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref)) {
-      /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
+	return;
+      }
+      emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0);
     } else {
-      /* Otherwise force a spill and use the spill slot. */
-      emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+      asm_tvstore64(as, dest, 0, ref);
     }
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-    asm_tvstore64(as, dest, 0, ref);
-    ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest);
   }
+  /* g->tmptv holds the TValue(s). */
+  emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL);
 }
 
 static void asm_aref(ASMState *as, IRIns *ir)
@@ -767,7 +791,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
     }
   } else if (irt_isaddr(kt)) {
     if (isk) {
-      int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
+      int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
       scr = ra_allock(as, kk, allow);
     } else {
       scr = ra_scratch(as, allow);
@@ -775,7 +799,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
     rset_clear(allow, scr);
   } else {
     lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
-    type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
+    type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow);
     scr = ra_scratch(as, rset_clear(allow, type));
     rset_clear(allow, scr);
   }
@@ -824,7 +848,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
       emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
     }
   } else {
-    emit_nm(as, A64I_CMPw, scr, type);
+    emit_nm(as, A64I_CMPx, scr, type);
     emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
   }
 
@@ -1059,6 +1083,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
   }
   type = ra_scratch(as, rset_clear(gpr, tmp));
   idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
+  if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
   /* Always do the type check, even if the load result is unused. */
   asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
   if (irt_type(ir->t) >= IRT_NUM) {
@@ -1261,17 +1286,13 @@ static void asm_tbar(ASMState *as, IRIns *ir)
 {
   Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
   Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
-  Reg gr = ra_allock(as, i64ptr(J2G(as->J)),
-		     rset_exclude(rset_exclude(RSET_GPR, tab), link));
   Reg mark = RID_TMP;
   MCLabel l_end = emit_label(as);
   emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
   emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
-  emit_lso(as, A64I_STRx, tab, gr,
-	   (int32_t)offsetof(global_State, gc.grayagain));
+  emit_setgl(as, tab, gc.grayagain);
   emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
-  emit_lso(as, A64I_LDRx, link, gr,
-	   (int32_t)offsetof(global_State, gc.grayagain));
+  emit_getgl(as, link, gc.grayagain);
   emit_cond_branch(as, CC_EQ, l_end);
   emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
   emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
@@ -1291,7 +1312,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
   args[0] = ASMREF_TMP1;  /* global_State *g */
   args[1] = ir->op1;      /* TValue *tv      */
   asm_gencall(as, ci, args);
-  ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) );
+  emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
   obj = IR(ir->op1)->r;
   tmp = ra_scratch(as, rset_exclude(allow, obj));
   emit_cond_branch(as, CC_EQ, l_end);
@@ -1704,13 +1725,25 @@ static void asm_comp(ASMState *as, IRIns *ir)
 
 #define asm_equal(as, ir)	asm_comp(as, ir)
 
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
 
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 64/64 bit op. Previous op is the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-  UNUSED(as); UNUSED(ir);
-  lj_assertA(0, "unexpected HIOP");  /* Unused on 64 bit. */
+  /* HIOP is marked as a store because it needs its own DCE logic. */
+  int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
+  if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+  if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
+  switch ((ir-1)->o) {
+  case IR_CALLN:
+  case IR_CALLL:
+  case IR_CALLS:
+  case IR_CALLXS:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
+    break;
+  default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+  }
 }
 
 /* -- Profiling ----------------------------------------------------------- */
@@ -1781,7 +1814,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
     IRIns *ir = IR(ref);
     if ((sn & SNAP_NORESTORE))
       continue;
-    if (irt_isnum(ir->t)) {
+    if ((sn & SNAP_KEYINDEX)) {
+      RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+      Reg r = irref_isk(ref) ? ra_allock(as, ir->i, allow) :
+			       ra_alloc1(as, ref, allow);
+      rset_clear(allow, r);
+      emit_lso(as, A64I_STRw, r, RID_BASE, ofs);
+      emit_lso(as, A64I_STRw, ra_allock(as, LJ_KEYINDEX, allow), RID_BASE, ofs+4);
+    } else if (irt_isnum(ir->t)) {
       Reg src = ra_alloc1(as, ref, RSET_FPR);
       emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs);
     } else {
@@ -1804,7 +1844,7 @@ static void asm_gc_check(ASMState *as)
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
   IRRef args[2];
   MCLabel l_end;
-  Reg tmp1, tmp2;
+  Reg tmp2;
   ra_evictset(as, RSET_SCRATCH);
   l_end = emit_label(as);
   /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
@@ -1813,17 +1853,14 @@ static void asm_gc_check(ASMState *as)
   args[0] = ASMREF_TMP1;  /* global_State *g */
   args[1] = ASMREF_TMP2;  /* MSize steps     */
   asm_gencall(as, ci, args);
-  tmp1 = ra_releasetmp(as, ASMREF_TMP1);
+  emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
   tmp2 = ra_releasetmp(as, ASMREF_TMP2);
   emit_loadi(as, tmp2, as->gcsteps);
   /* Jump around GC step if GC total < GC threshold. */
   emit_cond_branch(as, CC_LS, l_end);
   emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
-  emit_lso(as, A64I_LDRx, tmp2, tmp1,
-	   (int32_t)offsetof(global_State, gc.threshold));
-  emit_lso(as, A64I_LDRx, RID_TMP, tmp1,
-	   (int32_t)offsetof(global_State, gc.total));
-  ra_allockreg(as, i64ptr(J2G(as->J)), tmp1);
+  emit_getgl(as, tmp2, gc.threshold);
+  emit_getgl(as, RID_TMP, gc.total);
   as->gcsteps = 0;
   checkmclim(as);
 }
@@ -1846,6 +1883,12 @@ static void asm_loop_fixup(ASMState *as)
   }
 }
 
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+  UNUSED(as);  /* Nothing to do. */
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 
 /* Reload L register from g->cur_L. */

+ 141 - 91
libs/LuaJIT/src/lj_asm_mips.h

@@ -64,17 +64,29 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
 /* Setup spare long-range jump slots per mcarea. */
 static void asm_sparejump_setup(ASMState *as)
 {
-  MCode *mxp = as->mcbot;
-  if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) {
+  MCode *mxp = as->mctop;
+  if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
+    mxp -= MIPS_SPAREJUMP*2;
     lj_assertA(MIPSI_NOP == 0, "bad NOP");
     memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode));
-    mxp += MIPS_SPAREJUMP*2;
-    lj_assertA(mxp < as->mctop, "MIPS_SPAREJUMP too big");
-    lj_mcode_sync(as->mcbot, mxp);
-    lj_mcode_commitbot(as->J, mxp);
-    as->mcbot = mxp;
-    as->mclim = as->mcbot + MCLIM_REDZONE;
+    as->mctop = mxp;
+  }
+}
+
+static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump)
+{
+  MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size);
+  int slot = MIPS_SPAREJUMP;
+  while (slot--) {
+    mxp -= 2;
+    if (*mxp == tjump) {
+      return mxp;
+    } else if (*mxp == MIPSI_NOP) {
+      *mxp = tjump;
+      return mxp;
+    }
   }
+  return NULL;
 }
 
 /* Setup exit stub after the end of each trace. */
@@ -181,6 +193,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
 	  return ra_allock(as, ofs-(int16_t)ofs, allow);
 	}
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
+      return RID_JGL;
     }
   }
   *ofsp = 0;
@@ -336,19 +351,15 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   RegSet drop = RSET_SCRATCH;
-#if LJ_32
   int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
-#endif
 #if !LJ_SOFTFP
   if ((ci->flags & CCI_NOFPRCLOBBER))
     drop &= ~RSET_FPR;
 #endif
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
-#if LJ_32
   if (hiop && ra_hasreg((ir+1)->r))
     rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
-#endif
   ra_evictset(as, drop);  /* Evictions must be performed first. */
   if (ra_used(ir)) {
     lj_assertA(!irt_ispri(ir->t), "PRI dest");
@@ -377,10 +388,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       } else {
 	ra_destreg(as, ir, RID_FPRET);
       }
-#if LJ_32
     } else if (hiop) {
       ra_destpair(as, ir);
-#endif
     } else {
       ra_destreg(as, ir, RID_RET);
     }
@@ -450,6 +459,27 @@ static void asm_retf(ASMState *as, IRIns *ir)
   emit_tsi(as, MIPSI_AL, RID_TMP, base, -8);
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+  IRIns irgc;
+  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
+  emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+  if ((as->flags & JIT_F_MIPSXXR2)) {
+    emit_tsml(as, LJ_64 ? MIPSI_DINS : MIPSI_INS, RID_TMP, tmp,
+	      lj_fls(SBUF_MASK_FLAG), 0);
+  } else {
+    emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp);
+    emit_tsi(as, MIPSI_ANDI, tmp, tmp, SBUF_MASK_FLAG);
+  }
+  emit_getgl(as, RID_TMP, cur_L);
+  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
 /* -- Type conversions ---------------------------------------------------- */
 
 #if !LJ_SOFTFP
@@ -739,7 +769,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
 	  }
 	}
       } else {
-	if (st64) {
+	if (st64 && !(ir->op2 & IRCONV_NONE)) {
 	  /* This is either a 32 bit reg/reg mov which zeroes the hiword
 	  ** or a load of the loword from a 64 bit address.
 	  */
@@ -827,34 +857,63 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
 #endif
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, igcptr(ir_knum(ir)), dest);
-    else  /* Otherwise force a spill and use the spill slot. */
-      emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
+  int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if ((mode & IRTMPREF_OUT1)) {
+#if LJ_SOFTFP
+	emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
+#if LJ_64
+	emit_setgl(as, ra_alloc1(as, ref, RSET_GPR), tmptv.u64);
+#else
+	lj_assertA(irref_isk(ref), "unsplit FP op");
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+		   tmptv.u32.lo);
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+		   tmptv.u32.hi);
+#endif
+#else
+	Reg src = ra_alloc1(as, ref, RSET_FPR);
+	emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
+	emit_tsi(as, MIPSI_SDC1, (src & 31),  RID_JGL, tmpofs);
+#endif
+      } else if (irref_isk(ref)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, igcptr(ir_knum(ir)), dest);
+      } else {
+#if LJ_SOFTFP32
+	lj_assertA(0, "unsplit FP op");
+#else
+	/* Otherwise force a spill and use the spill slot. */
+	emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
+#endif
+      }
+    } else {
+      /* Otherwise use g->tmptv to hold the TValue. */
 #if LJ_32
-    RegSet allow = rset_exclude(RSET_GPR, dest);
-    Reg type;
-    emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768));
-    if (!irt_ispri(ir->t)) {
-      Reg src = ra_alloc1(as, ref, allow);
-      emit_setgl(as, src, tmptv.gcr);
-    }
-    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-      type = ra_alloc1(as, ref+1, allow);
-    else
-      type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
-    emit_setgl(as, type, tmptv.it);
+      Reg type;
+      emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, tmpofs);
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, RSET_GPR);
+	emit_setgl(as, src, tmptv.gcr);
+      }
+      if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+	type = ra_alloc1(as, ref+1, RSET_GPR);
+      else
+	type = ra_allock(as, (int32_t)irt_toitype(ir->t), RSET_GPR);
+      emit_setgl(as, type, tmptv.it);
 #else
-    asm_tvstore64(as, dest, 0, ref);
-    emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL,
-	     (int32_t)(offsetof(global_State, tmptv)-32768));
+      asm_tvstore64(as, dest, 0, ref);
+      emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, tmpofs);
 #endif
+    }
+  } else {
+    emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
   }
 }
 
@@ -909,11 +968,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
   MCLabel l_end, l_loop, l_next;
 
   rset_clear(allow, tab);
-#if LJ_SOFTFP32
-  if (!isk) {
-    key = ra_alloc1(as, refkey, allow);
-    rset_clear(allow, key);
-    if (irkey[1].o == IR_HIOP) {
+  if (!LJ_SOFTFP && irt_isnum(kt)) {
+    key = ra_alloc1(as, refkey, RSET_FPR);
+    tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
+  } else {
+    if (!irt_ispri(kt)) {
+      key = ra_alloc1(as, refkey, allow);
+      rset_clear(allow, key);
+    }
+#if LJ_32
+    if (LJ_SOFTFP && irkey[1].o == IR_HIOP) {
       if (ra_hasreg((irkey+1)->r)) {
 	type = tmpnum = (irkey+1)->r;
 	tmp1 = ra_scratch(as, allow);
@@ -924,23 +988,11 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
       }
       rset_clear(allow, tmpnum);
     } else {
-      type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
+      type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
       rset_clear(allow, type);
     }
-  }
-#else
-  if (!LJ_SOFTFP && irt_isnum(kt)) {
-    key = ra_alloc1(as, refkey, RSET_FPR);
-    tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
-  } else if (!irt_ispri(kt)) {
-    key = ra_alloc1(as, refkey, allow);
-    rset_clear(allow, key);
-#if LJ_32
-    type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
-    rset_clear(allow, type);
 #endif
   }
-#endif
   tmp2 = ra_scratch(as, allow);
   rset_clear(allow, tmp2);
 #if LJ_64
@@ -953,10 +1005,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
     } else {
       int64_t k;
       if (isk && irt_isaddr(kt)) {
-	k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
+	k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
       } else {
 	lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
-	k = ~((int64_t)~irt_toitype(ir->t) << 47);
+	k = ~((int64_t)~irt_toitype(kt) << 47);
       }
       cmp64 = ra_allock(as, k, allow);
       rset_clear(allow, cmp64);
@@ -1352,6 +1404,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
 #endif
   }
   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+  if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
   rset_clear(allow, idx);
   if (irt_isnum(t)) {
     asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
@@ -2327,15 +2380,15 @@ static void asm_comp64eq(ASMState *as, IRIns *ir)
 }
 #endif
 
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
 
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP)
   /* HIOP is marked as a store because it needs its own DCE logic. */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP)
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
     as->curins--;  /* Always skip the CONV. */
 #if LJ_HASFFI && !LJ_SOFTFP
@@ -2382,38 +2435,33 @@ static void asm_hiop(ASMState *as, IRIns *ir)
     }
     return;
   }
+#endif
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
-#if LJ_HASFFI
+#if LJ_32 && LJ_HASFFI
   case IR_ADD: as->curins--; asm_add64(as, ir); break;
   case IR_SUB: as->curins--; asm_sub64(as, ir); break;
   case IR_NEG: as->curins--; asm_neg64(as, ir); break;
+  case IR_CNEWI:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
-#if LJ_SOFTFP
+#if LJ_32 && LJ_SOFTFP
   case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
   case IR_STRTO:
     if (!uselo)
       ra_allocref(as, ir->op1, RSET_GPR);  /* Mark lo op as used. */
     break;
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
-  case IR_CALLN:
-  case IR_CALLS:
-  case IR_CALLXS:
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
     if (!uselo)
       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
     break;
-#if LJ_SOFTFP
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
-#endif
-  case IR_CNEWI:
-    /* Nothing to do here. Handled by lo op itself. */
-    break;
   default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
   }
-#else
-  /* Unused on MIPS64 or without SOFTFP or FFI. */
-  UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
-#endif
 }
 
 /* -- Profiling ----------------------------------------------------------- */
@@ -2513,6 +2561,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
       } else if ((sn & SNAP_SOFTFPNUM)) {
 	type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
 #endif
+      } else if ((sn & SNAP_KEYINDEX)) {
+	type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow);
       } else {
 	type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
       }
@@ -2575,6 +2625,12 @@ static void asm_loop_fixup(ASMState *as)
   }
 }
 
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+  if (as->loopinv) as->mctop--;
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 
 /* Coalesce BASE register for a root trace. */
@@ -2582,7 +2638,6 @@ static void asm_head_root_base(ASMState *as)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
-  if (as->loopinv) as->mctop--;
   if (ra_hasreg(r)) {
     ra_free(as, r);
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
@@ -2597,7 +2652,6 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
-  if (as->loopinv) as->mctop--;
   if (ra_hasreg(r)) {
     ra_free(as, r);
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
@@ -2704,21 +2758,17 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
 	patchbranch:
 	  p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu);
 	  *p = MIPSI_NOP;  /* Replace the load of the exit number. */
-	  cstop = p;
+	  cstop = p+1;
 	  if (!cstart) cstart = p-1;
 	} else {  /* Branch out of range. Use spare jump slot in mcarea. */
-	  int i;
-	  for (i = (int)(sizeof(MCLink)/sizeof(MCode));
-	       i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2);
-	       i += 2) {
-	    if (mcarea[i] == tjump) {
-	      delta = mcarea+i - p;
-	      goto patchbranch;
-	    } else if (mcarea[i] == MIPSI_NOP) {
-	      mcarea[i] = tjump;
-	      cstart = mcarea+i;
-	      delta = mcarea+i - p;
+	  MCode *mcjump = asm_sparejump_use(mcarea, tjump);
+	  if (mcjump) {
+	    lj_mcode_sync(mcjump, mcjump+1);
+	    delta = mcjump - p;
+	    if (((delta + 0x8000) >> 16) == 0) {
 	      goto patchbranch;
+	    } else {
+	      lj_assertJ(0, "spare jump out of range: -Osizemcode too big");
 	    }
 	  }
 	  /* Ignore jump slot overflow. Child trace is simply not attached. */

+ 90 - 40
libs/LuaJIT/src/lj_asm_ppc.h

@@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
 	  return ra_allock(as, ofs-(int16_t)ofs, allow);
 	}
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
+      return RID_JGL;
     }
   }
   *ofsp = 0;
@@ -337,10 +340,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       } else {
 	ra_destreg(as, ir, RID_FPRET);
       }
-#if LJ_32
     } else if (hiop) {
       ra_destpair(as, ir);
-#endif
     } else {
       ra_destreg(as, ir, RID_RET);
     }
@@ -389,6 +390,21 @@ static void asm_retf(ASMState *as, IRIns *ir)
   emit_tai(as, PPCI_LWZ, RID_TMP, base, -8);
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+  IRIns irgc;
+  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
+  emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+  emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31);
+  emit_getgl(as, RID_TMP, cur_L);
+  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
 /* -- Type conversions ---------------------------------------------------- */
 
 #if !LJ_SOFTFP
@@ -567,28 +583,54 @@ static void asm_strto(ASMState *as, IRIns *ir)
 /* -- Memory references --------------------------------------------------- */
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
-    else  /* Otherwise force a spill and use the spill slot. */
-      emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-    RegSet allow = rset_exclude(RSET_GPR, dest);
-    Reg type;
-    emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
-    if (!irt_ispri(ir->t)) {
-      Reg src = ra_alloc1(as, ref, allow);
-      emit_setgl(as, src, tmptv.gcr);
+  int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if ((mode & IRTMPREF_OUT1)) {
+#if LJ_SOFTFP
+	lj_assertA(irref_isk(ref), "unsplit FP op");
+	emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+		   tmptv.u32.lo);
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+		   tmptv.u32.hi);
+#else
+	Reg src = ra_alloc1(as, ref, RSET_FPR);
+	emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+	emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs);
+#endif
+      } else if (irref_isk(ref)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+      } else {
+#if LJ_SOFTFP
+	lj_assertA(0, "unsplit FP op");
+#else
+	/* Otherwise force a spill and use the spill slot. */
+	emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
+#endif
+      }
+    } else {
+      /* Otherwise use g->tmptv to hold the TValue. */
+      Reg type;
+      emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, RSET_GPR);
+	emit_setgl(as, src, tmptv.gcr);
+      }
+      if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+	type = ra_alloc1(as, ref+1, RSET_GPR);
+      else
+	type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
+      emit_setgl(as, type, tmptv.it);
     }
-    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-      type = ra_alloc1(as, ref+1, allow);
-    else
-      type = ra_allock(as, irt_toitype(ir->t), allow);
-    emit_setgl(as, type, tmptv.it);
+  } else {
+    emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
   }
 }
 
@@ -894,7 +936,7 @@ static void asm_fload(ASMState *as, IRIns *ir)
   int32_t ofs;
   if (ir->op1 == REF_NIL) {  /* FLOAD from GG_State with offset. */
     idx = RID_JGL;
-    ofs = (ir->op2 << 2) - 32768;
+    ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
   } else {
     idx = ra_alloc1(as, ir->op1, RSET_GPR);
     if (ir->op2 == IRFL_TAB_ARRAY) {
@@ -975,6 +1017,10 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
     rset_clear(allow, dest);
   }
   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+  if (ir->o == IR_VLOAD) {
+    ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 :
+	  ir->op2 ? 8 * ir->op2 : AHUREF_LSX;
+  }
   if (irt_isnum(t)) {
     Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx));
     asm_guardcc(as, CC_GE);
@@ -1057,7 +1103,8 @@ static void asm_sload(ASMState *as, IRIns *ir)
   lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
 	     "inconsistent SLOAD variant");
   lj_assertA(LJ_DUALNUM ||
-	     !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)),
+	     !irt_isint(t) ||
+	     (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)),
 	     "bad SLOAD type");
 #if LJ_SOFTFP
   lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
@@ -1894,15 +1941,15 @@ static void asm_comp64(ASMState *as, IRIns *ir)
 }
 #endif
 
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
 
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-#if LJ_HASFFI || LJ_SOFTFP
   /* HIOP is marked as a store because it needs its own DCE logic. */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_HASFFI || LJ_SOFTFP
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
     as->curins--;  /* Always skip the CONV. */
 #if LJ_HASFFI && !LJ_SOFTFP
@@ -1937,12 +1984,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
     }
     return;
   }
+#endif
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
 #if LJ_HASFFI
   case IR_ADD: as->curins--; asm_add64(as, ir); break;
   case IR_SUB: as->curins--; asm_sub64(as, ir); break;
   case IR_NEG: as->curins--; asm_neg64(as, ir); break;
+  case IR_CNEWI:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
 #if LJ_SOFTFP
   case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
@@ -1950,25 +2001,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
     if (!uselo)
       ra_allocref(as, ir->op1, RSET_GPR);  /* Mark lo op as used. */
     break;
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
-  case IR_CALLN:
-  case IR_CALLS:
-  case IR_CALLXS:
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
     if (!uselo)
       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
     break;
-#if LJ_SOFTFP
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
-#endif
-  case IR_CNEWI:
-    /* Nothing to do here. Handled by lo op itself. */
-    break;
   default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
   }
-#else
-  /* Unused without SOFTFP or FFI. */
-  UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
-#endif
 }
 
 /* -- Profiling ----------------------------------------------------------- */
@@ -2055,6 +2097,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
       } else if ((sn & SNAP_SOFTFPNUM)) {
 	type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
 #endif
+      } else if ((sn & SNAP_KEYINDEX)) {
+	type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow);
       } else {
 	type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
       }
@@ -2113,6 +2157,12 @@ static void asm_loop_fixup(ASMState *as)
   }
 }
 
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+  UNUSED(as);  /* Nothing to do. */
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 
 /* Coalesce BASE register for a root trace. */

+ 93 - 55
libs/LuaJIT/src/lj_asm_x86.h

@@ -216,10 +216,17 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
 #endif
       }
       break;
+    case IR_TMPREF:
+#if LJ_GC64
+      as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->tmptv);
+      as->mrm.base = RID_DISPATCH;
+      as->mrm.idx = RID_NONE;
+#else
+      as->mrm.ofs = igcptr(&J2G(as->J)->tmptv);
+      as->mrm.base = as->mrm.idx = RID_NONE;
+#endif
+      return;
     default:
-      lj_assertA(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO ||
-		 ir->o == IR_KKPTR,
-		 "bad IR op %d", ir->o);
       break;
     }
   }
@@ -480,6 +487,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
       }
     } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) {
       asm_fuseahuref(as, ir->op1, xallow);
+      as->mrm.ofs += 8 * ir->op2;
       return RID_MRM;
     }
   }
@@ -651,7 +659,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   RegSet drop = RSET_SCRATCH;
-  int hiop = (LJ_32 && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+  int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
   if ((ci->flags & CCI_NOFPRCLOBBER))
     drop &= ~RSET_FPR;
   if (ra_hasreg(ir->r))
@@ -691,10 +699,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 		  irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
       }
 #endif
-#if LJ_32
     } else if (hiop) {
       ra_destpair(as, ir);
-#endif
     } else {
       lj_assertA(!irt_ispri(ir->t), "PRI dest");
       ra_destreg(as, ir, RID_RET);
@@ -781,6 +787,21 @@ static void asm_retf(ASMState *as, IRIns *ir)
 #endif
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+  IRIns irgc;
+  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
+  emit_storeofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+  emit_opgl(as, XO_ARITH(XOg_OR), tmp|REX_GC64, cur_L);
+  emit_gri(as, XG_ARITHi(XOg_AND), tmp, SBUF_MASK_FLAG);
+  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
 /* -- Type conversions ---------------------------------------------------- */
 
 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
@@ -924,7 +945,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
       }
     } else {
       Reg dest = ra_dest(as, ir, RSET_GPR);
-      if (st64) {
+      if (st64 && !(ir->op2 & IRCONV_NONE)) {
 	Reg left = asm_fuseload(as, lref, RSET_GPR);
 	/* This is either a 32 bit reg/reg mov which zeroes the hiword
 	** or a load of the loword from a 64 bit address.
@@ -1050,47 +1071,48 @@ static void asm_strto(ASMState *as, IRIns *ir)
 /* -- Memory references --------------------------------------------------- */
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    /* For numbers use the constant itself or a spill slot as a TValue. */
-    if (irref_isk(ref))
-      emit_loada(as, dest, ir_knum(ir));
-    else
-      emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-#if LJ_GC64
-    if (irref_isk(ref)) {
-      TValue k;
-      lj_ir_kvalue(as->J->L, &k, ir);
-      emit_movmroi(as, dest, 4, k.u32.hi);
-      emit_movmroi(as, dest, 0, k.u32.lo);
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
+	/* Use the number constant itself as a TValue. */
+	emit_loada(as, dest, ir_knum(ir));
+	return;
+      }
+      emit_rmro(as, XO_MOVSDto, ra_alloc1(as, ref, RSET_FPR), dest, 0);
     } else {
-      /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
-      Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
-      if (irt_is64(ir->t)) {
-	emit_u32(as, irt_toitype(ir->t) << 15);
-	emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
+#if LJ_GC64
+      if (irref_isk(ref)) {
+	TValue k;
+	lj_ir_kvalue(as->J->L, &k, ir);
+	emit_movmroi(as, dest, 4, k.u32.hi);
+	emit_movmroi(as, dest, 0, k.u32.lo);
       } else {
-	/* Currently, no caller passes integers that might end up here. */
-	emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
+	/* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
+	Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+	if (irt_is64(ir->t)) {
+	  emit_u32(as, irt_toitype(ir->t) << 15);
+	  emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
+	} else {
+	  emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
+	}
+	emit_movtomro(as, REX_64IR(ir, src), dest, 0);
       }
-      emit_movtomro(as, REX_64IR(ir, src), dest, 0);
-    }
 #else
-    if (!irref_isk(ref)) {
-      Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
-      emit_movtomro(as, REX_64IR(ir, src), dest, 0);
-    } else if (!irt_ispri(ir->t)) {
-      emit_movmroi(as, dest, 0, ir->i);
-    }
-    if (!(LJ_64 && irt_islightud(ir->t)))
-      emit_movmroi(as, dest, 4, irt_toitype(ir->t));
+      if (!irref_isk(ref)) {
+	Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+	emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+      } else if (!irt_ispri(ir->t)) {
+	emit_movmroi(as, dest, 0, ir->i);
+      }
+      if (!(LJ_64 && irt_islightud(ir->t)))
+	emit_movmroi(as, dest, 4, irt_toitype(ir->t));
 #endif
-    emit_loada(as, dest, &J2G(as->J)->tmptv);
+    }
   }
+  emit_loada(as, dest, &J2G(as->J)->tmptv); /* g->tmptv holds the TValue(s). */
 }
 
 static void asm_aref(ASMState *as, IRIns *ir)
@@ -1524,6 +1546,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
     Reg dest = asm_load_lightud64(as, ir, 1);
     if (ra_hasreg(dest)) {
       asm_fuseahuref(as, ir->op1, RSET_GPR);
+      if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
       emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
     }
     return;
@@ -1533,6 +1556,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
     RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
     Reg dest = ra_dest(as, ir, allow);
     asm_fuseahuref(as, ir->op1, RSET_GPR);
+    if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
 #if LJ_GC64
     if (irt_isaddr(ir->t)) {
       emit_shifti(as, XOg_SHR|REX_64, dest, 17);
@@ -1560,6 +1584,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
     }
 #endif
     asm_fuseahuref(as, ir->op1, gpr);
+    if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
   }
   /* Always do the type check, even if the load result is unused. */
   as->mrm.ofs += 4;
@@ -1675,7 +1700,8 @@ static void asm_sload(ASMState *as, IRIns *ir)
   lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
 	     "inconsistent SLOAD variant");
   lj_assertA(LJ_DUALNUM ||
-	     !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)),
+	     !irt_isint(t) ||
+	     (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)),
 	     "bad SLOAD type");
   if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
     Reg left = ra_scratch(as, RSET_FPR);
@@ -2584,15 +2610,15 @@ static void asm_comp_int64(ASMState *as, IRIns *ir)
 }
 #endif
 
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
 
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-#if LJ_32 && LJ_HASFFI
   /* HIOP is marked as a store because it needs its own DCE logic. */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_32 && LJ_HASFFI
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
     as->curins--;  /* Always skip the CONV. */
     if (usehi || uselo)
@@ -2606,8 +2632,10 @@ static void asm_hiop(ASMState *as, IRIns *ir)
       asm_fxstore(as, ir);
     return;
   }
+#endif
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
+#if LJ_32 && LJ_HASFFI
   case IR_ADD:
     as->flagmcp = NULL;
     as->curins--;
@@ -2630,20 +2658,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
     asm_neg_not(as, ir-1, XOg_NEG);
     break;
     }
-  case IR_CALLN:
-  case IR_CALLXS:
-    if (!uselo)
-      ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
-    break;
   case IR_CNEWI:
     /* Nothing to do here. Handled by CNEWI itself. */
     break;
+#endif
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
+    break;
   default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
   }
-#else
-  /* Unused on x64 or without FFI. */
-  UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
-#endif
 }
 
 /* -- Profiling ----------------------------------------------------------- */
@@ -2704,7 +2728,15 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
     IRIns *ir = IR(ref);
     if ((sn & SNAP_NORESTORE))
       continue;
-    if (irt_isnum(ir->t)) {
+    if ((sn & SNAP_KEYINDEX)) {
+      emit_movmroi(as, RID_BASE, ofs+4, LJ_KEYINDEX);
+      if (irref_isk(ref)) {
+	emit_movmroi(as, RID_BASE, ofs, ir->i);
+      } else {
+	Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
+	emit_movtomro(as, src, RID_BASE, ofs);
+      }
+    } else if (irt_isnum(ir->t)) {
       Reg src = ra_alloc1(as, ref, RSET_FPR);
       emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
     } else {
@@ -2837,6 +2869,12 @@ static void asm_loop_fixup(ASMState *as)
   }
 }
 
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+  UNUSED(as);  /* Nothing to do. */
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 
 /* Coalesce BASE register for a root trace. */

+ 6 - 10
libs/LuaJIT/src/lj_bcread.c

@@ -53,11 +53,11 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
   do {
     const char *buf;
     size_t sz;
-    char *p = sbufB(&ls->sb);
+    char *p = ls->sb.b;
     MSize n = (MSize)(ls->pe - ls->p);
     if (n) {  /* Copy remainder to buffer. */
       if (sbuflen(&ls->sb)) {  /* Move down in buffer. */
-	lj_assertLS(ls->pe == sbufP(&ls->sb), "bad buffer pointer");
+	lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer");
 	if (ls->p != p) memmove(p, ls->p, n);
       } else {  /* Copy from buffer provided by reader. */
 	p = lj_buf_need(&ls->sb, len);
@@ -66,7 +66,7 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
       ls->p = p;
       ls->pe = p + n;
     }
-    setsbufP(&ls->sb, p + n);
+    ls->sb.w = p + n;
     buf = ls->rfunc(ls->L, ls->rdata, &sz);  /* Get more data from reader. */
     if (buf == NULL || sz == 0) {  /* EOF? */
       if (need) bcread_error(ls, LJ_ERR_BCBAD);
@@ -77,8 +77,8 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
     if (n) {  /* Append to buffer. */
       n += (MSize)sz;
       p = lj_buf_need(&ls->sb, n < len ? len : n);
-      memcpy(sbufP(&ls->sb), buf, sz);
-      setsbufP(&ls->sb, p + n);
+      memcpy(ls->sb.w, buf, sz);
+      ls->sb.w = p + n;
       ls->p = p;
       ls->pe = p + n;
     } else {  /* Return buffer provided by reader. */
@@ -399,11 +399,7 @@ static int bcread_header(LexState *ls)
   if ((flags & BCDUMP_F_FFI)) {
 #if LJ_HASFFI
     lua_State *L = ls->L;
-    if (!ctype_ctsG(G(L))) {
-      ptrdiff_t oldtop = savestack(L, L->top);
-      luaopen_ffi(L);  /* Load FFI library on-demand. */
-      L->top = restorestack(L, oldtop);
-    }
+    ctype_loadffi(L);
 #else
     return 0;
 #endif

+ 11 - 11
libs/LuaJIT/src/lj_bcwrite.c

@@ -62,7 +62,7 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
       if (num == (lua_Number)k) {  /* -0 is never a constant. */
 	*p++ = BCDUMP_KTAB_INT;
 	p = lj_strfmt_wuleb128(p, k);
-	setsbufP(&ctx->sb, p);
+	ctx->sb.w = p;
 	return;
       }
     }
@@ -73,7 +73,7 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
     lj_assertBCW(tvispri(o), "unhandled type %d", itype(o));
     *p++ = BCDUMP_KTAB_NIL+~itype(o);
   }
-  setsbufP(&ctx->sb, p);
+  ctx->sb.w = p;
 }
 
 /* Write a template table. */
@@ -97,7 +97,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
   /* Write number of array slots and hash slots. */
   p = lj_strfmt_wuleb128(p, narray);
   p = lj_strfmt_wuleb128(p, nhash);
-  setsbufP(&ctx->sb, p);
+  ctx->sb.w = p;
   if (narray) {  /* Write array entries (may contain nil). */
     MSize i;
     TValue *o = tvref(t->array);
@@ -172,7 +172,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
       }
 #endif
     }
-    setsbufP(&ctx->sb, p);
+    ctx->sb.w = p;
   }
 }
 
@@ -206,7 +206,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
       p = lj_strfmt_wuleb128(p, o->u32.hi);
     }
   }
-  setsbufP(&ctx->sb, p);
+  ctx->sb.w = p;
 }
 
 /* Write bytecode instructions. */
@@ -281,7 +281,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
   /* Write bytecode instructions and upvalue refs. */
   p = bcwrite_bytecode(ctx, p, pt);
   p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
-  setsbufP(&ctx->sb, p);
+  ctx->sb.w = p;
 
   /* Write constants. */
   bcwrite_kgc(ctx, pt);
@@ -291,16 +291,16 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
   if (sizedbg) {
     p = lj_buf_more(&ctx->sb, sizedbg);
     p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
-    setsbufP(&ctx->sb, p);
+    ctx->sb.w = p;
   }
 
   /* Pass buffer to writer function. */
   if (ctx->status == 0) {
     MSize n = sbuflen(&ctx->sb) - 5;
     MSize nn = (lj_fls(n)+8)*9 >> 6;
-    char *q = sbufB(&ctx->sb) + (5 - nn);
+    char *q = ctx->sb.b + (5 - nn);
     p = lj_strfmt_wuleb128(q, n);  /* Fill in final size. */
-    lj_assertBCW(p == sbufB(&ctx->sb) + 5, "bad ULEB128 write");
+    lj_assertBCW(p == ctx->sb.b + 5, "bad ULEB128 write");
     ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
   }
 }
@@ -324,8 +324,8 @@ static void bcwrite_header(BCWriteCtx *ctx)
     p = lj_strfmt_wuleb128(p, len);
     p = lj_buf_wmem(p, name, len);
   }
-  ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb),
-			   (MSize)(p - sbufB(&ctx->sb)), ctx->wdata);
+  ctx->status = ctx->wfunc(sbufL(&ctx->sb), ctx->sb.b,
+			   (MSize)(p - ctx->sb.b), ctx->wdata);
 }
 
 /* Write footer of bytecode dump. */

+ 132 - 59
libs/LuaJIT/src/lj_buf.c

@@ -20,12 +20,32 @@ static void buf_grow(SBuf *sb, MSize sz)
 {
   MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
   char *b;
+  GCSize flag;
   if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
   while (nsz < sz) nsz += nsz;
-  b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz);
-  setmref(sb->b, b);
-  setmref(sb->p, b + len);
-  setmref(sb->e, b + nsz);
+  flag = sbufflag(sb);
+  if ((flag & SBUF_FLAG_COW)) {  /* Copy-on-write semantics. */
+    lj_assertG_(G(sbufL(sb)), sb->w == sb->e, "bad SBuf COW");
+    b = (char *)lj_mem_new(sbufL(sb), nsz);
+    setsbufflag(sb, flag & ~(GCSize)SBUF_FLAG_COW);
+    setgcrefnull(sbufX(sb)->cowref);
+    memcpy(b, sb->b, osz);
+  } else {
+    b = (char *)lj_mem_realloc(sbufL(sb), sb->b, osz, nsz);
+  }
+  if ((flag & SBUF_FLAG_EXT)) {
+    sbufX(sb)->r = sbufX(sb)->r - sb->b + b;  /* Adjust read pointer, too. */
+  }
+  /* Adjust buffer pointers. */
+  sb->b = b;
+  sb->w = b + len;
+  sb->e = b + nsz;
+  if ((flag & SBUF_FLAG_BORROW)) {  /* Adjust borrowed buffer pointers. */
+    SBuf *bsb = mref(sbufX(sb)->bsb, SBuf);
+    bsb->b = b;
+    bsb->w = b + len;
+    bsb->e = b + nsz;
+  }
 }
 
 LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
@@ -34,30 +54,51 @@ LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
   if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
     lj_err_mem(sbufL(sb));
   buf_grow(sb, sz);
-  return sbufB(sb);
+  return sb->b;
 }
 
 LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
 {
-  MSize len = sbuflen(sb);
-  lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow");
-  if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
-    lj_err_mem(sbufL(sb));
-  buf_grow(sb, len + sz);
-  return sbufP(sb);
+  if (sbufisext(sb)) {
+    SBufExt *sbx = (SBufExt *)sb;
+    MSize len = sbufxlen(sbx);
+    if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
+      lj_err_mem(sbufL(sbx));
+    if (len + sz > sbufsz(sbx)) {  /* Must grow. */
+      buf_grow((SBuf *)sbx, len + sz);
+    } else if (sbufxslack(sbx) < (sbufsz(sbx) >> 3)) {
+      /* Also grow to avoid excessive compactions, if slack < size/8. */
+      buf_grow((SBuf *)sbx, sbuflen(sbx) + sz);  /* Not sbufxlen! */
+      return sbx->w;
+    }
+    if (sbx->r != sbx->b) {  /* Compact by moving down. */
+      memmove(sbx->b, sbx->r, len);
+      sbx->r = sbx->b;
+      sbx->w = sbx->b + len;
+      lj_assertG_(G(sbufL(sbx)), len + sz <= sbufsz(sbx), "bad SBuf compact");
+    }
+  } else {
+    MSize len = sbuflen(sb);
+    lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow");
+    if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
+      lj_err_mem(sbufL(sb));
+    buf_grow(sb, len + sz);
+  }
+  return sb->w;
 }
 
 void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
 {
-  char *b = sbufB(sb);
-  MSize osz = (MSize)(sbufE(sb) - b);
+  char *b = sb->b;
+  MSize osz = (MSize)(sb->e - b);
   if (osz > 2*LJ_MIN_SBUF) {
-    MSize n = (MSize)(sbufP(sb) - b);
+    MSize n = (MSize)(sb->w - b);
     b = lj_mem_realloc(L, b, osz, (osz >> 1));
-    setmref(sb->b, b);
-    setmref(sb->p, b + n);
-    setmref(sb->e, b + (osz >> 1));
+    sb->b = b;
+    sb->w = b + n;
+    sb->e = b + (osz >> 1);
   }
+  lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt");
 }
 
 char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
@@ -67,30 +108,62 @@ char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
   return lj_buf_need(sb, sz);
 }
 
+#if LJ_HASBUFFER && LJ_HASJIT
+void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *ref)
+{
+  lua_State *L = sbufL(sbx);
+  lj_bufx_free(L, sbx);
+  lj_bufx_set_cow(L, sbx, p, len);
+  setgcref(sbx->cowref, ref);
+  lj_gc_objbarrier(L, (GCudata *)sbx - 1, ref);
+}
+
+#if LJ_HASFFI
+MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz)
+{
+  lj_buf_more((SBuf *)sbx, sz);
+  return sbufleft(sbx);
+}
+#endif
+#endif
+
 /* -- Low-level buffer put operations ------------------------------------- */
 
 SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
 {
-  char *p = lj_buf_more(sb, len);
-  p = lj_buf_wmem(p, q, len);
-  setsbufP(sb, p);
+  char *w = lj_buf_more(sb, len);
+  w = lj_buf_wmem(w, q, len);
+  sb->w = w;
   return sb;
 }
 
-SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
+#if LJ_HASJIT || LJ_HASFFI
+static LJ_NOINLINE SBuf * LJ_FASTCALL lj_buf_putchar2(SBuf *sb, int c)
 {
-  char *p = lj_buf_more(sb, 1);
-  *p++ = (char)c;
-  setsbufP(sb, p);
+  char *w = lj_buf_more2(sb, 1);
+  *w++ = (char)c;
+  sb->w = w;
   return sb;
 }
 
+SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
+{
+  char *w = sb->w;
+  if (LJ_LIKELY(w < sb->e)) {
+    *w++ = (char)c;
+    sb->w = w;
+    return sb;
+  }
+  return lj_buf_putchar2(sb, c);
+}
+#endif
+
 SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
 {
   MSize len = s->len;
-  char *p = lj_buf_more(sb, len);
-  p = lj_buf_wmem(p, strdata(s), len);
-  setsbufP(sb, p);
+  char *w = lj_buf_more(sb, len);
+  w = lj_buf_wmem(w, strdata(s), len);
+  sb->w = w;
   return sb;
 }
 
@@ -99,47 +172,47 @@ SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
 SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
 {
   MSize len = s->len;
-  char *p = lj_buf_more(sb, len), *e = p+len;
+  char *w = lj_buf_more(sb, len), *e = w+len;
   const char *q = strdata(s)+len-1;
-  while (p < e)
-    *p++ = *q--;
-  setsbufP(sb, p);
+  while (w < e)
+    *w++ = *q--;
+  sb->w = w;
   return sb;
 }
 
 SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
 {
   MSize len = s->len;
-  char *p = lj_buf_more(sb, len), *e = p+len;
+  char *w = lj_buf_more(sb, len), *e = w+len;
   const char *q = strdata(s);
-  for (; p < e; p++, q++) {
+  for (; w < e; w++, q++) {
     uint32_t c = *(unsigned char *)q;
 #if LJ_TARGET_PPC
-    *p = c + ((c >= 'A' && c <= 'Z') << 5);
+    *w = c + ((c >= 'A' && c <= 'Z') << 5);
 #else
     if (c >= 'A' && c <= 'Z') c += 0x20;
-    *p = c;
+    *w = c;
 #endif
   }
-  setsbufP(sb, p);
+  sb->w = w;
   return sb;
 }
 
 SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
 {
   MSize len = s->len;
-  char *p = lj_buf_more(sb, len), *e = p+len;
+  char *w = lj_buf_more(sb, len), *e = w+len;
   const char *q = strdata(s);
-  for (; p < e; p++, q++) {
+  for (; w < e; w++, q++) {
     uint32_t c = *(unsigned char *)q;
 #if LJ_TARGET_PPC
-    *p = c - ((c >= 'a' && c <= 'z') << 5);
+    *w = c - ((c >= 'a' && c <= 'z') << 5);
 #else
     if (c >= 'a' && c <= 'z') c -= 0x20;
-    *p = c;
+    *w = c;
 #endif
   }
-  setsbufP(sb, p);
+  sb->w = w;
   return sb;
 }
 
@@ -148,21 +221,21 @@ SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
   MSize len = s->len;
   if (rep > 0 && len) {
     uint64_t tlen = (uint64_t)rep * len;
-    char *p;
+    char *w;
     if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
       lj_err_mem(sbufL(sb));
-    p = lj_buf_more(sb, (MSize)tlen);
+    w = lj_buf_more(sb, (MSize)tlen);
     if (len == 1) {  /* Optimize a common case. */
       uint32_t c = strdata(s)[0];
-      do { *p++ = c; } while (--rep > 0);
+      do { *w++ = c; } while (--rep > 0);
     } else {
       const char *e = strdata(s) + len;
       do {
 	const char *q = strdata(s);
-	do { *p++ = *q++; } while (q < e);
+	do { *w++ = *q++; } while (q < e);
       } while (--rep > 0);
     }
-    setsbufP(sb, p);
+    sb->w = w;
   }
   return sb;
 }
@@ -173,27 +246,27 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
   if (i <= e) {
     for (;;) {
       cTValue *o = lj_tab_getint(t, i);
-      char *p;
+      char *w;
       if (!o) {
       badtype:  /* Error: bad element type. */
-	setsbufP(sb, (void *)(intptr_t)i);  /* Store failing index. */
+	sb->w = (char *)(intptr_t)i;  /* Store failing index. */
 	return NULL;
       } else if (tvisstr(o)) {
 	MSize len = strV(o)->len;
-	p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
+	w = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
       } else if (tvisint(o)) {
-	p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
+	w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
       } else if (tvisnum(o)) {
-	p = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen);
+	w = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen);
       } else {
 	goto badtype;
       }
       if (i++ == e) {
-	setsbufP(sb, p);
+	sb->w = w;
 	break;
       }
-      if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen);
-      setsbufP(sb, p);
+      if (seplen) w = lj_buf_wmem(w, strdata(sep), seplen);
+      sb->w = w;
     }
   }
   return sb;
@@ -203,7 +276,7 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
 
 GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
 {
-  return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb));
+  return lj_str_new(sbufL(sb), sb->b, sbuflen(sb));
 }
 
 /* Concatenate two strings. */
@@ -219,14 +292,14 @@ GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
 /* Read ULEB128 from buffer. */
 uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
 {
-  const uint8_t *p = (const uint8_t *)*pp;
-  uint32_t v = *p++;
+  const uint8_t *w = (const uint8_t *)*pp;
+  uint32_t v = *w++;
   if (LJ_UNLIKELY(v >= 0x80)) {
     int sh = 0;
     v &= 0x7f;
-    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
+    do { v |= ((*w & 0x7f) << (sh += 7)); } while (*w++ >= 0x80);
   }
-  *pp = (const char *)p;
+  *pp = (const char *)w;
   return v;
 }
 

+ 114 - 19
libs/LuaJIT/src/lj_buf.h

@@ -10,16 +10,60 @@
 #include "lj_gc.h"
 #include "lj_str.h"
 
-/* Resizable string buffers. Struct definition in lj_obj.h. */
-#define sbufB(sb)	(mref((sb)->b, char))
-#define sbufP(sb)	(mref((sb)->p, char))
-#define sbufE(sb)	(mref((sb)->e, char))
-#define sbufL(sb)	(mref((sb)->L, lua_State))
-#define sbufsz(sb)	((MSize)(sbufE((sb)) - sbufB((sb))))
-#define sbuflen(sb)	((MSize)(sbufP((sb)) - sbufB((sb))))
-#define sbufleft(sb)	((MSize)(sbufE((sb)) - sbufP((sb))))
-#define setsbufP(sb, q)	(setmref((sb)->p, (q)))
-#define setsbufL(sb, l)	(setmref((sb)->L, (l)))
+/* Resizable string buffers. */
+
+/* The SBuf struct definition is in lj_obj.h:
+**   char *w;	Write pointer.
+**   char *e;	End pointer.
+**   char *b;	Base pointer.
+**   MRef L;	lua_State, used for buffer resizing. Extension bits in 3 LSB.
+*/
+
+/* Extended string buffer. */
+typedef struct SBufExt {
+  SBufHeader;
+  union {
+    GCRef cowref;	/* Copy-on-write object reference. */
+    MRef bsb;		/* Borrowed string buffer. */
+  };
+  char *r;		/* Read pointer. */
+  GCRef dict_str;	/* Serialization string dictionary table. */
+  GCRef dict_mt;	/* Serialization metatable dictionary table. */
+  int depth;		/* Remaining recursion depth. */
+} SBufExt;
+
+#define sbufsz(sb)		((MSize)((sb)->e - (sb)->b))
+#define sbuflen(sb)		((MSize)((sb)->w - (sb)->b))
+#define sbufleft(sb)		((MSize)((sb)->e - (sb)->w))
+#define sbufxlen(sbx)		((MSize)((sbx)->w - (sbx)->r))
+#define sbufxslack(sbx)		((MSize)((sbx)->r - (sbx)->b))
+
+#define SBUF_MASK_FLAG		(7)
+#define SBUF_MASK_L		(~(GCSize)SBUF_MASK_FLAG)
+#define SBUF_FLAG_EXT		1	/* Extended string buffer. */
+#define SBUF_FLAG_COW		2	/* Copy-on-write buffer. */
+#define SBUF_FLAG_BORROW	4	/* Borrowed string buffer. */
+
+#define sbufL(sb) \
+  ((lua_State *)(void *)(uintptr_t)(mrefu((sb)->L) & SBUF_MASK_L))
+#define setsbufL(sb, l)		(setmref((sb)->L, (l)))
+#define setsbufXL(sb, l, flag) \
+  (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) + (flag)))
+#define setsbufXL_(sb, l) \
+  (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) | (mrefu((sb)->L) & SBUF_MASK_FLAG)))
+
+#define sbufflag(sb)		(mrefu((sb)->L))
+#define sbufisext(sb)		(sbufflag((sb)) & SBUF_FLAG_EXT)
+#define sbufiscow(sb)		(sbufflag((sb)) & SBUF_FLAG_COW)
+#define sbufisborrow(sb)	(sbufflag((sb)) & SBUF_FLAG_BORROW)
+#define sbufiscoworborrow(sb)	(sbufflag((sb)) & (SBUF_FLAG_COW|SBUF_FLAG_BORROW))
+#define sbufX(sb) \
+  (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb))
+#define setsbufflag(sb, flag)	(setmrefu((sb)->L, (flag)))
+
+#define tvisbuf(o) \
+  (LJ_HASBUFFER && tvisudata(o) && udataV(o)->udtype == UDTYPE_BUFFER)
+#define bufV(o)		check_exp(tvisbuf(o), ((SBufExt *)uddata(udataV(o))))
 
 /* Buffer management */
 LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
@@ -30,12 +74,12 @@ LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
 static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
 {
   setsbufL(sb, L);
-  setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL);
+  sb->w = sb->e = sb->b = NULL;
 }
 
 static LJ_AINLINE void lj_buf_reset(SBuf *sb)
 {
-  setmrefr(sb->p, sb->b);
+  sb->w = sb->b;
 }
 
 static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
@@ -48,26 +92,77 @@ static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
 
 static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
 {
-  lj_mem_free(g, sbufB(sb), sbufsz(sb));
+  lj_assertG(!sbufisext(sb), "bad free of SBufExt");
+  lj_mem_free(g, sb->b, sbufsz(sb));
 }
 
 static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
 {
   if (LJ_UNLIKELY(sz > sbufsz(sb)))
     return lj_buf_need2(sb, sz);
-  return sbufB(sb);
+  return sb->b;
 }
 
 static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
 {
   if (LJ_UNLIKELY(sz > sbufleft(sb)))
     return lj_buf_more2(sb, sz);
-  return sbufP(sb);
+  return sb->w;
+}
+
+/* Extended buffer management */
+static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx)
+{
+  memset(sbx, 0, sizeof(SBufExt));
+  setsbufXL(sbx, L, SBUF_FLAG_EXT);
+}
+
+static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb)
+{
+  setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW);
+  setmref(sbx->bsb, sb);
+  sbx->r = sbx->w = sbx->b = sb->b;
+  sbx->e = sb->e;
+}
+
+static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx,
+				       const char *p, MSize len)
+{
+  setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW);
+  sbx->r = sbx->b = (char *)p;
+  sbx->w = sbx->e = (char *)p + len;
+}
+
+static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx)
+{
+  if (sbufiscow(sbx)) {
+    setmrefu(sbx->L, (mrefu(sbx->L) & ~(GCSize)SBUF_FLAG_COW));
+    setgcrefnull(sbx->cowref);
+    sbx->b = sbx->e = NULL;
+  }
+  sbx->r = sbx->w = sbx->b;
 }
 
+static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx)
+{
+  if (!sbufiscoworborrow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx));
+  setsbufXL(sbx, L, SBUF_FLAG_EXT);
+  setgcrefnull(sbx->cowref);
+  sbx->r = sbx->w = sbx->b = sbx->e = NULL;
+}
+
+#if LJ_HASBUFFER && LJ_HASJIT
+LJ_FUNC void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *o);
+#if LJ_HASFFI
+LJ_FUNC MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz);
+#endif
+#endif
+
 /* Low-level buffer put operations */
 LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
+#if LJ_HASJIT || LJ_HASFFI
 LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
+#endif
 LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
 
 static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
@@ -77,9 +172,9 @@ static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
 
 static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
 {
-  char *p = lj_buf_more(sb, 1);
-  *p++ = (char)c;
-  setsbufP(sb, p);
+  char *w = lj_buf_more(sb, 1);
+  *w++ = (char)c;
+  sb->w = w;
 }
 
 /* High-level buffer put operations */
@@ -97,7 +192,7 @@ LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
 
 static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
 {
-  return lj_str_new(L, sbufB(sb), sbuflen(sb));
+  return lj_str_new(L, sb->b, sbuflen(sb));
 }
 
 #endif

+ 4 - 4
libs/LuaJIT/src/lj_ccall.c

@@ -334,7 +334,7 @@
   isfp = sz == 2*sizeof(float) ? 2 : 1;
 
 #define CCALL_HANDLE_REGARG \
-  if (LJ_TARGET_IOS && isva) { \
+  if (LJ_TARGET_OSX && isva) { \
     /* IOS: All variadic arguments are on the stack. */ \
   } else if (isfp) {  /* Try to pass argument in FPRs. */ \
     int n2 = ctype_isvector(d->info) ? 1 : \
@@ -345,10 +345,10 @@
       goto done; \
     } else { \
       nfpr = CCALL_NARG_FPR;  /* Prevent reordering. */ \
-      if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
+      if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
     } \
   } else {  /* Try to pass argument in GPRs. */ \
-    if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
+    if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
       ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
     if (ngpr + n <= maxgpr) { \
       dp = &cc->gpr[ngpr]; \
@@ -356,7 +356,7 @@
       goto done; \
     } else { \
       ngpr = maxgpr;  /* Prevent reordering. */ \
-      if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
+      if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
     } \
   }
 

+ 7 - 2
libs/LuaJIT/src/lj_ccallback.c

@@ -256,6 +256,11 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
 #ifndef MAP_ANONYMOUS
 #define MAP_ANONYMOUS   MAP_ANON
 #endif
+#ifdef PROT_MPROTECT
+#define CCPROT_CREATE	(PROT_MPROTECT(PROT_EXEC))
+#else
+#define CCPROT_CREATE	0
+#endif
 
 #endif
 
@@ -271,7 +276,7 @@ static void callback_mcode_new(CTState *cts)
   if (!p)
     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
 #elif LJ_TARGET_POSIX
-  p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS,
+  p = mmap(NULL, sz, (PROT_READ|PROT_WRITE|CCPROT_CREATE), MAP_PRIVATE|MAP_ANONYMOUS,
 	   -1, 0);
   if (p == MAP_FAILED)
     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
@@ -409,7 +414,7 @@ void lj_ccallback_mcode_free(CTState *cts)
       nfpr = CCALL_NARG_FPR;  /* Prevent reordering. */ \
     } \
   } else { \
-    if (!LJ_TARGET_IOS && n > 1) \
+    if (!LJ_TARGET_OSX && n > 1) \
       ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
     if (ngpr + n <= maxgpr) { \
       sp = &cts->cb.gpr[ngpr]; \

+ 5 - 0
libs/LuaJIT/src/lj_cconv.c

@@ -8,6 +8,7 @@
 #if LJ_HASFFI
 
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_tab.h"
 #include "lj_ctype.h"
 #include "lj_cdata.h"
@@ -568,7 +569,9 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
     }
     s = ctype_raw(cts, sid);
     if (ctype_isfunc(s->info)) {
+      CTypeID did = ctype_typeid(cts, d);
       sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR);
+      d = ctype_get(cts, did);  /* cts->tab may have been reallocated. */
     } else {
       if (ctype_isenum(s->info)) s = ctype_child(cts, s);
       goto doconv;
@@ -619,6 +622,8 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
     tmpptr = uddata(ud);
     if (ud->udtype == UDTYPE_IO_FILE)
       tmpptr = *(void **)tmpptr;
+    else if (ud->udtype == UDTYPE_BUFFER)
+      tmpptr = ((SBufExt *)tmpptr)->r;
   } else if (tvislightud(o)) {
     tmpptr = lightudV(cts->g, o);
   } else if (tvisfunc(o)) {

+ 4 - 4
libs/LuaJIT/src/lj_cparse.c

@@ -133,9 +133,9 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...)
     tokstr = NULL;
   } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
 	     tok >= CTOK_FIRSTDECL) {
-    if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$');
+    if (cp->sb.w == cp->sb.b) cp_save(cp, '$');
     cp_save(cp, '\0');
-    tokstr = sbufB(&cp->sb);
+    tokstr = cp->sb.b;
   } else {
     tokstr = cp_tok2str(cp, tok);
   }
@@ -175,7 +175,7 @@ static CPToken cp_number(CPState *cp)
   TValue o;
   do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
   cp_save(cp, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), sbuflen(&cp->sb)-1,
+  fmt = lj_strscan_scan((const uint8_t *)(cp->sb.b), sbuflen(&cp->sb)-1,
 			&o, STRSCAN_OPT_C);
   if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
   else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
@@ -279,7 +279,7 @@ static CPToken cp_string(CPState *cp)
     return CTOK_STRING;
   } else {
     if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
-    cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb);
+    cp->val.i32 = (int32_t)(char)*cp->sb.b;
     cp->val.id = CTID_INT32;
     return CTOK_INTEGER;
   }

+ 49 - 5
libs/LuaJIT/src/lj_crecord.c

@@ -78,7 +78,7 @@ static CTypeID argv2ctype(jit_State *J, TRef tr, cTValue *o)
     /* Specialize to the string containing the C type declaration. */
     emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, s));
     cp.L = J->L;
-    cp.cts = ctype_ctsG(J2G(J));
+    cp.cts = ctype_cts(J->L);
     oldtop = cp.cts->top;
     cp.srcname = strdata(s);
     cp.p = strdata(s);
@@ -616,10 +616,12 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
     sp = lj_ir_kptr(J, NULL);
   } else if (tref_isudata(sp)) {
     GCudata *ud = udataV(sval);
-    if (ud->udtype == UDTYPE_IO_FILE) {
+    if (ud->udtype == UDTYPE_IO_FILE || ud->udtype == UDTYPE_BUFFER) {
       TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE);
-      emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE));
-      sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, IRFL_UDATA_FILE);
+      emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype));
+      sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp,
+		  ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE :
+						 IRFL_SBUF_R);
     } else {
       sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata)));
     }
@@ -1024,8 +1026,26 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
 	crec_ct_tv(J, dc, dp, sp, sval);
       }
     } else if (ctype_isstruct(d->info)) {
-      CTypeID fid = d->sib;
+      CTypeID fid;
       MSize i = 1;
+      if (!J->base[1]) {  /* Handle zero-fill of struct-of-NYI. */
+	fid = d->sib;
+	while (fid) {
+	  CType *df = ctype_get(cts, fid);
+	  fid = df->sib;
+	  if (ctype_isfield(df->info)) {
+	    CType *dc;
+	    if (!gcref(df->name)) continue;  /* Ignore unnamed fields. */
+	    dc = ctype_rawchild(cts, df);  /* Field type. */
+	    if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info) ||
+		  ctype_isenum(dc->info)))
+	      goto special;
+	  } else if (!ctype_isconstval(df->info)) {
+	    goto special;
+	  }
+	}
+      }
+      fid = d->sib;
       while (fid) {
 	CType *df = ctype_get(cts, fid);
 	fid = df->sib;
@@ -1893,6 +1913,30 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd)
   }
 }
 
+TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o)
+{
+  CTypeID id = argv2cdata(J, tr, o)->ctypeid;
+  if (!(id == CTID_INT64 || id == CTID_UINT64))
+    lj_trace_err(J, LJ_TRERR_BADTYPE);
+  lj_needsplit(J);
+  return emitir(IRT(IR_FLOAD, id == CTID_INT64 ? IRT_I64 : IRT_U64), tr,
+		IRFL_CDATA_INT64);
+}
+
+#if LJ_HASBUFFER
+TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE);
+  return crec_ct_tv(J, ctype_get(cts, CTID_P_CVOID), 0, tr, o);
+}
+
+TRef lj_crecord_topuint8(jit_State *J, TRef tr)
+{
+  return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, CTID_P_UINT8), tr);
+}
+#endif
+
 #undef IR
 #undef emitir
 #undef emitconv

+ 5 - 0
libs/LuaJIT/src/lj_crecord.h

@@ -33,6 +33,11 @@ LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
 LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
 
 LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
+LJ_FUNC TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o);
+#if LJ_HASBUFFER
+LJ_FUNC TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o);
+LJ_FUNC TRef lj_crecord_topuint8(jit_State *J, TRef tr);
+#endif
 #endif
 
 #endif

+ 1 - 1
libs/LuaJIT/src/lj_ctype.c

@@ -583,7 +583,7 @@ GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
   lj_strfmt_putfnum(sb, STRFMT_G14, re.n);
   if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+');
   lj_strfmt_putfnum(sb, STRFMT_G14, im.n);
-  lj_buf_putchar(sb, sbufP(sb)[-1] >= 'a' ? 'I' : 'i');
+  lj_buf_putchar(sb, sb->w[-1] >= 'a' ? 'I' : 'i');
   return lj_buf_str(L, sb);
 }
 

+ 11 - 0
libs/LuaJIT/src/lj_ctype.h

@@ -298,6 +298,7 @@ typedef struct CTState {
   _(P_VOID,	CTSIZE_PTR,	CT_PTR, CTALIGN_PTR|CTID_VOID) \
   _(P_CVOID,	CTSIZE_PTR,	CT_PTR, CTALIGN_PTR|CTID_CVOID) \
   _(P_CCHAR,	CTSIZE_PTR,	CT_PTR, CTALIGN_PTR|CTID_CCHAR) \
+  _(P_UINT8,	CTSIZE_PTR,	CT_PTR, CTALIGN_PTR|CTID_UINT8) \
   _(A_CCHAR,		-1,	CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \
   _(CTYPEID,		4,	CT_ENUM, CTALIGN(2)|CTID_INT32) \
   CTTYDEFP(_) \
@@ -389,6 +390,16 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L)
   return cts;
 }
 
+/* Load FFI library on-demand. */
+#define ctype_loadffi(L) \
+  do { \
+    if (!ctype_ctsG(G(L))) { \
+      ptrdiff_t oldtop = (char *)L->top - mref(L->stack, char); \
+      luaopen_ffi(L); \
+      L->top = (TValue *)(mref(L->stack, char) + oldtop); \
+    } \
+  } while (0)
+
 /* Save and restore state of C type table. */
 #define LJ_CTYPE_SAVE(cts)	CTState savects_ = *(cts)
 #define LJ_CTYPE_RESTORE(cts) \

+ 1 - 1
libs/LuaJIT/src/lj_debug.c

@@ -648,7 +648,7 @@ void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
     level += dir;
   }
   if (lastlen)
-    setsbufP(sb, sbufB(sb) + lastlen);  /* Zap trailing separator. */
+    sb->w = sb->b + lastlen;  /* Zap trailing separator. */
 }
 #endif
 

+ 7 - 1
libs/LuaJIT/src/lj_dispatch.c

@@ -68,6 +68,8 @@ void lj_dispatch_init(GG_State *GG)
   /* The JIT engine is off by default. luaopen_jit() turns it on. */
   disp[BC_FORL] = disp[BC_IFORL];
   disp[BC_ITERL] = disp[BC_IITERL];
+  /* Workaround for stable v2.1 bytecode. TODO: Replace with BC_IITERN. */
+  disp[BC_ITERN] = &lj_vm_IITERN;
   disp[BC_LOOP] = disp[BC_ILOOP];
   disp[BC_FUNCF] = disp[BC_IFUNCF];
   disp[BC_FUNCV] = disp[BC_IFUNCV];
@@ -118,19 +120,21 @@ void lj_dispatch_update(global_State *g)
   mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0;
   if (oldmode != mode) {  /* Mode changed? */
     ASMFunction *disp = G2GG(g)->dispatch;
-    ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv;
+    ASMFunction f_forl, f_iterl, f_itern, f_loop, f_funcf, f_funcv;
     g->dispatchmode = mode;
 
     /* Hotcount if JIT is on, but not while recording. */
     if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) {
       f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]);
       f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]);
+      f_itern = makeasmfunc(lj_bc_ofs[BC_ITERN]);
       f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]);
       f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]);
       f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]);
     } else {  /* Otherwise use the non-hotcounting instructions. */
       f_forl = disp[GG_LEN_DDISP+BC_IFORL];
       f_iterl = disp[GG_LEN_DDISP+BC_IITERL];
+      f_itern = &lj_vm_IITERN;
       f_loop = disp[GG_LEN_DDISP+BC_ILOOP];
       f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]);
       f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]);
@@ -138,6 +142,7 @@ void lj_dispatch_update(global_State *g)
     /* Init static counting instruction dispatch first (may be copied below). */
     disp[GG_LEN_DDISP+BC_FORL] = f_forl;
     disp[GG_LEN_DDISP+BC_ITERL] = f_iterl;
+    disp[GG_LEN_DDISP+BC_ITERN] = f_itern;
     disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
 
     /* Set dynamic instruction dispatch. */
@@ -165,6 +170,7 @@ void lj_dispatch_update(global_State *g)
       /* Otherwise set dynamic counting ins. */
       disp[BC_FORL] = f_forl;
       disp[BC_ITERL] = f_iterl;
+      disp[BC_ITERN] = f_itern;
       disp[BC_LOOP] = f_loop;
       /* Set dynamic return dispatch. */
       if ((mode & DISPMODE_RET)) {

+ 2 - 2
libs/LuaJIT/src/lj_dispatch.h

@@ -31,7 +31,7 @@ extern double __divdf3(double a, double b);
 #define SFGOTDEF(_)
 #endif
 #if LJ_HASJIT
-#define JITGOTDEF(_)	_(lj_trace_exit) _(lj_trace_hot)
+#define JITGOTDEF(_)	_(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot)
 #else
 #define JITGOTDEF(_)
 #endif
@@ -46,7 +46,7 @@ extern double __divdf3(double a, double b);
   _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
   _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
   _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
-  _(lj_dispatch_profile) _(lj_err_throw) _(lj_err_run) \
+  _(lj_dispatch_profile) _(lj_err_throw) \
   _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
   _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
   _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \

+ 34 - 32
libs/LuaJIT/src/lj_emit_arm64.h

@@ -163,7 +163,7 @@ nopair:
 /* Try to find an N-step delta relative to other consts with N < lim. */
 static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
 {
-  RegSet work = ~as->freeset & RSET_GPR;
+  RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL);
   if (lim <= 1) return 0;  /* Can't beat that. */
   while (work) {
     Reg r = rset_picktop(work);
@@ -194,39 +194,41 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
 
 static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
 {
-  uint32_t k13 = emit_isk13(u64, is64);
-  if (k13) {  /* Can the constant be represented as a bitmask immediate? */
-    emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
-  } else {
-    int i, zeros = 0, ones = 0, neg;
-    if (!is64) u64 = (int64_t)(int32_t)u64;  /* Sign-extend. */
-    /* Count homogeneous 16 bit fragments. */
-    for (i = 0; i < 4; i++) {
-      uint64_t frag = (u64 >> i*16) & 0xffff;
-      zeros += (frag == 0);
-      ones += (frag == 0xffff);
+  int i, zeros = 0, ones = 0, neg;
+  if (!is64) u64 = (int64_t)(int32_t)u64;  /* Sign-extend. */
+  /* Count homogeneous 16 bit fragments. */
+  for (i = 0; i < 4; i++) {
+    uint64_t frag = (u64 >> i*16) & 0xffff;
+    zeros += (frag == 0);
+    ones += (frag == 0xffff);
+  }
+  neg = ones > zeros;  /* Use MOVN if it pays off. */
+  if ((neg ? ones : zeros) < 3) {  /* Need 2+ ins. Try shorter K13 encoding. */
+    uint32_t k13 = emit_isk13(u64, is64);
+    if (k13) {
+      emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
+      return;
     }
-    neg = ones > zeros;  /* Use MOVN if it pays off. */
-    if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
-      int shift = 0, lshift = 0;
-      uint64_t n64 = neg ? ~u64 : u64;
-      if (n64 != 0) {
-	/* Find first/last fragment to be filled. */
-	shift = (63-emit_clz64(n64)) & ~15;
-	lshift = emit_ctz64(n64) & ~15;
-      }
-      /* MOVK requires the original value (u64). */
-      while (shift > lshift) {
-	uint32_t u16 = (u64 >> shift) & 0xffff;
-	/* Skip fragments that are correctly filled by MOVN/MOVZ. */
-	if (u16 != (neg ? 0xffff : 0))
-	  emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
-	shift -= 16;
-      }
-      /* But MOVN needs an inverted value (n64). */
-      emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
-		 A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
+  }
+  if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
+    int shift = 0, lshift = 0;
+    uint64_t n64 = neg ? ~u64 : u64;
+    if (n64 != 0) {
+      /* Find first/last fragment to be filled. */
+      shift = (63-emit_clz64(n64)) & ~15;
+      lshift = emit_ctz64(n64) & ~15;
+    }
+    /* MOVK requires the original value (u64). */
+    while (shift > lshift) {
+      uint32_t u16 = (u64 >> shift) & 0xffff;
+      /* Skip fragments that are correctly filled by MOVN/MOVZ. */
+      if (u16 != (neg ? 0xffff : 0))
+	emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
+      shift -= 16;
     }
+    /* But MOVN needs an inverted value (n64). */
+    emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
+	       A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
   }
 }
 

+ 1 - 1
libs/LuaJIT/src/lj_emit_mips.h

@@ -70,7 +70,7 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
   }
 }
 
-#if LJ_64
+#if LJ_64 || LJ_HASBUFFER
 static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
 		      uint32_t lsb)
 {

+ 408 - 164
libs/LuaJIT/src/lj_err.c

@@ -29,12 +29,18 @@
 ** Pros and Cons:
 **
 ** - EXT requires unwind tables for *all* functions on the C stack between
-**   the pcall/catch and the error/throw. This is the default on x64,
-**   but needs to be manually enabled on x86/PPC for non-C++ code.
+**   the pcall/catch and the error/throw. C modules used by Lua code can
+**   throw errors, so these need to have unwind tables, too. Transitively
+**   this applies to all system libraries used by C modules -- at least
+**   when they have callbacks which may throw an error.
 **
-** - INT is faster when actually throwing errors (but this happens rarely).
+** - INT is faster when actually throwing errors, but this happens rarely.
 **   Setting up error handlers is zero-cost in any case.
 **
+** - INT needs to save *all* callee-saved registers when entering the
+**   interpreter. EXT only needs to save those actually used inside the
+**   interpreter. JIT-compiled code may need to save some more.
+**
 ** - EXT provides full interoperability with C++ exceptions. You can throw
 **   Lua errors or C++ exceptions through a mix of Lua frames and C++ frames.
 **   C++ destructors are called as needed. C++ exceptions caught by pcall
@@ -46,27 +52,38 @@
 **   the wrapper function feature. Lua errors thrown through C++ frames
 **   cannot be caught by C++ code and C++ destructors are not run.
 **
-** EXT is the default on x64 systems and on Windows, INT is the default on all
-** other systems.
+** - EXT can handle errors from internal helper functions that are called
+**   from JIT-compiled code (except for Windows/x86 and 32 bit ARM).
+**   INT has no choice but to call the panic handler, if this happens.
+**   Note: this is mainly relevant for out-of-memory errors.
+**
+** EXT is the default on all systems where the toolchain produces unwind
+** tables by default (*). This is hard-coded and/or detected in src/Makefile.
+** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL
+**
+** INT is the default on all other systems.
+**
+** EXT can be manually enabled for toolchains that are able to produce
+** conforming unwind tables:
+**   "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL"
+** As explained above, *all* C code used directly or indirectly by LuaJIT
+** must be compiled with -funwind-tables (or -fexceptions). C++ code must
+** *not* be compiled with -fno-exceptions.
+**
+** If you're unsure whether error handling inside the VM works correctly,
+** try running this and check whether it prints "OK":
 **
-** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack
-** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
-** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set
-** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules
-** and all C libraries that have callbacks which may be used to call back
-** into Lua. C++ code must *not* be compiled with -fno-exceptions.
+**   luajit -e "print(select(2, load('OK')):match('OK'))"
 **
-** EXT is mandatory on WIN64 since the calling convention has an abundance
-** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
-** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
+** (*) Originally, toolchains only generated unwind tables for C++ code. For
+** interoperability reasons, this can be manually enabled for plain C code,
+** too (with -funwind-tables). With the introduction of the x64 architecture,
+** the corresponding POSIX and Windows ABIs mandated unwind tables for all
+** code. Over the following years most desktop and server platforms have
+** enabled unwind tables by default on all architectures. OTOH mobile and
+** embedded platforms do not consistently mandate unwind tables.
 */
 
-#if (defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
-#define LJ_UNWIND_EXT	1
-#elif LJ_TARGET_WINDOWS
-#define LJ_UNWIND_EXT	1
-#endif
-
 /* -- Error messages ------------------------------------------------------ */
 
 /* Error message strings. */
@@ -184,7 +201,172 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 
 /* -- External frame unwinding -------------------------------------------- */
 
-#if (defined(__GNUC__) || defined(__clang__)) && !LJ_NO_UNWIND && !LJ_ABI_WIN
+#if LJ_ABI_WIN
+
+/*
+** Someone in Redmond owes me several days of my life. A lot of this is
+** undocumented or just plain wrong on MSDN. Some of it can be gathered
+** from 3rd party docs or must be found by trial-and-error. They really
+** don't want you to write your own language-specific exception handler
+** or to interact gracefully with MSVC. :-(
+**
+** Apparently MSVC doesn't call C++ destructors for foreign exceptions
+** unless you compile your C++ code with /EHa. Unfortunately this means
+** catch (...) also catches things like access violations. The use of
+** _set_se_translator doesn't really help, because it requires /EHa, too.
+*/
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#if LJ_TARGET_X86
+typedef void *UndocumentedDispatcherContext;  /* Unused on x86. */
+#else
+/* Taken from: http://www.nynaeve.net/?p=99 */
+typedef struct UndocumentedDispatcherContext {
+  ULONG64 ControlPc;
+  ULONG64 ImageBase;
+  PRUNTIME_FUNCTION FunctionEntry;
+  ULONG64 EstablisherFrame;
+  ULONG64 TargetIp;
+  PCONTEXT ContextRecord;
+  void (*LanguageHandler)(void);
+  PVOID HandlerData;
+  PUNWIND_HISTORY_TABLE HistoryTable;
+  ULONG ScopeIndex;
+  ULONG Fill0;
+} UndocumentedDispatcherContext;
+#endif
+
+/* Another wild guess. */
+extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
+
+#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
+/* Workaround for broken MinGW64 declaration. */
+VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
+#define RtlUnwindEx RtlUnwindEx_FIXED
+#endif
+
+#define LJ_MSVC_EXCODE		((DWORD)0xe06d7363)
+#define LJ_GCC_EXCODE		((DWORD)0x20474343)
+
+#define LJ_EXCODE		((DWORD)0xe24c4a00)
+#define LJ_EXCODE_MAKE(c)	(LJ_EXCODE | (DWORD)(c))
+#define LJ_EXCODE_CHECK(cl)	(((cl) ^ LJ_EXCODE) <= 0xff)
+#define LJ_EXCODE_ERRCODE(cl)	((int)((cl) & 0xff))
+
+/* Windows exception handler for interpreter frame. */
+LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
+  void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
+{
+#if LJ_TARGET_X86
+  void *cf = (char *)f - CFRAME_OFS_SEH;
+#else
+  void *cf = f;
+#endif
+  lua_State *L = cframe_L(cf);
+  int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
+		LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
+  if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
+    /* Unwind internal frames. */
+    err_unwind(L, cf, errcode);
+  } else {
+    void *cf2 = err_unwind(L, cf, 0);
+    if (cf2) {  /* We catch it, so start unwinding the upper frames. */
+      if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
+	  rec->ExceptionCode == LJ_GCC_EXCODE) {
+#if !LJ_TARGET_CYGWIN
+	__DestructExceptionObject(rec, 1);
+#endif
+	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+      } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
+	/* Don't catch access violations etc. */
+	return 1;  /* ExceptionContinueSearch */
+      }
+#if LJ_TARGET_X86
+      UNUSED(ctx);
+      UNUSED(dispatch);
+      /* Call all handlers for all lower C frames (including ourselves) again
+      ** with EH_UNWINDING set. Then call the specified function, passing cf
+      ** and errcode.
+      */
+      lj_vm_rtlunwind(cf, (void *)rec,
+	(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+	(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
+      /* lj_vm_rtlunwind does not return. */
+#else
+      /* Unwind the stack and call all handlers for all lower C frames
+      ** (including ourselves) again with EH_UNWINDING set. Then set
+      ** stack pointer = cf, result = errcode and jump to the specified target.
+      */
+      RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+			       lj_vm_unwind_ff_eh :
+			       lj_vm_unwind_c_eh),
+		  rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
+      /* RtlUnwindEx should never return. */
+#endif
+    }
+  }
+  return 1;  /* ExceptionContinueSearch */
+}
+
+#if LJ_UNWIND_JIT
+
+#if LJ_TARGET_X64
+#define CONTEXT_REG_PC	Rip
+#elif LJ_TARGET_ARM64
+#define CONTEXT_REG_PC	Pc
+#else
+#error "NYI: Windows arch-specific unwinder for JIT-compiled code"
+#endif
+
+/* Windows unwinder for JIT-compiled code. */
+static void err_unwind_win_jit(global_State *g, int errcode)
+{
+  CONTEXT ctx;
+  UNWIND_HISTORY_TABLE hist;
+
+  memset(&hist, 0, sizeof(hist));
+  RtlCaptureContext(&ctx);
+  while (1) {
+    uintptr_t frame, base, addr = ctx.CONTEXT_REG_PC;
+    void *hdata;
+    PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist);
+    if (!func) {  /* Found frame without .pdata: must be JIT-compiled code. */
+      ExitNo exitno;
+      uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
+      if (stub) {  /* Jump to side exit to unwind the trace. */
+	ctx.CONTEXT_REG_PC = stub;
+	G2J(g)->exitcode = errcode;
+	RtlRestoreContext(&ctx, NULL);  /* Does not return. */
+      }
+      break;
+    }
+    RtlVirtualUnwind(UNW_FLAG_NHANDLER, base, addr, func,
+		     &ctx, &hdata, &frame, NULL);
+    if (!addr) break;
+  }
+  /* Unwinding failed, if we end up here. */
+}
+#endif
+
+/* Raise Windows exception. */
+static void err_raise_ext(global_State *g, int errcode)
+{
+#if LJ_UNWIND_JIT
+  if (tvref(g->jit_base)) {
+    err_unwind_win_jit(g, errcode);
+    return;  /* Unwinding failed. */
+  }
+#elif LJ_HASJIT
+  /* Cannot catch on-trace errors for Windows/x86 SEH. Unwind to interpreter. */
+  setmref(g->jit_base, NULL);
+#endif
+  UNUSED(g);
+  RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
+}
+
+#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__))
 
 /*
 ** We have to use our own definitions instead of the mandatory (!) unwind.h,
@@ -194,6 +376,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 typedef struct _Unwind_Context _Unwind_Context;
 
 #define _URC_OK			0
+#define _URC_FATAL_PHASE2_ERROR	2
 #define _URC_FATAL_PHASE1_ERROR	3
 #define _URC_HANDLER_FOUND	6
 #define _URC_INSTALL_CONTEXT	7
@@ -213,9 +396,11 @@ typedef struct _Unwind_Exception
   void (*excleanup)(int, struct _Unwind_Exception *);
   uintptr_t p1, p2;
 } __attribute__((__aligned__)) _Unwind_Exception;
+#define UNWIND_EXCEPTION_TYPE	_Unwind_Exception
 
 extern uintptr_t _Unwind_GetCFA(_Unwind_Context *);
 extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t);
+extern uintptr_t _Unwind_GetIP(_Unwind_Context *);
 extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t);
 extern void _Unwind_DeleteException(_Unwind_Exception *);
 extern int _Unwind_RaiseException(_Unwind_Exception *);
@@ -233,7 +418,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
   lua_State *L;
   if (version != 1)
     return _URC_FATAL_PHASE1_ERROR;
-  UNUSED(uexclass);
   cf = (void *)_Unwind_GetCFA(ctx);
   L = cframe_L(cf);
   if ((actions & _UA_SEARCH_PHASE)) {
@@ -281,20 +465,139 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
     ** it on non-x64 because the interpreter restores all callee-saved regs.
     */
     lj_err_throw(L, errcode);
+#if LJ_TARGET_X64
+#error "Broken build system -- only use the provided Makefiles!"
+#endif
 #endif
   }
   return _URC_CONTINUE_UNWIND;
 }
 
-#if LJ_UNWIND_EXT
-static __thread _Unwind_Exception static_uex;
+#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
+struct dwarf_eh_bases { void *tbase, *dbase, *func; };
+extern const void *_Unwind_Find_FDE(void *pc, struct dwarf_eh_bases *bases);
 
-/* Raise DWARF2 exception. */
-static void err_raise_ext(int errcode)
+/* Verify that external error handling actually has a chance to work. */
+void lj_err_verify(void)
 {
-  static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
-  static_uex.excleanup = NULL;
-  _Unwind_RaiseException(&static_uex);
+  struct dwarf_eh_bases ehb;
+  lj_assertX(_Unwind_Find_FDE((void *)lj_err_throw, &ehb), "broken build: external frame unwinding enabled, but missing -funwind-tables");
+  /* Check disabled, because of broken Fedora/ARM64. See #722.
+  lj_assertX(_Unwind_Find_FDE((void *)_Unwind_RaiseException, &ehb), "broken build: external frame unwinding enabled, but system libraries have no unwind tables");
+  */
+}
+#endif
+
+#if LJ_UNWIND_JIT
+/* DWARF2 personality handler for JIT-compiled code. */
+static int err_unwind_jit(int version, int actions,
+  uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
+{
+  /* NYI: FFI C++ exception interoperability. */
+  if (version != 1 || !LJ_UEXCLASS_CHECK(uexclass))
+    return _URC_FATAL_PHASE1_ERROR;
+  if ((actions & _UA_SEARCH_PHASE)) {
+    return _URC_HANDLER_FOUND;
+  }
+  if ((actions & _UA_CLEANUP_PHASE)) {
+    global_State *g = *(global_State **)(uex+1);
+    ExitNo exitno;
+    uintptr_t addr = _Unwind_GetIP(ctx);  /* Return address _after_ call. */
+    uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
+    lj_assertG(tvref(g->jit_base), "unexpected throw across mcode frame");
+    if (stub) {  /* Jump to side exit to unwind the trace. */
+      G2J(g)->exitcode = LJ_UEXCLASS_ERRCODE(uexclass);
+#ifdef LJ_TARGET_MIPS
+      _Unwind_SetGR(ctx, 4, stub);
+      _Unwind_SetGR(ctx, 5, exitno);
+      _Unwind_SetIP(ctx, (uintptr_t)(void *)lj_vm_unwind_stub);
+#else
+      _Unwind_SetIP(ctx, stub);
+#endif
+      return _URC_INSTALL_CONTEXT;
+    }
+    return _URC_FATAL_PHASE2_ERROR;
+  }
+  return _URC_FATAL_PHASE1_ERROR;
+}
+
+/* DWARF2 template frame info for JIT-compiled code.
+**
+** After copying the template to the start of the mcode segment,
+** the frame handler function and the code size is patched.
+** The frame handler always installs a new context to jump to the exit,
+** so don't bother to add any unwind opcodes.
+*/
+static const uint8_t err_frame_jit_template[] = {
+#if LJ_BE
+  0,0,0,
+#endif
+  LJ_64 ? 0x1c : 0x14,  /* CIE length. */
+#if LJ_LE
+  0,0,0,
+#endif
+  0,0,0,0, 1, 'z','P','R',0,  /* CIE mark, CIE version, augmentation. */
+  1, LJ_64 ? 0x78 : 0x7c, LJ_TARGET_EHRAREG,  /* Code/data align, RA. */
+#if LJ_64
+  10, 0, 0,0,0,0,0,0,0,0, 0x1b,  /* Aug. data ABS handler, PCREL|SDATA4 code. */
+  0,0,0,0,0,  /* Alignment. */
+#else
+  6, 0, 0,0,0,0, 0x1b,  /* Aug. data ABS handler, PCREL|SDATA4 code. */
+  0,  /* Alignment. */
+#endif
+#if LJ_BE
+  0,0,0,
+#endif
+  LJ_64 ? 0x14 : 0x10,  /* FDE length. */
+  0,0,0,
+  LJ_64 ? 0x24 : 0x1c,  /* CIE offset. */
+  0,0,0,
+  LJ_64 ? 0x14 : 0x10,  /* Code offset. After Final FDE. */
+#if LJ_LE
+  0,0,0,
+#endif
+  0,0,0,0, 0, 0,0,0, /* Code size, augmentation length, alignment. */
+#if LJ_64
+  0,0,0,0,  /* Alignment. */
+#endif
+  0,0,0,0  /* Final FDE. */
+};
+
+#define ERR_FRAME_JIT_OFS_HANDLER	0x12
+#define ERR_FRAME_JIT_OFS_FDE		(LJ_64 ? 0x20 : 0x18)
+#define ERR_FRAME_JIT_OFS_CODE_SIZE	(LJ_64 ? 0x2c : 0x24)
+#if LJ_TARGET_OSX
+#define ERR_FRAME_JIT_OFS_REGISTER	ERR_FRAME_JIT_OFS_FDE
+#else
+#define ERR_FRAME_JIT_OFS_REGISTER	0
+#endif
+
+extern void __register_frame(const void *);
+extern void __deregister_frame(const void *);
+
+uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info)
+{
+  void **handler;
+  memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template));
+  handler = (void *)err_unwind_jit;
+  memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler));
+  *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) =
+    (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base));
+  __register_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
+#ifdef LUA_USE_ASSERT
+  {
+    struct dwarf_eh_bases ehb;
+    lj_assertX(_Unwind_Find_FDE(info + sizeof(err_frame_jit_template)+1, &ehb),
+	       "bad JIT unwind table registration");
+  }
+#endif
+  return info + sizeof(err_frame_jit_template);
+}
+
+void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info)
+{
+  UNUSED(base); UNUSED(sz);
+  __deregister_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
 }
 #endif
 
@@ -306,6 +609,7 @@ static void err_raise_ext(int errcode)
 #define _US_FORCE_UNWIND		8
 
 typedef struct _Unwind_Control_Block _Unwind_Control_Block;
+#define UNWIND_EXCEPTION_TYPE	_Unwind_Control_Block
 
 struct _Unwind_Control_Block {
   uint64_t exclass;
@@ -364,136 +668,63 @@ LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb,
   }
   if (__gnu_unwind_frame(ucb, ctx) != _URC_OK)
     return _URC_FAILURE;
+#ifdef LUA_USE_ASSERT
+  /* We should never get here unless this is a forced unwind aka backtrace. */
+  if (_Unwind_GetGR(ctx, 0) == 0xff33aa77) {
+    _Unwind_SetGR(ctx, 0, 0xff33aa88);
+  }
+#endif
   return _URC_CONTINUE_UNWIND;
 }
 
-#if LJ_UNWIND_EXT
-static __thread _Unwind_Control_Block static_uex;
+#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
+typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *);
+extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
 
-static void err_raise_ext(int errcode)
+static int err_verify_bt(_Unwind_Context *ctx, int *got)
 {
-  memset(&static_uex, 0, sizeof(static_uex));
-  static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
-  _Unwind_RaiseException(&static_uex);
+  if (_Unwind_GetGR(ctx, 0) == 0xff33aa88) { *got = 2; }
+  else if (*got == 0) { *got = 1; _Unwind_SetGR(ctx, 0, 0xff33aa77); }
+  return _URC_OK;
 }
-#endif
 
-#endif /* LJ_TARGET_ARM */
-
-#elif LJ_ABI_WIN
+/* Verify that external error handling actually has a chance to work. */
+void lj_err_verify(void)
+{
+  int got = 0;
+  _Unwind_Backtrace((_Unwind_Trace_Fn)err_verify_bt, &got);
+  lj_assertX(got == 2, "broken build: external frame unwinding enabled, but missing -funwind-tables");
+}
+#endif
 
 /*
-** Someone in Redmond owes me several days of my life. A lot of this is
-** undocumented or just plain wrong on MSDN. Some of it can be gathered
-** from 3rd party docs or must be found by trial-and-error. They really
-** don't want you to write your own language-specific exception handler
-** or to interact gracefully with MSVC. :-(
+** Note: LJ_UNWIND_JIT is not implemented for 32 bit ARM.
 **
-** Apparently MSVC doesn't call C++ destructors for foreign exceptions
-** unless you compile your C++ code with /EHa. Unfortunately this means
-** catch (...) also catches things like access violations. The use of
-** _set_se_translator doesn't really help, because it requires /EHa, too.
+** The quirky ARM unwind API doesn't have __register_frame().
+** A potential workaround might involve _Unwind_Backtrace.
+** But most 32 bit ARM targets don't qualify for LJ_UNWIND_EXT, anyway,
+** since they are built without unwind tables by default.
 */
 
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-#if LJ_TARGET_X64
-/* Taken from: http://www.nynaeve.net/?p=99 */
-typedef struct UndocumentedDispatcherContext {
-  ULONG64 ControlPc;
-  ULONG64 ImageBase;
-  PRUNTIME_FUNCTION FunctionEntry;
-  ULONG64 EstablisherFrame;
-  ULONG64 TargetIp;
-  PCONTEXT ContextRecord;
-  void (*LanguageHandler)(void);
-  PVOID HandlerData;
-  PUNWIND_HISTORY_TABLE HistoryTable;
-  ULONG ScopeIndex;
-  ULONG Fill0;
-} UndocumentedDispatcherContext;
-#else
-typedef void *UndocumentedDispatcherContext;
-#endif
-
-/* Another wild guess. */
-extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
-
-#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
-/* Workaround for broken MinGW64 declaration. */
-VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
-#define RtlUnwindEx RtlUnwindEx_FIXED
-#endif
+#endif /* LJ_TARGET_ARM */
 
-#define LJ_MSVC_EXCODE		((DWORD)0xe06d7363)
-#define LJ_GCC_EXCODE		((DWORD)0x20474343)
 
-#define LJ_EXCODE		((DWORD)0xe24c4a00)
-#define LJ_EXCODE_MAKE(c)	(LJ_EXCODE | (DWORD)(c))
-#define LJ_EXCODE_CHECK(cl)	(((cl) ^ LJ_EXCODE) <= 0xff)
-#define LJ_EXCODE_ERRCODE(cl)	((int)((cl) & 0xff))
+#if LJ_UNWIND_EXT
+static __thread struct {
+  UNWIND_EXCEPTION_TYPE ex;
+  global_State *g;
+} static_uex;
 
-/* Windows exception handler for interpreter frame. */
-LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
-  void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
+/* Raise external exception. */
+static void err_raise_ext(global_State *g, int errcode)
 {
-#if LJ_TARGET_X64
-  void *cf = f;
-#else
-  void *cf = (char *)f - CFRAME_OFS_SEH;
-#endif
-  lua_State *L = cframe_L(cf);
-  int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
-		LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
-  if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
-    /* Unwind internal frames. */
-    err_unwind(L, cf, errcode);
-  } else {
-    void *cf2 = err_unwind(L, cf, 0);
-    if (cf2) {  /* We catch it, so start unwinding the upper frames. */
-      if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
-	  rec->ExceptionCode == LJ_GCC_EXCODE) {
-#if LJ_TARGET_WINDOWS
-	__DestructExceptionObject(rec, 1);
-#endif
-	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
-      } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
-	/* Don't catch access violations etc. */
-	return 1;  /* ExceptionContinueSearch */
-      }
-#if LJ_TARGET_X64
-      /* Unwind the stack and call all handlers for all lower C frames
-      ** (including ourselves) again with EH_UNWINDING set. Then set
-      ** rsp = cf, rax = errcode and jump to the specified target.
-      */
-      RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
-			       lj_vm_unwind_ff_eh :
-			       lj_vm_unwind_c_eh),
-		  rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
-      /* RtlUnwindEx should never return. */
-#else
-      UNUSED(ctx);
-      UNUSED(dispatch);
-      /* Call all handlers for all lower C frames (including ourselves) again
-      ** with EH_UNWINDING set. Then call the specified function, passing cf
-      ** and errcode.
-      */
-      lj_vm_rtlunwind(cf, (void *)rec,
-	(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
-	(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
-      /* lj_vm_rtlunwind does not return. */
-#endif
-    }
-  }
-  return 1;  /* ExceptionContinueSearch */
+  memset(&static_uex, 0, sizeof(static_uex));
+  static_uex.ex.exclass = LJ_UEXCLASS_MAKE(errcode);
+  static_uex.g = g;
+  _Unwind_RaiseException(&static_uex.ex);
 }
 
-/* Raise Windows exception. */
-static void err_raise_ext(int errcode)
-{
-  RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
-}
+#endif
 
 #endif
 
@@ -504,22 +735,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
 {
   global_State *g = G(L);
   lj_trace_abort(g);
-  setmref(g->jit_base, NULL);
   L->status = LUA_OK;
 #if LJ_UNWIND_EXT
-  err_raise_ext(errcode);
+  err_raise_ext(g, errcode);
   /*
   ** A return from this function signals a corrupt C stack that cannot be
   ** unwound. We have no choice but to call the panic function and exit.
   **
   ** Usually this is caused by a C function without unwind information.
-  ** This should never happen on x64, but may happen if you've manually
-  ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every*
-  ** non-C++ file with -funwind-tables.
+  ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL
+  ** and forgot to recompile *every* non-C++ file with -funwind-tables.
   */
   if (G(L)->panic)
     G(L)->panic(L);
 #else
+#if LJ_HASJIT
+  setmref(g->jit_base, NULL);
+#endif
   {
     void *cf = err_unwind(L, NULL, errcode);
     if (cframe_unwind_ff(cf))
@@ -600,7 +832,7 @@ static ptrdiff_t finderrfunc(lua_State *L)
 /* Runtime error. */
 LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
 {
-  ptrdiff_t ef = finderrfunc(L);
+  ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L);
   if (ef) {
     TValue *errfunc = restorestack(L, ef);
     TValue *top = L->top;
@@ -619,6 +851,16 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
   lj_err_throw(L, LUA_ERRRUN);
 }
 
+#if LJ_HASJIT
+LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode)
+{
+  if (errcode == LUA_ERRRUN)
+    lj_err_run(L);
+  else
+    lj_err_throw(L, errcode);
+}
+#endif
+
 /* Formatted runtime error message. */
 LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
 {
@@ -699,25 +941,27 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
 /* Error in context of caller. */
 LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
 {
-  TValue *frame = L->base-1;
-  TValue *pframe = NULL;
-  if (frame_islua(frame)) {
-    pframe = frame_prevl(frame);
-  } else if (frame_iscont(frame)) {
-    if (frame_iscont_fficb(frame)) {
-      pframe = frame;
-      frame = NULL;
-    } else {
-      pframe = frame_prevd(frame);
+  TValue *frame = NULL, *pframe = NULL;
+  if (!(LJ_HASJIT && tvref(G(L)->jit_base))) {
+    frame = L->base-1;
+    if (frame_islua(frame)) {
+      pframe = frame_prevl(frame);
+    } else if (frame_iscont(frame)) {
+      if (frame_iscont_fficb(frame)) {
+	pframe = frame;
+	frame = NULL;
+      } else {
+	pframe = frame_prevd(frame);
 #if LJ_HASFFI
-      /* Remove frame for FFI metamethods. */
-      if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
-	  frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
-	L->base = pframe+1;
-	L->top = frame;
-	setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
-      }
+	/* Remove frame for FFI metamethods. */
+	if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
+	    frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
+	  L->base = pframe+1;
+	  L->top = frame;
+	  setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
+	}
 #endif
+      }
     }
   }
   lj_debug_addloc(L, msg, pframe, frame);

+ 18 - 1
libs/LuaJIT/src/lj_err.h

@@ -23,7 +23,10 @@ LJ_DATA const char *lj_err_allmsg;
 LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
 LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode);
 LJ_FUNC_NORET void lj_err_mem(lua_State *L);
-LJ_FUNCA_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
+LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
+#if LJ_HASJIT
+LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode);
+#endif
 LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em);
 LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
 			      BCLine line, ErrMsg em, va_list argp);
@@ -38,4 +41,18 @@ LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...);
 LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname);
 LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt);
 
+#if LJ_UNWIND_JIT && !LJ_ABI_WIN
+LJ_FUNC uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info);
+LJ_FUNC void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info);
+#else
+#define lj_err_register_mcode(base, sz, info)	(info)
+#define lj_err_deregister_mcode(base, sz, info)	UNUSED(base)
+#endif
+
+#if LJ_UNWIND_EXT && !LJ_ABI_WIN && defined(LUA_USE_ASSERT)
+LJ_FUNC void lj_err_verify(void);
+#else
+#define lj_err_verify()		((void)0)
+#endif
+
 #endif

+ 14 - 0
libs/LuaJIT/src/lj_errmsg.h

@@ -67,6 +67,7 @@ ERRDEF(PROTMT,	"cannot change a protected metatable")
 ERRDEF(UNPACK,	"too many results to unpack")
 ERRDEF(RDRSTR,	"reader function must return a string")
 ERRDEF(PRTOSTR,	LUA_QL("tostring") " must return a string to " LUA_QL("print"))
+ERRDEF(NUMRNG,	"number out of range")
 ERRDEF(IDXRNG,	"index out of range")
 ERRDEF(BASERNG,	"base out of range")
 ERRDEF(LVLRNG,	"level out of range")
@@ -179,6 +180,19 @@ ERRDEF(FFI_NYIPACKBIT,	"NYI: packed bit fields")
 ERRDEF(FFI_NYICALL,	"NYI: cannot call this C function (yet)")
 #endif
 
+#if LJ_HASBUFFER
+/* String buffer errors. */
+ERRDEF(BUFFER_SELF,	"cannot put buffer into itself")
+ERRDEF(BUFFER_BADOPT,	"bad options table")
+ERRDEF(BUFFER_BADENC,	"cannot serialize " LUA_QS)
+ERRDEF(BUFFER_BADDEC,	"cannot deserialize tag 0x%02x")
+ERRDEF(BUFFER_BADDICTX,	"cannot deserialize dictionary index %d")
+ERRDEF(BUFFER_DEPTH,	"too deep to serialize")
+ERRDEF(BUFFER_DUPKEY,	"duplicate table key")
+ERRDEF(BUFFER_EOB,	"unexpected end of buffer")
+ERRDEF(BUFFER_LEFTOV,	"left-over data in buffer")
+#endif
+
 #undef ERRDEF
 
 /* Detecting unused error messages:

+ 399 - 25
libs/LuaJIT/src/lj_ffrecord.c

@@ -11,6 +11,7 @@
 #if LJ_HASJIT
 
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_frame.h"
@@ -28,6 +29,7 @@
 #include "lj_vm.h"
 #include "lj_strscan.h"
 #include "lj_strfmt.h"
+#include "lj_serialize.h"
 
 /* Some local macros to save typing. Undef'd at the end. */
 #define IR(ref)			(&J->cur.ir[(ref)])
@@ -107,6 +109,10 @@ static void recff_stitch(jit_State *J)
   const BCIns *pc = frame_pc(base-1);
   TValue *pframe = frame_prevl(base-1);
 
+  /* Check for this now. Throwing in lj_record_stop messes up the stack. */
+  if (J->cur.nsnap >= (MSize)J->param[JIT_P_maxsnap])
+    lj_trace_err(J, LJ_TRERR_SNAPOV);
+
   /* Move func + args up in Lua stack and insert continuation. */
   memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
   setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
@@ -182,6 +188,14 @@ static TRef recff_bufhdr(jit_State *J)
 		lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
 }
 
+/* Emit TMPREF. */
+static TRef recff_tmpref(jit_State *J, TRef tr, int mode)
+{
+  if (!LJ_DUALNUM && tref_isinteger(tr))
+    tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
+  return emitir(IRT(IR_TMPREF, IRT_PGC), tr, mode);
+}
+
 /* -- Base library fast functions ----------------------------------------- */
 
 static void LJ_FASTCALL recff_assert(jit_State *J, RecordFFData *rd)
@@ -296,7 +310,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv)
     } else {
       TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
       TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY);
-      emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#'));
+      emitir(IRTGI(IR_EQ), trchar, lj_ir_kint(J, '#'));
     }
     return 0;
   } else {  /* select(n, ...) */
@@ -317,9 +331,9 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
       ptrdiff_t n = (ptrdiff_t)J->maxslot;
       if (start < 0) start += n;
       else if (start > n) start = n;
-      rd->nres = n - start;
       if (start >= 1) {
 	ptrdiff_t i;
+	rd->nres = n - start;
 	for (i = 0; i < n - start; i++)
 	  J->base[i] = J->base[start+i];
       }  /* else: Interpreter will throw. */
@@ -455,6 +469,7 @@ static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
 #endif
     lj_record_call(J, 0, J->maxslot - 1);
     rd->nres = -1;  /* Pending call. */
+    J->needsnap = 1;  /* Start catching on-trace errors. */
   }  /* else: Interpreter will throw. */
 }
 
@@ -490,6 +505,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
     if (errcode)
       lj_err_throw(J->L, errcode);  /* Propagate errors. */
     rd->nres = -1;  /* Pending call. */
+    J->needsnap = 1;  /* Start catching on-trace errors. */
   }  /* else: Interpreter will throw. */
 }
 
@@ -505,6 +521,40 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
   recff_nyiu(J, rd);
 }
 
+static void LJ_FASTCALL recff_next(jit_State *J, RecordFFData *rd)
+{
+#if LJ_BE
+  /* YAGNI: Disabled on big-endian due to issues with lj_vm_next,
+  ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair.
+  */
+  recff_nyi(J, rd);
+#else
+  TRef tab = J->base[0];
+  if (tref_istab(tab)) {
+    RecordIndex ix;
+    cTValue *keyv;
+    ix.tab = tab;
+    if (tref_isnil(J->base[1])) {  /* Shortcut for start of traversal. */
+      ix.key = lj_ir_kint(J, 0);
+      keyv = niltvg(J2G(J));
+    } else {
+      TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1);
+      ix.key = lj_ir_call(J, IRCALL_lj_tab_keyindex, tab, tmp);
+      keyv = &rd->argv[1];
+    }
+    copyTV(J->L, &ix.tabv, &rd->argv[0]);
+    ix.keyv.u32.lo = lj_tab_keyindex(tabV(&ix.tabv), keyv);
+    /* Omit the value, if not used by the caller. */
+    ix.idxchain = (J->framedepth && frame_islua(J->L->base-1) &&
+		   bc_b(frame_pc(J->L->base-1)[-1])-1 < 2);
+    ix.mobj = 0;  /* We don't need the next index. */
+    rd->nres = lj_record_next(J, &ix);
+    J->base[0] = ix.key;
+    J->base[1] = ix.val;
+  }  /* else: Interpreter will throw. */
+#endif
+}
+
 /* -- Math library fast functions ----------------------------------------- */
 
 static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
@@ -707,7 +757,7 @@ static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
 #if LJ_HASFFI
   TRef hdr = recff_bufhdr(J);
   TRef tr = recff_bit64_tohex(J, rd, hdr);
-  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
 #else
   recff_nyiu(J, rd);  /* Don't bother working around this NYI. */
 #endif
@@ -833,8 +883,8 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
   if (i > 1) {  /* Concatenate the strings, if there's more than one. */
     TRef hdr = recff_bufhdr(J), tr = hdr;
     for (i = 0; J->base[i] != 0; i++)
-      tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
-    J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+      tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
+    J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
   } else if (i == 0) {
     J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
   }
@@ -852,19 +902,19 @@ static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
     emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
     if (vrep > 1) {
       TRef hdr2 = recff_bufhdr(J);
-      TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep);
-      tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str);
-      str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
+      TRef tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), hdr2, sep);
+      tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr2, str);
+      str2 = emitir(IRTG(IR_BUFSTR, IRT_STR), tr2, hdr2);
     }
   }
   tr = hdr = recff_bufhdr(J);
   if (str2) {
-    tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str);
+    tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, str);
     str = str2;
     rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
   }
   tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
-  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
 }
 
 static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
@@ -872,7 +922,7 @@ static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
   TRef str = lj_ir_tostr(J, J->base[0]);
   TRef hdr = recff_bufhdr(J);
   TRef tr = lj_ir_call(J, rd->data, hdr, str);
-  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
 }
 
 static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
@@ -935,34 +985,40 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
   }
 }
 
-static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
+static void recff_format(jit_State *J, RecordFFData *rd, TRef hdr, int sbufx)
 {
-  TRef trfmt = lj_ir_tostr(J, J->base[0]);
-  GCstr *fmt = argv2str(J, &rd->argv[0]);
-  int arg = 1;
-  TRef hdr, tr;
+  ptrdiff_t arg = sbufx;
+  TRef tr = hdr, trfmt = lj_ir_tostr(J, J->base[arg]);
+  GCstr *fmt = argv2str(J, &rd->argv[arg]);
   FormatState fs;
   SFormat sf;
   /* Specialize to the format string. */
   emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
-  tr = hdr = recff_bufhdr(J);
   lj_strfmt_init(&fs, strdata(fmt), fmt->len);
   while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {  /* Parse format. */
-    TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++];
+    TRef tra = sf == STRFMT_LIT ? 0 : J->base[++arg];
     TRef trsf = lj_ir_kint(J, (int32_t)sf);
     IRCallID id;
     switch (STRFMT_TYPE(sf)) {
     case STRFMT_LIT:
-      tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
+      tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
 		  lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
       break;
     case STRFMT_INT:
       id = IRCALL_lj_strfmt_putfnum_int;
     handle_int:
-      if (!tref_isinteger(tra))
+      if (!tref_isinteger(tra)) {
+#if LJ_HASFFI
+	if (tref_iscdata(tra)) {
+	  tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg]);
+	  tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
+	  break;
+	}
+#endif
 	goto handle_num;
+      }
       if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
-	tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
+	tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
 		    emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
       } else {
 #if LJ_HASFFI
@@ -989,10 +1045,11 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
     case STRFMT_STR:
       if (!tref_isstr(tra)) {
 	recff_nyiu(J, rd);  /* NYI: __tostring and non-string types for %s. */
+	/* NYI: also buffers. */
 	return;
       }
       if (sf == STRFMT_STR)  /* Shortcut for plain %s. */
-	tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra);
+	tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, tra);
       else if ((sf & STRFMT_T_QUOTED))
 	tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
       else
@@ -1001,7 +1058,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
     case STRFMT_CHAR:
       tra = lj_opt_narrow_toint(J, tra);
       if (sf == STRFMT_CHAR)  /* Shortcut for plain %c. */
-	tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
+	tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
 		    emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
       else
 	tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
@@ -1013,9 +1070,326 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
       return;
     }
   }
-  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  if (sbufx) {
+    emitir(IRT(IR_USE, IRT_NIL), tr, 0);
+  } else {
+    J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+  }
 }
 
+static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
+{
+  recff_format(J, rd, recff_bufhdr(J), 0);
+}
+
+/* -- Buffer library fast functions --------------------------------------- */
+
+#if LJ_HASBUFFER
+
+static LJ_AINLINE TRef recff_sbufx_get_L(jit_State *J, TRef ud)
+{
+  return emitir(IRT(IR_FLOAD, IRT_PGC), ud, IRFL_SBUF_L);
+}
+
+static LJ_AINLINE void recff_sbufx_set_L(jit_State *J, TRef ud, TRef val)
+{
+  TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_L);
+  emitir(IRT(IR_FSTORE, IRT_PGC), fref, val);
+}
+
+static LJ_AINLINE TRef recff_sbufx_get_ptr(jit_State *J, TRef ud, IRFieldID fl)
+{
+  return emitir(IRT(IR_FLOAD, IRT_PTR), ud, fl);
+}
+
+static LJ_AINLINE void recff_sbufx_set_ptr(jit_State *J, TRef ud, IRFieldID fl, TRef val)
+{
+  TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ud, fl);
+  emitir(IRT(IR_FSTORE, IRT_PTR), fref, val);
+}
+
+static LJ_AINLINE TRef recff_sbufx_len(jit_State *J, TRef trr, TRef trw)
+{
+  TRef len = emitir(IRT(IR_SUB, IRT_INTP), trw, trr);
+  if (LJ_64)
+    len = emitir(IRTI(IR_CONV), len, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
+  return len;
+}
+
+/* Emit typecheck for string buffer. */
+static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, int arg)
+{
+  TRef trtype, ud = J->base[arg];
+  if (!tvisbuf(&rd->argv[arg])) lj_trace_err(J, LJ_TRERR_BADTYPE);
+  trtype = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE);
+  emitir(IRTGI(IR_EQ), trtype, lj_ir_kint(J, UDTYPE_BUFFER));
+  J->needsnap = 1;
+  return ud;
+}
+
+/* Emit BUFHDR for write to extended string buffer. */
+static TRef recff_sbufx_write(jit_State *J, TRef ud)
+{
+  TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata)));
+  return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE);
+}
+
+/* Check for integer in range for the buffer API. */
+static TRef recff_sbufx_checkint(jit_State *J, RecordFFData *rd, int arg)
+{
+  TRef tr = J->base[arg];
+  TRef trlim = lj_ir_kint(J, LJ_MAX_BUF);
+  if (tref_isinteger(tr)) {
+    emitir(IRTGI(IR_ULE), tr, trlim);
+  } else if (tref_isnum(tr)) {
+    tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
+    emitir(IRTGI(IR_ULE), tr, trlim);
+#if LJ_HASFFI
+  } else if (tref_iscdata(tr)) {
+    tr = lj_crecord_loadiu64(J, tr, &rd->argv[arg]);
+    emitir(IRTG(IR_ULE, IRT_U64), tr, lj_ir_kint64(J, LJ_MAX_BUF));
+    tr = emitir(IRTI(IR_CONV), tr, (IRT_INT<<5)|IRT_I64|IRCONV_NONE);
+#else
+    UNUSED(rd);
+#endif
+  } else {
+    lj_trace_err(J, LJ_TRERR_BADTYPE);
+  }
+  return tr;
+}
+
+static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  SBufExt *sbx = bufV(&rd->argv[0]);
+  int iscow = (int)sbufiscow(sbx);
+  TRef trl = recff_sbufx_get_L(J, ud);
+  TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW));
+  TRef zero = lj_ir_kint(J, 0);
+  emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero);
+  if (iscow) {
+    trl = emitir(IRT(IR_BXOR, IRT_IGC), trl,
+		 LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) :
+			   lj_ir_kint(J, SBUF_FLAG_COW));
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero);
+    recff_sbufx_set_L(J, ud, trl);
+    emitir(IRT(IR_FSTORE, IRT_PGC),
+	   emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero);
+  } else {
+    TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trb);
+  }
+}
+
+static void LJ_FASTCALL recff_buffer_method_skip(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  TRef len = recff_sbufx_len(J, trr, trw);
+  TRef trn = recff_sbufx_checkint(J, rd, 1);
+  len = emitir(IRTI(IR_MIN), len, trn);
+  trr = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
+  recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+}
+
+static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tr = J->base[1];
+  if (tref_isstr(tr)) {
+    TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
+    TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
+    lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
+#if LJ_HASFFI
+  } else if (tref_iscdata(tr)) {
+    TRef trp = lj_crecord_topcvoid(J, tr, &rd->argv[1]);
+    TRef len = recff_sbufx_checkint(J, rd, 2);
+    lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
+#endif
+  }  /* else: Interpreter will throw. */
+}
+
+static void LJ_FASTCALL recff_buffer_method_put(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tr;
+  ptrdiff_t arg;
+  if (!J->base[1]) return;
+  for (arg = 1; (tr = J->base[arg]); arg++) {
+    if (tref_isstr(tr)) {
+      trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, tr);
+    } else if (tref_isnumber(tr)) {
+      trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf,
+		     emitir(IRT(IR_TOSTR, IRT_STR), tr,
+			    tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT));
+    } else if (tref_isudata(tr)) {
+      TRef ud2 = recff_sbufx_check(J, rd, arg);
+      TRef trr = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_R);
+      TRef trw = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_W);
+      TRef len = recff_sbufx_len(J, trr, trw);
+      emitir(IRTG(IR_NE, IRT_PGC), ud, ud2);
+      trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, trr, len);
+    } else {
+      recff_nyiu(J, rd);
+    }
+  }
+  emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
+}
+
+static void LJ_FASTCALL recff_buffer_method_putf(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  recff_format(J, rd, trbuf, 1);
+}
+
+static void LJ_FASTCALL recff_buffer_method_get(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  TRef tr;
+  ptrdiff_t arg;
+  if (!J->base[1]) { J->base[1] = TREF_NIL; J->base[2] = 0; }
+  for (arg = 0; (tr = J->base[arg+1]); arg++) {
+    TRef len = recff_sbufx_len(J, trr, trw);
+    if (tref_isnil(tr)) {
+      J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
+      trr = trw;
+    } else {
+      TRef trn = recff_sbufx_checkint(J, rd, arg+1);
+      TRef tru;
+      len = emitir(IRTI(IR_MIN), len, trn);
+      tru = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
+      J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
+      trr = tru;  /* Doing the ADD before the SNEW generates better code. */
+    }
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+  }
+  rd->nres = arg;
+}
+
+static void LJ_FASTCALL recff_buffer_method___tostring(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  J->base[0] = emitir(IRT(IR_XSNEW, IRT_STR), trr, recff_sbufx_len(J, trr, trw));
+}
+
+static void LJ_FASTCALL recff_buffer_method___len(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  J->base[0] = recff_sbufx_len(J, trr, trw);
+}
+
+#if LJ_HASFFI
+static void LJ_FASTCALL recff_buffer_method_putcdata(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tr = lj_crecord_topcvoid(J, J->base[1], &rd->argv[1]);
+  TRef len = recff_sbufx_checkint(J, rd, 2);
+  trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, tr, len);
+  emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
+}
+
+static void LJ_FASTCALL recff_buffer_method_reserve(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef trsz = recff_sbufx_checkint(J, rd, 1);
+  J->base[1] = lj_ir_call(J, IRCALL_lj_bufx_more, trbuf, trsz);
+  J->base[0] = lj_crecord_topuint8(J, recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W));
+  rd->nres = 2;
+}
+
+static void LJ_FASTCALL recff_buffer_method_commit(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef len = recff_sbufx_checkint(J, rd, 1);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  TRef tre = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_E);
+  TRef left = emitir(IRT(IR_SUB, IRT_INTP), tre, trw);
+  if (LJ_64)
+    left = emitir(IRTI(IR_CONV), left, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
+  emitir(IRTGI(IR_ULE), len, left);
+  trw = emitir(IRT(IR_ADD, IRT_PTR), trw, len);
+  recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trw);
+}
+
+static void LJ_FASTCALL recff_buffer_method_ref(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  J->base[0] = lj_crecord_topuint8(J, trr);
+  J->base[1] = recff_sbufx_len(J, trr, trw);
+  rd->nres = 2;
+}
+#endif
+
+static void LJ_FASTCALL recff_buffer_method_encode(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1);
+  lj_ir_call(J, IRCALL_lj_serialize_put, trbuf, tmp);
+  /* No IR_USE needed, since the call is a store. */
+}
+
+static void LJ_FASTCALL recff_buffer_method_decode(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1);
+  TRef trr = lj_ir_call(J, IRCALL_lj_serialize_get, trbuf, tmp);
+  IRType t = (IRType)lj_serialize_peektype(bufV(&rd->argv[0]));
+  /* No IR_USE needed, since the call is a store. */
+  J->base[0] = lj_record_vload(J, tmp, 0, t);
+  /* The sbx->r store must be after the VLOAD type check, in case it fails. */
+  recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+}
+
+static void LJ_FASTCALL recff_buffer_encode(jit_State *J, RecordFFData *rd)
+{
+  TRef tmp = recff_tmpref(J, J->base[0], IRTMPREF_IN1);
+  J->base[0] = lj_ir_call(J, IRCALL_lj_serialize_encode, tmp);
+  /* IR_USE needed for IR_CALLA, because the encoder may throw non-OOM. */
+  emitir(IRT(IR_USE, IRT_NIL), J->base[0], 0);
+  UNUSED(rd);
+}
+
+static void LJ_FASTCALL recff_buffer_decode(jit_State *J, RecordFFData *rd)
+{
+  if (tvisstr(&rd->argv[0])) {
+    GCstr *str = strV(&rd->argv[0]);
+    SBufExt sbx;
+    IRType t;
+    TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1);
+    TRef tr = lj_ir_call(J, IRCALL_lj_serialize_decode, tmp, J->base[0]);
+    /* IR_USE needed for IR_CALLA, because the decoder may throw non-OOM.
+    ** That's why IRCALL_lj_serialize_decode needs a fake INT result.
+    */
+    emitir(IRT(IR_USE, IRT_NIL), tr, 0);
+    memset(&sbx, 0, sizeof(SBufExt));
+    lj_bufx_set_cow(J->L, &sbx, strdata(str), str->len);
+    t = (IRType)lj_serialize_peektype(&sbx);
+    J->base[0] = lj_record_vload(J, tmp, 0, t);
+  }  /* else: Interpreter will throw. */
+}
+
+#endif
+
 /* -- Table library fast functions ---------------------------------------- */
 
 static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
@@ -1054,7 +1428,7 @@ static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
     TRef hdr = recff_bufhdr(J);
     TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
     emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
-    J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+    J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
   }  /* else: Interpreter will throw. */
   UNUSED(rd);
 }

+ 6 - 6
libs/LuaJIT/src/lj_frame.h

@@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #endif
 #define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_ARM64
-#define CFRAME_OFS_ERRF		196
-#define CFRAME_OFS_NRES		200
-#define CFRAME_OFS_PREV		160
-#define CFRAME_OFS_L		176
-#define CFRAME_OFS_PC		168
-#define CFRAME_OFS_MULTRES	192
+#define CFRAME_OFS_ERRF		36
+#define CFRAME_OFS_NRES		40
+#define CFRAME_OFS_PREV		0
+#define CFRAME_OFS_L		16
+#define CFRAME_OFS_PC		8
+#define CFRAME_OFS_MULTRES	32
 #define CFRAME_SIZE		208
 #define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_PPC

+ 9 - 0
libs/LuaJIT/src/lj_gc.c

@@ -65,6 +65,15 @@ static void gc_mark(global_State *g, GCobj *o)
     gray2black(o);  /* Userdata are never gray. */
     if (mt) gc_markobj(g, mt);
     gc_markobj(g, tabref(gco2ud(o)->env));
+    if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) {
+      SBufExt *sbx = (SBufExt *)uddata(gco2ud(o));
+      if (sbufiscow(sbx) && gcref(sbx->cowref))
+	gc_markobj(g, gcref(sbx->cowref));
+      if (gcref(sbx->dict_str))
+	gc_markobj(g, gcref(sbx->dict_str));
+      if (gcref(sbx->dict_mt))
+	gc_markobj(g, gcref(sbx->dict_mt));
+    }
   } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) {
     GCupval *uv = gco2uv(o);
     gc_marktv(g, uvval(uv));

+ 2 - 1
libs/LuaJIT/src/lj_ir.c

@@ -30,6 +30,7 @@
 #endif
 #include "lj_vm.h"
 #include "lj_strscan.h"
+#include "lj_serialize.h"
 #include "lj_strfmt.h"
 #include "lj_prng.h"
 
@@ -147,7 +148,7 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...)
 }
 
 /* Load field of type t from GG_State + offset. Must be 32 bit aligned. */
-LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
+TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
 {
   lj_assertJ((ofs & 3) == 0, "unaligned GG_State field offset");
   ofs >>= 2;

+ 27 - 11
libs/LuaJIT/src/lj_ir.h

@@ -95,6 +95,7 @@
   _(UREFO,	LW, ref, lit) \
   _(UREFC,	LW, ref, lit) \
   _(FREF,	R , ref, lit) \
+  _(TMPREF,	S , ref, lit) \
   _(STRREF,	N , ref, ref) \
   _(LREF,	L , ___, ___) \
   \
@@ -105,7 +106,7 @@
   _(FLOAD,	L , ref, lit) \
   _(XLOAD,	L , ref, lit) \
   _(SLOAD,	L , lit, lit) \
-  _(VLOAD,	L , ref, ___) \
+  _(VLOAD,	L , ref, lit) \
   _(ALEN,	L , ref, ref) \
   \
   _(ASTORE,	S , ref, ref) \
@@ -124,8 +125,8 @@
   \
   /* Buffer operations. */ \
   _(BUFHDR,	L , ref, lit) \
-  _(BUFPUT,	L , ref, ref) \
-  _(BUFSTR,	A , ref, ref) \
+  _(BUFPUT,	LW, ref, ref) \
+  _(BUFSTR,	AW, ref, ref) \
   \
   /* Barriers. */ \
   _(TBAR,	S , ref, ___) \
@@ -139,9 +140,9 @@
   _(STRTO,	N , ref, ___) \
   \
   /* Calls. */ \
-  _(CALLN,	N , ref, lit) \
-  _(CALLA,	A , ref, lit) \
-  _(CALLL,	L , ref, lit) \
+  _(CALLN,	NW, ref, lit) \
+  _(CALLA,	AW, ref, lit) \
+  _(CALLL,	LW, ref, lit) \
   _(CALLS,	S , ref, lit) \
   _(CALLXS,	S , ref, ref) \
   _(CARG,	N , ref, ref) \
@@ -204,9 +205,15 @@ IRFPMDEF(FPMENUM)
   _(UDATA_META,	offsetof(GCudata, metatable)) \
   _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
   _(UDATA_FILE,	sizeof(GCudata)) \
+  _(SBUF_W,	sizeof(GCudata) + offsetof(SBufExt, w)) \
+  _(SBUF_E,	sizeof(GCudata) + offsetof(SBufExt, e)) \
+  _(SBUF_B,	sizeof(GCudata) + offsetof(SBufExt, b)) \
+  _(SBUF_L,	sizeof(GCudata) + offsetof(SBufExt, L)) \
+  _(SBUF_REF,	sizeof(GCudata) + offsetof(SBufExt, cowref)) \
+  _(SBUF_R,	sizeof(GCudata) + offsetof(SBufExt, r)) \
   _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \
   _(CDATA_PTR,	sizeof(GCcdata)) \
-  _(CDATA_INT, sizeof(GCcdata)) \
+  _(CDATA_INT,	sizeof(GCcdata)) \
   _(CDATA_INT64, sizeof(GCcdata)) \
   _(CDATA_INT64_4, sizeof(GCcdata) + 4)
 
@@ -217,6 +224,11 @@ IRFLDEF(FLENUM)
   IRFL__MAX
 } IRFieldID;
 
+/* TMPREF mode bits, stored in op2. */
+#define IRTMPREF_IN1		0x01	/* First input value. */
+#define IRTMPREF_OUT1		0x02	/* First output value. */
+#define IRTMPREF_OUT2		0x04	/* Second output value. */
+
 /* SLOAD mode bits, stored in op2. */
 #define IRSLOAD_PARENT		0x01	/* Coalesce with parent trace. */
 #define IRSLOAD_FRAME		0x02	/* Load 32 bits of ftsz. */
@@ -224,15 +236,17 @@ IRFLDEF(FLENUM)
 #define IRSLOAD_CONVERT		0x08	/* Number to integer conversion. */
 #define IRSLOAD_READONLY	0x10	/* Read-only, omit slot store. */
 #define IRSLOAD_INHERIT		0x20	/* Inherited by exits/side traces. */
+#define IRSLOAD_KEYINDEX	0x40	/* Table traversal key index. */
 
-/* XLOAD mode, stored in op2. */
-#define IRXLOAD_READONLY	1	/* Load from read-only data. */
-#define IRXLOAD_VOLATILE	2	/* Load from volatile data. */
-#define IRXLOAD_UNALIGNED	4	/* Unaligned load. */
+/* XLOAD mode bits, stored in op2. */
+#define IRXLOAD_READONLY	0x01	/* Load from read-only data. */
+#define IRXLOAD_VOLATILE	0x02	/* Load from volatile data. */
+#define IRXLOAD_UNALIGNED	0x04	/* Unaligned load. */
 
 /* BUFHDR mode, stored in op2. */
 #define IRBUFHDR_RESET		0	/* Reset buffer. */
 #define IRBUFHDR_APPEND		1	/* Append to buffer. */
+#define IRBUFHDR_WRITE		2	/* Write to string buffer. */
 
 /* CONV mode, stored in op2. */
 #define IRCONV_SRCMASK		0x001f	/* Source IRType. */
@@ -249,6 +263,7 @@ IRFLDEF(FLENUM)
 #define IRCONV_ANY    (1<<IRCONV_CSH)	/* Any FP number is ok. */
 #define IRCONV_INDEX  (2<<IRCONV_CSH)	/* Check + special backprop rules. */
 #define IRCONV_CHECK  (3<<IRCONV_CSH)	/* Number checked for integerness. */
+#define IRCONV_NONE   IRCONV_ANY	/* INT|*64 no conv, but change type. */
 
 /* TOSTR mode, stored in op2. */
 #define IRTOSTR_INT		0	/* Convert integer to string. */
@@ -481,6 +496,7 @@ typedef uint32_t TRef;
 #define TREF_REFMASK		0x0000ffff
 #define TREF_FRAME		0x00010000
 #define TREF_CONT		0x00020000
+#define TREF_KEYINDEX		0x00100000
 
 #define TREF(ref, t)		((TRef)((ref) + ((t)<<24)))
 

+ 49 - 27
libs/LuaJIT/src/lj_ircall.h

@@ -30,10 +30,12 @@ typedef struct CCallInfo {
 #define CCI_CALL_L		(IR_CALLL << CCI_OPSHIFT)
 #define CCI_CALL_S		(IR_CALLS << CCI_OPSHIFT)
 #define CCI_CALL_FN		(CCI_CALL_N|CCI_CC_FASTCALL)
+#define CCI_CALL_FA		(CCI_CALL_A|CCI_CC_FASTCALL)
 #define CCI_CALL_FL		(CCI_CALL_L|CCI_CC_FASTCALL)
 #define CCI_CALL_FS		(CCI_CALL_S|CCI_CC_FASTCALL)
 
 /* C call info flags. */
+#define CCI_T			(IRT_GUARD << CCI_OTSHIFT)  /* May throw. */
 #define CCI_L			0x0100	/* Implicit L arg. */
 #define CCI_CASTU64		0x0200	/* Cast u64 result to number. */
 #define CCI_NOFPRCLOBBER	0x0400	/* Does not clobber any FPRs. */
@@ -111,6 +113,18 @@ typedef struct CCallInfo {
 #define IRCALLCOND_FFI32(x)		NULL
 #endif
 
+#if LJ_HASBUFFER
+#define IRCALLCOND_BUFFER(x)		x
+#else
+#define IRCALLCOND_BUFFER(x)		NULL
+#endif
+
+#if LJ_HASBUFFER && LJ_HASFFI
+#define IRCALLCOND_BUFFFI(x)		x
+#else
+#define IRCALLCOND_BUFFFI(x)		NULL
+#endif
+
 #if LJ_SOFTFP
 #define XA_FP		CCI_XA
 #define XA2_FP		(CCI_XA+CCI_XA)
@@ -139,39 +153,47 @@ typedef struct CCallInfo {
 #define IRCALLDEF(_) \
   _(ANY,	lj_str_cmp,		2,  FN, INT, CCI_NOFPRCLOBBER) \
   _(ANY,	lj_str_find,		4,   N, PGC, 0) \
-  _(ANY,	lj_str_new,		3,   S, STR, CCI_L) \
+  _(ANY,	lj_str_new,		3,   S, STR, CCI_L|CCI_T) \
   _(ANY,	lj_strscan_num,		2,  FN, INT, 0) \
-  _(ANY,	lj_strfmt_int,		2,  FN, STR, CCI_L) \
-  _(ANY,	lj_strfmt_num,		2,  FN, STR, CCI_L) \
-  _(ANY,	lj_strfmt_char,		2,  FN, STR, CCI_L) \
-  _(ANY,	lj_strfmt_putint,	2,  FL, PGC, 0) \
-  _(ANY,	lj_strfmt_putnum,	2,  FL, PGC, 0) \
-  _(ANY,	lj_strfmt_putquoted,	2,  FL, PGC, 0) \
-  _(ANY,	lj_strfmt_putfxint,	3,   L, PGC, XA_64) \
-  _(ANY,	lj_strfmt_putfnum_int,	3,   L, PGC, XA_FP) \
-  _(ANY,	lj_strfmt_putfnum_uint,	3,   L, PGC, XA_FP) \
-  _(ANY,	lj_strfmt_putfnum,	3,   L, PGC, XA_FP) \
-  _(ANY,	lj_strfmt_putfstr,	3,   L, PGC, 0) \
-  _(ANY,	lj_strfmt_putfchar,	3,   L, PGC, 0) \
-  _(ANY,	lj_buf_putmem,		3,   S, PGC, 0) \
-  _(ANY,	lj_buf_putstr,		2,  FL, PGC, 0) \
-  _(ANY,	lj_buf_putchar,		2,  FL, PGC, 0) \
-  _(ANY,	lj_buf_putstr_reverse,	2,  FL, PGC, 0) \
-  _(ANY,	lj_buf_putstr_lower,	2,  FL, PGC, 0) \
-  _(ANY,	lj_buf_putstr_upper,	2,  FL, PGC, 0) \
-  _(ANY,	lj_buf_putstr_rep,	3,   L, PGC, 0) \
-  _(ANY,	lj_buf_puttab,		5,   L, PGC, 0) \
-  _(ANY,	lj_buf_tostr,		1,  FL, STR, 0) \
-  _(ANY,	lj_tab_new_ah,		3,   A, TAB, CCI_L) \
-  _(ANY,	lj_tab_new1,		2,  FS, TAB, CCI_L) \
-  _(ANY,	lj_tab_dup,		2,  FS, TAB, CCI_L) \
+  _(ANY,	lj_strfmt_int,		2,  FN, STR, CCI_L|CCI_T) \
+  _(ANY,	lj_strfmt_num,		2,  FN, STR, CCI_L|CCI_T) \
+  _(ANY,	lj_strfmt_char,		2,  FN, STR, CCI_L|CCI_T) \
+  _(ANY,	lj_strfmt_putint,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_strfmt_putnum,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_strfmt_putquoted,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_strfmt_putfxint,	3,   L, PGC, XA_64|CCI_T) \
+  _(ANY,	lj_strfmt_putfnum_int,	3,   L, PGC, XA_FP|CCI_T) \
+  _(ANY,	lj_strfmt_putfnum_uint,	3,   L, PGC, XA_FP|CCI_T) \
+  _(ANY,	lj_strfmt_putfnum,	3,   L, PGC, XA_FP|CCI_T) \
+  _(ANY,	lj_strfmt_putfstr,	3,   L, PGC, CCI_T) \
+  _(ANY,	lj_strfmt_putfchar,	3,   L, PGC, CCI_T) \
+  _(ANY,	lj_buf_putmem,		3,   S, PGC, CCI_T) \
+  _(ANY,	lj_buf_putstr,		2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_buf_putchar,		2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_buf_putstr_reverse,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_buf_putstr_lower,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_buf_putstr_upper,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_buf_putstr_rep,	3,   L, PGC, CCI_T) \
+  _(ANY,	lj_buf_puttab,		5,   L, PGC, CCI_T) \
+  _(BUFFER,	lj_bufx_set,		4,   S, NIL, 0) \
+  _(BUFFFI,	lj_bufx_more,		2,  FS, INT, CCI_T) \
+  _(BUFFER,	lj_serialize_put,	2,  FS, PGC, CCI_T) \
+  _(BUFFER,	lj_serialize_get,	2,  FS, PTR, CCI_T) \
+  _(BUFFER,	lj_serialize_encode,	2,  FA, STR, CCI_L|CCI_T) \
+  _(BUFFER,	lj_serialize_decode,	3,   A, INT, CCI_L|CCI_T) \
+  _(ANY,	lj_buf_tostr,		1,  FL, STR, CCI_T) \
+  _(ANY,	lj_tab_new_ah,		3,   A, TAB, CCI_L|CCI_T) \
+  _(ANY,	lj_tab_new1,		2,  FA, TAB, CCI_L|CCI_T) \
+  _(ANY,	lj_tab_dup,		2,  FA, TAB, CCI_L|CCI_T) \
   _(ANY,	lj_tab_clear,		1,  FS, NIL, 0) \
-  _(ANY,	lj_tab_newkey,		3,   S, PGC, CCI_L) \
+  _(ANY,	lj_tab_newkey,		3,   S, PGC, CCI_L|CCI_T) \
+  _(ANY,	lj_tab_keyindex,	2,  FL, INT, 0) \
+  _(ANY,	lj_vm_next,		2,  FL, PTR, 0) \
   _(ANY,	lj_tab_len,		1,  FL, INT, 0) \
   _(ANY,	lj_tab_len_hint,	2,  FL, INT, 0) \
   _(ANY,	lj_gc_step_jit,		2,  FS, NIL, CCI_L) \
   _(ANY,	lj_gc_barrieruv,	2,  FS, NIL, 0) \
-  _(ANY,	lj_mem_newgco,		2,  FS, PGC, CCI_L) \
+  _(ANY,	lj_mem_newgco,		2,  FA, PGC, CCI_L|CCI_T) \
   _(ANY,	lj_prng_u64d,		1,  FS, NUM, CCI_CASTU64) \
   _(ANY,	lj_vm_modi,		2,  FN, INT, 0) \
   _(ANY,	log10,			1,   N, NUM, XA_FP) \

+ 1 - 0
libs/LuaJIT/src/lj_iropt.h

@@ -124,6 +124,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J);
 LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J);
 LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J);
 LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim);
+LJ_FUNC int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim);
 LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
 
 /* Dead-store elimination. */

+ 7 - 1
libs/LuaJIT/src/lj_jit.h

@@ -150,6 +150,7 @@ typedef enum {
   LJ_TRACE_IDLE,	/* Trace compiler idle. */
   LJ_TRACE_ACTIVE = 0x10,
   LJ_TRACE_RECORD,	/* Bytecode recording active. */
+  LJ_TRACE_RECORD_1ST,	/* Record 1st instruction, too. */
   LJ_TRACE_START,	/* New trace started. */
   LJ_TRACE_END,		/* End of trace. */
   LJ_TRACE_ASM,		/* Assemble trace. */
@@ -184,6 +185,7 @@ typedef struct MCLink {
 typedef struct SnapShot {
   uint32_t mapofs;	/* Offset into snapshot map. */
   IRRef1 ref;		/* First IR ref for this snapshot. */
+  uint16_t mcofs;	/* Offset into machine code in MCode units. */
   uint8_t nslots;	/* Number of valid slots. */
   uint8_t topslot;	/* Maximum frame extent. */
   uint8_t nent;		/* Number of compressed entries. */
@@ -199,12 +201,15 @@ typedef uint32_t SnapEntry;
 #define SNAP_CONT		0x020000	/* Continuation slot. */
 #define SNAP_NORESTORE		0x040000	/* No need to restore slot. */
 #define SNAP_SOFTFPNUM		0x080000	/* Soft-float number. */
+#define SNAP_KEYINDEX		0x100000	/* Traversal key index. */
 LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME);
 LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
+LJ_STATIC_ASSERT(SNAP_KEYINDEX == TREF_KEYINDEX);
 
 #define SNAP(slot, flags, ref)	(((SnapEntry)(slot) << 24) + (flags) + (ref))
 #define SNAP_TR(slot, tr) \
-  (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
+  (((SnapEntry)(slot) << 24) + \
+   ((tr) & (TREF_KEYINDEX|TREF_CONT|TREF_FRAME|TREF_REFMASK)))
 #if !LJ_FR2
 #define SNAP_MKPC(pc)		((SnapEntry)u32ptr(pc))
 #endif
@@ -485,6 +490,7 @@ typedef struct jit_State {
   const BCIns *startpc;	/* Bytecode PC of starting instruction. */
   TraceNo parent;	/* Parent of current side trace (0 for root traces). */
   ExitNo exitno;	/* Exit number in parent of current side trace. */
+  int exitcode;		/* Exit code from unwound trace. */
 
   BCIns *patchpc;	/* PC for pending re-patch. */
   BCIns patchins;	/* Instruction for pending re-patch. */

+ 6 - 10
libs/LuaJIT/src/lj_lex.c

@@ -105,7 +105,7 @@ static void lex_number(LexState *ls, TValue *tv)
     lex_savenext(ls);
   }
   lex_save(ls, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), sbuflen(&ls->sb)-1, tv,
+  fmt = lj_strscan_scan((const uint8_t *)ls->sb.b, sbuflen(&ls->sb)-1, tv,
 	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
 	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
   if (LJ_DUALNUM && fmt == STRSCAN_INT) {
@@ -118,11 +118,7 @@ static void lex_number(LexState *ls, TValue *tv)
     GCcdata *cd;
     lj_assertLS(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG,
 		"unexpected number format %d", fmt);
-    if (!ctype_ctsG(G(L))) {
-      ptrdiff_t oldtop = savestack(L, L->top);
-      luaopen_ffi(L);  /* Load FFI library on-demand. */
-      L->top = restorestack(L, oldtop);
-    }
+    ctype_loadffi(L);
     if (fmt == STRSCAN_IMAG) {
       cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
       ((double *)cdataptr(cd))[0] = 0;
@@ -180,7 +176,7 @@ static void lex_longstring(LexState *ls, TValue *tv, int sep)
     }
   } endloop:
   if (tv) {
-    GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep),
+    GCstr *str = lj_parse_keepstr(ls, ls->sb.b + (2 + (MSize)sep),
 				      sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
     setstrV(ls->L, tv, str);
   }
@@ -286,7 +282,7 @@ static void lex_string(LexState *ls, TValue *tv)
   }
   lex_savenext(ls);  /* Skip trailing delimiter. */
   setstrV(ls->L, tv,
-	  lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2));
+	  lj_parse_keepstr(ls, ls->sb.b+1, sbuflen(&ls->sb)-2));
 }
 
 /* -- Main lexical scanner ------------------------------------------------ */
@@ -306,7 +302,7 @@ static LexToken lex_scan(LexState *ls, TValue *tv)
       do {
 	lex_savenext(ls);
       } while (lj_char_isident(ls->c));
-      s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb));
+      s = lj_parse_keepstr(ls, ls->sb.b, sbuflen(&ls->sb));
       setstrV(ls->L, tv, s);
       if (s->reserved > 0)  /* Reserved word? */
 	return TK_OFS + s->reserved;
@@ -496,7 +492,7 @@ void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
     tokstr = NULL;
   } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
     lex_save(ls, '\0');
-    tokstr = sbufB(&ls->sb);
+    tokstr = ls->sb.b;
   } else {
     tokstr = lj_lex_token2str(ls, tok);
   }

+ 56 - 0
libs/LuaJIT/src/lj_lib.c

@@ -16,6 +16,9 @@
 #include "lj_func.h"
 #include "lj_bc.h"
 #include "lj_dispatch.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
 #include "lj_vm.h"
 #include "lj_strscan.h"
 #include "lj_strfmt.h"
@@ -301,3 +304,56 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst)
   return def;
 }
 
+/* -- Strict type checks -------------------------------------------------- */
+
+/* The following type checks do not coerce between strings and numbers.
+** And they handle plain int64_t/uint64_t FFI numbers, too.
+*/
+
+#if LJ_HASBUFFER
+GCstr *lj_lib_checkstrx(lua_State *L, int narg)
+{
+  TValue *o = L->base + narg-1;
+  if (!(o < L->top && tvisstr(o))) lj_err_argt(L, narg, LUA_TSTRING);
+  return strV(o);
+}
+
+int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b)
+{
+  TValue *o = L->base + narg-1;
+  lj_assertL(b >= 0, "expected range must be non-negative");
+  if (o < L->top) {
+    if (LJ_LIKELY(tvisint(o))) {
+      int32_t i = intV(o);
+      if (i >= a && i <= b) return i;
+    } else if (LJ_LIKELY(tvisnum(o))) {
+      /* For performance reasons, this doesn't check for integerness or
+      ** integer overflow. Overflow detection still works, since all FPUs
+      ** return either MININT or MAXINT, which is then out of range.
+      */
+      int32_t i = (int32_t)numV(o);
+      if (i >= a && i <= b) return i;
+#if LJ_HASFFI
+    } else if (tviscdata(o)) {
+      GCcdata *cd = cdataV(o);
+      if (cd->ctypeid == CTID_INT64) {
+	int64_t i = *(int64_t *)cdataptr(cd);
+	if (i >= (int64_t)a && i <= (int64_t)b) return (int32_t)i;
+      } else if (cd->ctypeid == CTID_UINT64) {
+	uint64_t i = *(uint64_t *)cdataptr(cd);
+	if ((a < 0 || i >= (uint64_t)a) && i <= (uint64_t)b) return (int32_t)i;
+      } else {
+	goto badtype;
+      }
+#endif
+    } else {
+      goto badtype;
+    }
+    lj_err_arg(L, narg, LJ_ERR_NUMRNG);
+  }
+badtype:
+  lj_err_argt(L, narg, LUA_TNUMBER);
+  return 0;  /* unreachable */
+}
+#endif
+

+ 6 - 0
libs/LuaJIT/src/lj_lib.h

@@ -46,6 +46,12 @@ LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
 LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
 LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
 
+#if LJ_HASBUFFER
+LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg);
+LJ_FUNC int32_t lj_lib_checkintrange(lua_State *L, int narg,
+				     int32_t a, int32_t b);
+#endif
+
 /* Avoid including lj_frame.h. */
 #if LJ_GC64
 #define lj_lib_upvalue(L, n) \

+ 17 - 8
libs/LuaJIT/src/lj_mcode.c

@@ -97,10 +97,15 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot)
 #define MCPROT_RW	(PROT_READ|PROT_WRITE)
 #define MCPROT_RX	(PROT_READ|PROT_EXEC)
 #define MCPROT_RWX	(PROT_READ|PROT_WRITE|PROT_EXEC)
+#ifdef PROT_MPROTECT
+#define MCPROT_CREATE	(PROT_MPROTECT(MCPROT_RWX))
+#else
+#define MCPROT_CREATE	0
+#endif
 
 static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
 {
-  void *p = mmap((void *)hint, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+  void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
   if (p == MAP_FAILED) {
     if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
     p = NULL;
@@ -238,7 +243,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
 /* All memory addresses are reachable by relative jumps. */
 static void *mcode_alloc(jit_State *J, size_t sz)
 {
-#if defined(__OpenBSD__) || LJ_TARGET_UWP
+#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP
   /* Allow better executable memory allocation for OpenBSD W^X mode. */
   void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
   if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
@@ -269,6 +274,7 @@ static void mcode_allocarea(jit_State *J)
   ((MCLink *)J->mcarea)->next = oldarea;
   ((MCLink *)J->mcarea)->size = sz;
   J->szallmcarea += sz;
+  J->mcbot = (MCode *)lj_err_register_mcode(J->mcarea, sz, (uint8_t *)J->mcbot);
 }
 
 /* Free all MCode areas. */
@@ -279,7 +285,9 @@ void lj_mcode_free(jit_State *J)
   J->szallmcarea = 0;
   while (mc) {
     MCode *next = ((MCLink *)mc)->next;
-    mcode_free(J, mc, ((MCLink *)mc)->size);
+    size_t sz = ((MCLink *)mc)->size;
+    lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink));
+    mcode_free(J, mc, sz);
     mc = next;
   }
 }
@@ -314,21 +322,21 @@ void lj_mcode_abort(jit_State *J)
 /* Set/reset protection to allow patching of MCode areas. */
 MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
 {
-#if LUAJIT_SECURITY_MCODE == 0
-  UNUSED(J); UNUSED(ptr); UNUSED(finish);
-  return NULL;
-#else
   if (finish) {
+#if LUAJIT_SECURITY_MCODE
     if (J->mcarea == ptr)
       mcode_protect(J, MCPROT_RUN);
     else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN)))
       mcode_protfail(J);
+#endif
     return NULL;
   } else {
     MCode *mc = J->mcarea;
     /* Try current area first to use the protection cache. */
     if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) {
+#if LUAJIT_SECURITY_MCODE
       mcode_protect(J, MCPROT_GEN);
+#endif
       return mc;
     }
     /* Otherwise search through the list of MCode areas. */
@@ -336,13 +344,14 @@ MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
       mc = ((MCLink *)mc)->next;
       lj_assertJ(mc != NULL, "broken MCode area chain");
       if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) {
+#if LUAJIT_SECURITY_MCODE
 	if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN)))
 	  mcode_protfail(J);
+#endif
 	return mc;
       }
     }
   }
-#endif
 }
 
 /* Limit of MCode reservation reached. */

+ 9 - 4
libs/LuaJIT/src/lj_meta.c

@@ -240,8 +240,8 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
   int fromc = 0;
   if (left < 0) { left = -left; fromc = 1; }
   do {
-    if (!(tvisstr(top) || tvisnumber(top)) ||
-	!(tvisstr(top-1) || tvisnumber(top-1))) {
+    if (!(tvisstr(top) || tvisnumber(top) || tvisbuf(top)) ||
+	!(tvisstr(top-1) || tvisnumber(top-1) || tvisbuf(top-1))) {
       cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
       if (tvisnil(mo)) {
 	mo = lj_meta_lookup(L, top, MM_concat);
@@ -277,10 +277,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
       ** next step: [...][CAT stack ............]
       */
       TValue *e, *o = top;
-      uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
+      uint64_t tlen = tvisstr(o) ? strV(o)->len :
+		      tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
       SBuf *sb;
       do {
-	o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
+	o--; tlen += tvisstr(o) ? strV(o)->len :
+		     tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
       } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1)));
       if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV);
       sb = lj_buf_tmp_(L);
@@ -290,6 +292,9 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
 	  GCstr *s = strV(o);
 	  MSize len = s->len;
 	  lj_buf_putmem(sb, strdata(s), len);
+	} else if (tvisbuf(o)) {
+	  SBufExt *sbx = bufV(o);
+	  lj_buf_putmem(sb, sbx->r, sbufxlen(sbx));
 	} else if (tvisint(o)) {
 	  lj_strfmt_putint(sb, intV(o));
 	} else {

+ 11 - 5
libs/LuaJIT/src/lj_obj.h

@@ -34,13 +34,17 @@ typedef struct MRef {
 
 #if LJ_GC64
 #define mref(r, t)	((t *)(void *)(r).ptr64)
+#define mrefu(r)	((r).ptr64)
 
 #define setmref(r, p)	((r).ptr64 = (uint64_t)(void *)(p))
+#define setmrefu(r, u)	((r).ptr64 = (uint64_t)(u))
 #define setmrefr(r, v)	((r).ptr64 = (v).ptr64)
 #else
 #define mref(r, t)	((t *)(void *)(uintptr_t)(r).ptr32)
+#define mrefu(r)	((r).ptr32)
 
 #define setmref(r, p)	((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
+#define setmrefu(r, u)	((r).ptr32 = (uint32_t)(u))
 #define setmrefr(r, v)	((r).ptr32 = (v).ptr32)
 #endif
 
@@ -153,11 +157,9 @@ typedef int32_t BCLine;  /* Bytecode line number. */
 typedef void (*ASMFunction)(void);
 
 /* Resizable string buffer. Need this here, details in lj_buf.h. */
+#define SBufHeader	char *w, *e, *b; MRef L
 typedef struct SBuf {
-  MRef p;		/* String buffer pointer. */
-  MRef e;		/* String buffer end pointer. */
-  MRef b;		/* String buffer base. */
-  MRef L;		/* lua_State, used for buffer resizing. */
+  SBufHeader;
 } SBuf;
 
 /* -- Tags and values ----------------------------------------------------- */
@@ -282,6 +284,9 @@ typedef const TValue cTValue;
 #define LJ_TISGCV		(LJ_TSTR+1)
 #define LJ_TISTABUD		LJ_TTAB
 
+/* Type marker for slot holding a traversal index. Must be lightuserdata. */
+#define LJ_KEYINDEX		0xfffe7fffu
+
 #if LJ_GC64
 #define LJ_GCVMASK		(((uint64_t)1 << 47) - 1)
 #endif
@@ -330,6 +335,7 @@ enum {
   UDTYPE_USERDATA,	/* Regular userdata. */
   UDTYPE_IO_FILE,	/* I/O library FILE. */
   UDTYPE_FFI_CLIB,	/* FFI C library namespace. */
+  UDTYPE_BUFFER,	/* String buffer. */
   UDTYPE__MAX
 };
 
@@ -920,7 +926,7 @@ static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it)
 }
 
 #define define_setV(name, type, tag) \
-static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \
+static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \
 { \
   setgcV(L, o, obj2gco(v), tag); \
 }

+ 70 - 16
libs/LuaJIT/src/lj_opt_fold.c

@@ -514,6 +514,7 @@ LJFOLDF(kfold_snew_kptr)
 }
 
 LJFOLD(SNEW any KINT)
+LJFOLD(XSNEW any KINT)
 LJFOLDF(kfold_snew_empty)
 {
   if (fright->i == 0)
@@ -577,22 +578,49 @@ LJFOLDF(kfold_strcmp)
 ** The compromise is to declare them as loads, emit them like stores and
 ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
 ** fragments left over from CSE are eliminated by DCE.
+**
+** The string buffer methods emit a USE instead of a BUFSTR to keep the
+** chain alive.
 */
 
-/* BUFHDR is emitted like a store, see below. */
+LJFOLD(BUFHDR any any)
+LJFOLDF(bufhdr_merge)
+{
+  return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
+}
 
-LJFOLD(BUFPUT BUFHDR BUFSTR)
-LJFOLDF(bufput_append)
+LJFOLD(BUFPUT any BUFSTR)
+LJFOLDF(bufput_bufstr)
 {
-  /* New buffer, no other buffer op inbetween and same buffer? */
-  if ((J->flags & JIT_F_OPT_FWD) &&
-      !(fleft->op2 & IRBUFHDR_APPEND) &&
-      fleft->prev == fright->op2 &&
-      fleft->op1 == IR(fright->op2)->op1) {
-    IRRef ref = fins->op1;
-    IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND);  /* Modify BUFHDR. */
-    IR(ref)->op1 = fright->op1;
-    return ref;
+  if ((J->flags & JIT_F_OPT_FWD)) {
+    IRRef hdr = fright->op2;
+    /* New buffer, no other buffer op inbetween and same buffer? */
+    if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
+	fleft->prev == hdr &&
+	fleft->op1 == IR(hdr)->op1) {
+      IRRef ref = fins->op1;
+      IR(ref)->op2 = IRBUFHDR_APPEND;  /* Modify BUFHDR. */
+      IR(ref)->op1 = fright->op1;
+      return ref;
+    }
+    /* Replay puts to global temporary buffer. */
+    if (IR(hdr)->op2 == IRBUFHDR_RESET) {
+      IRIns *ir = IR(fright->op1);
+      /* For now only handle single string.reverse .lower .upper .rep. */
+      if (ir->o == IR_CALLL &&
+	  ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
+	  ir->op2 <= IRCALL_lj_buf_putstr_rep) {
+	IRIns *carg1 = IR(ir->op1);
+	if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
+	  IRIns *carg2 = IR(carg1->op1);
+	  if (carg2->op1 == hdr) {
+	    return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
+	  }
+	} else if (carg1->op1 == hdr) {
+	  return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
+	}
+      }
+    }
   }
   return EMITFOLD;  /* Always emit, CSE later. */
 }
@@ -626,14 +654,14 @@ LJFOLDF(bufstr_kfold_cse)
 	     "bad buffer constructor IR op %d", fleft->o);
   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
     if (fleft->o == IR_BUFHDR) {  /* No put operations? */
-      if (!(fleft->op2 & IRBUFHDR_APPEND))  /* Empty buffer? */
+      if (fleft->op2 == IRBUFHDR_RESET)  /* Empty buffer? */
 	return lj_ir_kstr(J, &J2G(J)->strempty);
       fins->op1 = fleft->op1;
       fins->op2 = fleft->prev;  /* Relies on checks in bufput_append. */
       return CSEFOLD;
     } else if (fleft->o == IR_BUFPUT) {
       IRIns *irb = IR(fleft->op1);
-      if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
+      if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
 	return fleft->op2;  /* Shortcut for a single put operation. */
     }
   }
@@ -646,7 +674,7 @@ LJFOLDF(bufstr_kfold_cse)
 	lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
 		   ira->o == IR_CALLL || ira->o == IR_CARG,
 		   "bad buffer constructor IR op %d", ira->o);
-	if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
+	if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
 	  return ref;  /* CSE succeeded. */
 	if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
 	  break;
@@ -1297,6 +1325,10 @@ LJFOLD(CONV SUB IRCONV_U32_U64)
 LJFOLD(CONV MUL IRCONV_U32_U64)
 LJFOLDF(simplify_conv_narrow)
 {
+#if LJ_64
+  UNUSED(J);
+  return NEXTFOLD;
+#else
   IROp op = (IROp)fleft->o;
   IRType t = irt_type(fins->t);
   IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
@@ -1307,6 +1339,7 @@ LJFOLDF(simplify_conv_narrow)
   fins->op1 = op1;
   fins->op2 = op2;
   return RETRYFOLD;
+#endif
 }
 
 /* Special CSE rule for CONV. */
@@ -2275,6 +2308,27 @@ LJFOLDF(fload_str_len_tostr)
   return NEXTFOLD;
 }
 
+LJFOLD(FLOAD any IRFL_SBUF_W)
+LJFOLD(FLOAD any IRFL_SBUF_E)
+LJFOLD(FLOAD any IRFL_SBUF_B)
+LJFOLD(FLOAD any IRFL_SBUF_L)
+LJFOLD(FLOAD any IRFL_SBUF_REF)
+LJFOLD(FLOAD any IRFL_SBUF_R)
+LJFOLDF(fload_sbuf)
+{
+  TRef tr = lj_opt_fwd_fload(J);
+  return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
+}
+
+/* The fast function ID of function objects is immutable. */
+LJFOLD(FLOAD KGC IRFL_FUNC_FFID)
+LJFOLDF(fload_func_ffid_kgc)
+{
+  if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
+    return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid);
+  return NEXTFOLD;
+}
+
 /* The C type ID of cdata objects is immutable. */
 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
 LJFOLDF(fload_cdata_typeid_kgc)
@@ -2421,6 +2475,7 @@ LJFOLD(XSTORE any any)
 LJFOLDX(lj_opt_dse_xstore)
 
 LJFOLD(NEWREF any any)  /* Treated like a store. */
+LJFOLD(TMPREF any any)
 LJFOLD(CALLA any any)
 LJFOLD(CALLL any any)  /* Safeguard fallback. */
 LJFOLD(CALLS any any)
@@ -2431,7 +2486,6 @@ LJFOLD(TNEW any any)
 LJFOLD(TDUP any)
 LJFOLD(CNEW any any)
 LJFOLD(XSNEW any any)
-LJFOLD(BUFHDR any any)
 LJFOLDX(lj_ir_emit)
 
 /* ------------------------------------------------------------------------ */

+ 1 - 0
libs/LuaJIT/src/lj_opt_loop.c

@@ -225,6 +225,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
   /* Setup new snapshot. */
   snap->mapofs = (uint32_t)nmapofs;
   snap->ref = (IRRef1)J->cur.nins;
+  snap->mcofs = 0;
   snap->nslots = nslots;
   snap->topslot = osnap->topslot;
   snap->count = 0;

+ 30 - 3
libs/LuaJIT/src/lj_opt_mem.c

@@ -364,7 +364,10 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
       /* Different value: try to eliminate the redundant store. */
       if (ref > J->chain[IR_LOOP]) {  /* Quick check to avoid crossing LOOP. */
 	IRIns *ir;
-	/* Check for any intervening guards (includes conflicting loads). */
+	/* Check for any intervening guards (includes conflicting loads).
+	** Note that lj_tab_keyindex and lj_vm_next don't need guards,
+	** since they are followed by at least one guarded VLOAD.
+	*/
 	for (ir = IR(J->cur.nins-1); ir > store; ir--)
 	  if (irt_isguard(ir->t) || ir->o == IR_ALEN)
 	    goto doemit;  /* No elimination possible. */
@@ -620,8 +623,9 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J)
 	goto doemit;
       break;  /* Otherwise continue searching. */
     case ALIAS_MUST:
-      if (store->op2 == val)  /* Same value: drop the new store. */
-	return DROPFOLD;
+      if (store->op2 == val &&
+	  !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R))
+	return DROPFOLD;  /* Same value: drop the new store. */
       /* Different value: try to eliminate the redundant store. */
       if (ref > J->chain[IR_LOOP]) {  /* Quick check to avoid crossing LOOP. */
 	IRIns *ir;
@@ -642,6 +646,29 @@ doemit:
   return EMITFOLD;  /* Otherwise we have a conflict or simply no match. */
 }
 
+/* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */
+int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim)
+{
+  IRRef ref;
+  if (J->chain[IR_BUFPUT] > lim)
+    return 0;  /* Conflict. */
+  ref = J->chain[IR_CALLS];
+  while (ref > lim) {
+    IRIns *ir = IR(ref);
+    if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
+      return 0;  /* Conflict. */
+    ref = ir->prev;
+  }
+  ref = J->chain[IR_CALLL];
+  while (ref > lim) {
+    IRIns *ir = IR(ref);
+    if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
+      return 0;  /* Conflict. */
+    ref = ir->prev;
+  }
+  return 1;  /* No conflict. Can safely FOLD/CSE. */
+}
+
 /* -- XLOAD forwarding and XSTORE elimination ----------------------------- */
 
 /* Find cdata allocation for a reference (if any). */

+ 1 - 1
libs/LuaJIT/src/lj_opt_split.c

@@ -645,7 +645,7 @@ static void split_ir(jit_State *J)
       tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
 #endif
       ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
-    } else if (ir->o == IR_TOSTR) {
+    } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) {
       if (hisubst[ir->op1]) {
 	if (irref_isk(ir->op1))
 	  nir->op1 = ir->op1;

+ 3 - 3
libs/LuaJIT/src/lj_parse.c

@@ -1465,7 +1465,7 @@ static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
     MSize len = s->len+1;
     char *p = lj_buf_more(&ls->sb, len);
     p = lj_buf_wmem(p, strdata(s), len);
-    setsbufP(&ls->sb, p);
+    ls->sb.w = p;
   }
   *ofsvar = sbuflen(&ls->sb);
   lastpc = 0;
@@ -1486,7 +1486,7 @@ static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
       startpc = vs->startpc;
       p = lj_strfmt_wuleb128(p, startpc-lastpc);
       p = lj_strfmt_wuleb128(p, vs->endpc-startpc);
-      setsbufP(&ls->sb, p);
+      ls->sb.w = p;
       lastpc = startpc;
     }
   }
@@ -1499,7 +1499,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
 {
   setmref(pt->uvinfo, p);
   setmref(pt->varinfo, (char *)p + ofsvar);
-  memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb));  /* Copy from temp. buffer. */
+  memcpy(p, ls->sb.b, sbuflen(&ls->sb));  /* Copy from temp. buffer. */
 }
 #else
 

+ 11 - 5
libs/LuaJIT/src/lj_prng.c

@@ -109,13 +109,19 @@ static PRGR libfunc_rgr;
 #include <sys/syscall.h>
 #else
 
-#if LJ_TARGET_OSX
+#if LJ_TARGET_OSX && !LJ_TARGET_IOS
+/*
+** In their infinite wisdom Apple decided to disallow getentropy() in the
+** iOS App Store. Even though the call is common to all BSD-ish OS, it's
+** recommended by Apple in their own security-related docs, and, to top
+** off the foolery, /dev/urandom is handled by the same kernel code,
+** yet accessing it is actually permitted (but less efficient).
+*/
 #include <Availability.h>
-#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200 || \
-    __IPHONE_OS_VERSION_MIN_REQUIRED >= 100000
+#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200
 #define LJ_TARGET_HAS_GETENTROPY	1
 #endif
-#elif LJ_TARGET_BSD || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN
+#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN
 #define LJ_TARGET_HAS_GETENTROPY	1
 #endif
 
@@ -167,7 +173,7 @@ int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs)
 
 #elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA
 
-  if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u) == 0)
+  if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0)
     goto ok;
 
 #elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE

+ 2 - 3
libs/LuaJIT/src/lj_profile.c

@@ -346,8 +346,7 @@ LUA_API void luaJIT_profile_stop(lua_State *L)
     lj_trace_flushall(L);
 #endif
     lj_buf_free(g, &ps->sb);
-    setmref(ps->sb.b, NULL);
-    setmref(ps->sb.e, NULL);
+    ps->sb.w = ps->sb.e = NULL;
     ps->g = NULL;
   }
 }
@@ -362,7 +361,7 @@ LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
   lj_buf_reset(sb);
   lj_debug_dumpstack(L, sb, fmt, depth);
   *len = (size_t)sbuflen(sb);
-  return sbufB(sb);
+  return sb->b;
 }
 
 #endif

+ 162 - 18
libs/LuaJIT/src/lj_record.c

@@ -156,6 +156,9 @@ static void rec_check_slots(jit_State *J)
 	lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME),
 		   "cont slot %d not followed by frame", s);
 	depth++;
+      } else if ((tr & TREF_KEYINDEX)) {
+	lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d",
+				   s, tref_type(tr));
       } else {
 	/* Number repr. may differ, but other types must be the same. */
 	lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) :
@@ -259,6 +262,14 @@ TRef lj_record_constify(jit_State *J, cTValue *o)
     return 0;  /* Can't represent lightuserdata (pointless). */
 }
 
+/* Emit a VLOAD with the correct type. */
+TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t)
+{
+  TRef tr = emitir(IRTG(IR_VLOAD, t), ref, idx);
+  if (irtype_ispri(t)) tr = TREF_PRI(t);  /* Canonicalize primitives. */
+  return tr;
+}
+
 /* -- Record loop ops ----------------------------------------------------- */
 
 /* Loop event. */
@@ -275,9 +286,9 @@ static void canonicalize_slots(jit_State *J)
   if (LJ_DUALNUM) return;
   for (s = J->baseslot+J->maxslot-1; s >= 1; s--) {
     TRef tr = J->slot[s];
-    if (tref_isinteger(tr)) {
+    if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) {
       IRIns *ir = IR(tref_ref(tr));
-      if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY)))
+      if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY))))
 	J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
     }
   }
@@ -598,6 +609,7 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
 {
   if (J->parent == 0 && J->exitno == 0) {
     if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
+      if (bc_op(J->cur.startins) == BC_ITERN) return;  /* See rec_itern(). */
       /* Same loop? */
       if (ev == LOOPEV_LEAVE)  /* Must loop back to form a root trace. */
 	lj_trace_err(J, LJ_TRERR_LLEAVE);
@@ -638,6 +650,70 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
   }  /* Side trace continues across a loop that's left or not entered. */
 }
 
+/* Record ITERN. */
+static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb)
+{
+#if LJ_BE
+  /* YAGNI: Disabled on big-endian due to issues with lj_vm_next,
+  ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair.
+  */
+  UNUSED(ra); UNUSED(rb);
+  setintV(&J->errinfo, (int32_t)BC_ITERN);
+  lj_trace_err_info(J, LJ_TRERR_NYIBC);
+#else
+  RecordIndex ix;
+  /* Since ITERN is recorded at the start, we need our own loop detection. */
+  if (J->pc == J->startpc &&
+      (J->cur.nins > REF_FIRST+1 ||
+       (J->cur.nins == REF_FIRST+1 && J->cur.ir[REF_FIRST].o != IR_PROF)) &&
+      J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) {
+    lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno);  /* Looping trace. */
+    return LOOPEV_ENTER;
+  }
+  J->maxslot = ra;
+  lj_snap_add(J);  /* Required to make JLOOP the first ins in a side-trace. */
+  ix.tab = getslot(J, ra-2);
+  ix.key = J->base[ra-1] ? J->base[ra-1] :
+	   sloadt(J, (int32_t)(ra-1), IRT_INT, IRSLOAD_KEYINDEX);
+  copyTV(J->L, &ix.tabv, &J->L->base[ra-2]);
+  copyTV(J->L, &ix.keyv, &J->L->base[ra-1]);
+  ix.idxchain = (rb < 3);  /* Omit value type check, if unused. */
+  ix.mobj = 1;  /* We need the next index, too. */
+  J->maxslot = ra + lj_record_next(J, &ix);
+  J->needsnap = 1;
+  if (!tref_isnil(ix.key)) {  /* Looping back? */
+    J->base[ra-1] = ix.mobj | TREF_KEYINDEX;  /* Control var has next index. */
+    J->base[ra] = ix.key;
+    J->base[ra+1] = ix.val;
+    J->pc += bc_j(J->pc[1])+2;
+    return LOOPEV_ENTER;
+  } else {
+    J->maxslot = ra-3;
+    J->pc += 2;
+    return LOOPEV_LEAVE;
+  }
+#endif
+}
+
+/* Record ISNEXT. */
+static void rec_isnext(jit_State *J, BCReg ra)
+{
+  cTValue *b = &J->L->base[ra-3];
+  if (tvisfunc(b) && funcV(b)->c.ffid == FF_next &&
+      tvistab(b+1) && tvisnil(b+2)) {
+    /* These checks are folded away for a compiled pairs(). */
+    TRef func = getslot(J, ra-3);
+    TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID);
+    emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next));
+    (void)getslot(J, ra-2); /* Type check for table. */
+    (void)getslot(J, ra-1); /* Type check for nil key. */
+    J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX;
+    J->maxslot = ra;
+  } else {  /* Abort trace. Interpreter will despecialize bytecode. */
+    lj_trace_err(J, LJ_TRERR_RECERR);
+  }
+}
+
 /* -- Record profiler hook checks ----------------------------------------- */
 
 #if LJ_HASPROFILE
@@ -708,7 +784,7 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
       /* NYI: io_file_iter doesn't have an ffid, yet. */
       {  /* Specialize to the ffid. */
 	TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID);
-	emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid));
+	emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid));
       }
       return tr;
     default:
@@ -832,6 +908,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
     J->base -= cbase;
     J->base[--rbase] = TREF_TRUE;  /* Prepend true to results. */
     frame = frame_prevd(frame);
+    J->needsnap = 1;  /* Stop catching on-trace errors. */
   }
   /* Return to lower frame via interpreter for unhandled cases. */
   if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
@@ -918,6 +995,9 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
       TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
       if (bslot != J->maxslot) {  /* Concatenate the remainder. */
 	TValue *b = J->L->base, save;  /* Simulate lower frame and result. */
+	/* Can't handle MM_concat + CALLT + fast func side-effects. */
+	if (J->postproc != LJ_POST_NONE)
+	  lj_trace_err(J, LJ_TRERR_NYIRETL);
 	J->base[J->maxslot] = tr;
 	copyTV(J->L, &save, b-(2<<LJ_FR2));
 	if (gotresults)
@@ -1433,6 +1513,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
 	return 0;  /* No result yet. */
       }
     }
+#if LJ_HASBUFFER
+    /* The index table of buffer objects is treated as immutable. */
+    if (ix->mt == TREF_NIL && !ix->val &&
+	tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype == UDTYPE_BUFFER &&
+	tref_istab(ix->mobj) && tref_isstr(ix->key) && tref_isk(ix->key)) {
+      cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv));
+      TRef tr = lj_record_constify(J, val);
+      if (tr) return tr;  /* Specialize to the value, i.e. a method. */
+    }
+#endif
     /* Otherwise retry lookup with metaobject. */
     ix->tab = ix->mobj;
     copyTV(J->L, &ix->tabv, &ix->mobjv);
@@ -1543,6 +1633,47 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
   }
 }
 
+/* Determine result type of table traversal. */
+static IRType rec_next_types(GCtab *t, uint32_t idx)
+{
+  for (; idx < t->asize; idx++) {
+    cTValue *a = arrayslot(t, idx);
+    if (LJ_LIKELY(!tvisnil(a)))
+      return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8);
+  }
+  idx -= t->asize;
+  for (; idx <= t->hmask; idx++) {
+    Node *n = &noderef(t->node)[idx];
+    if (!tvisnil(&n->val))
+      return itype2irt(&n->key) + (itype2irt(&n->val) << 8);
+  }
+  return IRT_NIL + (IRT_NIL << 8);
+}
+
+/* Record a table traversal step aka next(). */
+int lj_record_next(jit_State *J, RecordIndex *ix)
+{
+  IRType t, tkey, tval;
+  TRef trvk;
+  t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo);
+  tkey = (t & 0xff); tval = (t >> 8);
+  trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key);
+  if (ix->mobj || tkey == IRT_NIL) {
+    TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk);
+    /* Always check for invalid key from next() for nil result. */
+    if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1));
+    ix->mobj = idx;
+  }
+  ix->key = lj_record_vload(J, trvk, 1, tkey);
+  if (tkey == IRT_NIL || ix->idxchain) {  /* Omit value type check. */
+    ix->val = TREF_NIL;
+    return 1;
+  } else {  /* Need value. */
+    ix->val = lj_record_vload(J, trvk, 0, tval);
+    return 2;
+  }
+}
+
 static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i)
 {
   RecordIndex ix;
@@ -1826,11 +1957,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
 	vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8));
 	for (i = 0; i < nload; i++) {
 	  IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
-	  TRef aref = emitir(IRT(IR_AREF, IRT_PGC),
-			     vbase, lj_ir_kint(J, (int32_t)i));
-	  TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
-	  if (irtype_ispri(t)) tr = TREF_PRI(t);  /* Canonicalize primitives. */
-	  J->base[dst+i] = tr;
+	  J->base[dst+i] = lj_record_vload(J, vbase, i, t);
 	}
       } else {
 	emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs));
@@ -1877,8 +2004,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
 		       lj_ir_kint(J, frofs-(8<<LJ_FR2)));
 	t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
 	aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
-	tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
-	if (irtype_ispri(t)) tr = TREF_PRI(t);  /* Canonicalize primitives. */
+	tr = lj_record_vload(J, aref, 0, t);
       }
       J->base[dst-2-LJ_FR2] = tr;
       J->maxslot = dst-1-LJ_FR2;
@@ -1935,9 +2061,9 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
     tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC),
 		      lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
     do {
-      tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++);
+      tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, *trp++);
     } while (trp <= top);
-    tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+    tr = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
     J->maxslot = (BCReg)(xbase - J->base);
     if (xbase == base) return tr;  /* Return simple concatenation result. */
     /* Pass partial result. */
@@ -2050,7 +2176,7 @@ void lj_record_ins(jit_State *J)
   /* Need snapshot before recording next bytecode (e.g. after a store). */
   if (J->needsnap) {
     J->needsnap = 0;
-    lj_snap_purge(J);
+    if (J->pt) lj_snap_purge(J);
     lj_snap_add(J);
     J->mergesnap = 1;
   }
@@ -2423,6 +2549,9 @@ void lj_record_ins(jit_State *J)
   case BC_ITERL:
     rec_loop_interp(J, pc, rec_iterl(J, *pc));
     break;
+  case BC_ITERN:
+    rec_loop_interp(J, pc, rec_itern(J, ra, rb));
+    break;
   case BC_LOOP:
     rec_loop_interp(J, pc, rec_loop(J, ra, 1));
     break;
@@ -2451,6 +2580,10 @@ void lj_record_ins(jit_State *J)
       J->maxslot = ra;  /* Shrink used slots. */
     break;
 
+  case BC_ISNEXT:
+    rec_isnext(J, ra);
+    break;
+
   /* -- Function headers -------------------------------------------------- */
 
   case BC_FUNCF:
@@ -2480,8 +2613,6 @@ void lj_record_ins(jit_State *J)
       break;
     }
     /* fallthrough */
-  case BC_ITERN:
-  case BC_ISNEXT:
   case BC_UCLO:
   case BC_FNEW:
     setintV(&J->errinfo, (int32_t)op);
@@ -2533,6 +2664,13 @@ static const BCIns *rec_setup_root(jit_State *J)
     lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1");
     J->bc_min = pc;
     break;
+  case BC_ITERN:
+    lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN");
+    J->maxslot = ra;
+    J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns);
+    J->bc_min = pc+2 + bc_j(pc[1]);
+    J->state = LJ_TRACE_RECORD_1ST;  /* Record the first ITERN, too. */
+    break;
   case BC_LOOP:
     /* Only check BC range for real loops, but not for "repeat until true". */
     pcj = pc + bc_j(ins);
@@ -2629,9 +2767,14 @@ void lj_record_setup(jit_State *J)
     }
     lj_snap_replay(J, T);
   sidecheck:
-    if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
-	T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
-				    J->param[JIT_P_tryside]) {
+    if ((traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
+	 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
+				     J->param[JIT_P_tryside])) {
+      if (bc_op(*J->pc) == BC_JLOOP) {
+	BCIns startins = traceref(J, bc_d(*J->pc))->startins;
+	if (bc_op(startins) == BC_ITERN)
+	  rec_itern(J, bc_a(startins), bc_b(startins));
+      }
       lj_record_stop(J, LJ_TRLINK_INTERP, 0);
     }
   } else {  /* Root trace. */
@@ -2640,6 +2783,7 @@ void lj_record_setup(jit_State *J)
     J->pc = rec_setup_root(J);
     /* Note: the loop instruction itself is recorded at the end and not
     ** at the start! So snapshot #0 needs to point to the *next* instruction.
+    ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST.
     */
     lj_snap_add(J);
     if (bc_op(J->cur.startins) == BC_FORL)

+ 2 - 0
libs/LuaJIT/src/lj_record.h

@@ -30,6 +30,7 @@ LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
 			     cTValue *av, cTValue *bv);
 LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
 LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
+LJ_FUNC TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t);
 
 LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
 LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs);
@@ -37,6 +38,7 @@ LJ_FUNC void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults);
 
 LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm);
 LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix);
+LJ_FUNC int lj_record_next(jit_State *J, RecordIndex *ix);
 
 LJ_FUNC void lj_record_ins(jit_State *J);
 LJ_FUNC void lj_record_setup(jit_State *J);

+ 538 - 0
libs/LuaJIT/src/lj_serialize.c

@@ -0,0 +1,538 @@
+/*
+** Object de/serialization.
+** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_serialize_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASBUFFER
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_udata.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#endif
+#if LJ_HASJIT
+#include "lj_ir.h"
+#endif
+#include "lj_serialize.h"
+
+/* Tags for internal serialization format. */
+enum {
+  SER_TAG_NIL,		/* 0x00 */
+  SER_TAG_FALSE,
+  SER_TAG_TRUE,
+  SER_TAG_NULL,
+  SER_TAG_LIGHTUD32,
+  SER_TAG_LIGHTUD64,
+  SER_TAG_INT,
+  SER_TAG_NUM,
+  SER_TAG_TAB,		/* 0x08 */
+  SER_TAG_DICT_MT = SER_TAG_TAB+6,
+  SER_TAG_DICT_STR,
+  SER_TAG_INT64,	/* 0x10 */
+  SER_TAG_UINT64,
+  SER_TAG_COMPLEX,
+  SER_TAG_0x13,
+  SER_TAG_0x14,
+  SER_TAG_0x15,
+  SER_TAG_0x16,
+  SER_TAG_0x17,
+  SER_TAG_0x18,		/* 0x18 */
+  SER_TAG_0x19,
+  SER_TAG_0x1a,
+  SER_TAG_0x1b,
+  SER_TAG_0x1c,
+  SER_TAG_0x1d,
+  SER_TAG_0x1e,
+  SER_TAG_0x1f,
+  SER_TAG_STR,		/* 0x20 + str->len */
+};
+LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0);
+
+/* -- Helper functions ---------------------------------------------------- */
+
+static LJ_AINLINE char *serialize_more(char *w, SBufExt *sbx, MSize sz)
+{
+  if (LJ_UNLIKELY(sz > (MSize)(sbx->e - w))) {
+    sbx->w = w;
+    w = lj_buf_more2((SBuf *)sbx, sz);
+  }
+  return w;
+}
+
+/* Write U124 to buffer. */
+static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v)
+{
+  if (v < 0x1fe0) {
+    v -= 0xe0;
+    *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v;
+  } else {
+    *w++ = (char)0xff;
+#if LJ_BE
+    v = lj_bswap(v);
+#endif
+    memcpy(w, &v, 4); w += 4;
+  }
+  return w;
+}
+
+static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v)
+{
+  if (LJ_LIKELY(v < 0xe0)) {
+    *w++ = (char)v;
+    return w;
+  } else {
+    return serialize_wu124_(w, v);
+  }
+}
+
+static LJ_NOINLINE char *serialize_ru124_(char *r, char *w, uint32_t *pv)
+{
+  uint32_t v = *pv;
+  if (v != 0xff) {
+    if (r >= w) return NULL;
+    v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++;
+  } else {
+    if (r + 4 > w) return NULL;
+    v = lj_getu32(r); r += 4;
+#if LJ_BE
+    v = lj_bswap(v);
+#endif
+  }
+  *pv = v;
+  return r;
+}
+
+static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv)
+{
+  if (LJ_LIKELY(r < w)) {
+    uint32_t v = *(uint8_t *)r; r++;
+    *pv = v;
+    if (LJ_UNLIKELY(v >= 0xe0)) {
+      r = serialize_ru124_(r, w, pv);
+    }
+    return r;
+  }
+  return NULL;
+}
+
+/* Prepare string dictionary for use (once). */
+void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict)
+{
+  if (!dict->hmask) {  /* No hash part means not prepared, yet. */
+    MSize i, len = lj_tab_len(dict);
+    if (!len) return;
+    lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
+    for (i = 1; i <= len && i < dict->asize; i++) {
+      cTValue *o = arrayslot(dict, i);
+      if (tvisstr(o)) {
+	if (!lj_tab_getstr(dict, strV(o))) {  /* Ignore dups. */
+	  lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
+	}
+      } else if (!tvisfalse(o)) {
+	lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
+      }
+    }
+  }
+}
+
+/* Prepare metatable dictionary for use (once). */
+void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict)
+{
+  if (!dict->hmask) {  /* No hash part means not prepared, yet. */
+    MSize i, len = lj_tab_len(dict);
+    if (!len) return;
+    lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
+    for (i = 1; i <= len && i < dict->asize; i++) {
+      cTValue *o = arrayslot(dict, i);
+      if (tvistab(o)) {
+	if (tvisnil(lj_tab_get(L, dict, o))) {  /* Ignore dups. */
+	  lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
+	}
+      } else if (!tvisfalse(o)) {
+	lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
+      }
+    }
+  }
+}
+
+/* -- Internal serializer ------------------------------------------------- */
+
+/* Put serialized object into buffer. */
+static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
+{
+  if (LJ_LIKELY(tvisstr(o))) {
+    const GCstr *str = strV(o);
+    MSize len = str->len;
+    w = serialize_more(w, sbx, 5+len);
+    w = serialize_wu124(w, SER_TAG_STR + len);
+    w = lj_buf_wmem(w, strdata(str), len);
+  } else if (tvisint(o)) {
+    uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o);
+    w = serialize_more(w, sbx, 1+4);
+    *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4;
+  } else if (tvisnum(o)) {
+    uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64;
+    w = serialize_more(w, sbx, 1+sizeof(lua_Number));
+    *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8;
+  } else if (tvispri(o)) {
+    w = serialize_more(w, sbx, 1);
+    *w++ = (char)(SER_TAG_NIL + ~itype(o));
+  } else if (tvistab(o)) {
+    const GCtab *t = tabV(o);
+    uint32_t narray = 0, nhash = 0, one = 2;
+    if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
+    sbx->depth--;
+    if (t->asize > 0) {  /* Determine max. length of array part. */
+      ptrdiff_t i;
+      TValue *array = tvref(t->array);
+      for (i = (ptrdiff_t)t->asize-1; i >= 0; i--)
+	if (!tvisnil(&array[i]))
+	  break;
+      narray = (uint32_t)(i+1);
+      if (narray && tvisnil(&array[0])) one = 4;
+    }
+    if (t->hmask > 0) {  /* Count number of used hash slots. */
+      uint32_t i, hmask = t->hmask;
+      Node *node = noderef(t->node);
+      for (i = 0; i <= hmask; i++)
+	nhash += !tvisnil(&node[i].val);
+    }
+    /* Write metatable index. */
+    if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) {
+      TValue mto;
+      Node *n;
+      settabV(sbufL(sbx), &mto, tabref(t->metatable));
+      n = hashgcref(tabref(sbx->dict_mt), mto.gcr);
+      do {
+	if (n->key.u64 == mto.u64) {
+	  uint32_t idx = n->val.u32.lo;
+	  w = serialize_more(w, sbx, 1+5);
+	  *w++ = SER_TAG_DICT_MT;
+	  w = serialize_wu124(w, idx);
+	  break;
+	}
+      } while ((n = nextnode(n)));
+    }
+    /* Write number of array slots and hash slots. */
+    w = serialize_more(w, sbx, 1+2*5);
+    *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0));
+    if (narray) w = serialize_wu124(w, narray);
+    if (nhash) w = serialize_wu124(w, nhash);
+    if (narray) {  /* Write array entries. */
+      cTValue *oa = tvref(t->array) + (one >> 2);
+      cTValue *oe = tvref(t->array) + narray;
+      while (oa < oe) w = serialize_put(w, sbx, oa++);
+    }
+    if (nhash) {  /* Write hash entries. */
+      const Node *node = noderef(t->node) + t->hmask;
+      GCtab *dict_str = tabref(sbx->dict_str);
+      if (LJ_UNLIKELY(dict_str)) {
+	for (;; node--)
+	  if (!tvisnil(&node->val)) {
+	    if (LJ_LIKELY(tvisstr(&node->key))) {
+	      /* Inlined lj_tab_getstr is 30% faster. */
+	      const GCstr *str = strV(&node->key);
+	      Node *n = hashstr(dict_str, str);
+	      do {
+		if (tvisstr(&n->key) && strV(&n->key) == str) {
+		  uint32_t idx = n->val.u32.lo;
+		  w = serialize_more(w, sbx, 1+5);
+		  *w++ = SER_TAG_DICT_STR;
+		  w = serialize_wu124(w, idx);
+		  break;
+		}
+		n = nextnode(n);
+		if (!n) {
+		  MSize len = str->len;
+		  w = serialize_more(w, sbx, 5+len);
+		  w = serialize_wu124(w, SER_TAG_STR + len);
+		  w = lj_buf_wmem(w, strdata(str), len);
+		  break;
+		}
+	      } while (1);
+	    } else {
+	      w = serialize_put(w, sbx, &node->key);
+	    }
+	    w = serialize_put(w, sbx, &node->val);
+	    if (--nhash == 0) break;
+	  }
+      } else {
+	for (;; node--)
+	  if (!tvisnil(&node->val)) {
+	    w = serialize_put(w, sbx, &node->key);
+	    w = serialize_put(w, sbx, &node->val);
+	    if (--nhash == 0) break;
+	  }
+      }
+    }
+    sbx->depth++;
+#if LJ_HASFFI
+  } else if (tviscdata(o)) {
+    CTState *cts = ctype_cts(sbufL(sbx));
+    CType *s = ctype_raw(cts, cdataV(o)->ctypeid);
+    uint8_t *sp = cdataptr(cdataV(o));
+    if (ctype_isinteger(s->info) && s->size == 8) {
+      w = serialize_more(w, sbx, 1+8);
+      *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64;
+#if LJ_BE
+      { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); }
+#else
+      memcpy(w, sp, 8);
+#endif
+      w += 8;
+    } else if (ctype_iscomplex(s->info) && s->size == 16) {
+      w = serialize_more(w, sbx, 1+16);
+      *w++ = SER_TAG_COMPLEX;
+#if LJ_BE
+      {  /* Only swap the doubles. The re/im order stays the same. */
+	uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8);
+	u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8);
+      }
+#else
+      memcpy(w, sp, 16);
+#endif
+      w += 16;
+    } else {
+      goto badenc;  /* NYI other cdata */
+    }
+#endif
+  } else if (tvislightud(o)) {
+    uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbx)), o);
+    w = serialize_more(w, sbx, 1+sizeof(ud));
+    if (ud == 0) {
+      *w++ = SER_TAG_NULL;
+    } else if (LJ_32 || checku32(ud)) {
+#if LJ_BE && LJ_64
+      ud = lj_bswap64(ud);
+#elif LJ_BE
+      ud = lj_bswap(ud);
+#endif
+      *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4;
+#if LJ_64
+    } else {
+#if LJ_BE
+      ud = lj_bswap64(ud);
+#endif
+      *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8;
+#endif
+    }
+  } else {
+    /* NYI userdata */
+#if LJ_HASFFI
+  badenc:
+#endif
+    lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADENC, lj_typename(o));
+  }
+  return w;
+}
+
+/* Get serialized object from buffer. */
+static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
+{
+  char *w = sbx->w;
+  uint32_t tp;
+  r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
+  if (LJ_LIKELY(tp >= SER_TAG_STR)) {
+    uint32_t len = tp - SER_TAG_STR;
+    if (LJ_UNLIKELY(len > (uint32_t)(w - r))) goto eob;
+    setstrV(sbufL(sbx), o, lj_str_new(sbufL(sbx), r, len));
+    r += len;
+  } else if (tp == SER_TAG_INT) {
+    if (LJ_UNLIKELY(r + 4 > w)) goto eob;
+    setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)));
+    r += 4;
+  } else if (tp == SER_TAG_NUM) {
+    if (LJ_UNLIKELY(r + 8 > w)) goto eob;
+    memcpy(o, r, 8); r += 8;
+#if LJ_BE
+    o->u64 = lj_bswap64(o->u64);
+#endif
+    if (!tvisnum(o)) setnanV(o);  /* Fix non-canonical NaNs. */
+  } else if (tp <= SER_TAG_TRUE) {
+    setpriV(o, ~tp);
+  } else if (tp == SER_TAG_DICT_STR) {
+    GCtab *dict_str;
+    uint32_t idx;
+    r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
+    idx++;
+    dict_str = tabref(sbx->dict_str);
+    if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx)))
+      copyTV(sbufL(sbx), o, arrayslot(dict_str, idx));
+    else
+      lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
+  } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) {
+    uint32_t narray = 0, nhash = 0;
+    GCtab *t, *mt = NULL;
+    if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
+    sbx->depth--;
+    if (tp == SER_TAG_DICT_MT) {
+      GCtab *dict_mt;
+      uint32_t idx;
+      r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
+      idx++;
+      dict_mt = tabref(sbx->dict_mt);
+      if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx)))
+	mt = tabV(arrayslot(dict_mt, idx));
+      else
+	lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
+      r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
+      if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag;
+    }
+    if (tp >= SER_TAG_TAB+2) {
+      r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob;
+    }
+    if ((tp & 1)) {
+      r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob;
+    }
+    t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash));
+    /* NOBARRIER: The table is new (marked white). */
+    setgcref(t->metatable, obj2gco(mt));
+    settabV(sbufL(sbx), o, t);
+    if (narray) {
+      TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4);
+      TValue *oe = tvref(t->array) + narray;
+      while (oa < oe) r = serialize_get(r, sbx, oa++);
+    }
+    if (nhash) {
+      do {
+	TValue k, *v;
+	r = serialize_get(r, sbx, &k);
+	v = lj_tab_set(sbufL(sbx), t, &k);
+	if (LJ_UNLIKELY(!tvisnil(v)))
+	  lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DUPKEY);
+	r = serialize_get(r, sbx, v);
+      } while (--nhash);
+    }
+    sbx->depth++;
+#if LJ_HASFFI
+  } else if (tp >= SER_TAG_INT64 &&  tp <= SER_TAG_COMPLEX) {
+    uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8;
+    GCcdata *cd;
+    if (LJ_UNLIKELY(r + sz > w)) goto eob;
+    cd = lj_cdata_new_(sbufL(sbx),
+	   tp == SER_TAG_INT64 ? CTID_INT64 :
+	   tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE,
+	   sz);
+    memcpy(cdataptr(cd), r, sz); r += sz;
+#if LJ_BE
+    *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd));
+    if (sz == 16)
+      ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]);
+#endif
+    if (sz == 16) {  /* Fix non-canonical NaNs. */
+      TValue *cdo = (TValue *)cdataptr(cd);
+      if (!tvisnum(&cdo[0])) setnanV(&cdo[0]);
+      if (!tvisnum(&cdo[1])) setnanV(&cdo[1]);
+    }
+    setcdataV(sbufL(sbx), o, cd);
+#endif
+  } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) {
+    uintptr_t ud = 0;
+    if (tp == SER_TAG_LIGHTUD32) {
+      if (LJ_UNLIKELY(r + 4 > w)) goto eob;
+      ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r));
+      r += 4;
+    }
+#if LJ_64
+    else if (tp == SER_TAG_LIGHTUD64) {
+      if (LJ_UNLIKELY(r + 8 > w)) goto eob;
+      memcpy(&ud, r, 8); r += 8;
+#if LJ_BE
+      ud = lj_bswap64(ud);
+#endif
+    }
+    setrawlightudV(o, lj_lightud_intern(sbufL(sbx), (void *)ud));
+#else
+    setrawlightudV(o, (void *)ud);
+#endif
+  } else {
+badtag:
+    lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp);
+  }
+  return r;
+eob:
+  lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_EOB);
+  return NULL;
+}
+
+/* -- External serialization API ------------------------------------------ */
+
+/* Encode to buffer. */
+SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o)
+{
+  sbx->depth = LJ_SERIALIZE_DEPTH;
+  sbx->w = serialize_put(sbx->w, sbx, o);
+  return sbx;
+}
+
+/* Decode from buffer. */
+char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o)
+{
+  sbx->depth = LJ_SERIALIZE_DEPTH;
+  return serialize_get(sbx->r, sbx, o);
+}
+
+/* Stand-alone encoding, borrowing from global temporary buffer. */
+GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o)
+{
+  SBufExt sbx;
+  char *w;
+  memset(&sbx, 0, sizeof(SBufExt));
+  lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf);
+  sbx.depth = LJ_SERIALIZE_DEPTH;
+  w = serialize_put(sbx.w, &sbx, o);
+  return lj_str_new(L, sbx.b, (size_t)(w - sbx.b));
+}
+
+/* Stand-alone decoding, copy-on-write from string. */
+void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str)
+{
+  SBufExt sbx;
+  char *r;
+  memset(&sbx, 0, sizeof(SBufExt));
+  lj_bufx_set_cow(L, &sbx, strdata(str), str->len);
+  /* No need to set sbx.cowref here. */
+  sbx.depth = LJ_SERIALIZE_DEPTH;
+  r = serialize_get(sbx.r, &sbx, o);
+  if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV);
+}
+
+#if LJ_HASJIT
+/* Peek into buffer to find the result IRType for specialization purposes. */
+LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx)
+{
+  uint32_t tp;
+  if (serialize_ru124(sbx->r, sbx->w, &tp)) {
+    /* This must match the handling of all tags in the decoder above. */
+    switch (tp) {
+    case SER_TAG_NIL: return IRT_NIL;
+    case SER_TAG_FALSE: return IRT_FALSE;
+    case SER_TAG_TRUE: return IRT_TRUE;
+    case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64:
+      return IRT_LIGHTUD;
+    case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM;
+    case SER_TAG_NUM: return IRT_NUM;
+    case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2:
+    case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5:
+    case SER_TAG_DICT_MT:
+      return IRT_TAB;
+    case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX:
+      return IRT_CDATA;
+    case SER_TAG_DICT_STR:
+    default:
+      return IRT_STR;
+    }
+  }
+  return IRT_NIL;  /* Will fail on actual decode. */
+}
+#endif
+
+#endif

+ 28 - 0
libs/LuaJIT/src/lj_serialize.h

@@ -0,0 +1,28 @@
+/*
+** Object de/serialization.
+** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_SERIALIZE_H
+#define _LJ_SERIALIZE_H
+
+#include "lj_obj.h"
+#include "lj_buf.h"
+
+#if LJ_HASBUFFER
+
+#define LJ_SERIALIZE_DEPTH	100	/* Default depth. */
+
+LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict);
+LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict);
+LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o);
+LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);
+LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o);
+LJ_FUNC void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str);
+#if LJ_HASJIT
+LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx);
+#endif
+
+#endif
+
+#endif

+ 54 - 9
libs/LuaJIT/src/lj_snap.c

@@ -171,6 +171,7 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
   nent += snapshot_framelinks(J, p + nent, &snap->topslot);
   snap->mapofs = (uint32_t)nsnapmap;
   snap->ref = (IRRef1)J->cur.nins;
+  snap->mcofs = 0;
   snap->nslots = (uint8_t)nslots;
   snap->count = 0;
   J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
@@ -251,7 +252,12 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
       BCReg minslot = bc_a(ins);
       if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
       else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
-      else if (op == BC_UCLO) { pc += bc_j(ins); break; }
+      else if (op == BC_UCLO) {
+	ptrdiff_t delta = bc_j(ins);
+	if (delta < 0) return maxslot;  /* Prevent loop. */
+	pc += delta;
+	break;
+      }
       for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
       return minslot < maxslot ? minslot : maxslot;
       }
@@ -275,7 +281,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
        if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
        break;
     case BCMbase:
-      if (op >= BC_CALLM && op <= BC_VARG) {
+      if (op >= BC_CALLM && op <= BC_ITERN) {
 	BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
 		    maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
 	if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
@@ -286,6 +292,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
 	  for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
 	  return 0;
 	}
+      } else if (op == BC_VARG) {
+	return maxslot;  /* NYI: punt. */
       } else if (op == BC_KNIL) {
 	for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
       } else if (op == BC_TSETM) {
@@ -304,15 +312,45 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
   return 0;  /* unreachable */
 }
 
+/* Mark slots used by upvalues of child prototypes as used. */
+void snap_useuv(GCproto *pt, uint8_t *udf)
+{
+  /* This is a coarse check, because it's difficult to correlate the lifetime
+  ** of slots and closures. But the number of false positives is quite low.
+  ** A false positive may cause a slot not to be purged, which is just
+  ** a missed optimization.
+  */
+  if ((pt->flags & PROTO_CHILD)) {
+    ptrdiff_t i, j, n = pt->sizekgc;
+    GCRef *kr = mref(pt->k, GCRef) - 1;
+    for (i = 0; i < n; i++, kr--) {
+      GCobj *o = gcref(*kr);
+      if (o->gch.gct == ~LJ_TPROTO) {
+	for (j = 0; j < gco2pt(o)->sizeuv; j++) {
+	  uint32_t v = proto_uv(gco2pt(o))[j];
+	  if ((v & PROTO_UV_LOCAL)) {
+	    udf[(v & 0xff)] = 0;
+	  }
+	}
+      }
+    }
+  }
+}
+
 /* Purge dead slots before the next snapshot. */
 void lj_snap_purge(jit_State *J)
 {
   uint8_t udf[SNAP_USEDEF_SLOTS];
-  BCReg maxslot = J->maxslot;
-  BCReg s = snap_usedef(J, udf, J->pc, maxslot);
-  for (; s < maxslot; s++)
-    if (udf[s] != 0)
-      J->base[s] = 0;  /* Purge dead slots. */
+  BCReg s, maxslot = J->maxslot;
+  if (bc_op(*J->pc) == BC_FUNCV && maxslot > J->pt->numparams)
+    maxslot = J->pt->numparams;
+  s = snap_usedef(J, udf, J->pc, maxslot);
+  if (s < maxslot) {
+    snap_useuv(J->pt, udf);
+    for (; s < maxslot; s++)
+      if (udf[s] != 0)
+	J->base[s] = 0;  /* Purge dead slots. */
+  }
 }
 
 /* Shrink last snapshot. */
@@ -325,6 +363,7 @@ void lj_snap_shrink(jit_State *J)
   BCReg maxslot = J->maxslot;
   BCReg baseslot = J->baseslot;
   BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
+  if (minslot < maxslot) snap_useuv(J->pt, udf);
   maxslot += baseslot;
   minslot += baseslot;
   snap->nslots = (uint8_t)maxslot;
@@ -424,7 +463,7 @@ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
   MSize j;
   for (j = 0; j < nmax; j++)
     if (snap_ref(map[j]) == ref)
-      return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
+      return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME);
   return 0;
 }
 
@@ -499,10 +538,12 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
       uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
       if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
       if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
+      if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX;
       tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
     }
   setslot:
-    J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME));  /* Same as TREF_* flags. */
+    /* Same as TREF_* flags. */
+    J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME));
     J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2));
     if ((sn & SNAP_FRAME))
       J->baseslot = s+1;
@@ -922,6 +963,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
 	setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
 	L->base = o+1;
 #endif
+      } else if ((sn & SNAP_KEYINDEX)) {
+	/* A IRT_INT key index slot is restored as a number. Undo this. */
+	o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o)));
+	o->u32.hi = LJ_KEYINDEX;
       }
     }
   }

+ 1 - 0
libs/LuaJIT/src/lj_state.c

@@ -156,6 +156,7 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud)
   fixstring(lj_err_str(L, LJ_ERR_ERRMEM));  /* Preallocate memory error msg. */
   g->gc.threshold = 4*g->gc.total;
   lj_trace_initstate(g);
+  lj_err_verify();
   return NULL;
 }
 

+ 172 - 38
libs/LuaJIT/src/lj_strfmt.c

@@ -9,11 +9,17 @@
 #define LUA_CORE
 
 #include "lj_obj.h"
+#include "lj_err.h"
 #include "lj_buf.h"
 #include "lj_str.h"
+#include "lj_meta.h"
 #include "lj_state.h"
 #include "lj_char.h"
 #include "lj_strfmt.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
+#include "lj_lib.h"
 
 /* -- Format parser ------------------------------------------------------- */
 
@@ -161,6 +167,10 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
   if (tvisstr(o)) {
     *lenp = strV(o)->len;
     return strVdata(o);
+  } else if (tvisbuf(o)) {
+    SBufExt *sbx = bufV(o);
+    *lenp = sbufxlen(sbx);
+    return sbx->r;
   } else if (tvisint(o)) {
     sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
   } else if (tvisnum(o)) {
@@ -169,7 +179,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
     return NULL;
   }
   *lenp = sbuflen(sb);
-  return sbufB(sb);
+  return sb->b;
 }
 
 /* -- Unformatted conversions to buffer ----------------------------------- */
@@ -177,7 +187,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
 /* Add integer to buffer. */
 SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
 {
-  setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k));
+  sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k);
   return sb;
 }
 
@@ -191,73 +201,86 @@ SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
 
 SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
 {
-  setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v));
+  sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v);
   return sb;
 }
 
 /* Add quoted string to buffer. */
-SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
+static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len)
 {
-  const char *s = strdata(str);
-  MSize len = str->len;
   lj_buf_putb(sb, '"');
   while (len--) {
     uint32_t c = (uint32_t)(uint8_t)*s++;
-    char *p = lj_buf_more(sb, 4);
+    char *w = lj_buf_more(sb, 4);
     if (c == '"' || c == '\\' || c == '\n') {
-      *p++ = '\\';
+      *w++ = '\\';
     } else if (lj_char_iscntrl(c)) {  /* This can only be 0-31 or 127. */
       uint32_t d;
-      *p++ = '\\';
+      *w++ = '\\';
       if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
-	*p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
+	*w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
 	goto tens;
       } else if (c >= 10) {
       tens:
-	d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
+	d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d);
       }
       c += '0';
     }
-    *p++ = (char)c;
-    setsbufP(sb, p);
+    *w++ = (char)c;
+    sb->w = w;
   }
   lj_buf_putb(sb, '"');
   return sb;
 }
 
+#if LJ_HASJIT
+SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
+{
+  return strfmt_putquotedlen(sb, strdata(str), str->len);
+}
+#endif
+
 /* -- Formatted conversions to buffer ------------------------------------- */
 
 /* Add formatted char to buffer. */
 SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
 {
   MSize width = STRFMT_WIDTH(sf);
-  char *p = lj_buf_more(sb, width > 1 ? width : 1);
-  if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
-  while (width-- > 1) *p++ = ' ';
-  if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
-  setsbufP(sb, p);
+  char *w = lj_buf_more(sb, width > 1 ? width : 1);
+  if ((sf & STRFMT_F_LEFT)) *w++ = (char)c;
+  while (width-- > 1) *w++ = ' ';
+  if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c;
+  sb->w = w;
   return sb;
 }
 
 /* Add formatted string to buffer. */
-SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
+static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len)
 {
-  MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
   MSize width = STRFMT_WIDTH(sf);
-  char *p = lj_buf_more(sb, width > len ? width : len);
-  if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
-  while (width-- > len) *p++ = ' ';
-  if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
-  setsbufP(sb, p);
+  char *w;
+  if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf);
+  w = lj_buf_more(sb, width > len ? width : len);
+  if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
+  while (width-- > len) *w++ = ' ';
+  if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
+  sb->w = w;
   return sb;
 }
 
+#if LJ_HASJIT
+SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
+{
+  return strfmt_putfstrlen(sb, sf, strdata(str), str->len);
+}
+#endif
+
 /* Add formatted signed/unsigned integer to buffer. */
 SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
 {
-  char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p;
+  char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w;
 #ifdef LUA_USE_ASSERT
-  char *ps;
+  char *ws;
 #endif
   MSize prefix = 0, len, prec, pprec, width, need;
 
@@ -301,27 +324,27 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
   width = STRFMT_WIDTH(sf);
   pprec = prec + (prefix >> 8);
   need = width > pprec ? width : pprec;
-  p = lj_buf_more(sb, need);
+  w = lj_buf_more(sb, need);
 #ifdef LUA_USE_ASSERT
-  ps = p;
+  ws = w;
 #endif
 
   /* Format number with leading/trailing whitespace and zeros. */
   if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
-    while (width-- > pprec) *p++ = ' ';
+    while (width-- > pprec) *w++ = ' ';
   if (prefix) {
-    if ((char)prefix >= 'X') *p++ = '0';
-    *p++ = (char)prefix;
+    if ((char)prefix >= 'X') *w++ = '0';
+    *w++ = (char)prefix;
   }
   if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
-    while (width-- > pprec) *p++ = '0';
-  while (prec-- > len) *p++ = '0';
-  while (q < buf + sizeof(buf)) *p++ = *q++;  /* Add number itself. */
+    while (width-- > pprec) *w++ = '0';
+  while (prec-- > len) *w++ = '0';
+  while (q < buf + sizeof(buf)) *w++ = *q++;  /* Add number itself. */
   if ((sf & STRFMT_F_LEFT))
-    while (width-- > pprec) *p++ = ' ';
+    while (width-- > pprec) *w++ = ' ';
 
-  lj_assertX(need == (MSize)(p - ps), "miscalculated format size");
-  setsbufP(sb, p);
+  lj_assertX(need == (MSize)(w - ws), "miscalculated format size");
+  sb->w = w;
   return sb;
 }
 
@@ -346,6 +369,117 @@ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
   return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
 }
 
+/* Format stack arguments to buffer. */
+int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry)
+{
+  int narg = (int)(L->top - L->base);
+  GCstr *fmt = lj_lib_checkstr(L, arg);
+  FormatState fs;
+  SFormat sf;
+  lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+    if (sf == STRFMT_LIT) {
+      lj_buf_putmem(sb, fs.str, fs.len);
+    } else if (sf == STRFMT_ERR) {
+      lj_err_callerv(L, LJ_ERR_STRFMT,
+		     strdata(lj_str_new(L, fs.str, fs.len)));
+    } else {
+      TValue *o = &L->base[arg++];
+      if (arg > narg)
+	lj_err_arg(L, arg, LJ_ERR_NOVAL);
+      switch (STRFMT_TYPE(sf)) {
+      case STRFMT_INT:
+	if (tvisint(o)) {
+	  int32_t k = intV(o);
+	  if (sf == STRFMT_INT)
+	    lj_strfmt_putint(sb, k);  /* Shortcut for plain %d. */
+	  else
+	    lj_strfmt_putfxint(sb, sf, k);
+	  break;
+	}
+#if LJ_HASFFI
+	if (tviscdata(o)) {
+	  GCcdata *cd = cdataV(o);
+	  if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
+	    lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
+	    break;
+	  }
+	}
+#endif
+	lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
+	break;
+      case STRFMT_UINT:
+	if (tvisint(o)) {
+	  lj_strfmt_putfxint(sb, sf, intV(o));
+	  break;
+	}
+#if LJ_HASFFI
+	if (tviscdata(o)) {
+	  GCcdata *cd = cdataV(o);
+	  if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
+	    lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
+	    break;
+	  }
+	}
+#endif
+	lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
+	break;
+      case STRFMT_NUM:
+	lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
+	break;
+      case STRFMT_STR: {
+	MSize len;
+	const char *s;
+	cTValue *mo;
+	if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0 &&
+	    !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+	  /* Call __tostring metamethod once. */
+	  copyTV(L, L->top++, mo);
+	  copyTV(L, L->top++, o);
+	  lua_call(L, 1, 1);
+	  o = &L->base[arg-1];  /* Stack may have been reallocated. */
+	  copyTV(L, o, --L->top);  /* Replace inline for retry. */
+	  if (retry < 2) {  /* Global buffer may have been overwritten. */
+	    retry = 1;
+	    break;
+	  }
+	}
+	if (LJ_LIKELY(tvisstr(o))) {
+	  len = strV(o)->len;
+	  s = strVdata(o);
+#if LJ_HASBUFFER
+	} else if (tvisbuf(o)) {
+	  SBufExt *sbx = bufV(o);
+	  if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
+	  len = sbufxlen(sbx);
+	  s = sbx->r;
+#endif
+	} else {
+	  GCstr *str = lj_strfmt_obj(L, o);
+	  len = str->len;
+	  s = strdata(str);
+	}
+	if ((sf & STRFMT_T_QUOTED))
+	  strfmt_putquotedlen(sb, s, len);  /* No formatting. */
+	else
+	  strfmt_putfstrlen(sb, sf, s, len);
+	break;
+	}
+      case STRFMT_CHAR:
+	lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
+	break;
+      case STRFMT_PTR:  /* No formatting. */
+	lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o));
+	break;
+      default:
+	lj_assertL(0, "bad string format type");
+	break;
+      }
+    }
+  }
+  return retry;
+}
+
 /* -- Conversions to strings ---------------------------------------------- */
 
 /* Convert integer to string. */

+ 5 - 0
libs/LuaJIT/src/lj_strfmt.h

@@ -95,7 +95,9 @@ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k);
 LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o);
 #endif
 LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v);
+#if LJ_HASJIT
 LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str);
+#endif
 
 /* Formatted conversions to buffer. */
 LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k);
@@ -103,7 +105,10 @@ LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n);
 LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n);
 LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n);
 LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c);
+#if LJ_HASJIT
 LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str);
+#endif
+LJ_FUNC int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry);
 
 /* Conversions to strings. */
 LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);

Деякі файли не було показано, через те що забагато файлів було змінено