Ver código fonte

Updated to LuaJIT 2.1.0-beta3.f0e865d.

woollybah 6 anos atrás
pai
commit
7e185f73be
100 arquivos alterados com 12818 adições e 3030 exclusões
  1. BIN
      luajit.mod/lib/win32/libluajit_x64.a
  2. BIN
      luajit.mod/lib/win32/libluajit_x86.a
  3. 5 1
      luajit.mod/luajit.bmx
  4. 11 0
      luajit.mod/luajit/.gitignore
  5. 17 8
      luajit.mod/luajit/Makefile
  6. 2 2
      luajit.mod/luajit/README
  7. 1 1
      luajit.mod/luajit/doc/bluequad-print.css
  8. 1 1
      luajit.mod/luajit/doc/bluequad.css
  9. 95 233
      luajit.mod/luajit/doc/changes.html
  10. 5 4
      luajit.mod/luajit/doc/contact.html
  11. 6 5
      luajit.mod/luajit/doc/ext_c_api.html
  12. 4 3
      luajit.mod/luajit/doc/ext_ffi.html
  13. 10 5
      luajit.mod/luajit/doc/ext_ffi_api.html
  14. 26 10
      luajit.mod/luajit/doc/ext_ffi_semantics.html
  15. 4 3
      luajit.mod/luajit/doc/ext_ffi_tutorial.html
  16. 5 4
      luajit.mod/luajit/doc/ext_jit.html
  17. 364 0
      luajit.mod/luajit/doc/ext_profiler.html
  18. 92 18
      luajit.mod/luajit/doc/extensions.html
  19. 4 3
      luajit.mod/luajit/doc/faq.html
  20. 99 54
      luajit.mod/luajit/doc/install.html
  21. 8 7
      luajit.mod/luajit/doc/luajit.html
  22. 5 3
      luajit.mod/luajit/doc/running.html
  23. 15 3
      luajit.mod/luajit/doc/status.html
  24. 2 0
      luajit.mod/luajit/dynasm/dasm_arm.h
  25. 3 3
      luajit.mod/luajit/dynasm/dasm_arm.lua
  26. 519 0
      luajit.mod/luajit/dynasm/dasm_arm64.h
  27. 1166 0
      luajit.mod/luajit/dynasm/dasm_arm64.lua
  28. 8 4
      luajit.mod/luajit/dynasm/dasm_mips.h
  29. 70 15
      luajit.mod/luajit/dynasm/dasm_mips.lua
  30. 12 0
      luajit.mod/luajit/dynasm/dasm_mips64.lua
  31. 11 3
      luajit.mod/luajit/dynasm/dasm_ppc.h
  32. 689 19
      luajit.mod/luajit/dynasm/dasm_ppc.lua
  33. 2 2
      luajit.mod/luajit/dynasm/dasm_proto.h
  34. 46 9
      luajit.mod/luajit/dynasm/dasm_x86.h
  35. 510 96
      luajit.mod/luajit/dynasm/dasm_x86.lua
  36. 3 3
      luajit.mod/luajit/dynasm/dynasm.lua
  37. 3 3
      luajit.mod/luajit/etc/luajit.pc
  38. 7 0
      luajit.mod/luajit/src/.gitignore
  39. 64 32
      luajit.mod/luajit/src/Makefile
  40. 118 98
      luajit.mod/luajit/src/Makefile.dep
  41. 3 0
      luajit.mod/luajit/src/host/.gitignore
  42. 14 12
      luajit.mod/luajit/src/host/buildvm.c
  43. 1 0
      luajit.mod/luajit/src/host/buildvm.h
  44. 56 11
      luajit.mod/luajit/src/host/buildvm_asm.c
  45. 60 1
      luajit.mod/luajit/src/host/buildvm_lib.c
  46. 56 0
      luajit.mod/luajit/src/host/buildvm_libbc.h
  47. 26 2
      luajit.mod/luajit/src/host/buildvm_peobj.c
  48. 197 0
      luajit.mod/luajit/src/host/genlibbc.lua
  49. 1 0
      luajit.mod/luajit/src/jit/.gitignore
  50. 9 10
      luajit.mod/luajit/src/jit/bc.lua
  51. 18 16
      luajit.mod/luajit/src/jit/bcsave.lua
  52. 9 9
      luajit.mod/luajit/src/jit/dis_arm.lua
  53. 1216 0
      luajit.mod/luajit/src/jit/dis_arm64.lua
  54. 12 0
      luajit.mod/luajit/src/jit/dis_arm64be.lua
  55. 47 32
      luajit.mod/luajit/src/jit/dis_mips.lua
  56. 17 0
      luajit.mod/luajit/src/jit/dis_mips64.lua
  57. 17 0
      luajit.mod/luajit/src/jit/dis_mips64el.lua
  58. 6 9
      luajit.mod/luajit/src/jit/dis_mipsel.lua
  59. 9 9
      luajit.mod/luajit/src/jit/dis_ppc.lua
  60. 6 9
      luajit.mod/luajit/src/jit/dis_x64.lua
  61. 207 90
      luajit.mod/luajit/src/jit/dis_x86.lua
  62. 28 17
      luajit.mod/luajit/src/jit/dump.lua
  63. 311 0
      luajit.mod/luajit/src/jit/p.lua
  64. 10 7
      luajit.mod/luajit/src/jit/v.lua
  65. 45 0
      luajit.mod/luajit/src/jit/zone.lua
  66. 14 20
      luajit.mod/luajit/src/lauxlib.h
  67. 46 28
      luajit.mod/luajit/src/lib_aux.c
  68. 64 68
      luajit.mod/luajit/src/lib_base.c
  69. 120 14
      luajit.mod/luajit/src/lib_bit.c
  70. 5 5
      luajit.mod/luajit/src/lib_debug.c
  71. 37 16
      luajit.mod/luajit/src/lib_ffi.c
  72. 17 24
      luajit.mod/luajit/src/lib_io.c
  73. 141 28
      luajit.mod/luajit/src/lib_jit.c
  74. 4 11
      luajit.mod/luajit/src/lib_math.c
  75. 21 16
      luajit.mod/luajit/src/lib_os.c
  76. 46 25
      luajit.mod/luajit/src/lib_package.c
  77. 130 322
      luajit.mod/luajit/src/lib_string.c
  78. 107 80
      luajit.mod/luajit/src/lib_table.c
  79. 179 85
      luajit.mod/luajit/src/lj_alloc.c
  80. 169 77
      luajit.mod/luajit/src/lj_api.c
  81. 203 56
      luajit.mod/luajit/src/lj_arch.h
  82. 611 120
      luajit.mod/luajit/src/lj_asm.c
  83. 157 307
      luajit.mod/luajit/src/lj_asm_arm.h
  84. 2031 0
      luajit.mod/luajit/src/lj_asm_arm64.h
  85. 468 147
      luajit.mod/luajit/src/lj_asm_mips.h
  86. 298 192
      luajit.mod/luajit/src/lj_asm_ppc.h
  87. 373 163
      luajit.mod/luajit/src/lj_asm_x86.h
  88. 4 0
      luajit.mod/luajit/src/lj_bc.h
  89. 4 2
      luajit.mod/luajit/src/lj_bcdump.h
  90. 62 81
      luajit.mod/luajit/src/lj_bcread.c
  91. 97 132
      luajit.mod/luajit/src/lj_bcwrite.c
  92. 232 0
      luajit.mod/luajit/src/lj_buf.c
  93. 103 0
      luajit.mod/luajit/src/lj_buf.h
  94. 84 0
      luajit.mod/luajit/src/lj_carith.c
  95. 11 0
      luajit.mod/luajit/src/lj_carith.h
  96. 330 47
      luajit.mod/luajit/src/lj_ccall.c
  97. 36 13
      luajit.mod/luajit/src/lj_ccall.h
  98. 191 48
      luajit.mod/luajit/src/lj_ccallback.c
  99. 3 1
      luajit.mod/luajit/src/lj_cconv.c
  100. 27 13
      luajit.mod/luajit/src/lj_cdata.c

BIN
luajit.mod/lib/win32/libluajit_x64.a


BIN
luajit.mod/lib/win32/libluajit_x86.a


+ 5 - 1
luajit.mod/luajit.bmx

@@ -5,9 +5,13 @@ bbdoc: LuaJIT
 end rem
 end rem
 Module zeke.luajit
 Module zeke.luajit
 
 
-ModuleInfo "Version: 1.14"
+ModuleInfo "Version: 1.16"
 ModuleInfo "Author: Zeke"
 ModuleInfo "Author: Zeke"
 
 
+ModuleInfo "History: 1.16"
+ModuleInfo "History: Updated to LuaJIT 2.1.0-beta3.f0e865d."
+ModuleInfo "History: 1.15"
+ModuleInfo "History: Fixed lua_integer size for 64-bit."
 ModuleInfo "History: 1.14"
 ModuleInfo "History: 1.14"
 ModuleInfo "History: Fixed reflection issues."
 ModuleInfo "History: Fixed reflection issues."
 ModuleInfo "History: 1.13"
 ModuleInfo "History: 1.13"

+ 11 - 0
luajit.mod/luajit/.gitignore

@@ -0,0 +1,11 @@
+*.[oa]
+*.so
+*.obj
+*.lib
+*.exp
+*.dll
+*.exe
+*.manifest
+*.dmp
+*.swp
+.tags

+ 17 - 8
luajit.mod/luajit/Makefile

@@ -14,9 +14,10 @@
 ##############################################################################
 ##############################################################################
 
 
 MAJVER=  2
 MAJVER=  2
-MINVER=  0
-RELVER=  5
-VERSION= $(MAJVER).$(MINVER).$(RELVER)
+MINVER=  1
+RELVER=  0
+PREREL=  -beta3
+VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL)
 ABIVER=  5.1
 ABIVER=  5.1
 
 
 ##############################################################################
 ##############################################################################
@@ -84,8 +85,10 @@ FILE_SO= libluajit.so
 FILE_MAN= luajit.1
 FILE_MAN= luajit.1
 FILE_PC= luajit.pc
 FILE_PC= luajit.pc
 FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
 FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
-FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua dis_arm.lua \
-	      dis_ppc.lua dis_mips.lua dis_mipsel.lua bcsave.lua vmdef.lua
+FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
+	      dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
+	      dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
+	      dis_mips64.lua dis_mips64el.lua vmdef.lua
 
 
 ifeq (,$(findstring Windows,$(OS)))
 ifeq (,$(findstring Windows,$(OS)))
   HOST_SYS:= $(shell uname -s)
   HOST_SYS:= $(shell uname -s)
@@ -115,7 +118,7 @@ install: $(INSTALL_DEP)
 	$(MKDIR) $(INSTALL_DIRS)
 	$(MKDIR) $(INSTALL_DIRS)
 	cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
 	cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
 	cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
 	cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
-	$(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
+	$(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
 	cd src && test -f $(FILE_SO) && \
 	cd src && test -f $(FILE_SO) && \
 	  $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
 	  $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
 	  $(LDCONFIG) $(INSTALL_LIB) && \
 	  $(LDCONFIG) $(INSTALL_LIB) && \
@@ -127,12 +130,18 @@ install: $(INSTALL_DEP)
 	  $(RM) $(FILE_PC).tmp
 	  $(RM) $(FILE_PC).tmp
 	cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
 	cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
 	cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
 	cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
-	$(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
 	@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
 	@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
+	@echo ""
+	@echo "Note: the development releases deliberately do NOT install a symlink for luajit"
+	@echo "You can do this now by running this command (with sudo):"
+	@echo ""
+	@echo "  $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
+	@echo ""
+
 
 
 uninstall:
 uninstall:
 	@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
 	@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
-	$(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
+	$(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
 	for file in $(FILES_JITLIB); do \
 	for file in $(FILES_JITLIB); do \
 	  $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
 	  $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
 	  done
 	  done

+ 2 - 2
luajit.mod/luajit/README

@@ -1,5 +1,5 @@
-README for LuaJIT 2.0.5
------------------------
+README for LuaJIT 2.1.0-beta3
+-----------------------------
 
 
 LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
 LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
 
 

+ 1 - 1
luajit.mod/luajit/doc/bluequad-print.css

@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2017 Mike Pall.
+/* Copyright (C) 2004-2018 Mike Pall.
  *
  *
  * You are welcome to use the general ideas of this design for your own sites.
  * You are welcome to use the general ideas of this design for your own sites.
  * But please do not steal the stylesheet, the layout or the color scheme.
  * But please do not steal the stylesheet, the layout or the color scheme.

+ 1 - 1
luajit.mod/luajit/doc/bluequad.css

@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2017 Mike Pall.
+/* Copyright (C) 2004-2018 Mike Pall.
  *
  *
  * You are welcome to use the general ideas of this design for your own sites.
  * You are welcome to use the general ideas of this design for your own sites.
  * But please do not steal the stylesheet, the layout or the color scheme.
  * But please do not steal the stylesheet, the layout or the color scheme.

+ 95 - 233
luajit.mod/luajit/doc/changes.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>LuaJIT Change History</title>
 <title>LuaJIT Change History</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -44,6 +43,8 @@ div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -72,6 +73,96 @@ to see whether newer versions are available.
 </p>
 </p>
 
 
 <div class="major" style="background: #d0d0ff;">
 <div class="major" style="background: #d0d0ff;">
+<h2 id="LuaJIT-2.1.0-beta3">LuaJIT 2.1.0-beta3 &mdash; 2017-05-01</h2>
+<ul>
+<li>Rewrite memory block allocator.</li>
+<li>Add various extension from Lua 5.2/5.3.</li>
+<li>Remove old Lua 5.0 compatibility defines.</li>
+<li>Set arg table before evaluating <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
+<li>Fix FOLD rules for <tt>math.abs()</tt> and FP negation.</li>
+<li>Fix soft-float <tt>math.abs()</tt> and negation.</li>
+<li>Fix formatting of some small denormals at low precision.</li>
+<li>LJ_GC64: Add JIT compiler support.</li>
+<li>x64/LJ_GC64: Add JIT compiler backend.</li>
+<li>x86/x64: Generate BMI2 shifts and rotates, if available.</li>
+<li>Windows/x86: Add full exception interoperability.</li>
+<li>ARM64: Add big-endian support.</li>
+<li>ARM64: Add JIT compiler backend.</li>
+<li>MIPS: Fix <tt>TSETR</tt> barrier.</li>
+<li>MIPS: Support MIPS16 interlinking.</li>
+<li>MIPS soft-float: Fix code generation for <tt>HREF</tt>.</li>
+<li>MIPS64: Add MIPS64 hard-float JIT compiler backend.</li>
+<li>MIPS64: Add MIPS64 hard-float/soft-float support to interpreter.</li>
+<li>FFI: Compile bitfield loads/stores.</li>
+<li>Various fixes common with the 2.0 branch.</li>
+</ul>
+
+<h2 id="LuaJIT-2.1.0-beta2">LuaJIT 2.1.0-beta2 &mdash; 2016-03-03</h2>
+<ul>
+<li>Enable trace stitching.</li>
+<li>Use internal implementation for converting FP numbers to strings.</li>
+<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li>
+<li>Add MIPS soft-float support.</li>
+<li>Switch MIPS port to dual-number mode.</li>
+<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li>
+<li>FFI: Add <tt>ssize_t</tt> declaration.</li>
+<li>FFI: Parse <tt>#line NN</tt> and <tt>#NN</tt>.</li>
+<li>Various minor fixes.</li>
+</ul>
+
+<h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 &mdash; 2015-08-25</h2>
+<p>
+This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0.
+Please take a look at the commit history for more details.
+</p>
+<ul>
+<li>Changes to the VM core:
+<ul>
+<li>Add low-overhead profiler (<tt>-jp</tt>).</li>
+<li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li>
+<li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li>
+<li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li>
+<li>Parse binary number literals (<tt>0bxxx</tt>).</li>
+</ul></li>
+<li>Improvements to the JIT compiler:
+<ul>
+<li>Add trace stitching (disabled for now).</li>
+<li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li>
+<li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li>
+<li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li>
+<li>Compile string concatenations (<tt>BC_CAT</tt>).</li>
+<li>Compile <tt>__concat</tt> metamethod.</li>
+<li>Various minor optimizations.</li>
+</ul></li>
+<li>Internal Changes:
+<ul>
+<li>Add support for embedding LuaJIT bytecode for builtins.</li>
+<li>Replace various builtins with embedded bytecode.</li>
+<li>Refactor string buffers and string formatting.</li>
+<li>Remove obsolete non-truncating number to integer conversions.</li>
+</ul></li>
+<li>Ports:
+<ul>
+<li>Add Xbox One port (<tt>LJ_GC64</tt> mode).</li>
+<li>ARM64: Add port of the interpreter (<tt>LJ_GC64</tt> mode).</li>
+<li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt> mode.</li>
+<li>x86/x64: Drop internal x87 math functions. Use libm functions.</li>
+<li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li>
+<li>PPC/e500: Drop support for this architecture.</li>
+</ul></li>
+<li>FFI library:
+<ul>
+<li>FFI: Add 64 bit bitwise operations.</li>
+<li>FFI: Compile VLA/VLS and large cdata allocations with default initialization.</li>
+<li>FFI: Compile conversions from functions to function pointers.</li>
+<li>FFI: Compile lightuserdata to <tt>void *</tt> conversion.</li>
+<li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li>
+<li>FFI: Add <tt>ffi.typeinfo()</tt>.</li>
+</ul></li>
+</ul>
+</div>
+
+<div class="major" style="background: #ffffd0;">
 <h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 &mdash; 2017-05-01</h2>
 <h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 &mdash; 2017-05-01</h2>
 <ul>
 <ul>
 <li>Add workaround for MSVC 2015 stdio changes.</li>
 <li>Add workaround for MSVC 2015 stdio changes.</li>
@@ -81,7 +172,7 @@ to see whether newer versions are available.
 <li>Remove internal <tt>__mode = "K"</tt> and replace with safe check.</li>
 <li>Remove internal <tt>__mode = "K"</tt> and replace with safe check.</li>
 <li>Add "proto" field to <tt>jit.util.funcinfo()</tt>.</li>
 <li>Add "proto" field to <tt>jit.util.funcinfo()</tt>.</li>
 <li>Fix GC step size calculation.</li>
 <li>Fix GC step size calculation.</li>
-<li>Initialize <tt>uv->immutable</tt> for upvalues of loaded chunks.</li>
+<li>Initialize <tt>uv-&gt;immutable</tt> for upvalues of loaded chunks.</li>
 <li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li>
 <li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li>
 <li>Drop leftover regs in 'for' iterator assignment, too.</li>
 <li>Drop leftover regs in 'for' iterator assignment, too.</li>
 <li>Fix PHI remarking in SINK pass.</li>
 <li>Fix PHI remarking in SINK pass.</li>
@@ -777,240 +868,11 @@ This matches the behavior of Lua 5.1, but not the specification.</li>
 no point in listing differences over earlier versions.</li>
 no point in listing differences over earlier versions.</li>
 </ul>
 </ul>
 </div>
 </div>
-
-<div class="major" style="background: #ffff80;">
-<h2 id="LuaJIT-1.1.8">LuaJIT 1.1.8 &mdash; 2012-04-16</h2>
-<ul>
-<li>Merged with Lua 5.1.5. Also integrated fixes for all
-<a href="http://www.lua.org/bugs.html#5.1.5"><span class="ext">&raquo;</span>&nbsp;<span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.5</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.7">LuaJIT 1.1.7 &mdash; 2011-05-05</h2>
-<ul>
-<li>Added fixes for the
-<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.6">LuaJIT 1.1.6 &mdash; 2010-03-28</h2>
-<ul>
-<li>Added fixes for the
-<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
-<li>Removed wrong GC check in <tt>jit_createstate()</tt>.
-Thanks to Tim Mensch.</li>
-<li>Fixed bad assertions while compiling <tt>table.insert()</tt> and
-<tt>table.remove()</tt>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.5">LuaJIT 1.1.5 &mdash; 2008-10-25</h2>
-<ul>
-<li>Merged with Lua 5.1.4. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.3"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.3</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.4">LuaJIT 1.1.4 &mdash; 2008-02-05</h2>
-<ul>
-<li>Merged with Lua 5.1.3. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.2"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.2</a>.</li>
-<li>Fixed possible (but unlikely) stack corruption while compiling
-<tt>k^x</tt> expressions.</li>
-<li>Fixed DynASM template for cmpss instruction.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.3">LuaJIT 1.1.3 &mdash; 2007-05-24</h2>
-<ul>
-<li>Merged with Lua 5.1.2. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.1</a>.</li>
-<li>Merged pending Lua 5.1.x fixes: "return -nil" bug, spurious count hook call.</li>
-<li>Remove a (sometimes) wrong assertion in <tt>luaJIT_findpc()</tt>.</li>
-<li>DynASM now allows labels for displacements and <tt>.aword</tt>.</li>
-<li>Fix some compiler warnings for DynASM glue (internal API change).</li>
-<li>Correct naming for SSSE3 (temporarily known as SSE4) in DynASM and x86 disassembler.</li>
-<li>The loadable debug modules now handle redirection to stdout
-(e.g. <tt>-j&nbsp;trace=-</tt>).</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.2">LuaJIT 1.1.2 &mdash; 2006-06-24</h2>
-<ul>
-<li>Fix MSVC inline assembly: use only local variables with
-<tt>lua_number2int()</tt>.</li>
-<li>Fix "attempt to call a thread value" bug on Mac OS X:
-make values of consts used as lightuserdata keys unique
-to avoid joining by the compiler/linker.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.1">LuaJIT 1.1.1 &mdash; 2006-06-20</h2>
-<ul>
-<li>Merged with Lua 5.1.1. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1</a>.</li>
-<li>Enforce (dynamic) linker error for EXE/DLL version mismatches.</li>
-<li>Minor changes to DynASM: faster pre-processing, smaller encoding
-for some immediates.</li>
-</ul>
-<p>
-This release is in sync with Coco 1.1.1 (see the
-<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
-</p>
-
-<h2 id="LuaJIT-1.1.0">LuaJIT 1.1.0 &mdash; 2006-03-13</h2>
-<ul>
-<li>Merged with Lua 5.1 (final).</li>
-
-<li>New JIT call frame setup:
-<ul>
-<li>The C stack is kept 16 byte aligned (faster).
-Mandatory for Mac OS X on Intel, too.</li>
-<li>Faster calling conventions for internal C helper functions.</li>
-<li>Better instruction scheduling for function prologue, OP_CALL and
-OP_RETURN.</li>
-</ul></li>
-
-<li>Miscellaneous optimizations:
-<ul>
-<li>Faster loads of FP constants. Remove narrow-to-wide store-to-load
-forwarding stalls.</li>
-<li>Use (scalar) SSE2 ops (if the CPU supports it) to speed up slot moves
-and FP to integer conversions.</li>
-<li>Optimized the two-argument form of <tt>OP_CONCAT</tt> (<tt>a..b</tt>).</li>
-<li>Inlined <tt>OP_MOD</tt> (<tt>a%b</tt>).
-With better accuracy than the C variant, too.</li>
-<li>Inlined <tt>OP_POW</tt> (<tt>a^b</tt>). Unroll <tt>x^k</tt> or
-use <tt>k^x = 2^(log2(k)*x)</tt> or call <tt>pow()</tt>.</li>
-</ul></li>
-
-<li>Changes in the optimizer:
-<ul>
-<li>Improved hinting for table keys derived from table values
-(<tt>t1[t2[x]]</tt>).</li>
-<li>Lookup hinting now works with arbitrary object types and
-supports index chains, too.</li>
-<li>Generate type hints for arithmetic and comparison operators,
-OP_LEN, OP_CONCAT and OP_FORPREP.</li>
-<li>Remove several hint definitions in favour of a generic COMBINE hint.</li>
-<li>Complete rewrite of <tt>jit.opt_inline</tt> module
-(ex <tt>jit.opt_lib</tt>).</li>
-</ul></li>
-
-<li>Use adaptive deoptimization:
-<ul>
-<li>If runtime verification of a contract fails, the affected
-instruction is recompiled and patched on-the-fly.
-Regular programs will trigger deoptimization only occasionally.</li>
-<li>This avoids generating code for uncommon fallback cases
-most of the time. Generated code is up to 30% smaller compared to
-LuaJIT&nbsp;1.0.3.</li>
-<li>Deoptimization is used for many opcodes and contracts:
-<ul>
-<li>OP_CALL, OP_TAILCALL: type mismatch for callable.</li>
-<li>Inlined calls: closure mismatch, parameter number and type mismatches.</li>
-<li>OP_GETTABLE, OP_SETTABLE: table or key type and range mismatches.</li>
-<li>All arithmetic and comparison operators, OP_LEN, OP_CONCAT,
-OP_FORPREP: operand type and range mismatches.</li>
-</ul></li>
-<li>Complete redesign of the debug and traceback info
-(bytecode &harr; mcode) to support deoptimization.
-Much more flexible and needs only 50% of the space.</li>
-<li>The modules <tt>jit.trace</tt>, <tt>jit.dumphints</tt> and
-<tt>jit.dump</tt> handle deoptimization.</li>
-</ul></li>
-
-<li>Inlined many popular library functions
-(for commonly used arguments only):
-<ul>
-<li>Most <tt>math.*</tt> functions (the 18 most used ones)
-[2x-10x faster].</li>
-<li><tt>string.len</tt>, <tt>string.sub</tt> and <tt>string.char</tt>
-[2x-10x faster].</li>
-<li><tt>table.insert</tt>, <tt>table.remove</tt> and <tt>table.getn</tt>
-[3x-5x faster].</li>
-<li><tt>coroutine.yield</tt> and <tt>coroutine.resume</tt>
-[3x-5x faster].</li>
-<li><tt>pairs</tt>, <tt>ipairs</tt> and the corresponding iterators
-[8x-15x faster].</li>
-</ul></li>
-
-<li>Changes in the core and loadable modules and the stand-alone executable:
-<ul>
-<li>Added <tt>jit.version</tt>, <tt>jit.version_num</tt>
-and <tt>jit.arch</tt>.</li>
-<li>Reorganized some internal API functions (<tt>jit.util.*mcode*</tt>).</li>
-<li>The <tt>-j dump</tt> output now shows JSUB names, too.</li>
-<li>New x86 disassembler module written in pure Lua. No dependency
-on ndisasm anymore. Flexible API, very compact (500 lines)
-and complete (x87, MMX, SSE, SSE2, SSE3, SSSE3, privileged instructions).</li>
-<li><tt>luajit -v</tt> prints the LuaJIT version and copyright
-on a separate line.</li>
-</ul></li>
-
-<li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li>
-<li>Miscellaneous doc changes. Added a section about
-<a href="install.html#embedding">embedding LuaJIT</a>.</li>
-</ul>
-<p>
-This release is in sync with Coco 1.1.0 (see the
-<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
-</p>
-</div>
-
-<div class="major" style="background: #ffffd0;">
-<h2 id="LuaJIT-1.0.3">LuaJIT 1.0.3 &mdash; 2005-09-08</h2>
-<ul>
-<li>Even more docs.</li>
-<li>Unified closure checks in <tt>jit.*</tt>.</li>
-<li>Fixed some range checks in <tt>jit.util.*</tt>.</li>
-<li>Fixed __newindex call originating from <tt>jit_settable_str()</tt>.</li>
-<li>Merged with Lua 5.1 alpha (including early bug fixes).</li>
-</ul>
-<p>
-This is the first public release of LuaJIT.
-</p>
-
-<h2 id="LuaJIT-1.0.2">LuaJIT 1.0.2 &mdash; 2005-09-02</h2>
-<ul>
-<li>Add support for flushing the Valgrind translation cache <br>
-(<tt>MYCFLAGS= -DUSE_VALGRIND</tt>).</li>
-<li>Add support for freeing executable mcode memory to the <tt>mmap()</tt>-based
-variant for POSIX systems.</li>
-<li>Reorganized the C&nbsp;function signature handling in
-<tt>jit.opt_lib</tt>.</li>
-<li>Changed to index-based hints for inlining C&nbsp;functions.
-Still no support in the backend for inlining.</li>
-<li>Hardcode <tt>HEAP_CREATE_ENABLE_EXECUTE</tt> value if undefined.</li>
-<li>Misc. changes to the <tt>jit.*</tt> modules.</li>
-<li>Misc. changes to the Makefiles.</li>
-<li>Lots of new docs.</li>
-<li>Complete doc reorg.</li>
-</ul>
-<p>
-Not released because Lua 5.1 alpha came out today.
-</p>
-
-<h2 id="LuaJIT-1.0.1">LuaJIT 1.0.1 &mdash; 2005-08-31</h2>
-<ul>
-<li>Missing GC step in <tt>OP_CONCAT</tt>.</li>
-<li>Fix result handling for C &ndash;> JIT calls.</li>
-<li>Detect CPU feature bits.</li>
-<li>Encode conditional moves (<tt>fucomip</tt>) only when supported.</li>
-<li>Add fallback instructions for FP compares.</li>
-<li>Add support for <tt>LUA_COMPAT_VARARG</tt>. Still disabled by default.</li>
-<li>MSVC needs a specific place for the <tt>CALLBACK</tt> attribute
-(David Burgess).</li>
-<li>Misc. doc updates.</li>
-</ul>
-<p>
-Interim non-public release.
-Special thanks to Adam D. Moss for reporting most of the bugs.
-</p>
-
-<h2 id="LuaJIT-1.0.0">LuaJIT 1.0.0 &mdash; 2005-08-29</h2>
-<p>
-This is the initial non-public release of LuaJIT.
-</p>
-</div>
 <br class="flush">
 <br class="flush">
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 5 - 4
luajit.mod/luajit/doc/contact.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>Contact</title>
 <title>Contact</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +40,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -91,7 +92,7 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")
 <h2>Copyright</h2>
 <h2>Copyright</h2>
 <p>
 <p>
 All documentation is
 All documentation is
-Copyright &copy; 2005-2017 Mike Pall.
+Copyright &copy; 2005-2018 Mike Pall.
 </p>
 </p>
 
 
 
 
@@ -99,7 +100,7 @@ Copyright &copy; 2005-2017 Mike Pall.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 6 - 5
luajit.mod/luajit/doc/ext_c_api.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>Lua/C API Extensions</title>
 <title>Lua/C API Extensions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +40,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a class="current" href="ext_c_api.html">Lua/C API</a>
 <a class="current" href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -89,8 +90,8 @@ other Lua/C API functions).
 </p>
 </p>
 <p>
 <p>
 The third argument specifies the mode, which is 'or'ed with a flag.
 The third argument specifies the mode, which is 'or'ed with a flag.
-The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature on,
-<tt>LUAJIT_MODE_ON</tt> to turn a feature off, or
+The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature off,
+<tt>LUAJIT_MODE_ON</tt> to turn a feature on, or
 <tt>LUAJIT_MODE_FLUSH</tt> to flush cached code.
 <tt>LUAJIT_MODE_FLUSH</tt> to flush cached code.
 </p>
 </p>
 <p>
 <p>
@@ -177,7 +178,7 @@ Also note that this mechanism is not without overhead.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 4 - 3
luajit.mod/luajit/doc/ext_ffi.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>FFI Library</title>
 <title>FFI Library</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +40,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -320,7 +321,7 @@ without undue conversion penalties.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 10 - 5
luajit.mod/luajit/doc/ext_ffi_api.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>ffi.* API Functions</title>
 <title>ffi.* API Functions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -46,6 +45,8 @@ td.abiparam { font-weight: bold; width: 6em; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -466,6 +467,10 @@ otherwise. The following parameters are currently defined:
 <td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
 <td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
 <tr class="odd">
 <tr class="odd">
 <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
 <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
+<tr class="even">
+<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
+<tr class="odd">
+<td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
 </table>
 </table>
 
 
 <h3 id="ffi_os"><tt>ffi.os</tt></h3>
 <h3 id="ffi_os"><tt>ffi.os</tt></h3>
@@ -542,8 +547,8 @@ corresponding ctype.
 The parser for Lua source code treats numeric literals with the
 The parser for Lua source code treats numeric literals with the
 suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
 suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
 integers. Case doesn't matter, but uppercase is recommended for
 integers. Case doesn't matter, but uppercase is recommended for
-readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal
-(<tt>0x2aLL</tt>) literals.
+readability. It handles decimal (<tt>42LL</tt>), hexadecimal
+(<tt>0x2aLL</tt>) and binary (<tt>0b101010LL</tt>) literals.
 </p>
 </p>
 <p>
 <p>
 The imaginary part of complex numbers can be specified by suffixing
 The imaginary part of complex numbers can be specified by suffixing
@@ -556,7 +561,7 @@ named <tt>i</tt>.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 26 - 10
luajit.mod/luajit/doc/ext_ffi_semantics.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>FFI Semantics</title>
 <title>FFI Semantics</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -46,6 +45,8 @@ td.convop { font-style: italic; width: 40%; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -183,6 +184,8 @@ a <tt>typedef</tt>, except re-declarations will be ignored):
 <tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>,
 <tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>,
 <tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li>
 <tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li>
 
 
+<li>From <tt>&lt;unistd.h&gt;</tt> (POSIX): <tt>ssize_t</tt>.</li>
+
 </ul>
 </ul>
 <p>
 <p>
 You're encouraged to use these types in preference to
 You're encouraged to use these types in preference to
@@ -730,6 +733,22 @@ You'll have to explicitly convert a 64&nbsp;bit integer to a Lua
 number (e.g. for regular floating-point calculations) with
 number (e.g. for regular floating-point calculations) with
 <tt>tonumber()</tt>. But note this may incur a precision loss.</li>
 <tt>tonumber()</tt>. But note this may incur a precision loss.</li>
 
 
+<li><b>64&nbsp;bit bitwise operations</b>: the rules for 64&nbsp;bit
+arithmetic operators apply analogously.<br>
+
+Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt>
+converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and
+returns a Lua number.<br>
+
+For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the
+conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to
+<em>all</em> arguments, if <em>any</em> argument is a cdata number.<br>
+
+For all other operations, only the first argument is used to determine
+the output type. This implies that a cdata number as a shift count for
+shifts and rotates is accepted, but that alone does <em>not</em> cause
+a cdata number output.
+
 </ul>
 </ul>
 
 
 <h3 id="cdata_comp">Comparisons of cdata objects</h3>
 <h3 id="cdata_comp">Comparisons of cdata objects</h3>
@@ -844,7 +863,7 @@ place of a type, you'd need to use <tt>ffi.typeof("int")</tt> instead.
 <p>
 <p>
 The main use for parameterized types are libraries implementing abstract
 The main use for parameterized types are libraries implementing abstract
 data types
 data types
-(<a href="http://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8"><span class="ext">&raquo;</span>&nbsp;example</a>),
+(<a href="https://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8">example</a>),
 similar to what can be achieved with C++ template metaprogramming.
 similar to what can be achieved with C++ template metaprogramming.
 Another use case are derived types of anonymous structs, which avoids
 Another use case are derived types of anonymous structs, which avoids
 pollution of the global struct namespace.
 pollution of the global struct namespace.
@@ -1201,14 +1220,12 @@ The following operations are currently not compiled and may exhibit
 suboptimal performance, especially when used in inner loops:
 suboptimal performance, especially when used in inner loops:
 </p>
 </p>
 <ul>
 <ul>
-<li>Bitfield accesses and initializations.</li>
 <li>Vector operations.</li>
 <li>Vector operations.</li>
 <li>Table initializers.</li>
 <li>Table initializers.</li>
 <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
 <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
-<li>Allocations of variable-length arrays or structs.</li>
-<li>Allocations of C&nbsp;types with a size &gt; 128&nbsp;bytes or an
-alignment &gt; 8&nbsp;bytes.</li>
-<li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
+<li>Non-default initialization of VLA/VLS or large C&nbsp;types
+(&gt; 128&nbsp;bytes or &gt; 16 array elements.</li>
+<li>Bitfield initializations.</li>
 <li>Pointer differences for element sizes that are not a power of
 <li>Pointer differences for element sizes that are not a power of
 two.</li>
 two.</li>
 <li>Calls to C&nbsp;functions with aggregates passed or returned by
 <li>Calls to C&nbsp;functions with aggregates passed or returned by
@@ -1224,7 +1241,6 @@ value.</li>
 Other missing features:
 Other missing features:
 </p>
 </p>
 <ul>
 <ul>
-<li>Bit operations for 64&nbsp;bit types.</li>
 <li>Arithmetic for <tt>complex</tt> numbers.</li>
 <li>Arithmetic for <tt>complex</tt> numbers.</li>
 <li>Passing structs by value to vararg C&nbsp;functions.</li>
 <li>Passing structs by value to vararg C&nbsp;functions.</li>
 <li><a href="extensions.html#exceptions">C++ exception interoperability</a>
 <li><a href="extensions.html#exceptions">C++ exception interoperability</a>
@@ -1235,7 +1251,7 @@ compiled.</li>
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 4 - 3
luajit.mod/luajit/doc/ext_ffi_tutorial.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>FFI Tutorial</title>
 <title>FFI Tutorial</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -48,6 +47,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -591,7 +592,7 @@ it to a local variable in the function scope is unnecessary.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 5 - 4
luajit.mod/luajit/doc/ext_jit.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>jit.* Library</title>
 <title>jit.* Library</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +40,8 @@
 <a class="current" href="ext_jit.html">jit.* Library</a>
 <a class="current" href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -151,7 +152,7 @@ Contains the target OS name:
 <h3 id="jit_arch"><tt>jit.arch</tt></h3>
 <h3 id="jit_arch"><tt>jit.arch</tt></h3>
 <p>
 <p>
 Contains the target architecture name:
 Contains the target architecture name:
-"x86", "x64", "arm", "ppc", "ppcspe", or "mips".
+"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64".
 </p>
 </p>
 
 
 <h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
 <h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
@@ -189,7 +190,7 @@ if you want to know more.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 364 - 0
luajit.mod/luajit/doc/ext_profiler.html

@@ -0,0 +1,364 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Profiler</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Profiler</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a class="current" href="ext_profiler.html">Profiler</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT has an integrated statistical profiler with very low overhead. It
+allows sampling the currently executing stack and other parameters in
+regular intervals.
+</p>
+<p>
+The integrated profiler can be accessed from three levels:
+</p>
+<ul>
+<li>The <a href="#hl_profiler">bundled high-level profiler</a>, invoked by the
+<a href="#j_p"><tt>-jp</tt></a> command line option.</li>
+<li>A <a href="#ll_lua_api">low-level Lua API</a> to control the profiler.</li>
+<li>A <a href="#ll_c_api">low-level C API</a> to control the profiler.</li>
+</ul>
+
+<h2 id="hl_profiler">High-Level Profiler</h2>
+<p>
+The bundled high-level profiler offers basic profiling functionality. It
+generates simple textual summaries or source code annotations. It can be
+accessed with the <a href="#j_p"><tt>-jp</tt></a> command line option
+or from Lua code by loading the underlying <tt>jit.p</tt> module.
+</p>
+<p>
+To cut to the chase &mdash; run this to get a CPU usage profile by
+function name:
+</p>
+<pre class="code">
+luajit -jp myapp.lua
+</pre>
+<p>
+It's <em>not</em> a stated goal of the bundled profiler to add every
+possible option or to cater for special profiling needs. The low-level
+profiler APIs are documented below. They may be used by third-party
+authors to implement advanced functionality, e.g. IDE integration or
+graphical profilers.
+</p>
+<p>
+Note: Sampling works for both interpreted and JIT-compiled code. The
+results for JIT-compiled code may sometimes be surprising. LuaJIT
+heavily optimizes and inlines Lua code &mdash; there's no simple
+one-to-one correspondence between source code lines and the sampled
+machine code.
+</p>
+
+<h3 id="j_p"><tt>-jp=[options[,output]]</tt></h3>
+<p>
+The <tt>-jp</tt> command line option starts the high-level profiler.
+When the application run by the command line terminates, the profiler
+stops and writes the results to <tt>stdout</tt> or to the specified
+<tt>output</tt> file.
+</p>
+<p>
+The <tt>options</tt> argument specifies how the profiling is to be
+performed:
+</p>
+<ul>
+<li><tt>f</tt> &mdash; Stack dump: function name, otherwise module:line.
+This is the default mode.</li>
+<li><tt>F</tt> &mdash; Stack dump: ditto, but dump module:name.</li>
+<li><tt>l</tt> &mdash; Stack dump: module:line.</li>
+<li><tt>&lt;number&gt;</tt> &mdash; stack dump depth (callee &larr;
+caller). Default: 1.</li>
+<li><tt>-&lt;number&gt;</tt> &mdash; Inverse stack dump depth (caller
+&rarr; callee).</li>
+<li><tt>s</tt> &mdash; Split stack dump after first stack level. Implies
+depth&nbsp;&ge;&nbsp;2 or depth&nbsp;&le;&nbsp;-2.</li>
+<li><tt>p</tt> &mdash; Show full path for module names.</li>
+<li><tt>v</tt> &mdash; Show VM states.</li>
+<li><tt>z</tt> &mdash; Show <a href="#jit_zone">zones</a>.</li>
+<li><tt>r</tt> &mdash; Show raw sample counts. Default: show percentages.</li>
+<li><tt>a</tt> &mdash; Annotate excerpts from source code files.</li>
+<li><tt>A</tt> &mdash; Annotate complete source code files.</li>
+<li><tt>G</tt> &mdash; Produce raw output suitable for graphical tools.</li>
+<li><tt>m&lt;number&gt;</tt> &mdash; Minimum sample percentage to be shown.
+Default: 3%.</li>
+<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds.
+Default: 10ms.<br>
+Note: The actual sampling precision is OS-dependent.</li>
+</ul>
+<p>
+The default output for <tt>-jp</tt> is a list of the most CPU consuming
+spots in the application. Increasing the stack dump depth with (say)
+<tt>-jp=2</tt> may help to point out the main callers or callees of
+hotspots. But sample aggregation is still flat per unique stack dump.
+</p>
+<p>
+To get a two-level view (split view) of callers/callees, use
+<tt>-jp=s</tt> or <tt>-jp=-s</tt>. The percentages shown for the second
+level are relative to the first level.
+</p>
+<p>
+To see how much time is spent in each line relative to a function, use
+<tt>-jp=fl</tt>.
+</p>
+<p>
+To see how much time is spent in different VM states or
+<a href="#jit_zone">zones</a>, use <tt>-jp=v</tt> or <tt>-jp=z</tt>.
+</p>
+<p>
+Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level
+views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time
+spent in a VM state or zone vs. hotspots. This can be used to answer
+questions like "Which time consuming functions are only interpreted?" or
+"What's the garbage collector overhead for a specific function?".
+</p>
+<p>
+Multiple options can be combined &mdash; but not all combinations make
+sense, see above. E.g. <tt>-jp=3si4m1</tt> samples three stack levels
+deep in 4ms intervals and shows a split view of the CPU consuming
+functions and their callers with a 1% threshold.
+</p>
+<p>
+Source code annotations produced by <tt>-jp=a</tt> or <tt>-jp=A</tt> are
+always flat and at the line level. Obviously, the source code files need
+to be readable by the profiler script.
+</p>
+<p>
+The high-level profiler can also be started and stopped from Lua code with:
+</p>
+<pre class="code">
+require("jit.p").start(options, output)
+...
+require("jit.p").stop()
+</pre>
+
+<h3 id="jit_zone"><tt>jit.zone</tt> &mdash; Zones</h3>
+<p>
+Zones can be used to provide information about different parts of an
+application to the high-level profiler. E.g. a game could make use of an
+<tt>"AI"</tt> zone, a <tt>"PHYS"</tt> zone, etc. Zones are hierarchical,
+organized as a stack.
+</p>
+<p>
+The <tt>jit.zone</tt> module needs to be loaded explicitly:
+</p>
+<pre class="code">
+local zone = require("jit.zone")
+</pre>
+<ul>
+<li><tt>zone("name")</tt> pushes a named zone to the zone stack.</li>
+<li><tt>zone()</tt> pops the current zone from the zone stack and
+returns its name.</li>
+<li><tt>zone:get()</tt> returns the current zone name or <tt>nil</tt>.</li>
+<li><tt>zone:flush()</tt> flushes the zone stack.</li>
+</ul>
+<p>
+To show the time spent in each zone use <tt>-jp=z</tt>. To show the time
+spent relative to hotspots use e.g. <tt>-jp=zf</tt> or <tt>-jp=fz</tt>.
+</p>
+
+<h2 id="ll_lua_api">Low-level Lua API</h2>
+<p>
+The <tt>jit.profile</tt> module gives access to the low-level API of the
+profiler from Lua code. This module needs to be loaded explicitly:
+<pre class="code">
+local profile = require("jit.profile")
+</pre>
+<p>
+This module can be used to implement your own higher-level profiler.
+A typical profiling run starts the profiler, captures stack dumps in
+the profiler callback, adds them to a hash table to aggregate the number
+of samples, stops the profiler and then analyzes all of the captured
+stack dumps. Other parameters can be sampled in the profiler callback,
+too. But it's important not to spend too much time in the callback,
+since this may skew the statistics.
+</p>
+
+<h3 id="profile_start"><tt>profile.start(mode, cb)</tt>
+&mdash; Start profiler</h3>
+<p>
+This function starts the profiler. The <tt>mode</tt> argument is a
+string holding options:
+</p>
+<ul>
+<li><tt>f</tt> &mdash; Profile with precision down to the function level.</li>
+<li><tt>l</tt> &mdash; Profile with precision down to the line level.</li>
+<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds (default
+10ms).</br>
+Note: The actual sampling precision is OS-dependent.
+</li>
+</ul>
+<p>
+The <tt>cb</tt> argument is a callback function which is called with
+three arguments: <tt>(thread, samples, vmstate)</tt>. The callback is
+called on a separate coroutine, the <tt>thread</tt> argument is the
+state that holds the stack to sample for profiling. Note: do
+<em>not</em> modify the stack of that state or call functions on it.
+</p>
+<p>
+<tt>samples</tt> gives the number of accumulated samples since the last
+callback (usually 1).
+</p>
+<p>
+<tt>vmstate</tt> holds the VM state at the time the profiling timer
+triggered. This may or may not correspond to the state of the VM when
+the profiling callback is called. The state is either <tt>'N'</tt>
+native (compiled) code, <tt>'I'</tt> interpreted code, <tt>'C'</tt>
+C&nbsp;code, <tt>'G'</tt> the garbage collector, or <tt>'J'</tt> the JIT
+compiler.
+</p>
+
+<h3 id="profile_stop"><tt>profile.stop()</tt>
+&mdash; Stop profiler</h3>
+<p>
+This function stops the profiler.
+</p>
+
+<h3 id="profile_dump"><tt>dump = profile.dumpstack([thread,] fmt, depth)</tt>
+&mdash; Dump stack </h3>
+<p>
+This function allows taking stack dumps in an efficient manner. It
+returns a string with a stack dump for the <tt>thread</tt> (coroutine),
+formatted according to the <tt>fmt</tt> argument:
+</p>
+<ul>
+<li><tt>p</tt> &mdash; Preserve the full path for module names. Otherwise
+only the file name is used.</li>
+<li><tt>f</tt> &mdash; Dump the function name if it can be derived. Otherwise
+use module:line.</li>
+<li><tt>F</tt> &mdash; Ditto, but dump module:name.</li>
+<li><tt>l</tt> &mdash; Dump module:line.</li>
+<li><tt>Z</tt> &mdash; Zap the following characters for the last dumped
+frame.</li>
+<li>All other characters are added verbatim to the output string.</li>
+</ul>
+<p>
+The <tt>depth</tt> argument gives the number of frames to dump, starting
+at the topmost frame of the thread. A negative number dumps the frames in
+inverse order.
+</p>
+<p>
+The first example prints a list of the current module names and line
+numbers of up to 10 frames in separate lines. The second example prints
+semicolon-separated function names for all frames (up to 100) in inverse
+order:
+</p>
+<pre class="code">
+print(profile.dumpstack(thread, "l\n", 10))
+print(profile.dumpstack(thread, "lZ;", -100))
+</pre>
+
+<h2 id="ll_c_api">Low-level C API</h2>
+<p>
+The profiler can be controlled directly from C&nbsp;code, e.g. for
+use by IDEs. The declarations are in <tt>"luajit.h"</tt> (see
+<a href="ext_c_api.html">Lua/C API</a> extensions).
+</p>
+
+<h3 id="luaJIT_profile_start"><tt>luaJIT_profile_start(L, mode, cb, data)</tt>
+&mdash; Start profiler</h3>
+<p>
+This function starts the profiler. <a href="#profile_start">See
+above</a> for a description of the <tt>mode</tt> argument.
+</p>
+<p>
+The <tt>cb</tt> argument is a callback function with the following
+declaration:
+</p>
+<pre class="code">
+typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
+                                        int samples, int vmstate);
+</pre>
+<p>
+<tt>data</tt> is available for use by the callback. <tt>L</tt> is the
+state that holds the stack to sample for profiling. Note: do
+<em>not</em> modify this stack or call functions on this stack &mdash;
+use a separate coroutine for this purpose. <a href="#profile_start">See
+above</a> for a description of <tt>samples</tt> and <tt>vmstate</tt>.
+</p>
+
+<h3 id="luaJIT_profile_stop"><tt>luaJIT_profile_stop(L)</tt>
+&mdash; Stop profiler</h3>
+<p>
+This function stops the profiler.
+</p>
+
+<h3 id="luaJIT_profile_dumpstack"><tt>p = luaJIT_profile_dumpstack(L, fmt, depth, len)</tt>
+&mdash; Dump stack </h3>
+<p>
+This function allows taking stack dumps in an efficient manner.
+<a href="#profile_dump">See above</a> for a description of <tt>fmt</tt>
+and <tt>depth</tt>.
+</p>
+<p>
+This function returns a <tt>const&nbsp;char&nbsp;*</tt> pointing to a
+private string buffer of the profiler. The <tt>int&nbsp;*len</tt>
+argument returns the length of the output string. The buffer is
+overwritten on the next call and deallocated when the profiler stops.
+You either need to consume the content immediately or copy it for later
+use.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2018
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 92 - 18
luajit.mod/luajit/doc/extensions.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>Extensions</title>
 <title>Extensions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -58,6 +57,8 @@ td.excinterop {
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -113,6 +114,9 @@ bit.lshift bit.rshift bit.arshift bit.rol  bit.ror  bit.bswap
 This module is a LuaJIT built-in &mdash; you don't need to download or
 This module is a LuaJIT built-in &mdash; you don't need to download or
 install Lua BitOp. The Lua BitOp site has full documentation for all
 install Lua BitOp. The Lua BitOp site has full documentation for all
 <a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
 <a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
+The FFI adds support for
+<a href="ext_ffi_semantics.html#cdata_arith">64&nbsp;bit bitwise operations</a>,
+using the same API functions.
 </p>
 </p>
 <p>
 <p>
 Please make sure to <tt>require</tt> the module before using any of
 Please make sure to <tt>require</tt> the module before using any of
@@ -146,6 +150,11 @@ LuaJIT adds some
 <a href="ext_c_api.html">extra functions to the Lua/C API</a>.
 <a href="ext_c_api.html">extra functions to the Lua/C API</a>.
 </p>
 </p>
 
 
+<h3 id="profiler">Profiler</h3>
+<p>
+LuaJIT has an <a href="ext_profiler.html">integrated profiler</a>.
+</p>
+
 <h2 id="library">Enhanced Standard Library Functions</h2>
 <h2 id="library">Enhanced Standard Library Functions</h2>
 
 
 <h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3>
 <h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3>
@@ -173,7 +182,7 @@ in <tt>"-inf"</tt>.
 <h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3>
 <h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3>
 <p>
 <p>
 All string-to-number conversions consistently convert integer and
 All string-to-number conversions consistently convert integer and
-floating-point inputs in decimal and hexadecimal on all platforms.
+floating-point inputs in decimal, hexadecimal and binary on all platforms.
 <tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous
 <tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous
 problems with poor C library implementations. The builtin conversion
 problems with poor C library implementations. The builtin conversion
 function provides full precision according to the IEEE-754 standard, it
 function provides full precision according to the IEEE-754 standard, it
@@ -197,6 +206,37 @@ for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
 minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
 minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
 bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
 bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
 </p>
 </p>
+<p>
+Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
+a different, incompatible bytecode format for ports that use this mode (e.g.
+ARM64 or MIPS64) or when explicitly enabled for x64. This may be rectified
+in the future.
+</p>
+
+<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
+<p>
+An extra library function <tt>table.new()</tt> can be made available via
+<tt>require("table.new")</tt>. This creates a pre-sized table, just like
+the C API equivalent <tt>lua_createtable()</tt>. This is useful for big
+tables if the final table size is known and automatic table resizing is
+too expensive.
+</p>
+
+<h3 id="table_clear"><tt>table.clear(tab)</tt> clears a table</h3>
+<p>
+An extra library function <tt>table.clear()</tt> can be made available
+via <tt>require("table.clear")</tt>. This clears all keys and values
+from a table, but preserves the allocated array/hash sizes. This is
+useful when a table, which is linked from multiple places, needs to be
+cleared and/or when recycling a table for use by the same context. This
+avoids managing backlinks, saves an allocation and the overhead of
+incremental array/hash part growth.
+</p>
+<p>
+Please note this function is meant for very specific situations. In most
+cases it's better to replace the (usually single) link with a new table
+and let the GC do its work.
+</p>
 
 
 <h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3>
 <h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3>
 <p>
 <p>
@@ -271,6 +311,26 @@ indexes for varargs.</li>
 <li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle
 <li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle
 C&nbsp;functions.</li>
 C&nbsp;functions.</li>
 <li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li>
 <li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li>
+<li>Lua/C API extensions:
+<tt>lua_version()</tt>
+<tt>lua_upvalueid()</tt>
+<tt>lua_upvaluejoin()</tt>
+<tt>lua_loadx()</tt>
+<tt>lua_copy()</tt>
+<tt>lua_tonumberx()</tt>
+<tt>lua_tointegerx()</tt>
+<tt>luaL_fileresult()</tt>
+<tt>luaL_execresult()</tt>
+<tt>luaL_loadfilex()</tt>
+<tt>luaL_loadbufferx()</tt>
+<tt>luaL_traceback()</tt>
+<tt>luaL_setfuncs()</tt>
+<tt>luaL_pushmodule()</tt>
+<tt>luaL_newlibtable()</tt>
+<tt>luaL_newlib()</tt>
+<tt>luaL_testudata()</tt>
+<tt>luaL_setmetatable()</tt>
+</li>
 <li>Command line option <tt>-E</tt>.</li>
 <li>Command line option <tt>-E</tt>.</li>
 <li>Command line checks <tt>__tostring</tt> for errors.</li>
 <li>Command line checks <tt>__tostring</tt> for errors.</li>
 </ul>
 </ul>
@@ -296,6 +356,8 @@ exit status.</li>
 <li><tt>debug.setmetatable()</tt> returns object.</li>
 <li><tt>debug.setmetatable()</tt> returns object.</li>
 <li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li>
 <li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li>
 <li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li>
 <li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li>
+<li><tt>package.searchers</tt>.</li>
+<li><tt>module()</tt> returns the module table.</li>
 </ul>
 </ul>
 <p>
 <p>
 Note: this provides only partial compatibility with Lua 5.2 at the
 Note: this provides only partial compatibility with Lua 5.2 at the
@@ -304,6 +366,21 @@ Lua&nbsp;5.1, which prevents implementing features that would otherwise
 break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
 break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
 </p>
 </p>
 
 
+<h2 id="lua53">Extensions from Lua 5.3</h2>
+<p>
+LuaJIT supports some extensions from Lua&nbsp;5.3:
+<ul>
+<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li>
+<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
+<li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li>
+<li><tt>assert()</tt> accepts any type of error object.</li>
+<li><tt>table.move(a1, f, e, t [,a2])</tt>.</li>
+<li><tt>coroutine.isyieldable()</tt>.</li>
+<li>Lua/C API extensions:
+<tt>lua_isyieldable()</tt>
+</li>
+</ul>
+
 <h2 id="exceptions">C++ Exception Interoperability</h2>
 <h2 id="exceptions">C++ Exception Interoperability</h2>
 <p>
 <p>
 LuaJIT has built-in support for interoperating with C++&nbsp;exceptions.
 LuaJIT has built-in support for interoperating with C++&nbsp;exceptions.
@@ -318,25 +395,30 @@ the toolchain used to compile LuaJIT:
 </tr>
 </tr>
 <tr class="odd separate">
 <tr class="odd separate">
 <td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
 <td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
-<td class="exccompiler">GCC 4.3+</td>
+<td class="exccompiler">GCC 4.3+, Clang</td>
 <td class="excinterop"><b style="color: #00a000;">Full</b></td>
 <td class="excinterop"><b style="color: #00a000;">Full</b></td>
 </tr>
 </tr>
 <tr class="even">
 <tr class="even">
+<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
+<td class="exccompiler">GCC, Clang</td>
+<td class="excinterop"><b style="color: #00a000;">Full</b></td>
+</tr>
+<tr class="odd">
 <td class="excplatform">Other platforms, DWARF2 unwinding</td>
 <td class="excplatform">Other platforms, DWARF2 unwinding</td>
-<td class="exccompiler">GCC</td>
+<td class="exccompiler">GCC, Clang</td>
 <td class="excinterop"><b style="color: #c06000;">Limited</b></td>
 <td class="excinterop"><b style="color: #c06000;">Limited</b></td>
 </tr>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td class="excplatform">Windows/x64</td>
 <td class="excplatform">Windows/x64</td>
 <td class="exccompiler">MSVC or WinSDK</td>
 <td class="exccompiler">MSVC or WinSDK</td>
 <td class="excinterop"><b style="color: #00a000;">Full</b></td>
 <td class="excinterop"><b style="color: #00a000;">Full</b></td>
 </tr>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td class="excplatform">Windows/x86</td>
 <td class="excplatform">Windows/x86</td>
 <td class="exccompiler">Any</td>
 <td class="exccompiler">Any</td>
-<td class="excinterop"><b style="color: #a00000;">No</b></td>
+<td class="excinterop"><b style="color: #00a000;">Full</b></td>
 </tr>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td class="excplatform">Other platforms</td>
 <td class="excplatform">Other platforms</td>
 <td class="exccompiler">Other compilers</td>
 <td class="exccompiler">Other compilers</td>
 <td class="excinterop"><b style="color: #a00000;">No</b></td>
 <td class="excinterop"><b style="color: #a00000;">No</b></td>
@@ -385,20 +467,12 @@ C++ destructors.</li>
 <li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
 <li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
 <li>Throwing Lua errors across C++ frames will <b>not</b> call
 <li>Throwing Lua errors across C++ frames will <b>not</b> call
 C++ destructors.</li>
 C++ destructors.</li>
-<li>Additionally, on Windows/x86 with SEH-based C++&nbsp;exceptions:
-it's <b>not</b> safe to throw a Lua error across any frames containing
-a C++ function with any try/catch construct or using variables with
-(implicit) destructors. This also applies to any functions which may be
-inlined in such a function. It doesn't matter whether <tt>lua_error()</tt>
-is called inside or outside of a try/catch or whether any object actually
-needs to be destroyed: the SEH chain is corrupted and this will eventually
-lead to the termination of the process.</li>
 </ul>
 </ul>
 <br class="flush">
 <br class="flush">
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 4 - 3
luajit.mod/luajit/doc/faq.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>Frequently Asked Questions (FAQ)</title>
 <title>Frequently Asked Questions (FAQ)</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -44,6 +43,8 @@ dd { margin-left: 1.5em; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -174,7 +175,7 @@ the development of certain features, if they are important to you.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 99 - 54
luajit.mod/luajit/doc/install.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>Installation</title>
 <title>Installation</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -69,6 +68,8 @@ td.compatno {
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -112,17 +113,17 @@ operating systems, CPUs and compilers:
 </tr>
 </tr>
 <tr class="odd separate">
 <tr class="odd separate">
 <td class="compatcpu">x86 (32 bit)</td>
 <td class="compatcpu">x86 (32 bit)</td>
-<td class="compatos">GCC 4.x+<br>GCC 3.4</td>
-<td class="compatos">GCC 4.x+<br>GCC 3.4</td>
+<td class="compatos">GCC 4.2+</td>
+<td class="compatos">GCC 4.2+</td>
 <td class="compatos">XCode 5.0+<br>Clang</td>
 <td class="compatos">XCode 5.0+<br>Clang</td>
 <td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW, Cygwin</td>
 <td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW, Cygwin</td>
 </tr>
 </tr>
 <tr class="even">
 <tr class="even">
 <td class="compatcpu">x64 (64 bit)</td>
 <td class="compatcpu">x64 (64 bit)</td>
-<td class="compatos">GCC 4.x+</td>
-<td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td>
+<td class="compatos">GCC 4.2+</td>
+<td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td>
 <td class="compatos">XCode 5.0+<br>Clang</td>
 <td class="compatos">XCode 5.0+<br>Clang</td>
-<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0</td>
+<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
 </tr>
 </tr>
 <tr class="odd">
 <tr class="odd">
 <td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td>
 <td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td>
@@ -132,21 +133,21 @@ operating systems, CPUs and compilers:
 <td class="compatos compatno">&nbsp;</td>
 <td class="compatos compatno">&nbsp;</td>
 </tr>
 </tr>
 <tr class="even">
 <tr class="even">
-<td class="compatcpu"><a href="#cross2">PPC</a></td>
-<td class="compatos">GCC 4.3+</td>
-<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
+<td class="compatcpu"><a href="#cross2">ARM64</a></td>
+<td class="compatos">GCC 4.8+</td>
+<td class="compatos compatno">&nbsp;</td>
+<td class="compatos">XCode 6.0+<br>Clang 3.5+</td>
 <td class="compatos compatno">&nbsp;</td>
 <td class="compatos compatno">&nbsp;</td>
-<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
 </tr>
 </tr>
 <tr class="odd">
 <tr class="odd">
-<td class="compatcpu"><a href="#cross2">PPC/e500v2</a></td>
-<td class="compatos">GCC 4.3+</td>
+<td class="compatcpu"><a href="#cross2">PPC</a></td>
 <td class="compatos">GCC 4.3+</td>
 <td class="compatos">GCC 4.3+</td>
+<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
 <td class="compatos compatno">&nbsp;</td>
 <td class="compatos compatno">&nbsp;</td>
-<td class="compatos compatno">&nbsp;</td>
+<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
 </tr>
 </tr>
 <tr class="even">
 <tr class="even">
-<td class="compatcpu"><a href="#cross2">MIPS</a></td>
+<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64</a></td>
 <td class="compatos">GCC 4.3+</td>
 <td class="compatos">GCC 4.3+</td>
 <td class="compatos">GCC 4.3+</td>
 <td class="compatos">GCC 4.3+</td>
 <td class="compatos compatno">&nbsp;</td>
 <td class="compatos compatno">&nbsp;</td>
@@ -173,6 +174,14 @@ MSVC or WinSDK.</li>
 Please read the instructions given in these files, before changing
 Please read the instructions given in these files, before changing
 any settings.
 any settings.
 </p>
 </p>
+<p>
+LuaJIT on x64 currently uses 32 bit GC objects by default.
+<tt>LJ_GC64</tt> mode may be explicitly enabled:
+add <tt>XCFLAGS=-DLUAJIT_ENABLE_GC64</tt> to the make command or run
+<tt>msvcbuild gc64</tt> for MSVC/WinSDK. Please check the note
+about the <a href="extensions.html#string_dump">bytecode format</a>
+differences, too.
+</p>
 
 
 <h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2>
 <h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2>
 <h3>Prerequisites</h3>
 <h3>Prerequisites</h3>
@@ -200,7 +209,7 @@ which is probably the default on your system, anyway. Simply run:
 make
 make
 </pre>
 </pre>
 <p>
 <p>
-This always builds a native x86, x64 or PPC binary, depending on the host OS
+This always builds a native binary, depending on the host OS
 you're running this command on. Check the section on
 you're running this command on. Check the section on
 <a href="#cross">cross-compilation</a> for more options.
 <a href="#cross">cross-compilation</a> for more options.
 </p>
 </p>
@@ -331,25 +340,36 @@ directory where <tt>luajit.exe</tt> is installed
 
 
 <h2 id="cross">Cross-compiling LuaJIT</h2>
 <h2 id="cross">Cross-compiling LuaJIT</h2>
 <p>
 <p>
+First, let's clear up some terminology:
+</p>
+<ul>
+<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li>
+<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li>
+<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li>
+<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li>
+<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li>
+</ul>
+<p>
 The GNU Makefile-based build system allows cross-compiling on any host
 The GNU Makefile-based build system allows cross-compiling on any host
-for any supported target, as long as both architectures have the same
-pointer size. If you want to cross-compile to any 32 bit target on an
-x64 OS, you need to install the multilib development package (e.g.
-<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part
-(<tt>HOST_CC="gcc -m32"</tt>).
+for any supported target:
 </p>
 </p>
+<ul>
+<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
+<li>Both host and target architectures must have the same pointer size.</li>
+<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
+<li>64 bit targets always require compilation on a 64 bit host.</li>
+</ul>
 <p>
 <p>
 You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the
 You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the
-target OS differ, or you'll get assembler or linker errors. E.g. if
-you're compiling on a Windows or OSX host for embedded Linux or Android,
-you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a
-minimal target OS, you may need to disable the built-in allocator in
-<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. Don't forget to
-specify the same <tt>TARGET_SYS</tt> for the install step, too.
+target OS differ, or you'll get assembler or linker errors:
 </p>
 </p>
+<ul>
+<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li>
+<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li>
+<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li>
+</ul>
 <p>
 <p>
-The examples below only show some popular targets &mdash; please check
-the comments in <tt>src/Makefile</tt> for more details.
+Here are some examples where host and target have the same CPU:
 </p>
 </p>
 <pre class="code">
 <pre class="code">
 # Cross-compile to a 32 bit binary on a multilib x64 OS
 # Cross-compile to a 32 bit binary on a multilib x64 OS
@@ -367,37 +387,47 @@ use the canonical toolchain triplets for Linux.
 </p>
 </p>
 <p>
 <p>
 Since there's often no easy way to detect CPU features at runtime, it's
 Since there's often no easy way to detect CPU features at runtime, it's
-important to compile with the proper CPU or architecture settings. You
-can specify these when building the toolchain yourself. Or add
-<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For
-ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting,
-too. Otherwise LuaJIT may not run at the full performance of your target
-CPU.
+important to compile with the proper CPU or architecture settings:
+</o>
+<ul>
+<li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li>
+<li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li>
+<li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li>
+<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li>
+</ul>
+<p>
+Here are some examples for targets with a different CPU than the host:
 </p>
 </p>
 <pre class="code">
 <pre class="code">
 # ARM soft-float
 # ARM soft-float
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
      TARGET_CFLAGS="-mfloat-abi=soft"
      TARGET_CFLAGS="-mfloat-abi=soft"
 
 
-# ARM soft-float ABI with VFP (example for Cortex-A8)
+# ARM soft-float ABI with VFP (example for Cortex-A9)
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
-     TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp"
+     TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp"
 
 
-# ARM hard-float ABI with VFP (armhf, requires recent toolchain)
+# ARM hard-float ABI with VFP (armhf, most modern toolchains)
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
 
 
+# ARM64
+make CROSS=aarch64-linux-
+
 # PPC
 # PPC
 make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
 make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
-# PPC/e500v2 (fast interpreter only)
-make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-
 
 
-# MIPS big-endian
+# MIPS32 big-endian
 make HOST_CC="gcc -m32" CROSS=mips-linux-
 make HOST_CC="gcc -m32" CROSS=mips-linux-
-# MIPS little-endian
+# MIPS32 little-endian
 make HOST_CC="gcc -m32" CROSS=mipsel-linux-
 make HOST_CC="gcc -m32" CROSS=mipsel-linux-
+
+# MIPS64 big-endian
+make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
+# MIPS64 little-endian
+make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
 </pre>
 </pre>
 <p>
 <p>
-You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/sdk/ndk/index.html"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>.
+You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/index.html">Android NDK</a>.
 The environment variables need to match the install locations and the
 The environment variables need to match the install locations and the
 desired target platform. E.g. Android&nbsp;4.0 corresponds to ABI level&nbsp;14.
 desired target platform. E.g. Android&nbsp;4.0 corresponds to ABI level&nbsp;14.
 For details check the folder <tt>docs</tt> in the NDK directory.
 For details check the folder <tt>docs</tt> in the NDK directory.
@@ -411,7 +441,7 @@ to build/deploy or which lowest common denominator you want to pick:
 # Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo)
 # Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo)
 NDK=/opt/android/ndk
 NDK=/opt/android/ndk
 NDKABI=8
 NDKABI=8
-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
+NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
 NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
 NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
 NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
 NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
 make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
 make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
@@ -419,16 +449,16 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
 # Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS)
 # Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS)
 NDK=/opt/android/ndk
 NDK=/opt/android/ndk
 NDKABI=14
 NDKABI=14
-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
+NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
 NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
 NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
 NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
 NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
 NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8"
 NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8"
 make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH"
 make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH"
 
 
-# Android/MIPS, mips (MIPS32R1 hard-float), Android 4.0+ (ICS)
+# Android/MIPS, mipsel (MIPS32R1 hard-float), Android 4.0+ (ICS)
 NDK=/opt/android/ndk
 NDK=/opt/android/ndk
 NDKABI=14
 NDKABI=14
-NDKVER=$NDK/toolchains/mipsel-linux-android-4.6
+NDKVER=$NDK/toolchains/mipsel-linux-android-4.9
 NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android-
 NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android-
 NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips"
 NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips"
 make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
 make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
@@ -436,7 +466,7 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
 # Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS)
 # Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS)
 NDK=/opt/android/ndk
 NDK=/opt/android/ndk
 NDKABI=14
 NDKABI=14
-NDKVER=$NDK/toolchains/x86-4.6
+NDKVER=$NDK/toolchains/x86-4.9
 NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android-
 NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android-
 NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86"
 NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86"
 make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
 make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
@@ -452,11 +482,19 @@ much slower than the JIT compiler. Please complain to Apple, not me.
 Or use Android. :-p
 Or use Android. :-p
 </p>
 </p>
 <pre class="code">
 <pre class="code">
+# iOS/ARM (32 bit)
 ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
 ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
 ICC=$(xcrun --sdk iphoneos --find clang)
 ICC=$(xcrun --sdk iphoneos --find clang)
 ISDKF="-arch armv7 -isysroot $ISDKP"
 ISDKF="-arch armv7 -isysroot $ISDKP"
 make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \
 make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \
      CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
      CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
+
+# iOS/ARM64
+ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+ICC=$(xcrun --sdk iphoneos --find clang)
+ISDKF="-arch arm64 -isysroot $ISDKP"
+make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \
+     TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
 </pre>
 </pre>
 
 
 <h3 id="consoles">Cross-compiling for consoles</h3>
 <h3 id="consoles">Cross-compiling for consoles</h3>
@@ -513,6 +551,16 @@ the following commands:
 cd src
 cd src
 xedkbuild
 xedkbuild
 </pre>
 </pre>
+<p>
+To cross-compile for <b id="xboxone">Xbox One</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (64&nbsp;bit host compiler),
+<tt>cd</tt> to the directory where you've unpacked the sources and run
+the following commands:
+</p>
+<pre class="code">
+cd src
+xb1build
+</pre>
 
 
 <h2 id="embed">Embedding LuaJIT</h2>
 <h2 id="embed">Embedding LuaJIT</h2>
 <p>
 <p>
@@ -543,14 +591,11 @@ intend to load Lua/C modules at runtime.
 </li>
 </li>
 <li>
 <li>
 If you're building a 64 bit application on OSX which links directly or
 If you're building a 64 bit application on OSX which links directly or
-indirectly against LuaJIT, you need to link your main executable
-with these flags:
+indirectly against LuaJIT which is not built for <tt>LJ_GC64</tt> mode,
+you need to link your main executable with these flags:
 <pre class="code">
 <pre class="code">
 -pagezero_size 10000 -image_base 100000000
 -pagezero_size 10000 -image_base 100000000
 </pre>
 </pre>
-Also, it's recommended to <tt>rebase</tt> all (self-compiled) shared libraries
-which are loaded at runtime on OSX/x64 (e.g. C extension modules for Lua).
-See: <tt>man rebase</tt>
 </li>
 </li>
 </ul>
 </ul>
 <p>Additional hints for initializing LuaJIT using the C API functions:</p>
 <p>Additional hints for initializing LuaJIT using the C API functions:</p>
@@ -636,7 +681,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 8 - 7
luajit.mod/luajit/doc/luajit.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>LuaJIT</title>
 <title>LuaJIT</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -126,6 +125,8 @@ table.feature small {
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -150,7 +151,7 @@ Lua is a powerful, dynamic and light-weight programming language.
 It may be embedded or used as a general-purpose, stand-alone language.
 It may be embedded or used as a general-purpose, stand-alone language.
 </p>
 </p>
 <p>
 <p>
-LuaJIT is Copyright &copy; 2005-2017 Mike Pall, released under the
+LuaJIT is Copyright &copy; 2005-2018 Mike Pall, released under the
 <a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
 <a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
 </p>
 </p>
 <p>
 <p>
@@ -164,13 +165,13 @@ LuaJIT is Copyright &copy; 2005-2017 Mike Pall, released under the
 <tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
 <tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
 </table>
 </table>
 <table class="feature os os3">
 <table class="feature os os3">
-<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td></tr>
+<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr>
 </table>
 </table>
 <table class="feature compiler">
 <table class="feature compiler">
-<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr>
+<tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr>
 </table>
 </table>
 <table class="feature cpu">
 <table class="feature cpu">
-<tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr>
+<tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr>
 </table>
 </table>
 <table class="feature fcompat">
 <table class="feature fcompat">
 <tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
 <tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
@@ -224,7 +225,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 5 - 3
luajit.mod/luajit/doc/running.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>Running LuaJIT</title>
 <title>Running LuaJIT</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -63,6 +62,8 @@ td.param_default {
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -178,6 +179,7 @@ Here are the available LuaJIT control commands:
 <li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
 <li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
 <li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
 <li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
 <li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
 <li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
+<li id="j_p"><tt>-jp</tt> &mdash; Start the <a href="ext_profiler.html">integrated profiler</a>.</li>
 </ul>
 </ul>
 <p>
 <p>
 The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
 The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
@@ -296,7 +298,7 @@ Here are the parameters and their default settings:
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 15 - 3
luajit.mod/luajit/doc/status.html

@@ -3,8 +3,7 @@
 <head>
 <head>
 <title>Status</title>
 <title>Status</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -44,6 +43,8 @@ ul li { padding-bottom: 0.3em; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a class="current" href="status.html">Status</a>
 <a class="current" href="status.html">Status</a>
@@ -95,12 +96,23 @@ handled correctly. The error may fall through an on-trace
 <tt>lua_atpanic</tt> on x64. This issue will be fixed with the new
 <tt>lua_atpanic</tt> on x64. This issue will be fixed with the new
 garbage collector.
 garbage collector.
 </li>
 </li>
+<li>
+LuaJIT on 64 bit systems provides a <b>limited range</b> of 47 bits for the
+<b>legacy <tt>lightuserdata</tt></b> data type.
+This is only relevant on x64 systems which use the negative part of the
+virtual address space in user mode, e.g. Solaris/x64, and on ARM64 systems
+configured with a 48 bit or 52 bit VA.
+Avoid using <tt>lightuserdata</tt> to hold pointers that may point outside
+of that range, e.g. variables on the stack. In general, avoid this data
+type for new code and replace it with (much more performant) FFI bindings.
+FFI cdata pointers can address the full 64 bit range.
+</li>
 </ul>
 </ul>
 <br class="flush">
 <br class="flush">
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2017 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 2 - 0
luajit.mod/luajit/dynasm/dasm_arm.h

@@ -254,6 +254,7 @@ void dasm_put(Dst_DECL, int start, ...)
       case DASM_IMMV8:
       case DASM_IMMV8:
 	CK((n & 3) == 0, RANGE_I);
 	CK((n & 3) == 0, RANGE_I);
 	n >>= 2;
 	n >>= 2;
+	/* fallthrough */
       case DASM_IMML8:
       case DASM_IMML8:
       case DASM_IMML12:
       case DASM_IMML12:
 	CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :
 	CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :
@@ -371,6 +372,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	  break;
 	case DASM_REL_LG:
 	case DASM_REL_LG:
 	  CK(n >= 0, UNDEF_LG);
 	  CK(n >= 0, UNDEF_LG);
+	  /* fallthrough */
 	case DASM_REL_PC:
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
 	  CK(n >= 0, UNDEF_PC);
 	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4;
 	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4;

+ 3 - 3
luajit.mod/luajit/dynasm/dasm_arm.lua

@@ -9,9 +9,9 @@
 local _info = {
 local _info = {
   arch =	"arm",
   arch =	"arm",
   description =	"DynASM ARM module",
   description =	"DynASM ARM module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }

+ 519 - 0
luajit.mod/luajit/dynasm/dasm_arm64.h

@@ -0,0 +1,519 @@
+/*
+** DynASM ARM64 encoding engine.
+** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH		"arm64"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d)	0
+#endif
+
+/* Action definitions. */
+enum {
+  DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+  /* The following actions need a buffer position. */
+  DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+  /* The following actions also have an argument. */
+  DASM_REL_PC, DASM_LABEL_PC,
+  DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
+  DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS		25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK		0x00000000
+#define DASM_S_NOMEM		0x01000000
+#define DASM_S_PHASE		0x02000000
+#define DASM_S_MATCH_SEC	0x03000000
+#define DASM_S_RANGE_I		0x11000000
+#define DASM_S_RANGE_SEC	0x12000000
+#define DASM_S_RANGE_LG		0x13000000
+#define DASM_S_RANGE_PC		0x14000000
+#define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_UNDEF_LG		0x21000000
+#define DASM_S_UNDEF_PC		0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
+#define DASM_SEC2POS(sec)	((sec)<<24)
+#define DASM_POS2SEC(pos)	((pos)>>24)
+#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+  int *rbuf;		/* Biased buffer pointer (negative section bias). */
+  int *buf;		/* True buffer pointer. */
+  size_t bsize;		/* Buffer size in bytes. */
+  int pos;		/* Biased buffer position. */
+  int epos;		/* End of biased buffer position - max single put. */
+  int ofs;		/* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+  size_t psize;			/* Allocated size of this structure. */
+  dasm_ActList actionlist;	/* Current actionlist pointer. */
+  int *lglabels;		/* Local/global chain/pos ptrs. */
+  size_t lgsize;
+  int *pclabels;		/* PC label chains/pos ptrs. */
+  size_t pcsize;
+  void **globals;		/* Array of globals (bias -10). */
+  dasm_Section *section;	/* Pointer to active section. */
+  size_t codesize;		/* Total size of all code sections. */
+  int maxsection;		/* 0 <= sectionidx < maxsection. */
+  int status;			/* Status code. */
+  dasm_Section sections[1];	/* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+  dasm_State *D;
+  size_t psz = 0;
+  int i;
+  Dst_REF = NULL;
+  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+  D = Dst_REF;
+  D->psize = psz;
+  D->lglabels = NULL;
+  D->lgsize = 0;
+  D->pclabels = NULL;
+  D->pcsize = 0;
+  D->globals = NULL;
+  D->maxsection = maxsection;
+  for (i = 0; i < maxsection; i++) {
+    D->sections[i].buf = NULL;  /* Need this for pass3. */
+    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+    D->sections[i].bsize = 0;
+    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
+  }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  for (i = 0; i < D->maxsection; i++)
+    if (D->sections[i].buf)
+      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+  if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+  if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+  DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+  dasm_State *D = Dst_REF;
+  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+  dasm_State *D = Dst_REF;
+  size_t osz = D->pcsize;
+  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+  memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  D->actionlist = (dasm_ActList)actionlist;
+  D->status = DASM_S_OK;
+  D->section = &D->sections[0];
+  memset((void *)D->lglabels, 0, D->lgsize);
+  if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+  for (i = 0; i < D->maxsection; i++) {
+    D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].ofs = 0;
+  }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) { \
+    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st)	((void)0)
+#define CKPL(kind, st)	((void)0)
+#endif
+
+static int dasm_imm12(unsigned int n)
+{
+  if ((n >> 12) == 0)
+    return n;
+  else if ((n & 0xff000fff) == 0)
+    return (n >> 12) | 0x1000;
+  else
+    return -1;
+}
+
+static int dasm_ffs(unsigned long long x)
+{
+  int n = -1;
+  while (x) { x >>= 1; n++; }
+  return n;
+}
+
+static int dasm_imm13(int lo, int hi)
+{
+  int inv = 0, w = 64, s = 0xfff, xa, xb;
+  unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
+  unsigned long long m = 1ULL, a, b, c;
+  if (n & 1) { n = ~n; inv = 1; }
+  a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b);
+  xa = dasm_ffs(a); xb = dasm_ffs(b);
+  if (c) {
+    w = dasm_ffs(c) - xa;
+    if (w == 32) m = 0x0000000100000001UL;
+    else if (w == 16) m = 0x0001000100010001UL;
+    else if (w == 8) m = 0x0101010101010101UL;
+    else if (w == 4) m = 0x1111111111111111UL;
+    else if (w == 2) m = 0x5555555555555555UL;
+    else return -1;
+    s = (-2*w & 0x3f) - 1;
+  } else if (!a) {
+    return -1;
+  } else if (xb == -1) {
+    xb = 64;
+  }
+  if ((b-a) * m != n) return -1;
+  if (inv) {
+    return ((w - xb) << 6) | (s+w+xa-xb);
+  } else {
+    return ((w - xa) << 6) | (s+xb-xa);
+  }
+  return -1;
+}
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+  va_list ap;
+  dasm_State *D = Dst_REF;
+  dasm_ActList p = D->actionlist + start;
+  dasm_Section *sec = D->section;
+  int pos = sec->pos, ofs = sec->ofs;
+  int *b;
+
+  if (pos >= sec->epos) {
+    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+      sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+    sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+  }
+
+  b = sec->rbuf;
+  b[pos++] = start;
+
+  va_start(ap, start);
+  while (1) {
+    unsigned int ins = *p++;
+    unsigned int action = (ins >> 16);
+    if (action >= DASM__MAX) {
+      ofs += 4;
+    } else {
+      int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+      switch (action) {
+      case DASM_STOP: goto stop;
+      case DASM_SECTION:
+	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+	D->section = &D->sections[n]; goto stop;
+      case DASM_ESC: p++; ofs += 4; break;
+      case DASM_REL_EXT: break;
+      case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+      case DASM_REL_LG:
+	n = (ins & 2047) - 10; pl = D->lglabels + n;
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+	pl += 10; n = *pl;
+	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
+	goto linkrel;
+      case DASM_REL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putrel:
+	n = *pl;
+	if (n < 0) {  /* Label exists. Get label pos and store it. */
+	  b[pos] = -n;
+	} else {
+      linkrel:
+	  b[pos] = n;  /* Else link to rel chain, anchored at label. */
+	  *pl = pos;
+	}
+	pos++;
+	break;
+      case DASM_LABEL_LG:
+	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+      case DASM_LABEL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putlabel:
+	n = *pl;  /* n > 0: Collapse rel chain and replace with label pos. */
+	while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+	}
+	*pl = -pos;  /* Label exists now. */
+	b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_IMM:
+	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+	n >>= ((ins>>10)&31);
+#ifdef DASM_CHECKS
+	if ((ins & 0x8000))
+	  CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+	else
+	  CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+      case DASM_IMM6:
+	CK((n >> 6) == 0, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM12:
+	CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM13W:
+	CK(dasm_imm13(n, n) != -1, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM13X: {
+	int m = va_arg(ap, int);
+	CK(dasm_imm13(n, m) != -1, RANGE_I);
+	b[pos++] = n;
+	b[pos++] = m;
+	break;
+	}
+      case DASM_IMML: {
+#ifdef DASM_CHECKS
+	int scale = (p[-2] >> 30);
+	CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
+	   (unsigned int)(n+256) < 512, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+	}
+      }
+    }
+  }
+stop:
+  va_end(ap);
+  sec->pos = pos;
+  sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+  dasm_State *D = Dst_REF;
+  int secnum;
+  int ofs = 0;
+
+#ifdef DASM_CHECKS
+  *szp = 0;
+  if (D->status != DASM_S_OK) return D->status;
+  {
+    int pc;
+    for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+      if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+  }
+#endif
+
+  { /* Handle globals not defined in this translation unit. */
+    int idx;
+    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+      int n = D->lglabels[idx];
+      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+      while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+    }
+  }
+
+  /* Combine all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->rbuf;
+    int pos = DASM_SEC2POS(secnum);
+    int lastpos = sec->pos;
+
+    while (pos != lastpos) {
+      dasm_ActList p = D->actionlist + b[pos++];
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: p++; break;
+	case DASM_REL_EXT: break;
+	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+	case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
+	case DASM_IMML: pos++; break;
+	case DASM_IMM13X: pos += 2; break;
+	}
+      }
+      stop: (void)0;
+    }
+    ofs += sec->ofs;  /* Next section starts right after current section. */
+  }
+
+  D->codesize = ofs;  /* Total size of all code sections */
+  *szp = ofs;
+  return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st)	((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+  dasm_State *D = Dst_REF;
+  char *base = (char *)buffer;
+  unsigned int *cp = (unsigned int *)buffer;
+  int secnum;
+
+  /* Encode all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->buf;
+    int *endb = sec->rbuf + sec->pos;
+
+    while (b != endb) {
+      dasm_ActList p = D->actionlist + *b++;
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: *cp++ = *p++; break;
+	case DASM_REL_EXT:
+	  n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
+	  goto patchrel;
+	case DASM_ALIGN:
+	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+	  break;
+	case DASM_REL_LG:
+	  CK(n >= 0, UNDEF_LG);
+	  /* fallthrough */
+	case DASM_REL_PC:
+	  CK(n >= 0, UNDEF_PC);
+	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
+	patchrel:
+	  if (!(ins & 0xf800)) {  /* B, BL */
+	    CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL);
+	    cp[-1] |= ((n >> 2) & 0x03ffffff);
+	  } else if ((ins & 0x800)) {  /* B.cond, CBZ, CBNZ, LDR* literal */
+	    CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x00ffffe0);
+	  } else if ((ins & 0x3000) == 0x2000) {  /* ADR */
+	    CK(((n+0x00100000) >> 21) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
+	  } else if ((ins & 0x3000) == 0x3000) {  /* ADRP */
+	    cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
+	  } else if ((ins & 0x1000)) {  /* TBZ, TBNZ */
+	    CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x0007ffe0);
+	  }
+	  break;
+	case DASM_LABEL_LG:
+	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  break;
+	case DASM_LABEL_PC: break;
+	case DASM_IMM:
+	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
+	  break;
+	case DASM_IMM6:
+	  cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
+	  break;
+	case DASM_IMM12:
+	  cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
+	  break;
+	case DASM_IMM13W:
+	  cp[-1] |= (dasm_imm13(n, n) << 10);
+	  break;
+	case DASM_IMM13X:
+	  cp[-1] |= (dasm_imm13(n, *b++) << 10);
+	  break;
+	case DASM_IMML: {
+	  int scale = (p[-2] >> 30);
+	  cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
+	    ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
+	  break;
+	  }
+	default: *cp++ = ins; break;
+	}
+      }
+      stop: (void)0;
+    }
+  }
+
+  if (base + D->codesize != (char *)cp)  /* Check for phase errors. */
+    return DASM_S_PHASE;
+  return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+  dasm_State *D = Dst_REF;
+  if (pc*sizeof(int) < D->pcsize) {
+    int pos = D->pclabels[pc];
+    if (pos < 0) return *DASM_POS2PTR(D, -pos);
+    if (pos > 0) return -1;  /* Undefined. */
+  }
+  return -2;  /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+  dasm_State *D = Dst_REF;
+  if (D->status == DASM_S_OK) {
+    int i;
+    for (i = 1; i <= 9; i++) {
+      if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+      D->lglabels[i] = 0;
+    }
+  }
+  if (D->status == DASM_S_OK && secmatch >= 0 &&
+      D->section != &D->sections[secmatch])
+    D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+  return D->status;
+}
+#endif
+

+ 1166 - 0
luajit.mod/luajit/dynasm/dasm_arm64.lua

@@ -0,0 +1,1166 @@
+------------------------------------------------------------------------------
+-- DynASM ARM64 module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+  arch =	"arm",
+  description =	"DynASM ARM64 module",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
+  author =	"Mike Pall",
+  license =	"MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort, insert = table.concat, table.sort, table.insert
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local ror, tohex = bit.ror, bit.tohex
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+  "STOP", "SECTION", "ESC", "REL_EXT",
+  "ALIGN", "REL_LG", "LABEL_LG",
+  "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+  map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+  out:write("DynASM encoding engine action codes:\n")
+  for n,name in ipairs(action_names) do
+    local num = map_action[name]
+    out:write(format("  %-10s %02X  %d\n", name, num, num))
+  end
+  out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+  local nn = #actlist
+  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+  out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+  for i = 1,nn-1 do
+    assert(out:write("0x", tohex(actlist[i]), ",\n"))
+  end
+  assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+  local w = assert(map_action[action], "bad action name `"..action.."'")
+  wputxw(w * 0x10000 + (val or 0))
+  if a then actargs[#actargs+1] = a end
+  if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+  if #actlist == actargs[1] then return end -- Nothing to flush.
+  if not term then waction("STOP") end -- Terminate action list.
+  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+  secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+  if n <= 0x000fffff then waction("ESC") end
+  wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+  local pos = #actlist+1
+  actlist[pos] = ""
+  return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  if n <= 0x000fffff then
+    insert(actlist, pos+1, n)
+    n = map_action.ESC * 0x10000
+  end
+  actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+  local n = next_global
+  if n > 2047 then werror("too many global labels") end
+  next_global = n + 1
+  t[name] = n
+  return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("Global labels:\n")
+  for i=20,next_global-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("enum {\n")
+  for i=20,next_global-1 do
+    out:write("  ", prefix, t[i], ",\n")
+  end
+  out:write("  ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=20,next_global-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+  -- No restrictions on the name for now.
+  local n = next_extern
+  if n > 2047 then werror("too many extern labels") end
+  next_extern = n + 1
+  t[name] = n
+  map_extern_[n] = name
+  return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+  out:write("Extern labels:\n")
+  for i=0,next_extern-1 do
+    out:write(format("  %s\n", map_extern_[i]))
+  end
+  out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=0,next_extern-1 do
+    out:write("  \"", map_extern_[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+
+-- Ext. register name -> int. name.
+local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
+
+-- Int. register name -> ext. name.
+local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
+
+local map_type = {}		-- Type name -> { ctype, reg }
+local ctypenum = 0		-- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+  return map_reg_rev[s] or s
+end
+
+local map_shift = { lsl = 0, lsr = 1, asr = 2, }
+
+local map_extend = {
+  uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
+  sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
+}
+
+local map_cond = {
+  eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
+  hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
+  hs = 2, lo = 3,
+}
+
+------------------------------------------------------------------------------
+
+local parse_reg_type
+
+local function parse_reg(expr)
+  if not expr then werror("expected register name") end
+  local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
+  local tp = map_type[tname or expr]
+  if tp then
+    local reg = ovreg or tp.reg
+    if not reg then
+      werror("type `"..(tname or expr).."' needs a register override")
+    end
+    expr = reg
+  end
+  local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
+      if not parse_reg_type then
+	parse_reg_type = rt
+      elseif parse_reg_type ~= rt then
+	werror("register size mismatch")
+      end
+      return r, tp
+    end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_reg_base(expr)
+  if expr == "sp" then return 0x3e0 end
+  local base, tp = parse_reg(expr)
+  if parse_reg_type ~= "x" then werror("bad register type") end
+  parse_reg_type = false
+  return shl(base, 5), tp
+end
+
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+  local code = loadstring(s, "")
+  if code then setfenv(code, parse_ctx) end
+  return code
+end or function(s)
+  return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+  local x = tonumber(n)
+  if x then return x end
+  local code = loadenv("return "..n)
+  if code then
+    local ok, y = pcall(code)
+    if ok then return y end
+  end
+  return nil
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n then
+      if signed then
+	local s = sar(m, bits-1)
+	if s == 0 then return shl(m, shift)
+	elseif s == -1 then return shl(m + shl(1, bits), shift) end
+      else
+	if sar(m, bits) == 0 then return shl(m, shift) end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+    return 0
+  end
+end
+
+local function parse_imm12(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    if shr(n, 12) == 0 then
+      return shl(n, 10)
+    elseif band(n, 0xff000fff) == 0 then
+      return shr(n, 2) + 0x00400000
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM12", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm13(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  local r64 = parse_reg_type == "x"
+  if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
+    local inv = false
+    if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
+    local t = {}
+    for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
+    local b = table.concat(t)
+    b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
+    local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
+    if p0 then
+      local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
+      if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
+	local s = band(-2*w, 0x3f) - 1
+	if w == 64 then s = s + 0x1000 end
+	if inv then
+	  return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
+	else
+	  return shl(w-#p0, 16) + shl(s+#p1, 10)
+	end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  elseif r64 then
+    waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
+    actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
+    return 0
+  else
+    waction("IMM13W", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm6(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    if n >= 0 and n <= 63 then
+      return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM6", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm_load(imm, scale)
+  local n = parse_number(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n and m >= 0 and m < 0x1000 then
+      return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
+    elseif n >= -256 and n < 256 then
+      return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMML", 0, imm)
+    return 0
+  end
+end
+
+local function parse_fpimm(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    local m, e = math.frexp(n)
+    local s, e2 = 0, band(e-2, 7)
+    if m < 0 then m = -m; s = 0x00100000 end
+    m = m*32-16
+    if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
+      return s + shl(e2, 17) + shl(m, 13)
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    werror("NYI fpimm action")
+  end
+end
+
+local function parse_shift(expr)
+  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+  s = map_shift[s]
+  if not s then werror("expected shift operand") end
+  return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
+end
+
+local function parse_lslx16(expr)
+  local n = match(expr, "^lsl%s*#(%d+)$")
+  n = tonumber(n)
+  if not n then werror("expected shift operand") end
+  if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
+    werror("bad shift amount")
+  end
+  return shl(n, 17)
+end
+
+local function parse_extend(expr)
+  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+  if s == "lsl" then
+    s = parse_reg_type == "x" and 3 or 2
+  else
+    s = map_extend[s]
+  end
+  if not s then werror("expected extend operand") end
+  return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
+end
+
+local function parse_cond(expr, inv)
+  local c = map_cond[expr]
+  if not c then werror("expected condition operand") end
+  return shl(bit.bxor(c, inv), 12)
+end
+
+local function parse_load(params, nparams, n, op)
+  if params[n+2] then werror("too many operands") end
+  local pn, p2 = params[n], params[n+1]
+  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+  if not p1 then
+    if not p2 then
+      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+      if reg and tailr ~= "" then
+	local base, tp = parse_reg_base(reg)
+	if tp then
+	  waction("IMML", 0, format(tp.ctypefmt, tailr))
+	  return op + base
+	end
+      end
+    end
+    werror("expected address operand")
+  end
+  local scale = shr(op, 30)
+  if p2 then
+    if wb == "!" then werror("bad use of '!'") end
+    op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
+  elseif wb == "!" then
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+    if not p1a then werror("bad use of '!'") end
+    op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
+  else
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
+    op = op + parse_reg_base(p1a)
+    if p2a ~= "" then
+      local imm = match(p2a, "^,%s*#(.*)$")
+      if imm then
+	op = op + parse_imm_load(imm, scale)
+      else
+	local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
+	op = op + shl(parse_reg(p2b), 16) + 0x00200800
+	if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
+	  werror("bad index register type")
+	end
+	if p3b == "" then
+	  if parse_reg_type ~= "x" then werror("bad index register type") end
+	  op = op + 0x6000
+	else
+	  if p3s == "" or p3s == "#0" then
+	  elseif p3s == "#"..scale then
+	    op = op + 0x1000
+	  else
+	    werror("bad scale")
+	  end
+	  if parse_reg_type == "x" then
+	    if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
+	    elseif p3b == "sxtx" then op = op + 0xe000
+	    else
+	      werror("bad extend/shift specifier")
+	    end
+	  else
+	    if p3b == "uxtw" then op = op + 0x4000
+	    elseif p3b == "sxtw" then op = op + 0xc000
+	    else
+	      werror("bad extend/shift specifier")
+	    end
+	  end
+	end
+      end
+    else
+      if wb == "!" then werror("bad use of '!'") end
+      op = op + 0x01000000
+    end
+  end
+  return op
+end
+
+local function parse_load_pair(params, nparams, n, op)
+  if params[n+2] then werror("too many operands") end
+  local pn, p2 = params[n], params[n+1]
+  local scale = shr(op, 30) == 0 and 2 or 3
+  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+  if not p1 then
+    if not p2 then
+      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+      if reg and tailr ~= "" then
+	local base, tp = parse_reg_base(reg)
+	if tp then
+	  waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
+	  return op + base + 0x01000000
+	end
+      end
+    end
+    werror("expected address operand")
+  end
+  if p2 then
+    if wb == "!" then werror("bad use of '!'") end
+    op = op + 0x00800000
+  else
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+    if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
+    op = op + (wb == "!" and 0x01800000 or 0x01000000)
+  end
+  return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
+end
+
+local function parse_label(label, def)
+  local prefix = sub(label, 1, 2)
+  -- =>label (pc label reference)
+  if prefix == "=>" then
+    return "PC", 0, sub(label, 3)
+  end
+  -- ->name (global label reference)
+  if prefix == "->" then
+    return "LG", map_global[sub(label, 3)]
+  end
+  if def then
+    -- [1-9] (local label definition)
+    if match(label, "^[1-9]$") then
+      return "LG", 10+tonumber(label)
+    end
+  else
+    -- [<>][1-9] (local label reference)
+    local dir, lnum = match(label, "^([<>])([1-9])$")
+    if dir then -- Fwd: 1-9, Bkwd: 11-19.
+      return "LG", lnum + (dir == ">" and 0 or 10)
+    end
+    -- extern label (extern label reference)
+    local extname = match(label, "^extern%s+(%S+)$")
+    if extname then
+      return "EXT", map_extern[extname]
+    end
+  end
+  werror("bad label `"..label.."'")
+end
+
+local function branch_type(op)
+  if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
+  elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
+	 band(op, 0x3b000000) == 0x18000000 then
+    return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
+  elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
+  elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
+  elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
+  else
+    assert(false, "unknown branch type")
+  end
+end
+
+------------------------------------------------------------------------------
+
+local map_op, op_template
+
+local function op_alias(opname, f)
+  return function(params, nparams)
+    if not params then return "-> "..opname:sub(1, -3) end
+    f(params, nparams)
+    op_template(params, map_op[opname], nparams)
+  end
+end
+
+local function alias_bfx(p)
+  p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
+end
+
+local function alias_bfiz(p)
+  parse_reg(p[1])
+  if parse_reg_type == "w" then
+    p[3] = "#-("..p[3]:sub(2)..")%32"
+    p[4] = "#("..p[4]:sub(2)..")-1"
+  else
+    p[3] = "#-("..p[3]:sub(2)..")%64"
+    p[4] = "#("..p[4]:sub(2)..")-1"
+  end
+end
+
+local alias_lslimm = op_alias("ubfm_4", function(p)
+  parse_reg(p[1])
+  local sh = p[3]:sub(2)
+  if parse_reg_type == "w" then
+    p[3] = "#-("..sh..")%32"
+    p[4] = "#31-("..sh..")"
+  else
+    p[3] = "#-("..sh..")%64"
+    p[4] = "#63-("..sh..")"
+  end
+end)
+
+-- Template strings for ARM instructions.
+map_op = {
+  -- Basic data processing instructions.
+  add_3  = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
+  add_4  = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
+  adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
+  adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
+  cmn_2  = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
+  cmn_3  = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
+
+  sub_3  = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
+  sub_4  = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
+  subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
+  subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
+  cmp_2  = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
+  cmp_3  = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
+
+  neg_2  = "4b0003e0DMg",
+  neg_3  = "4b0003e0DMSg",
+  negs_2 = "6b0003e0DMg",
+  negs_3 = "6b0003e0DMSg",
+
+  adc_3  = "1a000000DNMg",
+  adcs_3 = "3a000000DNMg",
+  sbc_3  = "5a000000DNMg",
+  sbcs_3 = "7a000000DNMg",
+  ngc_2  = "5a0003e0DMg",
+  ngcs_2 = "7a0003e0DMg",
+
+  and_3  = "0a000000DNMg|12000000pDNig",
+  and_4  = "0a000000DNMSg",
+  orr_3  = "2a000000DNMg|32000000pDNig",
+  orr_4  = "2a000000DNMSg",
+  eor_3  = "4a000000DNMg|52000000pDNig",
+  eor_4  = "4a000000DNMSg",
+  ands_3 = "6a000000DNMg|72000000DNig",
+  ands_4 = "6a000000DNMSg",
+  tst_2  = "6a00001fNMg|7200001fNig",
+  tst_3  = "6a00001fNMSg",
+
+  bic_3  = "0a200000DNMg",
+  bic_4  = "0a200000DNMSg",
+  orn_3  = "2a200000DNMg",
+  orn_4  = "2a200000DNMSg",
+  eon_3  = "4a200000DNMg",
+  eon_4  = "4a200000DNMSg",
+  bics_3 = "6a200000DNMg",
+  bics_4 = "6a200000DNMSg",
+
+  movn_2 = "12800000DWg",
+  movn_3 = "12800000DWRg",
+  movz_2 = "52800000DWg",
+  movz_3 = "52800000DWRg",
+  movk_2 = "72800000DWg",
+  movk_3 = "72800000DWRg",
+
+  -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
+  mov_2  = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
+  mov_3  = "2a0003e0DMSg",
+  mvn_2  = "2a2003e0DMg",
+  mvn_3  = "2a2003e0DMSg",
+
+  adr_2  = "10000000DBx",
+  adrp_2 = "90000000DBx",
+
+  csel_4  = "1a800000DNMCg",
+  csinc_4 = "1a800400DNMCg",
+  csinv_4 = "5a800000DNMCg",
+  csneg_4 = "5a800400DNMCg",
+  cset_2  = "1a9f07e0Dcg",
+  csetm_2 = "5a9f03e0Dcg",
+  cinc_3  = "1a800400DNmcg",
+  cinv_3  = "5a800000DNmcg",
+  cneg_3  = "5a800400DNmcg",
+
+  ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
+  ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
+
+  madd_4 = "1b000000DNMAg",
+  msub_4 = "1b008000DNMAg",
+  mul_3  = "1b007c00DNMg",
+  mneg_3 = "1b00fc00DNMg",
+
+  smaddl_4 = "9b200000DxNMwAx",
+  smsubl_4 = "9b208000DxNMwAx",
+  smull_3  = "9b207c00DxNMw",
+  smnegl_3 = "9b20fc00DxNMw",
+  smulh_3  = "9b407c00DNMx",
+  umaddl_4 = "9ba00000DxNMwAx",
+  umsubl_4 = "9ba08000DxNMwAx",
+  umull_3  = "9ba07c00DxNMw",
+  umnegl_3 = "9ba0fc00DxNMw",
+  umulh_3  = "9bc07c00DNMx",
+
+  udiv_3 = "1ac00800DNMg",
+  sdiv_3 = "1ac00c00DNMg",
+
+  -- Bit operations.
+  sbfm_4 = "13000000DN12w|93400000DN12x",
+  bfm_4  = "33000000DN12w|b3400000DN12x",
+  ubfm_4 = "53000000DN12w|d3400000DN12x",
+  extr_4 = "13800000DNM2w|93c00000DNM2x",
+
+  sxtb_2 = "13001c00DNw|93401c00DNx",
+  sxth_2 = "13003c00DNw|93403c00DNx",
+  sxtw_2 = "93407c00DxNw",
+  uxtb_2 = "53001c00DNw",
+  uxth_2 = "53003c00DNw",
+
+  sbfx_4  = op_alias("sbfm_4", alias_bfx),
+  bfxil_4 = op_alias("bfm_4", alias_bfx),
+  ubfx_4  = op_alias("ubfm_4", alias_bfx),
+  sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
+  bfi_4   = op_alias("bfm_4", alias_bfiz),
+  ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
+
+  lsl_3  = function(params, nparams)
+    if params and params[3]:byte() == 35 then
+      return alias_lslimm(params, nparams)
+    else
+      return op_template(params, "1ac02000DNMg", nparams)
+    end
+  end,
+  lsr_3  = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
+  asr_3  = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
+  ror_3  = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
+
+  clz_2   = "5ac01000DNg",
+  cls_2   = "5ac01400DNg",
+  rbit_2  = "5ac00000DNg",
+  rev_2   = "5ac00800DNw|dac00c00DNx",
+  rev16_2 = "5ac00400DNg",
+  rev32_2 = "dac00800DNx",
+
+  -- Loads and stores.
+  ["strb_*"]  = "38000000DwL",
+  ["ldrb_*"]  = "38400000DwL",
+  ["ldrsb_*"] = "38c00000DwL|38800000DxL",
+  ["strh_*"]  = "78000000DwL",
+  ["ldrh_*"]  = "78400000DwL",
+  ["ldrsh_*"] = "78c00000DwL|78800000DxL",
+  ["str_*"]   = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
+  ["ldr_*"]   = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
+  ["ldrsw_*"] = "98000000DxB|b8800000DxL",
+  -- NOTE: ldur etc. are handled by ldr et al.
+
+  ["stp_*"]   = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
+  ["ldp_*"]   = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
+  ["ldpsw_*"] = "68400000DAxP",
+
+  -- Branches.
+  b_1    = "14000000B",
+  bl_1   = "94000000B",
+  blr_1  = "d63f0000Nx",
+  br_1   = "d61f0000Nx",
+  ret_0  = "d65f03c0",
+  ret_1  = "d65f0000Nx",
+  -- b.cond is added below.
+  cbz_2  = "34000000DBg",
+  cbnz_2 = "35000000DBg",
+  tbz_3  = "36000000DTBw|36000000DTBx",
+  tbnz_3 = "37000000DTBw|37000000DTBx",
+
+  -- Miscellaneous instructions.
+  -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
+  -- TODO: sys, sysl, ic, dc, at, tlbi
+  -- TODO: hint, yield, wfe, wfi, sev, sevl
+  -- TODO: clrex, dsb, dmb, isb
+  nop_0  = "d503201f",
+  brk_0  = "d4200000",
+  brk_1  = "d4200000W",
+
+  -- Floating point instructions.
+  fmov_2  = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
+  fabs_2  = "1e20c000DNf",
+  fneg_2  = "1e214000DNf",
+  fsqrt_2 = "1e21c000DNf",
+
+  fcvt_2  = "1e22c000DdNs|1e624000DsNd",
+
+  -- TODO: half-precision and fixed-point conversions.
+  fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
+  fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
+  fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
+  fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
+  fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
+  fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
+  fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
+  fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
+  fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
+  fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
+
+  scvtf_2  = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
+  ucvtf_2  = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
+
+  frintn_2 = "1e244000DNf",
+  frintp_2 = "1e24c000DNf",
+  frintm_2 = "1e254000DNf",
+  frintz_2 = "1e25c000DNf",
+  frinta_2 = "1e264000DNf",
+  frintx_2 = "1e274000DNf",
+  frinti_2 = "1e27c000DNf",
+
+  fadd_3   = "1e202800DNMf",
+  fsub_3   = "1e203800DNMf",
+  fmul_3   = "1e200800DNMf",
+  fnmul_3  = "1e208800DNMf",
+  fdiv_3   = "1e201800DNMf",
+
+  fmadd_4  = "1f000000DNMAf",
+  fmsub_4  = "1f008000DNMAf",
+  fnmadd_4 = "1f200000DNMAf",
+  fnmsub_4 = "1f208000DNMAf",
+
+  fmax_3   = "1e204800DNMf",
+  fmaxnm_3 = "1e206800DNMf",
+  fmin_3   = "1e205800DNMf",
+  fminnm_3 = "1e207800DNMf",
+
+  fcmp_2   = "1e202000NMf|1e202008NZf",
+  fcmpe_2  = "1e202010NMf|1e202018NZf",
+
+  fccmp_4  = "1e200400NMVCf",
+  fccmpe_4 = "1e200410NMVCf",
+
+  fcsel_4  = "1e200c00DNMCf",
+
+  -- TODO: crc32*, aes*, sha*, pmull
+  -- TODO: SIMD instructions.
+}
+
+for cond,c in pairs(map_cond) do
+  map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+local function parse_template(params, template, nparams, pos)
+  local op = tonumber(sub(template, 1, 8), 16)
+  local n = 1
+  local rtt = {}
+
+  parse_reg_type = false
+
+  -- Process each character.
+  for p in gmatch(sub(template, 9), ".") do
+    local q = params[n]
+    if p == "D" then
+      op = op + parse_reg(q); n = n + 1
+    elseif p == "N" then
+      op = op + shl(parse_reg(q), 5); n = n + 1
+    elseif p == "M" then
+      op = op + shl(parse_reg(q), 16); n = n + 1
+    elseif p == "A" then
+      op = op + shl(parse_reg(q), 10); n = n + 1
+    elseif p == "m" then
+      op = op + shl(parse_reg(params[n-1]), 16)
+
+    elseif p == "p" then
+      if q == "sp" then params[n] = "@x31" end
+    elseif p == "g" then
+      if parse_reg_type == "x" then
+	op = op + 0x80000000
+      elseif parse_reg_type ~= "w" then
+	werror("bad register type")
+      end
+      parse_reg_type = false
+    elseif p == "f" then
+      if parse_reg_type == "d" then
+	op = op + 0x00400000
+      elseif parse_reg_type ~= "s" then
+	werror("bad register type")
+      end
+      parse_reg_type = false
+    elseif p == "x" or p == "w" or p == "d" or p == "s" then
+      if parse_reg_type ~= p then
+	werror("register size mismatch")
+      end
+      parse_reg_type = false
+
+    elseif p == "L" then
+      op = parse_load(params, nparams, n, op)
+    elseif p == "P" then
+      op = parse_load_pair(params, nparams, n, op)
+
+    elseif p == "B" then
+      local mode, v, s = parse_label(q, false); n = n + 1
+      local m = branch_type(op)
+      waction("REL_"..mode, v+m, s, 1)
+
+    elseif p == "I" then
+      op = op + parse_imm12(q); n = n + 1
+    elseif p == "i" then
+      op = op + parse_imm13(q); n = n + 1
+    elseif p == "W" then
+      op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
+    elseif p == "T" then
+      op = op + parse_imm6(q); n = n + 1
+    elseif p == "1" then
+      op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
+    elseif p == "2" then
+      op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
+    elseif p == "5" then
+      op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
+    elseif p == "V" then
+      op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
+    elseif p == "F" then
+      op = op + parse_fpimm(q); n = n + 1
+    elseif p == "Z" then
+      if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
+      n = n + 1
+
+    elseif p == "S" then
+      op = op + parse_shift(q); n = n + 1
+    elseif p == "X" then
+      op = op + parse_extend(q); n = n + 1
+    elseif p == "R" then
+      op = op + parse_lslx16(q); n = n + 1
+    elseif p == "C" then
+      op = op + parse_cond(q, 0); n = n + 1
+    elseif p == "c" then
+      op = op + parse_cond(q, 1); n = n + 1
+
+    else
+      assert(false)
+    end
+  end
+  wputpos(pos, op)
+end
+
+function op_template(params, template, nparams)
+  if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
+
+  -- Limit number of section buffer positions used by a single dasm_put().
+  -- A single opcode needs a maximum of 3 positions.
+  if secpos+3 > maxsecpos then wflush() end
+  local pos = wpos()
+  local lpos, apos, spos = #actlist, #actargs, secpos
+
+  local ok, err
+  for t in gmatch(template, "[^|]+") do
+    ok, err = pcall(parse_template, params, t, nparams, pos)
+    if ok then return end
+    secpos = spos
+    actlist[lpos+1] = nil
+    actlist[lpos+2] = nil
+    actlist[lpos+3] = nil
+    actargs[apos+1] = nil
+    actargs[apos+2] = nil
+    actargs[apos+3] = nil
+  end
+  error(err, 0)
+end
+
+map_op[".template__"] = op_template
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+  if not params then return "prefix" end
+  local prefix = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+  if not params then return "[1-9] | ->global | =>pcexpr" end
+  if secpos+1 > maxsecpos then wflush() end
+  local mode, n, s = parse_label(params[1], true)
+  if mode == "EXT" then werror("bad label definition") end
+  waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+  if not params then return "imm..." end
+  for _,p in ipairs(params) do
+    local n = tonumber(p)
+    if not n then werror("bad immediate `"..p.."'") end
+    if n < 0 then n = n + 2^32 end
+    wputw(n)
+    if secpos+2 > maxsecpos then wflush() end
+  end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+  if not params then return "numpow2" end
+  if secpos+1 > maxsecpos then wflush() end
+  local align = tonumber(params[1])
+  if align then
+    local x = align
+    -- Must be a power of 2 in the range (2 ... 256).
+    for i=1,8 do
+      x = x / 2
+      if x == 1 then
+	waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+	return
+      end
+    end
+  end
+  werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+  if not params then
+    return nparams == 2 and "name, ctype" or "name, ctype, reg"
+  end
+  local name, ctype, reg = params[1], params[2], params[3]
+  if not match(name, "^[%a_][%w_]*$") then
+    werror("bad type name `"..name.."'")
+  end
+  local tp = map_type[name]
+  if tp then
+    werror("duplicate type `"..name.."'")
+  end
+  -- Add #type to defines. A bit unclean to put it in map_archdef.
+  map_archdef["#"..name] = "sizeof("..ctype..")"
+  -- Add new type and emit shortcut define.
+  local num = ctypenum + 1
+  map_type[name] = {
+    ctype = ctype,
+    ctypefmt = format("Dt%X(%%s)", num),
+    reg = reg,
+  }
+  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+  ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+  local t = {}
+  for name in pairs(map_type) do t[#t+1] = name end
+  sort(t)
+  out:write("Type definitions:\n")
+  for _,name in ipairs(t) do
+    local tp = map_type[name]
+    local reg = tp.reg or ""
+    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+  waction("SECTION", num)
+  wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+  out:write(format("DynASM %s version %s, released %s\n\n",
+    _info.arch, _info.version, _info.release))
+  dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+  dumptypes(out, lvl)
+  dumpglobals(out, lvl)
+  dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+  wline, werror, wfatal, wwarn = wl, we, wf, ww
+  return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+  g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+  setmetatable(map_op, { __index = map_coreop })
+  setmetatable(map_def, { __index = map_archdef })
+  return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+

+ 8 - 4
luajit.mod/luajit/dynasm/dasm_mips.h

@@ -21,7 +21,7 @@ enum {
   /* The following actions need a buffer position. */
   /* The following actions need a buffer position. */
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   /* The following actions also have an argument. */
   /* The following actions also have an argument. */
-  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
   DASM__MAX
   DASM__MAX
 };
 };
 
 
@@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...)
 	*pl = -pos;  /* Label exists now. */
 	*pl = -pos;  /* Label exists now. */
 	b[pos++] = ofs;  /* Store pass1 offset estimate. */
 	b[pos++] = ofs;  /* Store pass1 offset estimate. */
 	break;
 	break;
-      case DASM_IMM:
+      case DASM_IMM: case DASM_IMMS:
 #ifdef DASM_CHECKS
 #ifdef DASM_CHECKS
 	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
 	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
 #endif
 #endif
@@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
 	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-	case DASM_IMM: pos++; break;
+	case DASM_IMM: case DASM_IMMS: pos++; break;
 	}
 	}
       }
       }
       stop: (void)0;
       stop: (void)0;
@@ -350,13 +350,14 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	  break;
 	case DASM_REL_LG:
 	case DASM_REL_LG:
 	  CK(n >= 0, UNDEF_LG);
 	  CK(n >= 0, UNDEF_LG);
+	  /* fallthrough */
 	case DASM_REL_PC:
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
 	  CK(n >= 0, UNDEF_PC);
 	  n = *DASM_POS2PTR(D, n);
 	  n = *DASM_POS2PTR(D, n);
 	  if (ins & 2048)
 	  if (ins & 2048)
 	    n = n - (int)((char *)cp - base);
 	    n = n - (int)((char *)cp - base);
 	  else
 	  else
-	    n = (n + (int)base) & 0x0fffffff;
+	    n = (n + (int)(size_t)base) & 0x0fffffff;
 	patchrel:
 	patchrel:
 	  CK((n & 3) == 0 &&
 	  CK((n & 3) == 0 &&
 	     ((n + ((ins & 2048) ? 0x00020000 : 0)) >>
 	     ((n + ((ins & 2048) ? 0x00020000 : 0)) >>
@@ -367,6 +368,9 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
 	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
 	  break;
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_LABEL_PC: break;
+	case DASM_IMMS:
+	  cp[-1] |= ((n>>3) & 4); n &= 0x1f;
+	  /* fallthrough */
 	case DASM_IMM:
 	case DASM_IMM:
 	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
 	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
 	  break;
 	  break;

+ 70 - 15
luajit.mod/luajit/dynasm/dasm_mips.lua

@@ -1,17 +1,19 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
--- DynASM MIPS module.
+-- DynASM MIPS32/MIPS64 module.
 --
 --
 -- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
+local mips64 = mips64
+
 -- Module information:
 -- Module information:
 local _info = {
 local _info = {
-  arch =	"mips",
-  description =	"DynASM MIPS module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2012-01-23",
+  arch =	mips64 and "mips64" or "mips",
+  description =	"DynASM MIPS32/MIPS64 module",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2016-05-24",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }
@@ -27,7 +29,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
 local match, gmatch = _s.match, _s.gmatch
 local match, gmatch = _s.match, _s.gmatch
 local concat, sort = table.concat, table.sort
 local concat, sort = table.concat, table.sort
 local bit = bit or require("bit")
 local bit = bit or require("bit")
-local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local tohex = bit.tohex
 
 
 -- Inherited tables and callbacks.
 -- Inherited tables and callbacks.
 local g_opt, g_arch
 local g_opt, g_arch
@@ -38,7 +41,7 @@ local wline, werror, wfatal, wwarn
 local action_names = {
 local action_names = {
   "STOP", "SECTION", "ESC", "REL_EXT",
   "STOP", "SECTION", "ESC", "REL_EXT",
   "ALIGN", "REL_LG", "LABEL_LG",
   "ALIGN", "REL_LG", "LABEL_LG",
-  "REL_PC", "LABEL_PC", "IMM",
+  "REL_PC", "LABEL_PC", "IMM", "IMMS",
 }
 }
 
 
 -- Maximum number of section buffer positions for dasm_put().
 -- Maximum number of section buffer positions for dasm_put().
@@ -251,6 +254,10 @@ local map_op = {
   bnel_3 =	"54000000STB",
   bnel_3 =	"54000000STB",
   blezl_2 =	"58000000SB",
   blezl_2 =	"58000000SB",
   bgtzl_2 =	"5c000000SB",
   bgtzl_2 =	"5c000000SB",
+  daddi_3 =	mips64 and "60000000TSI",
+  daddiu_3 =	mips64 and "64000000TSI",
+  ldl_2 =	mips64 and "68000000TO",
+  ldr_2 =	mips64 and "6c000000TO",
   lb_2 =	"80000000TO",
   lb_2 =	"80000000TO",
   lh_2 =	"84000000TO",
   lh_2 =	"84000000TO",
   lwl_2 =	"88000000TO",
   lwl_2 =	"88000000TO",
@@ -258,23 +265,30 @@ local map_op = {
   lbu_2 =	"90000000TO",
   lbu_2 =	"90000000TO",
   lhu_2 =	"94000000TO",
   lhu_2 =	"94000000TO",
   lwr_2 =	"98000000TO",
   lwr_2 =	"98000000TO",
+  lwu_2 =	mips64 and "9c000000TO",
   sb_2 =	"a0000000TO",
   sb_2 =	"a0000000TO",
   sh_2 =	"a4000000TO",
   sh_2 =	"a4000000TO",
   swl_2 =	"a8000000TO",
   swl_2 =	"a8000000TO",
   sw_2 =	"ac000000TO",
   sw_2 =	"ac000000TO",
+  sdl_2 =	mips64 and "b0000000TO",
+  sdr_2 =	mips64 and "b1000000TO",
   swr_2 =	"b8000000TO",
   swr_2 =	"b8000000TO",
   cache_2 =	"bc000000NO",
   cache_2 =	"bc000000NO",
   ll_2 =	"c0000000TO",
   ll_2 =	"c0000000TO",
   lwc1_2 =	"c4000000HO",
   lwc1_2 =	"c4000000HO",
   pref_2 =	"cc000000NO",
   pref_2 =	"cc000000NO",
   ldc1_2 =	"d4000000HO",
   ldc1_2 =	"d4000000HO",
+  ld_2 =	mips64 and "dc000000TO",
   sc_2 =	"e0000000TO",
   sc_2 =	"e0000000TO",
   swc1_2 =	"e4000000HO",
   swc1_2 =	"e4000000HO",
+  scd_2 =	mips64 and "f0000000TO",
   sdc1_2 =	"f4000000HO",
   sdc1_2 =	"f4000000HO",
+  sd_2 =	mips64 and "fc000000TO",
 
 
   -- Opcode SPECIAL.
   -- Opcode SPECIAL.
   nop_0 =	"00000000",
   nop_0 =	"00000000",
   sll_3 =	"00000000DTA",
   sll_3 =	"00000000DTA",
+  sextw_2 =	"00000000DT",
   movf_2 =	"00000001DS",
   movf_2 =	"00000001DS",
   movf_3 =	"00000001DSC",
   movf_3 =	"00000001DSC",
   movt_2 =	"00010001DS",
   movt_2 =	"00010001DS",
@@ -285,6 +299,7 @@ local map_op = {
   sllv_3 =	"00000004DTS",
   sllv_3 =	"00000004DTS",
   srlv_3 =	"00000006DTS",
   srlv_3 =	"00000006DTS",
   rotrv_3 =	"00000046DTS",
   rotrv_3 =	"00000046DTS",
+  drotrv_3 =	mips64 and "00000056DTS",
   srav_3 =	"00000007DTS",
   srav_3 =	"00000007DTS",
   jr_1 =	"00000008S",
   jr_1 =	"00000008S",
   jalr_1 =	"0000f809S",
   jalr_1 =	"0000f809S",
@@ -300,15 +315,22 @@ local map_op = {
   mthi_1 =	"00000011S",
   mthi_1 =	"00000011S",
   mflo_1 =	"00000012D",
   mflo_1 =	"00000012D",
   mtlo_1 =	"00000013S",
   mtlo_1 =	"00000013S",
+  dsllv_3 =	mips64 and "00000014DTS",
+  dsrlv_3 =	mips64 and "00000016DTS",
+  dsrav_3 =	mips64 and "00000017DTS",
   mult_2 =	"00000018ST",
   mult_2 =	"00000018ST",
   multu_2 =	"00000019ST",
   multu_2 =	"00000019ST",
   div_2 =	"0000001aST",
   div_2 =	"0000001aST",
   divu_2 =	"0000001bST",
   divu_2 =	"0000001bST",
+  dmult_2 =	mips64 and "0000001cST",
+  dmultu_2 =	mips64 and "0000001dST",
+  ddiv_2 =	mips64 and "0000001eST",
+  ddivu_2 =	mips64 and "0000001fST",
   add_3 =	"00000020DST",
   add_3 =	"00000020DST",
-  move_2 =	"00000021DS",
+  move_2 =	mips64 and "00000025DS" or "00000021DS",
   addu_3 =	"00000021DST",
   addu_3 =	"00000021DST",
   sub_3 =	"00000022DST",
   sub_3 =	"00000022DST",
-  negu_2 =	"00000023DT",
+  negu_2 =	mips64 and "0000002fDT" or "00000023DT",
   subu_3 =	"00000023DST",
   subu_3 =	"00000023DST",
   and_3 =	"00000024DST",
   and_3 =	"00000024DST",
   or_3 =	"00000025DST",
   or_3 =	"00000025DST",
@@ -317,6 +339,10 @@ local map_op = {
   nor_3 =	"00000027DST",
   nor_3 =	"00000027DST",
   slt_3 =	"0000002aDST",
   slt_3 =	"0000002aDST",
   sltu_3 =	"0000002bDST",
   sltu_3 =	"0000002bDST",
+  dadd_3 =	mips64 and "0000002cDST",
+  daddu_3 =	mips64 and "0000002dDST",
+  dsub_3 =	mips64 and "0000002eDST",
+  dsubu_3 =	mips64 and "0000002fDST",
   tge_2 =	"00000030ST",
   tge_2 =	"00000030ST",
   tge_3 =	"00000030STZ",
   tge_3 =	"00000030STZ",
   tgeu_2 =	"00000031ST",
   tgeu_2 =	"00000031ST",
@@ -329,6 +355,14 @@ local map_op = {
   teq_3 =	"00000034STZ",
   teq_3 =	"00000034STZ",
   tne_2 =	"00000036ST",
   tne_2 =	"00000036ST",
   tne_3 =	"00000036STZ",
   tne_3 =	"00000036STZ",
+  dsll_3 =	mips64 and "00000038DTa",
+  dsrl_3 =	mips64 and "0000003aDTa",
+  drotr_3 =	mips64 and "0020003aDTa",
+  dsra_3 =	mips64 and "0000003bDTa",
+  dsll32_3 =	mips64 and "0000003cDTA",
+  dsrl32_3 =	mips64 and "0000003eDTA",
+  drotr32_3 =	mips64 and "0020003eDTA",
+  dsra32_3 =	mips64 and "0000003fDTA",
 
 
   -- Opcode REGIMM.
   -- Opcode REGIMM.
   bltz_2 =	"04000000SB",
   bltz_2 =	"04000000SB",
@@ -356,13 +390,24 @@ local map_op = {
   msubu_2 =	"70000005ST",
   msubu_2 =	"70000005ST",
   clz_2 =	"70000020DS=",
   clz_2 =	"70000020DS=",
   clo_2 =	"70000021DS=",
   clo_2 =	"70000021DS=",
+  dclz_2 =	mips64 and "70000024DS=",
+  dclo_2 =	mips64 and "70000025DS=",
   sdbbp_0 =	"7000003f",
   sdbbp_0 =	"7000003f",
   sdbbp_1 =	"7000003fY",
   sdbbp_1 =	"7000003fY",
 
 
   -- Opcode SPECIAL3.
   -- Opcode SPECIAL3.
   ext_4 =	"7c000000TSAM", -- Note: last arg is msbd = size-1
   ext_4 =	"7c000000TSAM", -- Note: last arg is msbd = size-1
+  dextm_4 =	mips64 and "7c000001TSAM", -- Args: pos    | size-1-32
+  dextu_4 =	mips64 and "7c000002TSAM", -- Args: pos-32 | size-1
+  dext_4 =	mips64 and "7c000003TSAM", -- Args: pos    | size-1
+  zextw_2 =	mips64 and "7c00f803TS",
   ins_4 =	"7c000004TSAM", -- Note: last arg is msb = pos+size-1
   ins_4 =	"7c000004TSAM", -- Note: last arg is msb = pos+size-1
+  dinsm_4 =	mips64 and "7c000005TSAM", -- Args: pos    | pos+size-33
+  dinsu_4 =	mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33
+  dins_4 =	mips64 and "7c000007TSAM", -- Args: pos    | pos+size-1
   wsbh_2 =	"7c0000a0DT",
   wsbh_2 =	"7c0000a0DT",
+  dsbh_2 =	mips64 and "7c0000a4DT",
+  dshd_2 =	mips64 and "7c000164DT",
   seb_2 =	"7c000420DT",
   seb_2 =	"7c000420DT",
   seh_2 =	"7c000620DT",
   seh_2 =	"7c000620DT",
   rdhwr_2 =	"7c00003bTD",
   rdhwr_2 =	"7c00003bTD",
@@ -370,8 +415,12 @@ local map_op = {
   -- Opcode COP0.
   -- Opcode COP0.
   mfc0_2 =	"40000000TD",
   mfc0_2 =	"40000000TD",
   mfc0_3 =	"40000000TDW",
   mfc0_3 =	"40000000TDW",
+  dmfc0_2 =	mips64 and "40200000TD",
+  dmfc0_3 =	mips64 and "40200000TDW",
   mtc0_2 =	"40800000TD",
   mtc0_2 =	"40800000TD",
   mtc0_3 =	"40800000TDW",
   mtc0_3 =	"40800000TDW",
+  dmtc0_2 =	mips64 and "40a00000TD",
+  dmtc0_3 =	mips64 and "40a00000TDW",
   rdpgpr_2 =	"41400000DT",
   rdpgpr_2 =	"41400000DT",
   di_0 =	"41606000",
   di_0 =	"41606000",
   di_1 =	"41606000T",
   di_1 =	"41606000T",
@@ -388,9 +437,11 @@ local map_op = {
 
 
   -- Opcode COP1.
   -- Opcode COP1.
   mfc1_2 =	"44000000TG",
   mfc1_2 =	"44000000TG",
+  dmfc1_2 =	mips64 and "44200000TG",
   cfc1_2 =	"44400000TG",
   cfc1_2 =	"44400000TG",
   mfhc1_2 =	"44600000TG",
   mfhc1_2 =	"44600000TG",
   mtc1_2 =	"44800000TG",
   mtc1_2 =	"44800000TG",
+  dmtc1_2 =	mips64 and "44a00000TG",
   ctc1_2 =	"44c00000TG",
   ctc1_2 =	"44c00000TG",
   mthc1_2 =	"44e00000TG",
   mthc1_2 =	"44e00000TG",
 
 
@@ -633,7 +684,7 @@ local function parse_fpr(expr)
   werror("bad register name `"..expr.."'")
   werror("bad register name `"..expr.."'")
 end
 end
 
 
-local function parse_imm(imm, bits, shift, scale, signed)
+local function parse_imm(imm, bits, shift, scale, signed, action)
   local n = tonumber(imm)
   local n = tonumber(imm)
   if n then
   if n then
     local m = sar(n, scale)
     local m = sar(n, scale)
@@ -651,7 +702,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
 	 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
 	 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
     werror("expected immediate operand, got register")
   else
   else
-    waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+    waction(action or "IMM",
+	    (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
     return 0
     return 0
   end
   end
 end
 end
@@ -757,12 +809,15 @@ map_op[".template__"] = function(params, template, nparams)
     elseif p == "X" then
     elseif p == "X" then
       op = op + parse_index(params[n]); n = n + 1
       op = op + parse_index(params[n]); n = n + 1
     elseif p == "B" or p == "J" then
     elseif p == "B" or p == "J" then
-      local mode, n, s = parse_label(params[n], false)
-      if p == "B" then n = n + 2048 end
-      waction("REL_"..mode, n, s, 1)
+      local mode, m, s = parse_label(params[n], false)
+      if p == "B" then m = m + 2048 end
+      waction("REL_"..mode, m, s, 1)
       n = n + 1
       n = n + 1
     elseif p == "A" then
     elseif p == "A" then
       op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
       op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
+    elseif p == "a" then
+      local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1
+      op = op + band(m, 0x7c0) + band(shr(m, 9), 4)
     elseif p == "M" then
     elseif p == "M" then
       op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
       op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
     elseif p == "N" then
     elseif p == "N" then

+ 12 - 0
luajit.mod/luajit/dynasm/dasm_mips64.lua

@@ -0,0 +1,12 @@
+------------------------------------------------------------------------------
+-- DynASM MIPS64 module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
+-- All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+mips64 = true -- Using a global is an ugly, but effective solution.
+return require("dasm_mips")

+ 11 - 3
luajit.mod/luajit/dynasm/dasm_ppc.h

@@ -1,5 +1,5 @@
 /*
 /*
-** DynASM PPC encoding engine.
+** DynASM PPC/PPC64 encoding engine.
 ** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 ** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 */
@@ -21,7 +21,7 @@ enum {
   /* The following actions need a buffer position. */
   /* The following actions need a buffer position. */
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   /* The following actions also have an argument. */
   /* The following actions also have an argument. */
-  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
   DASM__MAX
   DASM__MAX
 };
 };
 
 
@@ -244,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...)
 #endif
 #endif
 	b[pos++] = n;
 	b[pos++] = n;
 	break;
 	break;
+      case DASM_IMMSH:
+	CK((n >> 6) == 0, RANGE_I);
+	b[pos++] = n;
+	break;
       }
       }
     }
     }
   }
   }
@@ -299,7 +303,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
 	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-	case DASM_IMM: pos++; break;
+	case DASM_IMM: case DASM_IMMSH: pos++; break;
 	}
 	}
       }
       }
       stop: (void)0;
       stop: (void)0;
@@ -350,6 +354,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	  break;
 	case DASM_REL_LG:
 	case DASM_REL_LG:
 	  CK(n >= 0, UNDEF_LG);
 	  CK(n >= 0, UNDEF_LG);
+	  /* fallthrough */
 	case DASM_REL_PC:
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
 	  CK(n >= 0, UNDEF_PC);
 	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
 	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
@@ -366,6 +371,9 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_IMM:
 	case DASM_IMM:
 	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
 	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
 	  break;
 	  break;
+	case DASM_IMMSH:
+	  cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
+	  break;
 	default: *cp++ = ins; break;
 	default: *cp++ = ins; break;
 	}
 	}
       }
       }

+ 689 - 19
luajit.mod/luajit/dynasm/dasm_ppc.lua

@@ -1,17 +1,19 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
--- DynASM PPC module.
+-- DynASM PPC/PPC64 module.
 --
 --
 -- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
+--
+-- Support for various extensions contributed by Caio Souza Oliveira.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Module information:
 -- Module information:
 local _info = {
 local _info = {
   arch =	"ppc",
   arch =	"ppc",
   description =	"DynASM PPC module",
   description =	"DynASM PPC module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }
@@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn
 local action_names = {
 local action_names = {
   "STOP", "SECTION", "ESC", "REL_EXT",
   "STOP", "SECTION", "ESC", "REL_EXT",
   "ALIGN", "REL_LG", "LABEL_LG",
   "ALIGN", "REL_LG", "LABEL_LG",
-  "REL_PC", "LABEL_PC", "IMM",
+  "REL_PC", "LABEL_PC", "IMM", "IMMSH"
 }
 }
 
 
 -- Maximum number of section buffer positions for dasm_put().
 -- Maximum number of section buffer positions for dasm_put().
@@ -228,8 +230,18 @@ local map_cond = {
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
+local map_op, op_template
+
+local function op_alias(opname, f)
+  return function(params, nparams)
+    if not params then return "-> "..opname:sub(1, -3) end
+    f(params, nparams)
+    op_template(params, map_op[opname], nparams)
+  end
+end
+
 -- Template strings for PPC instructions.
 -- Template strings for PPC instructions.
-local map_op = {
+map_op = {
   tdi_3 =	"08000000ARI",
   tdi_3 =	"08000000ARI",
   twi_3 =	"0c000000ARI",
   twi_3 =	"0c000000ARI",
   mulli_3 =	"1c000000RRI",
   mulli_3 =	"1c000000RRI",
@@ -297,6 +309,250 @@ local map_op = {
   std_2 =	"f8000000RD",
   std_2 =	"f8000000RD",
   stdu_2 =	"f8000001RD",
   stdu_2 =	"f8000001RD",
 
 
+  subi_3 =	op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end),
+  subis_3 =	op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end),
+  subic_3 =	op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end),
+  ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end),
+
+  rotlwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = "0"; p[5] = "31"
+  end),
+  rotrwi_3 =	op_alias("rlwinm_5", function(p)
+    p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31"
+  end),
+  rotlw_3 =	op_alias("rlwnm_5", function(p)
+    p[4] = "0"; p[5] = "31"
+  end),
+  slwi_3 =	op_alias("rlwinm_5", function(p)
+    p[5] = "31-("..p[3]..")"; p[4] = "0"
+  end),
+  srwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31"
+  end),
+  clrlwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = p[3]; p[3] = "0"; p[5] = "31"
+  end),
+  clrrwi_3 =	op_alias("rlwinm_5", function(p)
+    p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0"
+  end),
+
+  -- Primary opcode 4:
+  mulhhwu_3 =		"10000010RRR.",
+  machhwu_3 =		"10000018RRR.",
+  mulhhw_3 =		"10000050RRR.",
+  nmachhw_3 =		"1000005cRRR.",
+  machhwsu_3 =		"10000098RRR.",
+  machhws_3 =		"100000d8RRR.",
+  nmachhws_3 =		"100000dcRRR.",
+  mulchwu_3 =		"10000110RRR.",
+  macchwu_3 =		"10000118RRR.",
+  mulchw_3 =		"10000150RRR.",
+  macchw_3 =		"10000158RRR.",
+  nmacchw_3 =		"1000015cRRR.",
+  macchwsu_3 =		"10000198RRR.",
+  macchws_3 =		"100001d8RRR.",
+  nmacchws_3 =		"100001dcRRR.",
+  mullhw_3 =		"10000350RRR.",
+  maclhw_3 =		"10000358RRR.",
+  nmaclhw_3 =		"1000035cRRR.",
+  maclhwsu_3 =		"10000398RRR.",
+  maclhws_3 =		"100003d8RRR.",
+  nmaclhws_3 =		"100003dcRRR.",
+  machhwuo_3 =		"10000418RRR.",
+  nmachhwo_3 =		"1000045cRRR.",
+  machhwsuo_3 =		"10000498RRR.",
+  machhwso_3 =		"100004d8RRR.",
+  nmachhwso_3 =		"100004dcRRR.",
+  macchwuo_3 =		"10000518RRR.",
+  macchwo_3 =		"10000558RRR.",
+  nmacchwo_3 =		"1000055cRRR.",
+  macchwsuo_3 =		"10000598RRR.",
+  macchwso_3 =		"100005d8RRR.",
+  nmacchwso_3 =		"100005dcRRR.",
+  maclhwo_3 =		"10000758RRR.",
+  nmaclhwo_3 =		"1000075cRRR.",
+  maclhwsuo_3 =		"10000798RRR.",
+  maclhwso_3 =		"100007d8RRR.",
+  nmaclhwso_3 =		"100007dcRRR.",
+
+  vaddubm_3 =		"10000000VVV",
+  vmaxub_3 =		"10000002VVV",
+  vrlb_3 =		"10000004VVV",
+  vcmpequb_3 =		"10000006VVV",
+  vmuloub_3 =		"10000008VVV",
+  vaddfp_3 =		"1000000aVVV",
+  vmrghb_3 =		"1000000cVVV",
+  vpkuhum_3 =		"1000000eVVV",
+  vmhaddshs_4 =		"10000020VVVV",
+  vmhraddshs_4 =	"10000021VVVV",
+  vmladduhm_4 =		"10000022VVVV",
+  vmsumubm_4 =		"10000024VVVV",
+  vmsummbm_4 =		"10000025VVVV",
+  vmsumuhm_4 =		"10000026VVVV",
+  vmsumuhs_4 =		"10000027VVVV",
+  vmsumshm_4 =		"10000028VVVV",
+  vmsumshs_4 =		"10000029VVVV",
+  vsel_4 =		"1000002aVVVV",
+  vperm_4 =		"1000002bVVVV",
+  vsldoi_4 =		"1000002cVVVP",
+  vpermxor_4 =		"1000002dVVVV",
+  vmaddfp_4 =		"1000002eVVVV~",
+  vnmsubfp_4 =		"1000002fVVVV~",
+  vaddeuqm_4 =		"1000003cVVVV",
+  vaddecuq_4 =		"1000003dVVVV",
+  vsubeuqm_4 =		"1000003eVVVV",
+  vsubecuq_4 =		"1000003fVVVV",
+  vadduhm_3 =		"10000040VVV",
+  vmaxuh_3 =		"10000042VVV",
+  vrlh_3 =		"10000044VVV",
+  vcmpequh_3 =		"10000046VVV",
+  vmulouh_3 =		"10000048VVV",
+  vsubfp_3 =		"1000004aVVV",
+  vmrghh_3 =		"1000004cVVV",
+  vpkuwum_3 =		"1000004eVVV",
+  vadduwm_3 =		"10000080VVV",
+  vmaxuw_3 =		"10000082VVV",
+  vrlw_3 =		"10000084VVV",
+  vcmpequw_3 =		"10000086VVV",
+  vmulouw_3 =		"10000088VVV",
+  vmuluwm_3 =		"10000089VVV",
+  vmrghw_3 =		"1000008cVVV",
+  vpkuhus_3 =		"1000008eVVV",
+  vaddudm_3 =		"100000c0VVV",
+  vmaxud_3 =		"100000c2VVV",
+  vrld_3 =		"100000c4VVV",
+  vcmpeqfp_3 =		"100000c6VVV",
+  vcmpequd_3 =		"100000c7VVV",
+  vpkuwus_3 =		"100000ceVVV",
+  vadduqm_3 =		"10000100VVV",
+  vmaxsb_3 =		"10000102VVV",
+  vslb_3 =		"10000104VVV",
+  vmulosb_3 =		"10000108VVV",
+  vrefp_2 =		"1000010aV-V",
+  vmrglb_3 =		"1000010cVVV",
+  vpkshus_3 =		"1000010eVVV",
+  vaddcuq_3 =		"10000140VVV",
+  vmaxsh_3 =		"10000142VVV",
+  vslh_3 =		"10000144VVV",
+  vmulosh_3 =		"10000148VVV",
+  vrsqrtefp_2 =		"1000014aV-V",
+  vmrglh_3 =		"1000014cVVV",
+  vpkswus_3 =		"1000014eVVV",
+  vaddcuw_3 =		"10000180VVV",
+  vmaxsw_3 =		"10000182VVV",
+  vslw_3 =		"10000184VVV",
+  vmulosw_3 =		"10000188VVV",
+  vexptefp_2 =		"1000018aV-V",
+  vmrglw_3 =		"1000018cVVV",
+  vpkshss_3 =		"1000018eVVV",
+  vmaxsd_3 =		"100001c2VVV",
+  vsl_3 =		"100001c4VVV",
+  vcmpgefp_3 =		"100001c6VVV",
+  vlogefp_2 =		"100001caV-V",
+  vpkswss_3 =		"100001ceVVV",
+  vadduhs_3 =		"10000240VVV",
+  vminuh_3 =		"10000242VVV",
+  vsrh_3 =		"10000244VVV",
+  vcmpgtuh_3 =		"10000246VVV",
+  vmuleuh_3 =		"10000248VVV",
+  vrfiz_2 =		"1000024aV-V",
+  vsplth_3 =		"1000024cVV3",
+  vupkhsh_2 =		"1000024eV-V",
+  vminuw_3 =		"10000282VVV",
+  vminud_3 =		"100002c2VVV",
+  vcmpgtud_3 =		"100002c7VVV",
+  vrfim_2 =		"100002caV-V",
+  vcmpgtsb_3 =		"10000306VVV",
+  vcfux_3 =		"1000030aVVA~",
+  vaddshs_3 =		"10000340VVV",
+  vminsh_3 =		"10000342VVV",
+  vsrah_3 =		"10000344VVV",
+  vcmpgtsh_3 =		"10000346VVV",
+  vmulesh_3 =		"10000348VVV",
+  vcfsx_3 =		"1000034aVVA~",
+  vspltish_2 =		"1000034cVS",
+  vupkhpx_2 =		"1000034eV-V",
+  vaddsws_3 =		"10000380VVV",
+  vminsw_3 =		"10000382VVV",
+  vsraw_3 =		"10000384VVV",
+  vcmpgtsw_3 =		"10000386VVV",
+  vmulesw_3 =		"10000388VVV",
+  vctuxs_3 =		"1000038aVVA~",
+  vspltisw_2 =		"1000038cVS",
+  vminsd_3 =		"100003c2VVV",
+  vsrad_3 =		"100003c4VVV",
+  vcmpbfp_3 =		"100003c6VVV",
+  vcmpgtsd_3 =		"100003c7VVV",
+  vctsxs_3 =		"100003caVVA~",
+  vupklpx_2 =		"100003ceV-V",
+  vsububm_3 =		"10000400VVV",
+  ["bcdadd._4"] =	"10000401VVVy.",
+  vavgub_3 =		"10000402VVV",
+  vand_3 =		"10000404VVV",
+  ["vcmpequb._3"] =	"10000406VVV",
+  vmaxfp_3 =		"1000040aVVV",
+  vsubuhm_3 =		"10000440VVV",
+  ["bcdsub._4"] =	"10000441VVVy.",
+  vavguh_3 =		"10000442VVV",
+  vandc_3 =		"10000444VVV",
+  ["vcmpequh._3"] =	"10000446VVV",
+  vminfp_3 =		"1000044aVVV",
+  vpkudum_3 =		"1000044eVVV",
+  vsubuwm_3 =		"10000480VVV",
+  vavguw_3 =		"10000482VVV",
+  vor_3 =		"10000484VVV",
+  ["vcmpequw._3"] =	"10000486VVV",
+  vpmsumw_3 =		"10000488VVV",
+  ["vcmpeqfp._3"] =	"100004c6VVV",
+  ["vcmpequd._3"] =	"100004c7VVV",
+  vpkudus_3 =		"100004ceVVV",
+  vavgsb_3 =		"10000502VVV",
+  vavgsh_3 =		"10000542VVV",
+  vorc_3 =		"10000544VVV",
+  vbpermq_3 =		"1000054cVVV",
+  vpksdus_3 =		"1000054eVVV",
+  vavgsw_3 =		"10000582VVV",
+  vsld_3 =		"100005c4VVV",
+  ["vcmpgefp._3"] =	"100005c6VVV",
+  vpksdss_3 =		"100005ceVVV",
+  vsububs_3 =		"10000600VVV",
+  mfvscr_1 =		"10000604V--",
+  vsum4ubs_3 =		"10000608VVV",
+  vsubuhs_3 =		"10000640VVV",
+  mtvscr_1 =		"10000644--V",
+  ["vcmpgtuh._3"] =	"10000646VVV",
+  vsum4shs_3 =		"10000648VVV",
+  vupkhsw_2 =		"1000064eV-V",
+  vsubuws_3 =		"10000680VVV",
+  vshasigmaw_4 =	"10000682VVYp",
+  veqv_3 =		"10000684VVV",
+  vsum2sws_3 =		"10000688VVV",
+  vmrgow_3 =		"1000068cVVV",
+  vshasigmad_4 =	"100006c2VVYp",
+  vsrd_3 =		"100006c4VVV",
+  ["vcmpgtud._3"] =	"100006c7VVV",
+  vupklsw_2 =		"100006ceV-V",
+  vupkslw_2 =		"100006ceV-V",
+  vsubsbs_3 =		"10000700VVV",
+  vclzb_2 =		"10000702V-V",
+  vpopcntb_2 =		"10000703V-V",
+  ["vcmpgtsb._3"] =	"10000706VVV",
+  vsum4sbs_3 =		"10000708VVV",
+  vsubshs_3 =		"10000740VVV",
+  vclzh_2 =		"10000742V-V",
+  vpopcnth_2 =		"10000743V-V",
+  ["vcmpgtsh._3"] =	"10000746VVV",
+  vsubsws_3 =		"10000780VVV",
+  vclzw_2 =		"10000782V-V",
+  vpopcntw_2 =		"10000783V-V",
+  ["vcmpgtsw._3"] =	"10000786VVV",
+  vsumsws_3 =		"10000788VVV",
+  vmrgew_3 =		"1000078cVVV",
+  vclzd_2 =		"100007c2V-V",
+  vpopcntd_2 =		"100007c3V-V",
+  ["vcmpbfp._3"] =	"100007c6VVV",
+  ["vcmpgtsd._3"] =	"100007c7VVV",
+
   -- Primary opcode 19:
   -- Primary opcode 19:
   mcrf_2 =	"4c000000XX",
   mcrf_2 =	"4c000000XX",
   isync_0 =	"4c00012c",
   isync_0 =	"4c00012c",
@@ -316,6 +572,8 @@ local map_op = {
   bclrl_2 =	"4c000021AA",
   bclrl_2 =	"4c000021AA",
   bcctr_2 =	"4c000420AA",
   bcctr_2 =	"4c000420AA",
   bcctrl_2 =	"4c000421AA",
   bcctrl_2 =	"4c000421AA",
+  bctar_2 =	"4c000460AA",
+  bctarl_2 =	"4c000461AA",
   blr_0 =	"4e800020",
   blr_0 =	"4e800020",
   blrl_0 =	"4e800021",
   blrl_0 =	"4e800021",
   bctr_0 =	"4e800420",
   bctr_0 =	"4e800420",
@@ -327,6 +585,7 @@ local map_op = {
   cmpd_3 =	"7c200000XRR",
   cmpd_3 =	"7c200000XRR",
   cmpd_2 =	"7c200000-RR",
   cmpd_2 =	"7c200000-RR",
   tw_3 =	"7c000008ARR",
   tw_3 =	"7c000008ARR",
+  lvsl_3 =	"7c00000cVRR",
   subfc_3 =	"7c000010RRR.",
   subfc_3 =	"7c000010RRR.",
   subc_3 =	"7c000010RRR~.",
   subc_3 =	"7c000010RRR~.",
   mulhdu_3 =	"7c000012RRR.",
   mulhdu_3 =	"7c000012RRR.",
@@ -351,50 +610,68 @@ local map_op = {
   cmplw_2 =	"7c000040-RR",
   cmplw_2 =	"7c000040-RR",
   cmpld_3 =	"7c200040XRR",
   cmpld_3 =	"7c200040XRR",
   cmpld_2 =	"7c200040-RR",
   cmpld_2 =	"7c200040-RR",
+  lvsr_3 =	"7c00004cVRR",
   subf_3 =	"7c000050RRR.",
   subf_3 =	"7c000050RRR.",
   sub_3 =	"7c000050RRR~.",
   sub_3 =	"7c000050RRR~.",
+  lbarx_3 =	"7c000068RR0R",
   ldux_3 =	"7c00006aRR0R",
   ldux_3 =	"7c00006aRR0R",
   dcbst_2 =	"7c00006c-RR",
   dcbst_2 =	"7c00006c-RR",
   lwzux_3 =	"7c00006eRR0R",
   lwzux_3 =	"7c00006eRR0R",
   cntlzd_2 =	"7c000074RR~",
   cntlzd_2 =	"7c000074RR~",
   andc_3 =	"7c000078RR~R.",
   andc_3 =	"7c000078RR~R.",
   td_3 =	"7c000088ARR",
   td_3 =	"7c000088ARR",
+  lvewx_3 =	"7c00008eVRR",
   mulhd_3 =	"7c000092RRR.",
   mulhd_3 =	"7c000092RRR.",
+  addg6s_3 =	"7c000094RRR",
   mulhw_3 =	"7c000096RRR.",
   mulhw_3 =	"7c000096RRR.",
+  dlmzb_3 =	"7c00009cRR~R.",
   ldarx_3 =	"7c0000a8RR0R",
   ldarx_3 =	"7c0000a8RR0R",
   dcbf_2 =	"7c0000ac-RR",
   dcbf_2 =	"7c0000ac-RR",
   lbzx_3 =	"7c0000aeRR0R",
   lbzx_3 =	"7c0000aeRR0R",
+  lvx_3 =	"7c0000ceVRR",
   neg_2 =	"7c0000d0RR.",
   neg_2 =	"7c0000d0RR.",
+  lharx_3 =	"7c0000e8RR0R",
   lbzux_3 =	"7c0000eeRR0R",
   lbzux_3 =	"7c0000eeRR0R",
   popcntb_2 =	"7c0000f4RR~",
   popcntb_2 =	"7c0000f4RR~",
   not_2 =	"7c0000f8RR~%.",
   not_2 =	"7c0000f8RR~%.",
   nor_3 =	"7c0000f8RR~R.",
   nor_3 =	"7c0000f8RR~R.",
+  stvebx_3 =	"7c00010eVRR",
   subfe_3 =	"7c000110RRR.",
   subfe_3 =	"7c000110RRR.",
   sube_3 =	"7c000110RRR~.",
   sube_3 =	"7c000110RRR~.",
   adde_3 =	"7c000114RRR.",
   adde_3 =	"7c000114RRR.",
   stdx_3 =	"7c00012aRR0R",
   stdx_3 =	"7c00012aRR0R",
-  stwcx_3 =	"7c00012cRR0R.",
+  ["stwcx._3"] =	"7c00012dRR0R.",
   stwx_3 =	"7c00012eRR0R",
   stwx_3 =	"7c00012eRR0R",
   prtyw_2 =	"7c000134RR~",
   prtyw_2 =	"7c000134RR~",
+  stvehx_3 =	"7c00014eVRR",
   stdux_3 =	"7c00016aRR0R",
   stdux_3 =	"7c00016aRR0R",
+  ["stqcx._3"] =	"7c00016dR:R0R.",
   stwux_3 =	"7c00016eRR0R",
   stwux_3 =	"7c00016eRR0R",
   prtyd_2 =	"7c000174RR~",
   prtyd_2 =	"7c000174RR~",
+  stvewx_3 =	"7c00018eVRR",
   subfze_2 =	"7c000190RR.",
   subfze_2 =	"7c000190RR.",
   addze_2 =	"7c000194RR.",
   addze_2 =	"7c000194RR.",
-  stdcx_3 =	"7c0001acRR0R.",
+  ["stdcx._3"] =	"7c0001adRR0R.",
   stbx_3 =	"7c0001aeRR0R",
   stbx_3 =	"7c0001aeRR0R",
+  stvx_3 =	"7c0001ceVRR",
   subfme_2 =	"7c0001d0RR.",
   subfme_2 =	"7c0001d0RR.",
   mulld_3 =	"7c0001d2RRR.",
   mulld_3 =	"7c0001d2RRR.",
   addme_2 =	"7c0001d4RR.",
   addme_2 =	"7c0001d4RR.",
   mullw_3 =	"7c0001d6RRR.",
   mullw_3 =	"7c0001d6RRR.",
   dcbtst_2 =	"7c0001ec-RR",
   dcbtst_2 =	"7c0001ec-RR",
   stbux_3 =	"7c0001eeRR0R",
   stbux_3 =	"7c0001eeRR0R",
+  bpermd_3 =	"7c0001f8RR~R",
+  lvepxl_3 =	"7c00020eVRR",
   add_3 =	"7c000214RRR.",
   add_3 =	"7c000214RRR.",
+  lqarx_3 =	"7c000228R:R0R",
   dcbt_2 =	"7c00022c-RR",
   dcbt_2 =	"7c00022c-RR",
   lhzx_3 =	"7c00022eRR0R",
   lhzx_3 =	"7c00022eRR0R",
+  cdtbcd_2 =	"7c000234RR~",
   eqv_3 =	"7c000238RR~R.",
   eqv_3 =	"7c000238RR~R.",
+  lvepx_3 =	"7c00024eVRR",
   eciwx_3 =	"7c00026cRR0R",
   eciwx_3 =	"7c00026cRR0R",
   lhzux_3 =	"7c00026eRR0R",
   lhzux_3 =	"7c00026eRR0R",
+  cbcdtd_2 =	"7c000274RR~",
   xor_3 =	"7c000278RR~R.",
   xor_3 =	"7c000278RR~R.",
   mfspefscr_1 =	"7c0082a6R",
   mfspefscr_1 =	"7c0082a6R",
   mfxer_1 =	"7c0102a6R",
   mfxer_1 =	"7c0102a6R",
@@ -404,8 +681,12 @@ local map_op = {
   lhax_3 =	"7c0002aeRR0R",
   lhax_3 =	"7c0002aeRR0R",
   mftb_1 =	"7c0c42e6R",
   mftb_1 =	"7c0c42e6R",
   mftbu_1 =	"7c0d42e6R",
   mftbu_1 =	"7c0d42e6R",
+  lvxl_3 =	"7c0002ceVRR",
   lwaux_3 =	"7c0002eaRR0R",
   lwaux_3 =	"7c0002eaRR0R",
   lhaux_3 =	"7c0002eeRR0R",
   lhaux_3 =	"7c0002eeRR0R",
+  popcntw_2 =	"7c0002f4RR~",
+  divdeu_3 =	"7c000312RRR.",
+  divweu_3 =	"7c000316RRR.",
   sthx_3 =	"7c00032eRR0R",
   sthx_3 =	"7c00032eRR0R",
   orc_3 =	"7c000338RR~R.",
   orc_3 =	"7c000338RR~R.",
   ecowx_3 =	"7c00036cRR0R",
   ecowx_3 =	"7c00036cRR0R",
@@ -420,10 +701,14 @@ local map_op = {
   mtctr_1 =	"7c0903a6R",
   mtctr_1 =	"7c0903a6R",
   dcbi_2 =	"7c0003ac-RR",
   dcbi_2 =	"7c0003ac-RR",
   nand_3 =	"7c0003b8RR~R.",
   nand_3 =	"7c0003b8RR~R.",
+  dsn_2 =	"7c0003c6-RR",
+  stvxl_3 =	"7c0003ceVRR",
   divd_3 =	"7c0003d2RRR.",
   divd_3 =	"7c0003d2RRR.",
   divw_3 =	"7c0003d6RRR.",
   divw_3 =	"7c0003d6RRR.",
+  popcntd_2 =	"7c0003f4RR~",
   cmpb_3 =	"7c0003f8RR~R.",
   cmpb_3 =	"7c0003f8RR~R.",
   mcrxr_1 =	"7c000400X",
   mcrxr_1 =	"7c000400X",
+  lbdx_3 =	"7c000406RRR",
   subfco_3 =	"7c000410RRR.",
   subfco_3 =	"7c000410RRR.",
   subco_3 =	"7c000410RRR~.",
   subco_3 =	"7c000410RRR~.",
   addco_3 =	"7c000414RRR.",
   addco_3 =	"7c000414RRR.",
@@ -433,16 +718,20 @@ local map_op = {
   lfsx_3 =	"7c00042eFR0R",
   lfsx_3 =	"7c00042eFR0R",
   srw_3 =	"7c000430RR~R.",
   srw_3 =	"7c000430RR~R.",
   srd_3 =	"7c000436RR~R.",
   srd_3 =	"7c000436RR~R.",
+  lhdx_3 =	"7c000446RRR",
   subfo_3 =	"7c000450RRR.",
   subfo_3 =	"7c000450RRR.",
   subo_3 =	"7c000450RRR~.",
   subo_3 =	"7c000450RRR~.",
   lfsux_3 =	"7c00046eFR0R",
   lfsux_3 =	"7c00046eFR0R",
+  lwdx_3 =	"7c000486RRR",
   lswi_3 =	"7c0004aaRR0A",
   lswi_3 =	"7c0004aaRR0A",
   sync_0 =	"7c0004ac",
   sync_0 =	"7c0004ac",
   lwsync_0 =	"7c2004ac",
   lwsync_0 =	"7c2004ac",
   ptesync_0 =	"7c4004ac",
   ptesync_0 =	"7c4004ac",
   lfdx_3 =	"7c0004aeFR0R",
   lfdx_3 =	"7c0004aeFR0R",
+  lddx_3 =	"7c0004c6RRR",
   nego_2 =	"7c0004d0RR.",
   nego_2 =	"7c0004d0RR.",
   lfdux_3 =	"7c0004eeFR0R",
   lfdux_3 =	"7c0004eeFR0R",
+  stbdx_3 =	"7c000506RRR",
   subfeo_3 =	"7c000510RRR.",
   subfeo_3 =	"7c000510RRR.",
   subeo_3 =	"7c000510RRR~.",
   subeo_3 =	"7c000510RRR~.",
   addeo_3 =	"7c000514RRR.",
   addeo_3 =	"7c000514RRR.",
@@ -450,27 +739,42 @@ local map_op = {
   stswx_3 =	"7c00052aRR0R",
   stswx_3 =	"7c00052aRR0R",
   stwbrx_3 =	"7c00052cRR0R",
   stwbrx_3 =	"7c00052cRR0R",
   stfsx_3 =	"7c00052eFR0R",
   stfsx_3 =	"7c00052eFR0R",
+  sthdx_3 =	"7c000546RRR",
+  ["stbcx._3"] =	"7c00056dRRR",
   stfsux_3 =	"7c00056eFR0R",
   stfsux_3 =	"7c00056eFR0R",
+  stwdx_3 =	"7c000586RRR",
   subfzeo_2 =	"7c000590RR.",
   subfzeo_2 =	"7c000590RR.",
   addzeo_2 =	"7c000594RR.",
   addzeo_2 =	"7c000594RR.",
   stswi_3 =	"7c0005aaRR0A",
   stswi_3 =	"7c0005aaRR0A",
+  ["sthcx._3"] =	"7c0005adRRR",
   stfdx_3 =	"7c0005aeFR0R",
   stfdx_3 =	"7c0005aeFR0R",
+  stddx_3 =	"7c0005c6RRR",
   subfmeo_2 =	"7c0005d0RR.",
   subfmeo_2 =	"7c0005d0RR.",
   mulldo_3 =	"7c0005d2RRR.",
   mulldo_3 =	"7c0005d2RRR.",
   addmeo_2 =	"7c0005d4RR.",
   addmeo_2 =	"7c0005d4RR.",
   mullwo_3 =	"7c0005d6RRR.",
   mullwo_3 =	"7c0005d6RRR.",
   dcba_2 =	"7c0005ec-RR",
   dcba_2 =	"7c0005ec-RR",
   stfdux_3 =	"7c0005eeFR0R",
   stfdux_3 =	"7c0005eeFR0R",
+  stvepxl_3 =	"7c00060eVRR",
   addo_3 =	"7c000614RRR.",
   addo_3 =	"7c000614RRR.",
   lhbrx_3 =	"7c00062cRR0R",
   lhbrx_3 =	"7c00062cRR0R",
+  lfdpx_3 =	"7c00062eF:RR",
   sraw_3 =	"7c000630RR~R.",
   sraw_3 =	"7c000630RR~R.",
   srad_3 =	"7c000634RR~R.",
   srad_3 =	"7c000634RR~R.",
+  lfddx_3 =	"7c000646FRR",
+  stvepx_3 =	"7c00064eVRR",
   srawi_3 =	"7c000670RR~A.",
   srawi_3 =	"7c000670RR~A.",
   sradi_3 =	"7c000674RR~H.",
   sradi_3 =	"7c000674RR~H.",
   eieio_0 =	"7c0006ac",
   eieio_0 =	"7c0006ac",
   lfiwax_3 =	"7c0006aeFR0R",
   lfiwax_3 =	"7c0006aeFR0R",
+  divdeuo_3 =	"7c000712RRR.",
+  divweuo_3 =	"7c000716RRR.",
   sthbrx_3 =	"7c00072cRR0R",
   sthbrx_3 =	"7c00072cRR0R",
+  stfdpx_3 =	"7c00072eF:RR",
   extsh_2 =	"7c000734RR~.",
   extsh_2 =	"7c000734RR~.",
+  stfddx_3 =	"7c000746FRR",
+  divdeo_3 =	"7c000752RRR.",
+  divweo_3 =	"7c000756RRR.",
   extsb_2 =	"7c000774RR~.",
   extsb_2 =	"7c000774RR~.",
   divduo_3 =	"7c000792RRR.",
   divduo_3 =	"7c000792RRR.",
   divwou_3 =	"7c000796RRR.",
   divwou_3 =	"7c000796RRR.",
@@ -481,6 +785,40 @@ local map_op = {
   divwo_3 =	"7c0007d6RRR.",
   divwo_3 =	"7c0007d6RRR.",
   dcbz_2 =	"7c0007ec-RR",
   dcbz_2 =	"7c0007ec-RR",
 
 
+  ["tbegin._1"] =	"7c00051d1",
+  ["tbegin._0"] =	"7c00051d",
+  ["tend._1"] =		"7c00055dY",
+  ["tend._0"] =		"7c00055d",
+  ["tendall._0"] =	"7e00055d",
+  tcheck_1 =		"7c00059cX",
+  ["tsr._1"] =		"7c0005dd1",
+  ["tsuspend._0"] =	"7c0005dd",
+  ["tresume._0"] =	"7c2005dd",
+  ["tabortwc._3"] =	"7c00061dARR",
+  ["tabortdc._3"] =	"7c00065dARR",
+  ["tabortwci._3"] =	"7c00069dARS",
+  ["tabortdci._3"] =	"7c0006ddARS",
+  ["tabort._1"] =	"7c00071d-R-",
+  ["treclaim._1"] =	"7c00075d-R",
+  ["trechkpt._0"] =	"7c0007dd",
+
+  lxsiwzx_3 =	"7c000018QRR",
+  lxsiwax_3 =	"7c000098QRR",
+  mfvsrd_2 =	"7c000066-Rq",
+  mfvsrwz_2 =	"7c0000e6-Rq",
+  stxsiwx_3 =	"7c000118QRR",
+  mtvsrd_2 =	"7c000166QR",
+  mtvsrwa_2 =	"7c0001a6QR",
+  lxvdsx_3 =	"7c000298QRR",
+  lxsspx_3 =	"7c000418QRR",
+  lxsdx_3 =	"7c000498QRR",
+  stxsspx_3 =	"7c000518QRR",
+  stxsdx_3 =	"7c000598QRR",
+  lxvw4x_3 =	"7c000618QRR",
+  lxvd2x_3 =	"7c000698QRR",
+  stxvw4x_3 =	"7c000718QRR",
+  stxvd2x_3 =	"7c000798QRR",
+
   -- Primary opcode 30:
   -- Primary opcode 30:
   rldicl_4 =	"78000000RR~HM.",
   rldicl_4 =	"78000000RR~HM.",
   rldicr_4 =	"78000004RR~HM.",
   rldicr_4 =	"78000004RR~HM.",
@@ -489,6 +827,34 @@ local map_op = {
   rldcl_4 =	"78000010RR~RM.",
   rldcl_4 =	"78000010RR~RM.",
   rldcr_4 =	"78000012RR~RM.",
   rldcr_4 =	"78000012RR~RM.",
 
 
+  rotldi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = "0"
+  end),
+  rotrdi_3 =	op_alias("rldicl_4", function(p)
+    p[3] = "64-("..p[3]..")"; p[4] = "0"
+  end),
+  rotld_3 =	op_alias("rldcl_4", function(p)
+    p[4] = "0"
+  end),
+  sldi_3 =	op_alias("rldicr_4", function(p)
+    p[4] = "63-("..p[3]..")"
+  end),
+  srdi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = p[3]; p[3] = "64-("..p[3]..")"
+  end),
+  clrldi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = p[3]; p[3] = "0"
+  end),
+  clrrdi_3 =	op_alias("rldicr_4", function(p)
+    p[4] = "63-("..p[3]..")"; p[3] = "0"
+  end),
+
+  -- Primary opcode 56:
+  lq_2 =	"e0000000R:D", -- NYI: displacement must be divisible by 8.
+
+  -- Primary opcode 57:
+  lfdp_2 =	"e4000000F:D", -- NYI: displacement must be divisible by 4.
+
   -- Primary opcode 59:
   -- Primary opcode 59:
   fdivs_3 =	"ec000024FFF.",
   fdivs_3 =	"ec000024FFF.",
   fsubs_3 =	"ec000028FFF.",
   fsubs_3 =	"ec000028FFF.",
@@ -501,6 +867,200 @@ local map_op = {
   fmadds_4 =	"ec00003aFFFF~.",
   fmadds_4 =	"ec00003aFFFF~.",
   fnmsubs_4 =	"ec00003cFFFF~.",
   fnmsubs_4 =	"ec00003cFFFF~.",
   fnmadds_4 =	"ec00003eFFFF~.",
   fnmadds_4 =	"ec00003eFFFF~.",
+  fcfids_2 =	"ec00069cF-F.",
+  fcfidus_2 =	"ec00079cF-F.",
+
+  dadd_3 =	"ec000004FFF.",
+  dqua_4 =	"ec000006FFFZ.",
+  dmul_3 =	"ec000044FFF.",
+  drrnd_4 =	"ec000046FFFZ.",
+  dscli_3 =	"ec000084FF6.",
+  dquai_4 =	"ec000086SF~FZ.",
+  dscri_3 =	"ec0000c4FF6.",
+  drintx_4 =	"ec0000c61F~FZ.",
+  dcmpo_3 =	"ec000104XFF",
+  dtstex_3 =	"ec000144XFF",
+  dtstdc_3 =	"ec000184XF6",
+  dtstdg_3 =	"ec0001c4XF6",
+  drintn_4 =	"ec0001c61F~FZ.",
+  dctdp_2 =	"ec000204F-F.",
+  dctfix_2 =	"ec000244F-F.",
+  ddedpd_3 =	"ec000284ZF~F.",
+  dxex_2 =	"ec0002c4F-F.",
+  dsub_3 =	"ec000404FFF.",
+  ddiv_3 =	"ec000444FFF.",
+  dcmpu_3 =	"ec000504XFF",
+  dtstsf_3 =	"ec000544XFF",
+  drsp_2 =	"ec000604F-F.",
+  dcffix_2 =	"ec000644F-F.",
+  denbcd_3 =	"ec000684YF~F.",
+  diex_3 =	"ec0006c4FFF.",
+
+  -- Primary opcode 60:
+  xsaddsp_3 =		"f0000000QQQ",
+  xsmaddasp_3 =		"f0000008QQQ",
+  xxsldwi_4 =		"f0000010QQQz",
+  xsrsqrtesp_2 =	"f0000028Q-Q",
+  xssqrtsp_2 =		"f000002cQ-Q",
+  xxsel_4 =		"f0000030QQQQ",
+  xssubsp_3 =		"f0000040QQQ",
+  xsmaddmsp_3 =		"f0000048QQQ",
+  xxpermdi_4 =		"f0000050QQQz",
+  xsresp_2 =		"f0000068Q-Q",
+  xsmulsp_3 =		"f0000080QQQ",
+  xsmsubasp_3 =		"f0000088QQQ",
+  xxmrghw_3 =		"f0000090QQQ",
+  xsdivsp_3 =		"f00000c0QQQ",
+  xsmsubmsp_3 =		"f00000c8QQQ",
+  xsadddp_3 =		"f0000100QQQ",
+  xsmaddadp_3 =		"f0000108QQQ",
+  xscmpudp_3 =		"f0000118XQQ",
+  xscvdpuxws_2 =	"f0000120Q-Q",
+  xsrdpi_2 =		"f0000124Q-Q",
+  xsrsqrtedp_2 =	"f0000128Q-Q",
+  xssqrtdp_2 =		"f000012cQ-Q",
+  xssubdp_3 =		"f0000140QQQ",
+  xsmaddmdp_3 =		"f0000148QQQ",
+  xscmpodp_3 =		"f0000158XQQ",
+  xscvdpsxws_2 =	"f0000160Q-Q",
+  xsrdpiz_2 =		"f0000164Q-Q",
+  xsredp_2 =		"f0000168Q-Q",
+  xsmuldp_3 =		"f0000180QQQ",
+  xsmsubadp_3 =		"f0000188QQQ",
+  xxmrglw_3 =		"f0000190QQQ",
+  xsrdpip_2 =		"f00001a4Q-Q",
+  xstsqrtdp_2 =		"f00001a8X-Q",
+  xsrdpic_2 =		"f00001acQ-Q",
+  xsdivdp_3 =		"f00001c0QQQ",
+  xsmsubmdp_3 =		"f00001c8QQQ",
+  xsrdpim_2 =		"f00001e4Q-Q",
+  xstdivdp_3 =		"f00001e8XQQ",
+  xvaddsp_3 =		"f0000200QQQ",
+  xvmaddasp_3 =		"f0000208QQQ",
+  xvcmpeqsp_3 =		"f0000218QQQ",
+  xvcvspuxws_2 =	"f0000220Q-Q",
+  xvrspi_2 =		"f0000224Q-Q",
+  xvrsqrtesp_2 =	"f0000228Q-Q",
+  xvsqrtsp_2 =		"f000022cQ-Q",
+  xvsubsp_3 =		"f0000240QQQ",
+  xvmaddmsp_3 =		"f0000248QQQ",
+  xvcmpgtsp_3 =		"f0000258QQQ",
+  xvcvspsxws_2 =	"f0000260Q-Q",
+  xvrspiz_2 =		"f0000264Q-Q",
+  xvresp_2 =		"f0000268Q-Q",
+  xvmulsp_3 =		"f0000280QQQ",
+  xvmsubasp_3 =		"f0000288QQQ",
+  xxspltw_3 =		"f0000290QQg~",
+  xvcmpgesp_3 =		"f0000298QQQ",
+  xvcvuxwsp_2 =		"f00002a0Q-Q",
+  xvrspip_2 =		"f00002a4Q-Q",
+  xvtsqrtsp_2 =		"f00002a8X-Q",
+  xvrspic_2 =		"f00002acQ-Q",
+  xvdivsp_3 =		"f00002c0QQQ",
+  xvmsubmsp_3 =		"f00002c8QQQ",
+  xvcvsxwsp_2 =		"f00002e0Q-Q",
+  xvrspim_2 =		"f00002e4Q-Q",
+  xvtdivsp_3 =		"f00002e8XQQ",
+  xvadddp_3 =		"f0000300QQQ",
+  xvmaddadp_3 =		"f0000308QQQ",
+  xvcmpeqdp_3 =		"f0000318QQQ",
+  xvcvdpuxws_2 =	"f0000320Q-Q",
+  xvrdpi_2 =		"f0000324Q-Q",
+  xvrsqrtedp_2 =	"f0000328Q-Q",
+  xvsqrtdp_2 =		"f000032cQ-Q",
+  xvsubdp_3 =		"f0000340QQQ",
+  xvmaddmdp_3 =		"f0000348QQQ",
+  xvcmpgtdp_3 =		"f0000358QQQ",
+  xvcvdpsxws_2 =	"f0000360Q-Q",
+  xvrdpiz_2 =		"f0000364Q-Q",
+  xvredp_2 =		"f0000368Q-Q",
+  xvmuldp_3 =		"f0000380QQQ",
+  xvmsubadp_3 =		"f0000388QQQ",
+  xvcmpgedp_3 =		"f0000398QQQ",
+  xvcvuxwdp_2 =		"f00003a0Q-Q",
+  xvrdpip_2 =		"f00003a4Q-Q",
+  xvtsqrtdp_2 =		"f00003a8X-Q",
+  xvrdpic_2 =		"f00003acQ-Q",
+  xvdivdp_3 =		"f00003c0QQQ",
+  xvmsubmdp_3 =		"f00003c8QQQ",
+  xvcvsxwdp_2 =		"f00003e0Q-Q",
+  xvrdpim_2 =		"f00003e4Q-Q",
+  xvtdivdp_3 =		"f00003e8XQQ",
+  xsnmaddasp_3 =	"f0000408QQQ",
+  xxland_3 =		"f0000410QQQ",
+  xscvdpsp_2 =		"f0000424Q-Q",
+  xscvdpspn_2 =		"f000042cQ-Q",
+  xsnmaddmsp_3 =	"f0000448QQQ",
+  xxlandc_3 =		"f0000450QQQ",
+  xsrsp_2 =		"f0000464Q-Q",
+  xsnmsubasp_3 =	"f0000488QQQ",
+  xxlor_3 =		"f0000490QQQ",
+  xscvuxdsp_2 =		"f00004a0Q-Q",
+  xsnmsubmsp_3 =	"f00004c8QQQ",
+  xxlxor_3 =		"f00004d0QQQ",
+  xscvsxdsp_2 =		"f00004e0Q-Q",
+  xsmaxdp_3 =		"f0000500QQQ",
+  xsnmaddadp_3 =	"f0000508QQQ",
+  xxlnor_3 =		"f0000510QQQ",
+  xscvdpuxds_2 =	"f0000520Q-Q",
+  xscvspdp_2 =		"f0000524Q-Q",
+  xscvspdpn_2 =		"f000052cQ-Q",
+  xsmindp_3 =		"f0000540QQQ",
+  xsnmaddmdp_3 =	"f0000548QQQ",
+  xxlorc_3 =		"f0000550QQQ",
+  xscvdpsxds_2 =	"f0000560Q-Q",
+  xsabsdp_2 =		"f0000564Q-Q",
+  xscpsgndp_3 =		"f0000580QQQ",
+  xsnmsubadp_3 =	"f0000588QQQ",
+  xxlnand_3 =		"f0000590QQQ",
+  xscvuxddp_2 =		"f00005a0Q-Q",
+  xsnabsdp_2 =		"f00005a4Q-Q",
+  xsnmsubmdp_3 =	"f00005c8QQQ",
+  xxleqv_3 =		"f00005d0QQQ",
+  xscvsxddp_2 =		"f00005e0Q-Q",
+  xsnegdp_2 =		"f00005e4Q-Q",
+  xvmaxsp_3 =		"f0000600QQQ",
+  xvnmaddasp_3 =	"f0000608QQQ",
+  ["xvcmpeqsp._3"] =	"f0000618QQQ",
+  xvcvspuxds_2 =	"f0000620Q-Q",
+  xvcvdpsp_2 =		"f0000624Q-Q",
+  xvminsp_3 =		"f0000640QQQ",
+  xvnmaddmsp_3 =	"f0000648QQQ",
+  ["xvcmpgtsp._3"] =	"f0000658QQQ",
+  xvcvspsxds_2 =	"f0000660Q-Q",
+  xvabssp_2 =		"f0000664Q-Q",
+  xvcpsgnsp_3 =		"f0000680QQQ",
+  xvnmsubasp_3 =	"f0000688QQQ",
+  ["xvcmpgesp._3"] =	"f0000698QQQ",
+  xvcvuxdsp_2 =		"f00006a0Q-Q",
+  xvnabssp_2 =		"f00006a4Q-Q",
+  xvnmsubmsp_3 =	"f00006c8QQQ",
+  xvcvsxdsp_2 =		"f00006e0Q-Q",
+  xvnegsp_2 =		"f00006e4Q-Q",
+  xvmaxdp_3 =		"f0000700QQQ",
+  xvnmaddadp_3 =	"f0000708QQQ",
+  ["xvcmpeqdp._3"] =	"f0000718QQQ",
+  xvcvdpuxds_2 =	"f0000720Q-Q",
+  xvcvspdp_2 =		"f0000724Q-Q",
+  xvmindp_3 =		"f0000740QQQ",
+  xvnmaddmdp_3 =	"f0000748QQQ",
+  ["xvcmpgtdp._3"] =	"f0000758QQQ",
+  xvcvdpsxds_2 =	"f0000760Q-Q",
+  xvabsdp_2 =		"f0000764Q-Q",
+  xvcpsgndp_3 =		"f0000780QQQ",
+  xvnmsubadp_3 =	"f0000788QQQ",
+  ["xvcmpgedp._3"] =	"f0000798QQQ",
+  xvcvuxddp_2 =		"f00007a0Q-Q",
+  xvnabsdp_2 =		"f00007a4Q-Q",
+  xvnmsubmdp_3 =	"f00007c8QQQ",
+  xvcvsxddp_2 =		"f00007e0Q-Q",
+  xvnegdp_2 =		"f00007e4Q-Q",
+
+  -- Primary opcode 61:
+  stfdp_2 =	"f4000000F:D", -- NYI: displacement must be divisible by 4.
+
+  -- Primary opcode 62:
+  stq_2 =	"f8000002R:D", -- NYI: displacement must be divisible by 8.
 
 
   -- Primary opcode 63:
   -- Primary opcode 63:
   fdiv_3 =	"fc000024FFF.",
   fdiv_3 =	"fc000024FFF.",
@@ -526,8 +1086,12 @@ local map_op = {
   frsp_2 =	"fc000018F-F.",
   frsp_2 =	"fc000018F-F.",
   fctiw_2 =	"fc00001cF-F.",
   fctiw_2 =	"fc00001cF-F.",
   fctiwz_2 =	"fc00001eF-F.",
   fctiwz_2 =	"fc00001eF-F.",
+  ftdiv_2 =	"fc000100X-F.",
+  fctiwu_2 =	"fc00011cF-F.",
+  fctiwuz_2 =	"fc00011eF-F.",
   mtfsfi_2 =	"fc00010cAA", -- NYI: upshift.
   mtfsfi_2 =	"fc00010cAA", -- NYI: upshift.
   fnabs_2 =	"fc000110F-F.",
   fnabs_2 =	"fc000110F-F.",
+  ftsqrt_2 =	"fc000140X-F.",
   fabs_2 =	"fc000210F-F.",
   fabs_2 =	"fc000210F-F.",
   frin_2 =	"fc000310F-F.",
   frin_2 =	"fc000310F-F.",
   friz_2 =	"fc000350F-F.",
   friz_2 =	"fc000350F-F.",
@@ -537,7 +1101,38 @@ local map_op = {
   -- NYI: mtfsf, mtfsb0, mtfsb1.
   -- NYI: mtfsf, mtfsb0, mtfsb1.
   fctid_2 =	"fc00065cF-F.",
   fctid_2 =	"fc00065cF-F.",
   fctidz_2 =	"fc00065eF-F.",
   fctidz_2 =	"fc00065eF-F.",
+  fmrgow_3 =	"fc00068cFFF",
   fcfid_2 =	"fc00069cF-F.",
   fcfid_2 =	"fc00069cF-F.",
+  fctidu_2 =	"fc00075cF-F.",
+  fctiduz_2 =	"fc00075eF-F.",
+  fmrgew_3 =	"fc00078cFFF",
+  fcfidu_2 =	"fc00079cF-F.",
+
+  daddq_3 =	"fc000004F:F:F:.",
+  dquaq_4 =	"fc000006F:F:F:Z.",
+  dmulq_3 =	"fc000044F:F:F:.",
+  drrndq_4 =	"fc000046F:F:F:Z.",
+  dscliq_3 =	"fc000084F:F:6.",
+  dquaiq_4 =	"fc000086SF:~F:Z.",
+  dscriq_3 =	"fc0000c4F:F:6.",
+  drintxq_4 =	"fc0000c61F:~F:Z.",
+  dcmpoq_3 =	"fc000104XF:F:",
+  dtstexq_3 =	"fc000144XF:F:",
+  dtstdcq_3 =	"fc000184XF:6",
+  dtstdgq_3 =	"fc0001c4XF:6",
+  drintnq_4 =	"fc0001c61F:~F:Z.",
+  dctqpq_2 =	"fc000204F:-F:.",
+  dctfixq_2 =	"fc000244F:-F:.",
+  ddedpdq_3 =	"fc000284ZF:~F:.",
+  dxexq_2 =	"fc0002c4F:-F:.",
+  dsubq_3 =	"fc000404F:F:F:.",
+  ddivq_3 =	"fc000444F:F:F:.",
+  dcmpuq_3 =	"fc000504XF:F:",
+  dtstsfq_3 =	"fc000544XF:F:",
+  drdpq_2 =	"fc000604F:-F:.",
+  dcffixq_2 =	"fc000644F:-F:.",
+  denbcdq_3 =	"fc000684YF:~F:.",
+  diexq_3 =	"fc0006c4F:FF:.",
 
 
   -- Primary opcode 4, SPE APU extension:
   -- Primary opcode 4, SPE APU extension:
   evaddw_3 =		"10000200RRR",
   evaddw_3 =		"10000200RRR",
@@ -822,7 +1417,7 @@ local map_op = {
 do
 do
   local t = {}
   local t = {}
   for k,v in pairs(map_op) do
   for k,v in pairs(map_op) do
-    if sub(v, -1) == "." then
+    if type(v) == "string" and sub(v, -1) == "." then
       local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
       local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
       t[sub(k, 1, -3).."."..sub(k, -2)] = v2
       t[sub(k, 1, -3).."."..sub(k, -2)] = v2
     end
     end
@@ -884,6 +1479,24 @@ local function parse_fpr(expr)
   werror("bad register name `"..expr.."'")
   werror("bad register name `"..expr.."'")
 end
 end
 
 
+local function parse_vr(expr)
+  local r = match(expr, "^v([1-3]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 31 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_vs(expr)
+  local r = match(expr, "^vs([1-6]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 63 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
 local function parse_cr(expr)
 local function parse_cr(expr)
   local r = match(expr, "^cr([0-7])$")
   local r = match(expr, "^cr([0-7])$")
   if r then return tonumber(r) end
   if r then return tonumber(r) end
@@ -900,8 +1513,30 @@ local function parse_cond(expr)
   werror("bad condition bit name `"..expr.."'")
   werror("bad condition bit name `"..expr.."'")
 end
 end
 
 
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+  local code = loadstring(s, "")
+  if code then setfenv(code, parse_ctx) end
+  return code
+end or function(s)
+  return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+  local x = tonumber(n)
+  if x then return x end
+  local code = loadenv("return "..n)
+  if code then
+    local ok, y = pcall(code)
+    if ok then return y end
+  end
+  return nil
+end
+
 local function parse_imm(imm, bits, shift, scale, signed)
 local function parse_imm(imm, bits, shift, scale, signed)
-  local n = tonumber(imm)
+  local n = parse_number(imm)
   if n then
   if n then
     local m = sar(n, scale)
     local m = sar(n, scale)
     if shl(m, scale) == n then
     if shl(m, scale) == n then
@@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
       end
       end
     end
     end
     werror("out of range immediate `"..imm.."'")
     werror("out of range immediate `"..imm.."'")
-  elseif match(imm, "^r([1-3]?[0-9])$") or
+  elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
+	 match(imm, "^vs([1-6]?[0-9])$") or
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
     werror("expected immediate operand, got register")
   else
   else
@@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed)
 end
 end
 
 
 local function parse_shiftmask(imm, isshift)
 local function parse_shiftmask(imm, isshift)
-  local n = tonumber(imm)
+  local n = parse_number(imm)
   if n then
   if n then
     if shr(n, 6) == 0 then
     if shr(n, 6) == 0 then
-      local lsb = band(imm, 31)
-      local msb = imm - lsb
+      local lsb = band(n, 31)
+      local msb = n - lsb
       return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
       return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
     end
     end
     werror("out of range immediate `"..imm.."'")
     werror("out of range immediate `"..imm.."'")
@@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift)
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
     werror("expected immediate operand, got register")
   else
   else
-    werror("NYI: parameterized 64 bit shift/mask")
+    waction("IMMSH", isshift and 1 or 0, imm)
+    return 0;
   end
   end
 end
 end
 
 
@@ -1011,7 +1648,7 @@ end
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Handle opcodes defined with template strings.
 -- Handle opcodes defined with template strings.
-map_op[".template__"] = function(params, template, nparams)
+op_template = function(params, template, nparams)
   if not params then return sub(template, 9) end
   if not params then return sub(template, 9) end
   local op = tonumber(sub(template, 1, 8), 16)
   local op = tonumber(sub(template, 1, 8), 16)
   local n, rs = 1, 26
   local n, rs = 1, 26
@@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams)
       rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
       rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
     elseif p == "F" then
     elseif p == "F" then
       rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
       rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
+    elseif p == "V" then
+      rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1
+    elseif p == "Q" then
+      local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5
+      local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3)
+      op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh)
+    elseif p == "q" then
+      local vs = parse_vs(params[n]); n = n + 1
+      op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5)
     elseif p == "A" then
     elseif p == "A" then
       rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
       rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
     elseif p == "S" then
     elseif p == "S" then
@@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams)
       rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
       rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
     elseif p == "X" then
     elseif p == "X" then
       rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
       rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
+    elseif p == "1" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1
+    elseif p == "g" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1
+    elseif p == "3" then
+      rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1
+    elseif p == "P" then
+      rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+    elseif p == "p" then
+      op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+    elseif p == "6" then
+      rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1
+    elseif p == "Y" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1
+    elseif p == "y" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1
+    elseif p == "Z" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1
+    elseif p == "z" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1
     elseif p == "W" then
     elseif p == "W" then
       op = op + parse_cr(params[n]); n = n + 1
       op = op + parse_cr(params[n]); n = n + 1
     elseif p == "G" then
     elseif p == "G" then
@@ -1056,9 +1722,9 @@ map_op[".template__"] = function(params, template, nparams)
     elseif p == "M" then
     elseif p == "M" then
       op = op + parse_shiftmask(params[n], false); n = n + 1
       op = op + parse_shiftmask(params[n], false); n = n + 1
     elseif p == "J" or p == "K" then
     elseif p == "J" or p == "K" then
-      local mode, n, s = parse_label(params[n], false)
-      if p == "K" then n = n + 2048 end
-      waction("REL_"..mode, n, s, 1)
+      local mode, m, s = parse_label(params[n], false)
+      if p == "K" then m = m + 2048 end
+      waction("REL_"..mode, m, s, 1)
       n = n + 1
       n = n + 1
     elseif p == "0" then
     elseif p == "0" then
       if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end
       if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end
@@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams)
       local lo = band(op, mm)
       local lo = band(op, mm)
       local hi = band(op, shl(mm, 5))
       local hi = band(op, shl(mm, 5))
       op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
       op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
+    elseif p == ":" then
+      if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end
     elseif p == "-" then
     elseif p == "-" then
       rs = rs - 5
       rs = rs - 5
     elseif p == "." then
     elseif p == "." then
@@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams)
   wputpos(pos, op)
   wputpos(pos, op)
 end
 end
 
 
+map_op[".template__"] = op_template
+
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Pseudo-opcode to mark the position where the action list is to be emitted.
 -- Pseudo-opcode to mark the position where the action list is to be emitted.

+ 2 - 2
luajit.mod/luajit/dynasm/dasm_proto.h

@@ -10,8 +10,8 @@
 #include <stddef.h>
 #include <stddef.h>
 #include <stdarg.h>
 #include <stdarg.h>
 
 
-#define DASM_IDENT	"DynASM 1.3.0"
-#define DASM_VERSION	10300	/* 1.3.0 */
+#define DASM_IDENT	"DynASM 1.4.0"
+#define DASM_VERSION	10400	/* 1.4.0 */
 
 
 #ifndef Dst_DECL
 #ifndef Dst_DECL
 #define Dst_DECL	dasm_State **Dst
 #define Dst_DECL	dasm_State **Dst

+ 46 - 9
luajit.mod/luajit/dynasm/dasm_x86.h

@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
   dasm_State *D = Dst_REF;
   dasm_State *D = Dst_REF;
   dasm_ActList p = D->actionlist + start;
   dasm_ActList p = D->actionlist + start;
   dasm_Section *sec = D->section;
   dasm_Section *sec = D->section;
-  int pos = sec->pos, ofs = sec->ofs, mrm = 4;
+  int pos = sec->pos, ofs = sec->ofs, mrm = -1;
   int *b;
   int *b;
 
 
   if (pos >= sec->epos) {
   if (pos >= sec->epos) {
@@ -193,21 +193,28 @@ void dasm_put(Dst_DECL, int start, ...)
       b[pos++] = n;
       b[pos++] = n;
       switch (action) {
       switch (action) {
       case DASM_DISP:
       case DASM_DISP:
-	if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
-      case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
+	if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
+	/* fallthrough */
+      case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
       case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
       case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
       case DASM_IMM_D: ofs += 4; break;
       case DASM_IMM_D: ofs += 4; break;
       case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
       case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
       case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
       case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
-      case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob;
+      case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
       case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
       case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
       case DASM_SPACE: p++; ofs += n; break;
       case DASM_SPACE: p++; ofs += n; break;
       case DASM_SETLABEL: b[pos-2] = -0x40000000; break;  /* Neg. label ofs. */
       case DASM_SETLABEL: b[pos-2] = -0x40000000; break;  /* Neg. label ofs. */
-      case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
-	if (*p++ == 1 && *p == DASM_DISP) mrm = n;
+      case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
+	if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
+	if (*p < 0x20 && (n&7) == 4) ofs++;
+	switch ((*p++ >> 3) & 3) {
+	case 3: n |= b[pos-3]; /* fallthrough */
+	case 2: n |= b[pos-2]; /* fallthrough */
+	case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
+	}
 	continue;
 	continue;
       }
       }
-      mrm = 4;
+      mrm = -1;
     } else {
     } else {
       int *pl, n;
       int *pl, n;
       switch (action) {
       switch (action) {
@@ -323,11 +330,14 @@ int dasm_link(Dst_DECL, size_t *szp)
 	  pos += 2;
 	  pos += 2;
 	  break;
 	  break;
 	}
 	}
+	  /* fallthrough */
 	case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
 	case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
+	  /* fallthrough */
 	case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
 	case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
 	case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
 	case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
 	case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
 	case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
 	case DASM_LABEL_LG: p++;
 	case DASM_LABEL_LG: p++;
+	  /* fallthrough */
 	case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
 	case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
 	case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
 	case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
 	case DASM_EXTERN: p += 2; break;
 	case DASM_EXTERN: p += 2; break;
@@ -385,17 +395,42 @@ int dasm_encode(Dst_DECL, void *buffer)
 	    if (mrm != 5) { mm[-1] -= 0x80; break; } }
 	    if (mrm != 5) { mm[-1] -= 0x80; break; } }
 	  if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
 	  if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
 	}
 	}
+	  /* fallthrough */
 	case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
 	case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
 	case DASM_IMM_DB: if (((n+128)&-256) == 0) {
 	case DASM_IMM_DB: if (((n+128)&-256) == 0) {
 	    db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
 	    db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
 	  } else mark = NULL;
 	  } else mark = NULL;
+	  /* fallthrough */
 	case DASM_IMM_D: wd: dasmd(n); break;
 	case DASM_IMM_D: wd: dasmd(n); break;
 	case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
 	case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
+	  /* fallthrough */
 	case DASM_IMM_W: dasmw(n); break;
 	case DASM_IMM_W: dasmw(n); break;
-	case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
+	case DASM_VREG: {
+	  int t = *p++;
+	  unsigned char *ex = cp - (t&7);
+	  if ((n & 8) && t < 0xa0) {
+	    if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
+	    n &= 7;
+	  } else if (n & 0x10) {
+	    if (*ex & 0x80) {
+	      *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
+	    }
+	    while (++ex < cp) ex[-1] = *ex;
+	    if (mark) mark--;
+	    cp--;
+	    n &= 7;
+	  }
+	  if (t >= 0xc0) n <<= 4;
+	  else if (t >= 0x40) n <<= 3;
+	  else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
+	  cp[-1] ^= n;
+	  break;
+	}
 	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
 	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
 	  b++; n = (int)(ptrdiff_t)D->globals[-n];
 	  b++; n = (int)(ptrdiff_t)D->globals[-n];
-	case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
+	  /* fallthrough */
+	case DASM_REL_A: rel_a:
+	  n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
 	case DASM_REL_PC: rel_pc: {
 	case DASM_REL_PC: rel_pc: {
 	  int shrink = *b++;
 	  int shrink = *b++;
 	  int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
 	  int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
@@ -406,6 +441,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	}
 	}
 	case DASM_IMM_LG:
 	case DASM_IMM_LG:
 	  p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
 	  p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
+	  /* fallthrough */
 	case DASM_IMM_PC: {
 	case DASM_IMM_PC: {
 	  int *pb = DASM_POS2PTR(D, n);
 	  int *pb = DASM_POS2PTR(D, n);
 	  n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
 	  n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
@@ -426,6 +462,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
 	case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
 	case DASM_MARK: mark = cp; break;
 	case DASM_MARK: mark = cp; break;
 	case DASM_ESC: action = *p++;
 	case DASM_ESC: action = *p++;
+	  /* fallthrough */
 	default: *cp++ = action; break;
 	default: *cp++ = action; break;
 	case DASM_SECTION: case DASM_STOP: goto stop;
 	case DASM_SECTION: case DASM_STOP: goto stop;
 	}
 	}

+ 510 - 96
luajit.mod/luajit/dynasm/dasm_x86.lua

@@ -11,9 +11,9 @@ local x64 = x64
 local _info = {
 local _info = {
   arch =	x64 and "x64" or "x86",
   arch =	x64 and "x64" or "x86",
   description =	"DynASM x86/x64 module",
   description =	"DynASM x86/x64 module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
 local _s = string
 local _s = string
 local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
 local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
 local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
 local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
-local concat, sort = table.concat, table.sort
+local concat, sort, remove = table.concat, table.sort, table.remove
 local bit = bit or require("bit")
 local bit = bit or require("bit")
-local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
 
 
 -- Inherited tables and callbacks.
 -- Inherited tables and callbacks.
 local g_opt, g_arch
 local g_opt, g_arch
@@ -41,7 +41,7 @@ local action_names = {
   -- int arg, 1 buffer pos:
   -- int arg, 1 buffer pos:
   "DISP",  "IMM_S", "IMM_B", "IMM_W", "IMM_D",  "IMM_WB", "IMM_DB",
   "DISP",  "IMM_S", "IMM_B", "IMM_W", "IMM_D",  "IMM_WB", "IMM_DB",
   -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
   -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
-  "VREG", "SPACE", -- !x64: VREG support NYI.
+  "VREG", "SPACE",
   -- ptrdiff_t arg, 1 buffer pos (address): !x64
   -- ptrdiff_t arg, 1 buffer pos (address): !x64
   "SETLABEL", "REL_A",
   "SETLABEL", "REL_A",
   -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
   -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
 -- Current number of section buffer positions for dasm_put().
 -- Current number of section buffer positions for dasm_put().
 local secpos = 1
 local secpos = 1
 
 
+-- VREG kind encodings, pre-shifted by 5 bits.
+local map_vreg = {
+  ["modrm.rm.m"] = 0x00,
+  ["modrm.rm.r"] = 0x20,
+  ["opcode"] =     0x20,
+  ["sib.base"] =   0x20,
+  ["sib.index"] =  0x40,
+  ["modrm.reg"] =  0x80,
+  ["vex.v"] =      0xa0,
+  ["imm.hi"] =     0xc0,
+}
+
+-- Current number of VREG actions contributing to REX/VEX shrinkage.
+local vreg_shrink_count = 0
+
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Compute action numbers for action names.
 -- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
   if a or num then secpos = secpos + (num or 1) end
   if a or num then secpos = secpos + (num or 1) end
 end
 end
 
 
+-- Optionally add a VREG action.
+local function wvreg(kind, vreg, psz, sk, defer)
+  if not vreg then return end
+  waction("VREG", vreg)
+  local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
+  if b < (sk or 0) then
+    vreg_shrink_count = vreg_shrink_count + 1
+  end
+  if not defer then
+    b = b + vreg_shrink_count * 8
+    vreg_shrink_count = 0
+  end
+  wputxb(b + (psz or 0))
+end
+
 -- Add call to embedded DynASM C code.
 -- Add call to embedded DynASM C code.
 local function wcall(func, args)
 local function wcall(func, args)
   wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
   wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
     local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
     local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
     if needrex then map_reg_needrex[iname] = true end
     if needrex then map_reg_needrex[iname] = true end
     local name
     local name
-    if sz == "o" then name = format("xmm%d", i)
+    if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
     elseif sz == "f" then name = format("st%d", i)
     elseif sz == "f" then name = format("st%d", i)
     else name = format("r%d%s", i, sz == addrsize and "" or sz) end
     else name = format("r%d%s", i, sz == addrsize and "" or sz) end
     map_archdef[name] = iname
     map_archdef[name] = iname
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
 mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
 mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
 map_reg_valid_index[map_archdef.esp] = false
 map_reg_valid_index[map_archdef.esp] = false
 if x64 then map_reg_valid_index[map_archdef.rsp] = false end
 if x64 then map_reg_valid_index[map_archdef.rsp] = false end
+if x64 then map_reg_needrex[map_archdef.Rb] = true end
 map_archdef["Ra"] = "@"..addrsize
 map_archdef["Ra"] = "@"..addrsize
 
 
 -- FP registers (internally tword sized, but use "f" as operand size).
 -- FP registers (internally tword sized, but use "f" as operand size).
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
 -- SSE registers (oword sized, but qword and dword accessible).
 -- SSE registers (oword sized, but qword and dword accessible).
 mkrmap("o", "xmm")
 mkrmap("o", "xmm")
 
 
+-- AVX registers (yword sized, but oword, qword and dword accessible).
+mkrmap("y", "ymm")
+
 -- Operand size prefixes to codes.
 -- Operand size prefixes to codes.
 local map_opsize = {
 local map_opsize = {
-  byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
-  aword = addrsize,
+  byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
+  tword = "t", aword = addrsize,
 }
 }
 
 
 -- Operand size code to number.
 -- Operand size code to number.
 local map_opsizenum = {
 local map_opsizenum = {
-  b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
+  b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
 }
 }
 
 
 -- Operand size code to name.
 -- Operand size code to name.
 local map_opsizename = {
 local map_opsizename = {
-  b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
-  f = "fpword",
+  b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
+  t = "tword", f = "fpword",
 }
 }
 
 
 -- Valid index register scale factors.
 -- Valid index register scale factors.
@@ -460,9 +494,45 @@ local function wputszarg(sz, n)
 end
 end
 
 
 -- Put multi-byte opcode with operand-size dependent modifications.
 -- Put multi-byte opcode with operand-size dependent modifications.
-local function wputop(sz, op, rex)
+local function wputop(sz, op, rex, vex, vregr, vregxb)
+  local psz, sk = 0, nil
+  if vex then
+    local tail
+    if vex.m == 1 and band(rex, 11) == 0 then
+      if x64 and vregxb then
+	sk = map_vreg["modrm.reg"]
+      else
+	wputb(0xc5)
+      tail = shl(bxor(band(rex, 4), 4), 5)
+      psz = 3
+      end
+    end
+    if not tail then
+      wputb(0xc4)
+      wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
+      tail = shl(band(rex, 8), 4)
+      psz = 4
+    end
+    local reg, vreg = 0, nil
+    if vex.v then
+      reg = vex.v.reg
+      if not reg then werror("bad vex operand") end
+      if reg < 0 then reg = 0; vreg = vex.v.vreg end
+    end
+    if sz == "y" or vex.l then tail = tail + 4 end
+    wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
+    wvreg("vex.v", vreg)
+    rex = 0
+    if op >= 256 then werror("bad vex opcode") end
+  else
+    if rex ~= 0 then
+      if not x64 then werror("bad operand size") end
+    elseif (vregr or vregxb) and x64 then
+      rex = 0x10
+      sk = map_vreg["vex.v"]
+    end
+  end
   local r
   local r
-  if rex ~= 0 and not x64 then werror("bad operand size") end
   if sz == "w" then wputb(102) end
   if sz == "w" then wputb(102) end
   -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
   -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
   if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
   if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -471,20 +541,20 @@ local function wputop(sz, op, rex)
     if rex ~= 0 then
     if rex ~= 0 then
       local opc3 = band(op, 0xffff00)
       local opc3 = band(op, 0xffff00)
       if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
       if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
-	wputb(64 + band(rex, 15)); rex = 0
+	wputb(64 + band(rex, 15)); rex = 0; psz = 2
       end
       end
     end
     end
-    wputb(shr(op, 16)); op = band(op, 0xffff)
+    wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
   end
   end
   if op >= 256 then
   if op >= 256 then
     local b = shr(op, 8)
     local b = shr(op, 8)
-    if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
-    wputb(b)
-    op = band(op, 255)
+    if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
+    wputb(b); op = band(op, 255); psz = psz + 1
   end
   end
-  if rex ~= 0 then wputb(64 + band(rex, 15)) end
+  if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
   if sz == "b" then op = op - 1 end
   if sz == "b" then op = op - 1 end
   wputb(op)
   wputb(op)
+  return psz, sk
 end
 end
 
 
 -- Put ModRM or SIB formatted byte.
 -- Put ModRM or SIB formatted byte.
@@ -494,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
 end
 end
 
 
 -- Put ModRM/SIB plus optional displacement.
 -- Put ModRM/SIB plus optional displacement.
-local function wputmrmsib(t, imark, s, vsreg)
+local function wputmrmsib(t, imark, s, vsreg, psz, sk)
   local vreg, vxreg
   local vreg, vxreg
   local reg, xreg = t.reg, t.xreg
   local reg, xreg = t.reg, t.xreg
   if reg and reg < 0 then reg = 0; vreg = t.vreg end
   if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -504,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg)
   -- Register mode.
   -- Register mode.
   if sub(t.mode, 1, 1) == "r" then
   if sub(t.mode, 1, 1) == "r" then
     wputmodrm(3, s, reg)
     wputmodrm(3, s, reg)
-    if vsreg then waction("VREG", vsreg); wputxb(2) end
-    if vreg then waction("VREG", vreg); wputxb(0) end
+    wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+    wvreg("modrm.rm.r", vreg, psz+1, sk)
     return
     return
   end
   end
 
 
@@ -519,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg)
       -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
       -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
       wputmodrm(0, s, 4)
       wputmodrm(0, s, 4)
       if imark == "I" then waction("MARK") end
       if imark == "I" then waction("MARK") end
-      if vsreg then waction("VREG", vsreg); wputxb(2) end
+      wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
       wputmodrm(t.xsc, xreg, 5)
       wputmodrm(t.xsc, xreg, 5)
-      if vxreg then waction("VREG", vxreg); wputxb(3) end
+      wvreg("sib.index", vxreg, psz+2, sk)
     else
     else
       -- Pure 32 bit displacement.
       -- Pure 32 bit displacement.
       if x64 and tdisp ~= "table" then
       if x64 and tdisp ~= "table" then
 	wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
 	wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
+	wvreg("modrm.reg", vsreg, psz+1, sk)
 	if imark == "I" then waction("MARK") end
 	if imark == "I" then waction("MARK") end
 	wputmodrm(0, 4, 5)
 	wputmodrm(0, 4, 5)
       else
       else
 	riprel = x64
 	riprel = x64
 	wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
 	wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
+	wvreg("modrm.reg", vsreg, psz+1, sk)
 	if imark == "I" then waction("MARK") end
 	if imark == "I" then waction("MARK") end
       end
       end
-      if vsreg then waction("VREG", vsreg); wputxb(2) end
     end
     end
     if riprel then -- Emit rip-relative displacement.
     if riprel then -- Emit rip-relative displacement.
       if match("UWSiI", imark) then
       if match("UWSiI", imark) then
@@ -561,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg)
   if xreg or band(reg, 7) == 4 then
   if xreg or band(reg, 7) == 4 then
     wputmodrm(m or 2, s, 4) -- ModRM.
     wputmodrm(m or 2, s, 4) -- ModRM.
     if m == nil or imark == "I" then waction("MARK") end
     if m == nil or imark == "I" then waction("MARK") end
-    if vsreg then waction("VREG", vsreg); wputxb(2) end
+    wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
     wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
     wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
-    if vxreg then waction("VREG", vxreg); wputxb(3) end
-    if vreg then waction("VREG", vreg); wputxb(1) end
+    wvreg("sib.index", vxreg, psz+2, sk, vreg)
+    wvreg("sib.base", vreg, psz+2, sk)
   else
   else
     wputmodrm(m or 2, s, reg) -- ModRM.
     wputmodrm(m or 2, s, reg) -- ModRM.
     if (imark == "I" and (m == 1 or m == 2)) or
     if (imark == "I" and (m == 1 or m == 2)) or
        (m == nil and (vsreg or vreg)) then waction("MARK") end
        (m == nil and (vsreg or vreg)) then waction("MARK") end
-    if vsreg then waction("VREG", vsreg); wputxb(2) end
-    if vreg then waction("VREG", vreg); wputxb(1) end
+    wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+    wvreg("modrm.rm.m", vreg, psz+1, sk)
   end
   end
 
 
   -- Put displacement.
   -- Put displacement.
@@ -881,9 +952,16 @@ end
 --   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
 --   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
 --             The spare 3 bits are either filled with the last hex digit or
 --             The spare 3 bits are either filled with the last hex digit or
 --             the result from a previous "r"/"R". The opcode is restored.
 --             the result from a previous "r"/"R". The opcode is restored.
+--   "u"       Use VEX encoding, vvvv unused.
+--   "v"/"V"   Use VEX encoding, vvvv from 1st/2nd operand (the operand is
+--             removed from the list used by future characters).
+--   "w"       Use VEX encoding, vvvv from 3rd operand.
+--   "L"       Force VEX.L
 --
 --
 -- All of the following characters force a flush of the opcode:
 -- All of the following characters force a flush of the opcode:
 --   "o"/"O"   stores a pure 32 bit disp (offset) from the 1st/2nd operand.
 --   "o"/"O"   stores a pure 32 bit disp (offset) from the 1st/2nd operand.
+--   "s"       stores a 4 bit immediate from the last register operand,
+--             followed by 4 zero bits.
 --   "S"       stores a signed 8 bit immediate from the last operand.
 --   "S"       stores a signed 8 bit immediate from the last operand.
 --   "U"       stores an unsigned 8 bit immediate from the last operand.
 --   "U"       stores an unsigned 8 bit immediate from the last operand.
 --   "W"       stores an unsigned 16 bit immediate from the last operand.
 --   "W"       stores an unsigned 16 bit immediate from the last operand.
@@ -1226,46 +1304,14 @@ local map_op = {
   movups_2 =	"rmo:0F10rM|mro:0F11Rm",
   movups_2 =	"rmo:0F10rM|mro:0F11Rm",
   orpd_2 =	"rmo:660F56rM",
   orpd_2 =	"rmo:660F56rM",
   orps_2 =	"rmo:0F56rM",
   orps_2 =	"rmo:0F56rM",
-  packssdw_2 =	"rmo:660F6BrM",
-  packsswb_2 =	"rmo:660F63rM",
-  packuswb_2 =	"rmo:660F67rM",
-  paddb_2 =	"rmo:660FFCrM",
-  paddd_2 =	"rmo:660FFErM",
-  paddq_2 =	"rmo:660FD4rM",
-  paddsb_2 =	"rmo:660FECrM",
-  paddsw_2 =	"rmo:660FEDrM",
-  paddusb_2 =	"rmo:660FDCrM",
-  paddusw_2 =	"rmo:660FDDrM",
-  paddw_2 =	"rmo:660FFDrM",
-  pand_2 =	"rmo:660FDBrM",
-  pandn_2 =	"rmo:660FDFrM",
   pause_0 =	"F390",
   pause_0 =	"F390",
-  pavgb_2 =	"rmo:660FE0rM",
-  pavgw_2 =	"rmo:660FE3rM",
-  pcmpeqb_2 =	"rmo:660F74rM",
-  pcmpeqd_2 =	"rmo:660F76rM",
-  pcmpeqw_2 =	"rmo:660F75rM",
-  pcmpgtb_2 =	"rmo:660F64rM",
-  pcmpgtd_2 =	"rmo:660F66rM",
-  pcmpgtw_2 =	"rmo:660F65rM",
   pextrw_3 =	"rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
   pextrw_3 =	"rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
   pinsrw_3 =	"rri/od:660FC4rMU|rxi/ow:",
   pinsrw_3 =	"rri/od:660FC4rMU|rxi/ow:",
-  pmaddwd_2 =	"rmo:660FF5rM",
-  pmaxsw_2 =	"rmo:660FEErM",
-  pmaxub_2 =	"rmo:660FDErM",
-  pminsw_2 =	"rmo:660FEArM",
-  pminub_2 =	"rmo:660FDArM",
   pmovmskb_2 =	"rr/do:660FD7rM",
   pmovmskb_2 =	"rr/do:660FD7rM",
-  pmulhuw_2 =	"rmo:660FE4rM",
-  pmulhw_2 =	"rmo:660FE5rM",
-  pmullw_2 =	"rmo:660FD5rM",
-  pmuludq_2 =	"rmo:660FF4rM",
-  por_2 =	"rmo:660FEBrM",
   prefetchnta_1 = "xb:n0F180m",
   prefetchnta_1 = "xb:n0F180m",
   prefetcht0_1 = "xb:n0F181m",
   prefetcht0_1 = "xb:n0F181m",
   prefetcht1_1 = "xb:n0F182m",
   prefetcht1_1 = "xb:n0F182m",
   prefetcht2_1 = "xb:n0F183m",
   prefetcht2_1 = "xb:n0F183m",
-  psadbw_2 =	"rmo:660FF6rM",
   pshufd_3 =	"rmio:660F70rMU",
   pshufd_3 =	"rmio:660F70rMU",
   pshufhw_3 =	"rmio:F30F70rMU",
   pshufhw_3 =	"rmio:F30F70rMU",
   pshuflw_3 =	"rmio:F20F70rMU",
   pshuflw_3 =	"rmio:F20F70rMU",
@@ -1279,23 +1325,6 @@ local map_op = {
   psrldq_2 =	"rio:660F733mU",
   psrldq_2 =	"rio:660F733mU",
   psrlq_2 =	"rmo:660FD3rM|rio:660F732mU",
   psrlq_2 =	"rmo:660FD3rM|rio:660F732mU",
   psrlw_2 =	"rmo:660FD1rM|rio:660F712mU",
   psrlw_2 =	"rmo:660FD1rM|rio:660F712mU",
-  psubb_2 =	"rmo:660FF8rM",
-  psubd_2 =	"rmo:660FFArM",
-  psubq_2 =	"rmo:660FFBrM",
-  psubsb_2 =	"rmo:660FE8rM",
-  psubsw_2 =	"rmo:660FE9rM",
-  psubusb_2 =	"rmo:660FD8rM",
-  psubusw_2 =	"rmo:660FD9rM",
-  psubw_2 =	"rmo:660FF9rM",
-  punpckhbw_2 =	"rmo:660F68rM",
-  punpckhdq_2 =	"rmo:660F6ArM",
-  punpckhqdq_2 = "rmo:660F6DrM",
-  punpckhwd_2 =	"rmo:660F69rM",
-  punpcklbw_2 =	"rmo:660F60rM",
-  punpckldq_2 =	"rmo:660F62rM",
-  punpcklqdq_2 = "rmo:660F6CrM",
-  punpcklwd_2 =	"rmo:660F61rM",
-  pxor_2 =	"rmo:660FEFrM",
   rcpps_2 =	"rmo:0F53rM",
   rcpps_2 =	"rmo:0F53rM",
   rcpss_2 =	"rro:F30F53rM|rx/od:",
   rcpss_2 =	"rro:F30F53rM|rx/od:",
   rsqrtps_2 =	"rmo:0F52rM",
   rsqrtps_2 =	"rmo:0F52rM",
@@ -1413,6 +1442,327 @@ local map_op = {
   movntsd_2 =	"xr/qo:nF20F2BRm",
   movntsd_2 =	"xr/qo:nF20F2BRm",
   movntss_2 =	"xr/do:F30F2BRm",
   movntss_2 =	"xr/do:F30F2BRm",
   -- popcnt is also in SSE4.2
   -- popcnt is also in SSE4.2
+
+  -- AES-NI
+  aesdec_2 =	"rmo:660F38DErM",
+  aesdeclast_2 = "rmo:660F38DFrM",
+  aesenc_2 =	"rmo:660F38DCrM",
+  aesenclast_2 = "rmo:660F38DDrM",
+  aesimc_2 =	"rmo:660F38DBrM",
+  aeskeygenassist_3 = "rmio:660F3ADFrMU",
+  pclmulqdq_3 =	"rmio:660F3A44rMU",
+
+   -- AVX FP ops
+  vaddsubpd_3 =	"rrmoy:660FVD0rM",
+  vaddsubps_3 =	"rrmoy:F20FVD0rM",
+  vandpd_3 =	"rrmoy:660FV54rM",
+  vandps_3 =	"rrmoy:0FV54rM",
+  vandnpd_3 =	"rrmoy:660FV55rM",
+  vandnps_3 =	"rrmoy:0FV55rM",
+  vblendpd_4 =	"rrmioy:660F3AV0DrMU",
+  vblendps_4 =	"rrmioy:660F3AV0CrMU",
+  vblendvpd_4 =	"rrmroy:660F3AV4BrMs",
+  vblendvps_4 =	"rrmroy:660F3AV4ArMs",
+  vbroadcastf128_2 = "rx/yo:660F38u1ArM",
+  vcmppd_4 =	"rrmioy:660FVC2rMU",
+  vcmpps_4 =	"rrmioy:0FVC2rMU",
+  vcmpsd_4 =	"rrrio:F20FVC2rMU|rrxi/ooq:",
+  vcmpss_4 =	"rrrio:F30FVC2rMU|rrxi/ood:",
+  vcomisd_2 =	"rro:660Fu2FrM|rx/oq:",
+  vcomiss_2 =	"rro:0Fu2FrM|rx/od:",
+  vcvtdq2pd_2 =	"rro:F30FuE6rM|rx/oq:|rm/yo:",
+  vcvtdq2ps_2 =	"rmoy:0Fu5BrM",
+  vcvtpd2dq_2 =	"rmoy:F20FuE6rM",
+  vcvtpd2ps_2 =	"rmoy:660Fu5ArM",
+  vcvtps2dq_2 =	"rmoy:660Fu5BrM",
+  vcvtps2pd_2 =	"rro:0Fu5ArM|rx/oq:|rm/yo:",
+  vcvtsd2si_2 =	"rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
+  vcvtsd2ss_3 =	"rrro:F20FV5ArM|rrx/ooq:",
+  vcvtsi2sd_3 =	"rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
+  vcvtsi2ss_3 =	"rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
+  vcvtss2sd_3 =	"rrro:F30FV5ArM|rrx/ood:",
+  vcvtss2si_2 =	"rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
+  vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
+  vcvttps2dq_2 = "rmoy:F30Fu5BrM",
+  vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
+  vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
+  vdppd_4 =	"rrmio:660F3AV41rMU",
+  vdpps_4 =	"rrmioy:660F3AV40rMU",
+  vextractf128_3 = "mri/oy:660F3AuL19RmU",
+  vextractps_3 = "mri/do:660F3Au17RmU",
+  vhaddpd_3 =	"rrmoy:660FV7CrM",
+  vhaddps_3 =	"rrmoy:F20FV7CrM",
+  vhsubpd_3 =	"rrmoy:660FV7DrM",
+  vhsubps_3 =	"rrmoy:F20FV7DrM",
+  vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
+  vinsertps_4 =	"rrrio:660F3AV21rMU|rrxi/ood:",
+  vldmxcsr_1 =	"xd:0FuAE2m",
+  vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
+  vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
+  vmovapd_2 =	"rmoy:660Fu28rM|mroy:660Fu29Rm",
+  vmovaps_2 =	"rmoy:0Fu28rM|mroy:0Fu29Rm",
+  vmovd_2 =	"rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
+  vmovq_2 =	"rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
+  vmovddup_2 =	"rmy:F20Fu12rM|rro:|rx/oq:",
+  vmovhlps_3 =	"rrro:0FV12rM",
+  vmovhpd_2 =	"xr/qo:660Fu17Rm",
+  vmovhpd_3 =	"rrx/ooq:660FV16rM",
+  vmovhps_2 =	"xr/qo:0Fu17Rm",
+  vmovhps_3 =	"rrx/ooq:0FV16rM",
+  vmovlhps_3 =	"rrro:0FV16rM",
+  vmovlpd_2 =	"xr/qo:660Fu13Rm",
+  vmovlpd_3 =	"rrx/ooq:660FV12rM",
+  vmovlps_2 =	"xr/qo:0Fu13Rm",
+  vmovlps_3 =	"rrx/ooq:0FV12rM",
+  vmovmskpd_2 =	"rr/do:660Fu50rM|rr/dy:660FuL50rM",
+  vmovmskps_2 =	"rr/do:0Fu50rM|rr/dy:0FuL50rM",
+  vmovntpd_2 =	"xroy:660Fu2BRm",
+  vmovntps_2 =	"xroy:0Fu2BRm",
+  vmovsd_2 =	"rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
+  vmovsd_3 =	"rrro:F20FV10rM",
+  vmovshdup_2 =	"rmoy:F30Fu16rM",
+  vmovsldup_2 =	"rmoy:F30Fu12rM",
+  vmovss_2 =	"rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
+  vmovss_3 =	"rrro:F30FV10rM",
+  vmovupd_2 =	"rmoy:660Fu10rM|mroy:660Fu11Rm",
+  vmovups_2 =	"rmoy:0Fu10rM|mroy:0Fu11Rm",
+  vorpd_3 =	"rrmoy:660FV56rM",
+  vorps_3 =	"rrmoy:0FV56rM",
+  vpermilpd_3 =	"rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
+  vpermilps_3 =	"rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
+  vperm2f128_4 = "rrmiy:660F3AV06rMU",
+  vptestpd_2 =	"rmoy:660F38u0FrM",
+  vptestps_2 =	"rmoy:660F38u0ErM",
+  vrcpps_2 =	"rmoy:0Fu53rM",
+  vrcpss_3 =	"rrro:F30FV53rM|rrx/ood:",
+  vrsqrtps_2 =	"rmoy:0Fu52rM",
+  vrsqrtss_3 =	"rrro:F30FV52rM|rrx/ood:",
+  vroundpd_3 =	"rmioy:660F3Au09rMU",
+  vroundps_3 =	"rmioy:660F3Au08rMU",
+  vroundsd_4 =	"rrrio:660F3AV0BrMU|rrxi/ooq:",
+  vroundss_4 =	"rrrio:660F3AV0ArMU|rrxi/ood:",
+  vshufpd_4 =	"rrmioy:660FVC6rMU",
+  vshufps_4 =	"rrmioy:0FVC6rMU",
+  vsqrtps_2 =	"rmoy:0Fu51rM",
+  vsqrtss_2 =	"rro:F30Fu51rM|rx/od:",
+  vsqrtpd_2 =	"rmoy:660Fu51rM",
+  vsqrtsd_2 =	"rro:F20Fu51rM|rx/oq:",
+  vstmxcsr_1 =	"xd:0FuAE3m",
+  vucomisd_2 =	"rro:660Fu2ErM|rx/oq:",
+  vucomiss_2 =	"rro:0Fu2ErM|rx/od:",
+  vunpckhpd_3 =	"rrmoy:660FV15rM",
+  vunpckhps_3 =	"rrmoy:0FV15rM",
+  vunpcklpd_3 =	"rrmoy:660FV14rM",
+  vunpcklps_3 =	"rrmoy:0FV14rM",
+  vxorpd_3 =	"rrmoy:660FV57rM",
+  vxorps_3 =	"rrmoy:0FV57rM",
+  vzeroall_0 =	"0FuL77",
+  vzeroupper_0 = "0Fu77",
+
+  -- AVX2 FP ops
+  vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
+  vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
+  -- *vgather* (!vsib)
+  vpermpd_3 =	"rmiy:660F3AuX01rMU",
+  vpermps_3 =	"rrmy:660F38V16rM",
+
+  -- AVX, AVX2 integer ops
+  -- In general, xmm requires AVX, ymm requires AVX2.
+  vaesdec_3 =  "rrmo:660F38VDErM",
+  vaesdeclast_3 = "rrmo:660F38VDFrM",
+  vaesenc_3 =  "rrmo:660F38VDCrM",
+  vaesenclast_3 = "rrmo:660F38VDDrM",
+  vaesimc_2 =  "rmo:660F38uDBrM",
+  vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
+  vlddqu_2 =	"rxoy:F20FuF0rM",
+  vmaskmovdqu_2 = "rro:660FuF7rM",
+  vmovdqa_2 =	"rmoy:660Fu6FrM|mroy:660Fu7FRm",
+  vmovdqu_2 =	"rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
+  vmovntdq_2 =	"xroy:660FuE7Rm",
+  vmovntdqa_2 =	"rxoy:660F38u2ArM",
+  vmpsadbw_4 =	"rrmioy:660F3AV42rMU",
+  vpabsb_2 =	"rmoy:660F38u1CrM",
+  vpabsd_2 =	"rmoy:660F38u1ErM",
+  vpabsw_2 =	"rmoy:660F38u1DrM",
+  vpackusdw_3 =	"rrmoy:660F38V2BrM",
+  vpalignr_4 =	"rrmioy:660F3AV0FrMU",
+  vpblendvb_4 =	"rrmroy:660F3AV4CrMs",
+  vpblendw_4 =	"rrmioy:660F3AV0ErMU",
+  vpclmulqdq_4 = "rrmio:660F3AV44rMU",
+  vpcmpeqq_3 =	"rrmoy:660F38V29rM",
+  vpcmpestri_3 = "rmio:660F3Au61rMU",
+  vpcmpestrm_3 = "rmio:660F3Au60rMU",
+  vpcmpgtq_3 =	"rrmoy:660F38V37rM",
+  vpcmpistri_3 = "rmio:660F3Au63rMU",
+  vpcmpistrm_3 = "rmio:660F3Au62rMU",
+  vpextrb_3 =	"rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
+  vpextrw_3 =	"rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
+  vpextrd_3 =	"mri/do:660F3Au16RmU",
+  vpextrq_3 =	"mri/qo:660F3Au16RmU",
+  vphaddw_3 =	"rrmoy:660F38V01rM",
+  vphaddd_3 =	"rrmoy:660F38V02rM",
+  vphaddsw_3 =	"rrmoy:660F38V03rM",
+  vphminposuw_2 = "rmo:660F38u41rM",
+  vphsubw_3 =	"rrmoy:660F38V05rM",
+  vphsubd_3 =	"rrmoy:660F38V06rM",
+  vphsubsw_3 =	"rrmoy:660F38V07rM",
+  vpinsrb_4 =	"rrri/ood:660F3AV20rMU|rrxi/oob:",
+  vpinsrw_4 =	"rrri/ood:660FVC4rMU|rrxi/oow:",
+  vpinsrd_4 =	"rrmi/ood:660F3AV22rMU",
+  vpinsrq_4 =	"rrmi/ooq:660F3AVX22rMU",
+  vpmaddubsw_3 = "rrmoy:660F38V04rM",
+  vpmaxsb_3 =	"rrmoy:660F38V3CrM",
+  vpmaxsd_3 =	"rrmoy:660F38V3DrM",
+  vpmaxuw_3 =	"rrmoy:660F38V3ErM",
+  vpmaxud_3 =	"rrmoy:660F38V3FrM",
+  vpminsb_3 =	"rrmoy:660F38V38rM",
+  vpminsd_3 =	"rrmoy:660F38V39rM",
+  vpminuw_3 =	"rrmoy:660F38V3ArM",
+  vpminud_3 =	"rrmoy:660F38V3BrM",
+  vpmovmskb_2 =	"rr/do:660FuD7rM|rr/dy:660FuLD7rM",
+  vpmovsxbw_2 =	"rroy:660F38u20rM|rx/oq:|rx/yo:",
+  vpmovsxbd_2 =	"rroy:660F38u21rM|rx/od:|rx/yq:",
+  vpmovsxbq_2 =	"rroy:660F38u22rM|rx/ow:|rx/yd:",
+  vpmovsxwd_2 =	"rroy:660F38u23rM|rx/oq:|rx/yo:",
+  vpmovsxwq_2 =	"rroy:660F38u24rM|rx/od:|rx/yq:",
+  vpmovsxdq_2 =	"rroy:660F38u25rM|rx/oq:|rx/yo:",
+  vpmovzxbw_2 =	"rroy:660F38u30rM|rx/oq:|rx/yo:",
+  vpmovzxbd_2 =	"rroy:660F38u31rM|rx/od:|rx/yq:",
+  vpmovzxbq_2 =	"rroy:660F38u32rM|rx/ow:|rx/yd:",
+  vpmovzxwd_2 =	"rroy:660F38u33rM|rx/oq:|rx/yo:",
+  vpmovzxwq_2 =	"rroy:660F38u34rM|rx/od:|rx/yq:",
+  vpmovzxdq_2 =	"rroy:660F38u35rM|rx/oq:|rx/yo:",
+  vpmuldq_3 =	"rrmoy:660F38V28rM",
+  vpmulhrsw_3 =	"rrmoy:660F38V0BrM",
+  vpmulld_3 =	"rrmoy:660F38V40rM",
+  vpshufb_3 =	"rrmoy:660F38V00rM",
+  vpshufd_3 =	"rmioy:660Fu70rMU",
+  vpshufhw_3 =	"rmioy:F30Fu70rMU",
+  vpshuflw_3 =	"rmioy:F20Fu70rMU",
+  vpsignb_3 =	"rrmoy:660F38V08rM",
+  vpsignw_3 =	"rrmoy:660F38V09rM",
+  vpsignd_3 =	"rrmoy:660F38V0ArM",
+  vpslldq_3 =	"rrioy:660Fv737mU",
+  vpsllw_3 =	"rrmoy:660FVF1rM|rrioy:660Fv716mU",
+  vpslld_3 =	"rrmoy:660FVF2rM|rrioy:660Fv726mU",
+  vpsllq_3 =	"rrmoy:660FVF3rM|rrioy:660Fv736mU",
+  vpsraw_3 =	"rrmoy:660FVE1rM|rrioy:660Fv714mU",
+  vpsrad_3 =	"rrmoy:660FVE2rM|rrioy:660Fv724mU",
+  vpsrldq_3 =	"rrioy:660Fv733mU",
+  vpsrlw_3 =	"rrmoy:660FVD1rM|rrioy:660Fv712mU",
+  vpsrld_3 =	"rrmoy:660FVD2rM|rrioy:660Fv722mU",
+  vpsrlq_3 =	"rrmoy:660FVD3rM|rrioy:660Fv732mU",
+  vptest_2 =	"rmoy:660F38u17rM",
+
+  -- AVX2 integer ops
+  vbroadcasti128_2 = "rx/yo:660F38u5ArM",
+  vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
+  vextracti128_3 = "mri/oy:660F3AuL39RmU",
+  vpblendd_4 =	"rrmioy:660F3AV02rMU",
+  vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
+  vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
+  vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
+  vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
+  vpermd_3 =	"rrmy:660F38V36rM",
+  vpermq_3 =	"rmiy:660F3AuX00rMU",
+  -- *vpgather* (!vsib)
+  vperm2i128_4 = "rrmiy:660F3AV46rMU",
+  vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
+  vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
+  vpsllvd_3 =	"rrmoy:660F38V47rM",
+  vpsllvq_3 =	"rrmoy:660F38VX47rM",
+  vpsravd_3 =	"rrmoy:660F38V46rM",
+  vpsrlvd_3 =	"rrmoy:660F38V45rM",
+  vpsrlvq_3 =	"rrmoy:660F38VX45rM",
+
+  -- Intel ADX
+  adcx_2 =	"rmqd:660F38F6rM",
+  adox_2 =	"rmqd:F30F38F6rM",
+
+  -- BMI1
+  andn_3 =	"rrmqd:0F38VF2rM",
+  bextr_3 =	"rmrqd:0F38wF7rM",
+  blsi_2 =	"rmqd:0F38vF33m",
+  blsmsk_2 =	"rmqd:0F38vF32m",
+  blsr_2 =	"rmqd:0F38vF31m",
+  tzcnt_2 =	"rmqdw:F30FBCrM",
+
+  -- BMI2
+  bzhi_3 =	"rmrqd:0F38wF5rM",
+  mulx_3 =	"rrmqd:F20F38VF6rM",
+  pdep_3 =	"rrmqd:F20F38VF5rM",
+  pext_3 =	"rrmqd:F30F38VF5rM",
+  rorx_3 =	"rmSqd:F20F3AuF0rMS",
+  sarx_3 =	"rmrqd:F30F38wF7rM",
+  shrx_3 =	"rmrqd:F20F38wF7rM",
+  shlx_3 =	"rmrqd:660F38wF7rM",
+
+  -- FMA3
+  vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
+  vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
+  vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
+  vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
+  vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
+  vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
+
+  vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
+  vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
+  vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
+  vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
+  vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
+  vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
+
+  vfmadd132pd_3 = "rrmoy:660F38VX98rM",
+  vfmadd132ps_3 = "rrmoy:660F38V98rM",
+  vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
+  vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
+  vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
+  vfmadd213ps_3 = "rrmoy:660F38VA8rM",
+  vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
+  vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
+  vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
+  vfmadd231ps_3 = "rrmoy:660F38VB8rM",
+  vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
+  vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
+
+  vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
+  vfmsub132ps_3 = "rrmoy:660F38V9ArM",
+  vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
+  vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
+  vfmsub213pd_3 = "rrmoy:660F38VXAArM",
+  vfmsub213ps_3 = "rrmoy:660F38VAArM",
+  vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
+  vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
+  vfmsub231pd_3 = "rrmoy:660F38VXBArM",
+  vfmsub231ps_3 = "rrmoy:660F38VBArM",
+  vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
+  vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
+
+  vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
+  vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
+  vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
+  vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
+  vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
+  vfnmadd213ps_3 = "rrmoy:660F38VACrM",
+  vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
+  vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
+  vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
+  vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
+  vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
+  vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
+
+  vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
+  vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
+  vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
+  vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
+  vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
+  vfnmsub213ps_3 = "rrmoy:660F38VAErM",
+  vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
+  vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
+  vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
+  vfnmsub231ps_3 = "rrmoy:660F38VBErM",
+  vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
+  vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
 }
 }
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
@@ -1463,28 +1813,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
   map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
   map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
 end
 end
 
 
--- SSE FP arithmetic ops.
+-- SSE / AVX FP arithmetic ops.
 for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
 for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
 		     sub = 12, min = 13, div = 14, max = 15 } do
 		     sub = 12, min = 13, div = 14, max = 15 } do
   map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
   map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
   map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
   map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
   map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
   map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
   map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
   map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
+  if n ~= 1 then
+    map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
+    map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
+    map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
+    map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
+  end
+end
+
+-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
+for name,n in pairs{
+  paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
+  paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
+  packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
+  paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
+  pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
+  pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
+  pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
+  pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
+  pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
+  pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
+  psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
+  psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
+  punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
+  punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
+  punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
+} do
+  map_op[name.."_2"] = format("rmo:660F%02XrM", n)
+  map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
 end
 end
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
+local map_vexarg = { u = false, v = 1, V = 2 }
+
 -- Process pattern string.
 -- Process pattern string.
 local function dopattern(pat, args, sz, op, needrex)
 local function dopattern(pat, args, sz, op, needrex)
-  local digit, addin
+  local digit, addin, vex
   local opcode = 0
   local opcode = 0
   local szov = sz
   local szov = sz
   local narg = 1
   local narg = 1
   local rex = 0
   local rex = 0
 
 
   -- Limit number of section buffer positions used by a single dasm_put().
   -- Limit number of section buffer positions used by a single dasm_put().
-  -- A single opcode needs a maximum of 5 positions.
-  if secpos+5 > maxsecpos then wflush() end
+  -- A single opcode needs a maximum of 6 positions.
+  if secpos+6 > maxsecpos then wflush() end
 
 
   -- Process each character.
   -- Process each character.
   for c in gmatch(pat.."|", ".") do
   for c in gmatch(pat.."|", ".") do
@@ -1498,6 +1878,8 @@ local function dopattern(pat, args, sz, op, needrex)
       szov = nil
       szov = nil
     elseif c == "X" then	-- Force REX.W.
     elseif c == "X" then	-- Force REX.W.
       rex = 8
       rex = 8
+    elseif c == "L" then	-- Force VEX.L.
+      vex.l = true
     elseif c == "r" then	-- Merge 1st operand regno. into opcode.
     elseif c == "r" then	-- Merge 1st operand regno. into opcode.
       addin = args[1]; opcode = opcode + (addin.reg % 8)
       addin = args[1]; opcode = opcode + (addin.reg % 8)
       if narg < 2 then narg = 2 end
       if narg < 2 then narg = 2 end
@@ -1521,21 +1903,42 @@ local function dopattern(pat, args, sz, op, needrex)
       if t.xreg and t.xreg > 7 then rex = rex + 2 end
       if t.xreg and t.xreg > 7 then rex = rex + 2 end
       if s > 7 then rex = rex + 4 end
       if s > 7 then rex = rex + 4 end
       if needrex then rex = rex + 16 end
       if needrex then rex = rex + 16 end
-      wputop(szov, opcode, rex); opcode = nil
+      local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
+      opcode = nil
       local imark = sub(pat, -1) -- Force a mark (ugly).
       local imark = sub(pat, -1) -- Force a mark (ugly).
       -- Put ModRM/SIB with regno/last digit as spare.
       -- Put ModRM/SIB with regno/last digit as spare.
-      wputmrmsib(t, imark, s, addin and addin.vreg)
+      wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
       addin = nil
       addin = nil
+    elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
+      local b = band(opcode, 255); opcode = shr(opcode, 8)
+      local m = 1
+      if b == 0x38 then m = 2
+      elseif b == 0x3a then m = 3 end
+      if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
+      if b ~= 0x0f then
+	werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
+	  "' in pattern `"..pat.."' for `"..op.."'")
+      end
+      local v = map_vexarg[c]
+      if v then v = remove(args, v) end
+      b = band(opcode, 255)
+      local p = 0
+      if b == 0x66 then p = 1
+      elseif b == 0xf3 then p = 2
+      elseif b == 0xf2 then p = 3 end
+      if p ~= 0 then opcode = shr(opcode, 8) end
+      if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
+      vex = { m = m, p = p, v = v }
     else
     else
       if opcode then -- Flush opcode.
       if opcode then -- Flush opcode.
 	if szov == "q" and rex == 0 then rex = rex + 8 end
 	if szov == "q" and rex == 0 then rex = rex + 8 end
 	if needrex then rex = rex + 16 end
 	if needrex then rex = rex + 16 end
 	if addin and addin.reg == -1 then
 	if addin and addin.reg == -1 then
-	  wputop(szov, opcode - 7, rex)
-	  waction("VREG", addin.vreg); wputxb(0)
+	  local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
+	  wvreg("opcode", addin.vreg, psz, sk)
 	else
 	else
 	  if addin and addin.reg > 7 then rex = rex + 1 end
 	  if addin and addin.reg > 7 then rex = rex + 1 end
-	  wputop(szov, opcode, rex)
+	  wputop(szov, opcode, rex, vex)
 	end
 	end
 	opcode = nil
 	opcode = nil
       end
       end
@@ -1572,6 +1975,14 @@ local function dopattern(pat, args, sz, op, needrex)
 	  else
 	  else
 	    wputlabel("REL_", imm, 2)
 	    wputlabel("REL_", imm, 2)
 	  end
 	  end
+	elseif c == "s" then
+	  local reg = a.reg
+	  if reg < 0 then
+	    wputb(0)
+	    wvreg("imm.hi", a.vreg)
+	  else
+	    wputb(shl(reg, 4))
+	  end
 	else
 	else
 	  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
 	  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
 	end
 	end
@@ -1648,11 +2059,14 @@ map_op[".template__"] = function(params, template, nparams)
     if pat == "" then pat = lastpat else lastpat = pat end
     if pat == "" then pat = lastpat else lastpat = pat end
     if matchtm(tm, args) then
     if matchtm(tm, args) then
       local prefix = sub(szm, 1, 1)
       local prefix = sub(szm, 1, 1)
-      if prefix == "/" then -- Match both operand sizes.
-	if args[1].opsize == sub(szm, 2, 2) and
-	   args[2].opsize == sub(szm, 3, 3) then
-	  dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
-	  return
+      if prefix == "/" then -- Exactly match leading operand sizes.
+	for i = #szm,1,-1 do
+	  if i == 1 then
+	    dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
+	    return
+	  elseif args[i-1].opsize ~= sub(szm, i, i) then
+	    break
+	  end
 	end
 	end
       else -- Match common operand size.
       else -- Match common operand size.
 	local szp = sz
 	local szp = sz
@@ -1717,8 +2131,8 @@ if x64 then
 	rex = a.reg > 7 and 9 or 8
 	rex = a.reg > 7 and 9 or 8
       end
       end
     end
     end
-    wputop(sz, opcode, rex)
-    if vreg then waction("VREG", vreg); wputxb(0) end
+    local psz, sk = wputop(sz, opcode, rex, nil, vreg)
+    wvreg("opcode", vreg, psz, sk)
     waction("IMM_D", format("(unsigned int)(%s)", op64))
     waction("IMM_D", format("(unsigned int)(%s)", op64))
     waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
     waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
   end
   end

+ 3 - 3
luajit.mod/luajit/dynasm/dynasm.lua

@@ -10,9 +10,9 @@
 local _info = {
 local _info = {
   name =	"DynASM",
   name =	"DynASM",
   description =	"A dynamic assembler for code generation engines",
   description =	"A dynamic assembler for code generation engines",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   url =		"http://luajit.org/dynasm.html",
   url =		"http://luajit.org/dynasm.html",
   license =	"MIT",
   license =	"MIT",

+ 3 - 3
luajit.mod/luajit/etc/luajit.pc

@@ -1,8 +1,8 @@
 # Package information for LuaJIT to be used by pkg-config.
 # Package information for LuaJIT to be used by pkg-config.
 majver=2
 majver=2
-minver=0
-relver=5
-version=${majver}.${minver}.${relver}
+minver=1
+relver=0
+version=${majver}.${minver}.${relver}-beta3
 abiver=5.1
 abiver=5.1
 
 
 prefix=/usr/local
 prefix=/usr/local

+ 7 - 0
luajit.mod/luajit/src/.gitignore

@@ -0,0 +1,7 @@
+luajit
+lj_bcdef.h
+lj_ffdef.h
+lj_libdef.h
+lj_recdef.h
+lj_folddef.h
+lj_vm.[sS]

+ 64 - 32
luajit.mod/luajit/src/Makefile

@@ -11,8 +11,8 @@
 ##############################################################################
 ##############################################################################
 
 
 MAJVER=  2
 MAJVER=  2
-MINVER=  0
-RELVER=  5
+MINVER=  1
+RELVER=  0
 ABIVER=  5.1
 ABIVER=  5.1
 NODOTABIVER= 51
 NODOTABIVER= 51
 
 
@@ -44,17 +44,14 @@ CCOPT= -O2 -fomit-frame-pointer
 #
 #
 # Target-specific compiler options:
 # Target-specific compiler options:
 #
 #
-# x86 only: it's recommended to compile at least for i686. Better yet,
-# compile for an architecture that has SSE2, too (-msse -msse2).
-#
 # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
 # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
 # the binaries to a different machine you could also use: -march=native
 # the binaries to a different machine you could also use: -march=native
 #
 #
-CCOPT_x86= -march=i686
+CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
 CCOPT_x64=
 CCOPT_x64=
 CCOPT_arm=
 CCOPT_arm=
+CCOPT_arm64=
 CCOPT_ppc=
 CCOPT_ppc=
-CCOPT_ppcspe=
 CCOPT_mips=
 CCOPT_mips=
 #
 #
 CCDEBUG=
 CCDEBUG=
@@ -113,6 +110,9 @@ XCFLAGS=
 #XCFLAGS+= -DLUAJIT_NUMMODE=1
 #XCFLAGS+= -DLUAJIT_NUMMODE=1
 #XCFLAGS+= -DLUAJIT_NUMMODE=2
 #XCFLAGS+= -DLUAJIT_NUMMODE=2
 #
 #
+# Enable GC64 mode for x64.
+#XCFLAGS+= -DLUAJIT_ENABLE_GC64
+#
 ##############################################################################
 ##############################################################################
 
 
 ##############################################################################
 ##############################################################################
@@ -124,8 +124,8 @@ XCFLAGS=
 #
 #
 # Use the system provided memory allocator (realloc) instead of the
 # Use the system provided memory allocator (realloc) instead of the
 # bundled memory allocator. This is slower, but sometimes helpful for
 # bundled memory allocator. This is slower, but sometimes helpful for
-# debugging. This option cannot be enabled on x64, since realloc usually
-# doesn't return addresses in the right address range.
+# debugging. This option cannot be enabled on x64 without GC64, since
+# realloc usually doesn't return addresses in the right address range.
 # OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
 # OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
 # the only way to get useful results from it for all other architectures.
 # the only way to get useful results from it for all other architectures.
 #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
 #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
@@ -165,6 +165,10 @@ else
     HOST_SYS= Windows
     HOST_SYS= Windows
     HOST_MSYS= mingw
     HOST_MSYS= mingw
   endif
   endif
+  ifneq (,$(findstring MSYS,$(HOST_SYS)))
+    HOST_SYS= Windows
+    HOST_MSYS= mingw
+  endif
   ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
   ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
     HOST_SYS= Windows
     HOST_SYS= Windows
     HOST_MSYS= cygwin
     HOST_MSYS= cygwin
@@ -186,11 +190,12 @@ endif
 #   make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
 #   make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
 #   make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
 #   make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
 
 
-CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
 LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
 LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
 
 
 HOST_CC= $(CC)
 HOST_CC= $(CC)
-HOST_RM= rm -f
+HOST_RM?= rm -f
 # If left blank, minilua is built and used. You can supply an installed
 # If left blank, minilua is built and used. You can supply an installed
 # copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua
 # copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua
 HOST_LUA=
 HOST_LUA=
@@ -208,7 +213,7 @@ TARGET_CC= $(STATIC_CC)
 TARGET_STCC= $(STATIC_CC)
 TARGET_STCC= $(STATIC_CC)
 TARGET_DYNCC= $(DYNAMIC_CC)
 TARGET_DYNCC= $(DYNAMIC_CC)
 TARGET_LD= $(CROSS)$(CC)
 TARGET_LD= $(CROSS)$(CC)
-TARGET_AR= $(CROSS)ar rcus 2>/dev/null
+TARGET_AR= $(CROSS)ar rcus
 TARGET_STRIP= $(CROSS)strip
 TARGET_STRIP= $(CROSS)strip
 
 
 TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
 TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
@@ -225,6 +230,7 @@ TARGET_XLDFLAGS=
 TARGET_XLIBS= -lm
 TARGET_XLIBS= -lm
 TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
+TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
 TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
 TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
 TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
 TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
 TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
@@ -239,17 +245,29 @@ else
 ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
 ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
   TARGET_LJARCH= arm
   TARGET_LJARCH= arm
 else
 else
+ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+  ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
+    TARGET_ARCH= -D__AARCH64EB__=1
+  endif
+  TARGET_LJARCH= arm64
+else
 ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
 ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
+  ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
+    TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
+  else
+    TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE
+  endif
   TARGET_LJARCH= ppc
   TARGET_LJARCH= ppc
 else
 else
-ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH)))
-  TARGET_LJARCH= ppcspe
-else
 ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
 ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
   ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
   ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
     TARGET_ARCH= -D__MIPSEL__=1
     TARGET_ARCH= -D__MIPSEL__=1
   endif
   endif
-  TARGET_LJARCH= mips
+  ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH)))
+    TARGET_LJARCH= mips64
+  else
+    TARGET_LJARCH= mips
+  endif
 else
 else
   $(error Unsupported target architecture)
   $(error Unsupported target architecture)
 endif
 endif
@@ -263,6 +281,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
   TARGET_SYS= PS3
   TARGET_SYS= PS3
   TARGET_ARCH+= -D__CELLOS_LV2__
   TARGET_ARCH+= -D__CELLOS_LV2__
   TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
   TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
+  TARGET_XLIBS+= -lpthread
 endif
 endif
 
 
 TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
 TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
@@ -293,6 +312,7 @@ ifeq (Windows,$(TARGET_SYS))
   TARGET_XSHLDFLAGS= -shared
   TARGET_XSHLDFLAGS= -shared
   TARGET_DYNXLDOPTS=
   TARGET_DYNXLDOPTS=
 else
 else
+  TARGET_AR+= 2>/dev/null
 ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
 ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
   TARGET_XCFLAGS+= -fno-stack-protector
   TARGET_XCFLAGS+= -fno-stack-protector
 endif
 endif
@@ -314,6 +334,9 @@ ifeq (iOS,$(TARGET_SYS))
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
+  ifeq (arm64,$(TARGET_LJARCH))
+    TARGET_XCFLAGS+= -fno-omit-frame-pointer
+  endif
 else
 else
   ifneq (SunOS,$(TARGET_SYS))
   ifneq (SunOS,$(TARGET_SYS))
     ifneq (PS3,$(TARGET_SYS))
     ifneq (PS3,$(TARGET_SYS))
@@ -374,6 +397,11 @@ DASM_XFLAGS=
 DASM_AFLAGS=
 DASM_AFLAGS=
 DASM_ARCH= $(TARGET_LJARCH)
 DASM_ARCH= $(TARGET_LJARCH)
 
 
+ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D ENDIAN_LE
+else
+  DASM_AFLAGS+= -D ENDIAN_BE
+endif
 ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH)))
 ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH)))
   DASM_AFLAGS+= -D P64
   DASM_AFLAGS+= -D P64
 endif
 endif
@@ -406,13 +434,10 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs
 ifeq (Windows,$(TARGET_SYS))
 ifeq (Windows,$(TARGET_SYS))
   DASM_AFLAGS+= -D WIN
   DASM_AFLAGS+= -D WIN
 endif
 endif
-ifeq (x86,$(TARGET_LJARCH))
-  ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
-    DASM_AFLAGS+= -D SSE
-  endif
-else
 ifeq (x64,$(TARGET_LJARCH))
 ifeq (x64,$(TARGET_LJARCH))
-  DASM_ARCH= x86
+  ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
+    DASM_ARCH= x86
+  endif
 else
 else
 ifeq (arm,$(TARGET_LJARCH))
 ifeq (arm,$(TARGET_LJARCH))
   ifeq (iOS,$(TARGET_SYS))
   ifeq (iOS,$(TARGET_SYS))
@@ -426,13 +451,15 @@ ifeq (ppc,$(TARGET_LJARCH))
   ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
   ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
     DASM_AFLAGS+= -D ROUND
     DASM_AFLAGS+= -D ROUND
   endif
   endif
-  ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH)))
+  ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH)))
     DASM_AFLAGS+= -D GPR64
     DASM_AFLAGS+= -D GPR64
   endif
   endif
   ifeq (PS3,$(TARGET_SYS))
   ifeq (PS3,$(TARGET_SYS))
     DASM_AFLAGS+= -D PPE -D TOC
     DASM_AFLAGS+= -D PPE -D TOC
   endif
   endif
-endif
+  ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH)))
+    DASM_ARCH= ppc64
+  endif
 endif
 endif
 endif
 endif
 endif
 endif
@@ -448,7 +475,7 @@ BUILDVM_X= $(BUILDVM_T)
 HOST_O= $(MINILUA_O) $(BUILDVM_O)
 HOST_O= $(MINILUA_O) $(BUILDVM_O)
 HOST_T= $(MINILUA_T) $(BUILDVM_T)
 HOST_T= $(MINILUA_T) $(BUILDVM_T)
 
 
-LJVM_S= lj_vm.s
+LJVM_S= lj_vm.S
 LJVM_O= lj_vm.o
 LJVM_O= lj_vm.o
 LJVM_BOUT= $(LJVM_S)
 LJVM_BOUT= $(LJVM_S)
 LJVM_MODE= elfasm
 LJVM_MODE= elfasm
@@ -457,10 +484,11 @@ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
 	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
 	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
 LJLIB_C= $(LJLIB_O:.o=.c)
 LJLIB_C= $(LJLIB_O:.o=.c)
 
 
-LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \
+LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
 	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
-	  lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
+	  lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \
+	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
 	  lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
 	  lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
@@ -580,6 +608,10 @@ amalg:
 clean:
 clean:
 	$(HOST_RM) $(ALL_RM)
 	$(HOST_RM) $(ALL_RM)
 
 
+libbc:
+	./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
+	$(MAKE) all
+
 depend:
 depend:
 	@for file in $(ALL_HDRGEN); do \
 	@for file in $(ALL_HDRGEN); do \
 	  test -f $$file || touch $$file; \
 	  test -f $$file || touch $$file; \
@@ -594,7 +626,7 @@ depend:
 	  test -s $$file || $(HOST_RM) $$file; \
 	  test -s $$file || $(HOST_RM) $$file; \
 	  done
 	  done
 
 
-.PHONY: default all amalg clean depend
+.PHONY: default all amalg clean libbc depend
 
 
 ##############################################################################
 ##############################################################################
 # Rules for generated files.
 # Rules for generated files.
@@ -604,7 +636,7 @@ $(MINILUA_T): $(MINILUA_O)
 	$(E) "HOSTLINK  $@"
 	$(E) "HOSTLINK  $@"
 	$(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
 	$(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
 
 
-host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP)
+host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua
 	$(E) "DYNASM    $@"
 	$(E) "DYNASM    $@"
 	$(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
 	$(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
 
 
@@ -651,10 +683,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
 	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
 	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
 	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
 	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
 
 
-%.o: %.s
+%.o: %.S
 	$(E) "ASM       $@"
 	$(E) "ASM       $@"
-	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
-	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
+	$(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $<
+	$(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $<
 
 
 $(LUAJIT_O):
 $(LUAJIT_O):
 	$(E) "CC        $@"
 	$(E) "CC        $@"

+ 118 - 98
luajit.mod/luajit/src/Makefile.dep

@@ -3,45 +3,49 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
  lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h
  lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h
 lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
- lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
- lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
- lj_lib.h lj_libdef.h
+ lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \
+ lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
+ lj_strfmt.h lj_lib.h lj_libdef.h
 lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
 lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
+ lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
+ lj_ffdef.h lj_lib.h lj_libdef.h
 lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
  lj_libdef.h
  lj_libdef.h
 lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
 lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
  lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
  lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
- lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+ lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \
+ lj_libdef.h
 lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
 lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
 lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
 lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \
- lj_ffdef.h lj_lib.h lj_libdef.h
-lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
- lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
- lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \
- lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \
- lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
+ lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
+ lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
 lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
  lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
 lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
 lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
+ lj_libdef.h
 lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
  lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
 lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
- lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \
- lj_lib.h lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
+ lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
 lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \
- lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
 lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
 lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
 lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
  lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
  lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
  lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h
+ lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
 lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
  lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
  lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
  lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
@@ -50,17 +54,20 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
 lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
  lj_bcdef.h
  lj_bcdef.h
 lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \
- lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
+ lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \
+ lj_strfmt.h
 lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \
- lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
+ lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
+ lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
+lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h
 lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_carith.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
+ lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
 lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \
+ lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h
  lj_traceerr.h
 lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
 lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
@@ -71,107 +78,118 @@ lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
  lj_ccallback.h
  lj_ccallback.h
 lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
- lj_cdata.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
 lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
 lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
 lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_clib.h
+ lj_cdata.h lj_clib.h lj_strfmt.h
 lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \
- lj_bc.h lj_vm.h lj_char.h lj_strscan.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h
 lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \
- lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \
- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
+ lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \
+ lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
+ lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
- lj_crecord.h
+ lj_crecord.h lj_strfmt.h
 lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
+ lj_ccallback.h lj_buf.h
 lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \
- lj_bc.h lj_vm.h lj_jit.h lj_ir.h
+ lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
+ lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
 lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \
- lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \
- lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \
- lj_vm.h luajit.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
+ lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
+ lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h
 lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
 lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
  lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
  lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
  lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
  lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
- lj_traceerr.h lj_vm.h
+ lj_traceerr.h lj_vm.h lj_strfmt.h
 lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
  lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
  lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
- lj_vm.h lj_strscan.h lj_recdef.h
+ lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
 lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h lj_vm.h
  lj_traceerr.h lj_vm.h
 lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
- lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \
- lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
+ lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
 lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \
- lj_ir.h lj_dispatch.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
+ lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
 lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
- lj_vm.h lj_strscan.h lj_lib.h
+ lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
+ lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
 lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \
- lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
+ lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
+ lj_strfmt.h
 lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
 lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
- lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h
+ lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
+ lj_bcdump.h lj_lib.h
 lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
 lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \
- lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
 lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
  lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
  lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
 lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
- lj_vm.h lj_strscan.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
+ lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
 lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
 lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
 lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_jit.h lj_iropt.h
  lj_ir.h lj_jit.h lj_iropt.h
 lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
- lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \
- lj_strscan.h lj_folddef.h
+ lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \
+ lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h
 lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
+ lj_vm.h
 lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
+ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h
 lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
 lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
  lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
  lj_traceerr.h lj_vm.h lj_strscan.h
  lj_traceerr.h lj_vm.h lj_strscan.h
 lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
  lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
 lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
 lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \
- lj_iropt.h lj_vm.h
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
+ lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h
 lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \
- lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
+ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
+ lj_vm.h lj_vmevent.h
+lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
+ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
 lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
- lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \
- lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \
- lj_ffrecord.h lj_snap.h lj_vm.h
+ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
+ lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
 lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
  lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
  lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
  lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
  lj_target_*.h lj_ctype.h lj_cdata.h
  lj_target_*.h lj_ctype.h lj_cdata.h
 lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \
- lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
+ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h
 lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h
+ lj_err.h lj_errmsg.h lj_str.h lj_char.h
+lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
+lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h
 lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_char.h lj_strscan.h
  lj_char.h lj_strscan.h
 lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -189,26 +207,27 @@ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_vm.h
  lj_ir.h lj_vm.h
 ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
 ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \
- lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \
- lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \
- lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \
- lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \
- lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \
- luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \
- lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
- lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \
- lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \
- lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \
- lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \
- lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
- lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \
- lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
- lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
- lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
- lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
- lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
- lib_init.c
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \
+ lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
+ lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
+ lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \
+ lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
+ lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
+ lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
+ lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \
+ lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
+ lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
+ lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
+ lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
+ lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
+ lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
+ lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
+ lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
+ lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
+ lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
+ lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
+ lib_ffi.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
 host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
 host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
  lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
  lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
@@ -220,7 +239,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
 host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
 host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
  luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
  luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
 host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
 host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
- lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h
+ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
+ host/buildvm_libbc.h
 host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
 host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
  luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
  luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
 host/minilua.o: host/minilua.c
 host/minilua.o: host/minilua.c

+ 3 - 0
luajit.mod/luajit/src/host/.gitignore

@@ -0,0 +1,3 @@
+minilua
+buildvm
+buildvm_arch.h

+ 14 - 12
luajit.mod/luajit/src/host/buildvm.c

@@ -59,10 +59,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
 #include "../dynasm/dasm_x86.h"
 #include "../dynasm/dasm_x86.h"
 #elif LJ_TARGET_ARM
 #elif LJ_TARGET_ARM
 #include "../dynasm/dasm_arm.h"
 #include "../dynasm/dasm_arm.h"
+#elif LJ_TARGET_ARM64
+#include "../dynasm/dasm_arm64.h"
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 #include "../dynasm/dasm_ppc.h"
 #include "../dynasm/dasm_ppc.h"
-#elif LJ_TARGET_PPCSPE
-#include "../dynasm/dasm_ppc.h"
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
 #include "../dynasm/dasm_mips.h"
 #include "../dynasm/dasm_mips.h"
 #else
 #else
@@ -110,11 +110,11 @@ static const char *sym_decorate(BuildCtx *ctx,
   if (p) {
   if (p) {
 #if LJ_TARGET_X86ORX64
 #if LJ_TARGET_X86ORX64
     if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
     if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
-      name[0] = '@';
+      name[0] = name[1] == 'R' ? '_' : '@';  /* Just for _RtlUnwind@16. */
     else
     else
       *p = '\0';
       *p = '\0';
-#elif (LJ_TARGET_PPC  || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE
-    /* Keep @plt. */
+#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
+    /* Keep @plt etc. */
 #else
 #else
     *p = '\0';
     *p = '\0';
 #endif
 #endif
@@ -179,6 +179,7 @@ static int build_code(BuildCtx *ctx)
   ctx->nreloc = 0;
   ctx->nreloc = 0;
 
 
   ctx->globnames = globnames;
   ctx->globnames = globnames;
+  ctx->extnames = extnames;
   ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
   ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
   ctx->nrelocsym = 0;
   ctx->nrelocsym = 0;
   for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
   for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
@@ -320,20 +321,20 @@ static void emit_vmdef(BuildCtx *ctx)
   char buf[80];
   char buf[80];
   int i;
   int i;
   fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
   fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
-  fprintf(ctx->fp, "module(...)\n\n");
+  fprintf(ctx->fp, "return {\n\n");
 
 
   fprintf(ctx->fp, "bcnames = \"");
   fprintf(ctx->fp, "bcnames = \"");
   for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
   for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
-  fprintf(ctx->fp, "\"\n\n");
+  fprintf(ctx->fp, "\",\n\n");
 
 
   fprintf(ctx->fp, "irnames = \"");
   fprintf(ctx->fp, "irnames = \"");
   for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
   for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
-  fprintf(ctx->fp, "\"\n\n");
+  fprintf(ctx->fp, "\",\n\n");
 
 
   fprintf(ctx->fp, "irfpm = { [0]=");
   fprintf(ctx->fp, "irfpm = { [0]=");
   for (i = 0; irfpm_names[i]; i++)
   for (i = 0; irfpm_names[i]; i++)
     fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
     fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
 
   fprintf(ctx->fp, "irfield = { [0]=");
   fprintf(ctx->fp, "irfield = { [0]=");
   for (i = 0; irfield_names[i]; i++) {
   for (i = 0; irfield_names[i]; i++) {
@@ -343,17 +344,17 @@ static void emit_vmdef(BuildCtx *ctx)
     if (p) *p = '.';
     if (p) *p = '.';
     fprintf(ctx->fp, "\"%s\", ", buf);
     fprintf(ctx->fp, "\"%s\", ", buf);
   }
   }
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
 
   fprintf(ctx->fp, "ircall = {\n[0]=");
   fprintf(ctx->fp, "ircall = {\n[0]=");
   for (i = 0; ircall_names[i]; i++)
   for (i = 0; ircall_names[i]; i++)
     fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
     fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
 
   fprintf(ctx->fp, "traceerr = {\n[0]=");
   fprintf(ctx->fp, "traceerr = {\n[0]=");
   for (i = 0; trace_errors[i]; i++)
   for (i = 0; trace_errors[i]; i++)
     fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
     fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 }
 }
 
 
 /* -- Argument parsing ---------------------------------------------------- */
 /* -- Argument parsing ---------------------------------------------------- */
@@ -490,6 +491,7 @@ int main(int argc, char **argv)
   case BUILD_vmdef:
   case BUILD_vmdef:
     emit_vmdef(ctx);
     emit_vmdef(ctx);
     emit_lib(ctx);
     emit_lib(ctx);
+    fprintf(ctx->fp, "}\n\n");
     break;
     break;
   case BUILD_ffdef:
   case BUILD_ffdef:
   case BUILD_libdef:
   case BUILD_libdef:

+ 1 - 0
luajit.mod/luajit/src/host/buildvm.h

@@ -82,6 +82,7 @@ typedef struct BuildCtx {
   const char *beginsym;
   const char *beginsym;
   /* Strings generated by DynASM. */
   /* Strings generated by DynASM. */
   const char *const *globnames;
   const char *const *globnames;
+  const char *const *extnames;
   const char *dasm_ident;
   const char *dasm_ident;
   const char *dasm_arch;
   const char *dasm_arch;
   /* Relocations. */
   /* Relocations. */

+ 56 - 11
luajit.mod/luajit/src/host/buildvm_asm.c

@@ -51,8 +51,8 @@ static const char *const jccnames[] = {
   "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
   "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
 };
 };
 
 
-/* Emit relocation for the incredibly stupid OSX assembler. */
-static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n,
+/* Emit x86/x64 text relocations. */
+static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
 				const char *sym)
 				const char *sym)
 {
 {
   const char *opname = NULL;
   const char *opname = NULL;
@@ -71,6 +71,20 @@ err:
     exit(1);
     exit(1);
   }
   }
   emit_asm_bytes(ctx, cp, n);
   emit_asm_bytes(ctx, cp, n);
+  if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
+    /* Various fixups for external symbols outside of our binary. */
+    if (ctx->mode == BUILD_elfasm) {
+      if (LJ_32)
+	fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym);
+      fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym);
+      if (LJ_32)
+	fprintf(ctx->fp, "#endif\n");
+      return;
+    } else if (LJ_32 && ctx->mode == BUILD_machasm) {
+      fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym);
+      return;
+    }
+  }
   fprintf(ctx->fp, "\t%s %s\n", opname, sym);
   fprintf(ctx->fp, "\t%s %s\n", opname, sym);
 }
 }
 #else
 #else
@@ -79,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
 {
 {
   int i;
   int i;
   for (i = 0; i < n; i += 4) {
   for (i = 0; i < n; i += 4) {
+    uint32_t ins = *(uint32_t *)(p+i);
+#if LJ_TARGET_ARM64 && LJ_BE
+    ins = lj_bswap(ins);  /* ARM64 instructions are always little-endian. */
+#endif
     if ((i & 15) == 0)
     if ((i & 15) == 0)
-      fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i));
+      fprintf(ctx->fp, "\t.long 0x%08x", ins);
     else
     else
-      fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i));
+      fprintf(ctx->fp, ",0x%08x", ins);
     if ((i & 15) == 12) putc('\n', ctx->fp);
     if ((i & 15) == 12) putc('\n', ctx->fp);
   }
   }
   if ((n & 15) != 0) putc('\n', ctx->fp);
   if ((n & 15) != 0) putc('\n', ctx->fp);
@@ -107,7 +125,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
 	    ins, sym);
 	    ins, sym);
     exit(1);
     exit(1);
   }
   }
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_ARM64
+  if ((ins >> 26) == 0x25u) {
+    fprintf(ctx->fp, "\tbl %s\n", sym);
+  } else {
+    fprintf(stderr,
+	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
+	    ins, sym);
+    exit(1);
+  }
+#elif LJ_TARGET_PPC
 #if LJ_TARGET_PS3
 #if LJ_TARGET_PS3
 #define TOCPREFIX "."
 #define TOCPREFIX "."
 #else
 #else
@@ -117,6 +144,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
     fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
     fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
 	    (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
 	    (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
   } else if ((ins >> 26) == 18) {
   } else if ((ins >> 26) == 18) {
+#if LJ_ARCH_PPC64
+    const char *suffix = strchr(sym, '@');
+    if (suffix && suffix[1] == 'h') {
+      fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
+    } else if (suffix && suffix[1] == 'l') {
+      fprintf(ctx->fp, "\tld 12, %s\n", sym);
+    } else
+#endif
     fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
     fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
   } else {
   } else {
     fprintf(stderr,
     fprintf(stderr,
@@ -215,6 +250,9 @@ void emit_asm(BuildCtx *ctx)
   int i, rel;
   int i, rel;
 
 
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
+#if LJ_ARCH_PPC64
+  fprintf(ctx->fp, "\t.abiversion 2\n");
+#endif
   fprintf(ctx->fp, "\t.text\n");
   fprintf(ctx->fp, "\t.text\n");
   emit_asm_align(ctx, 4);
   emit_asm_align(ctx, 4);
 
 
@@ -228,11 +266,20 @@ void emit_asm(BuildCtx *ctx)
 
 
 #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
 #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
   /* This should really be moved into buildvm_arm.dasc. */
   /* This should really be moved into buildvm_arm.dasc. */
+#if LJ_ARCH_HASFPU
+  fprintf(ctx->fp,
+	  ".fnstart\n"
+	  ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n"
+	  ".vsave {d8-d15}\n"
+	  ".save {r4}\n"
+	  ".pad #28\n");
+#else
   fprintf(ctx->fp,
   fprintf(ctx->fp,
 	  ".fnstart\n"
 	  ".fnstart\n"
 	  ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
 	  ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
 	  ".pad #28\n");
 	  ".pad #28\n");
 #endif
 #endif
+#endif
 #if LJ_TARGET_MIPS
 #if LJ_TARGET_MIPS
   fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
   fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
 #endif
 #endif
@@ -255,8 +302,9 @@ void emit_asm(BuildCtx *ctx)
       BuildReloc *r = &ctx->reloc[rel];
       BuildReloc *r = &ctx->reloc[rel];
       int n = r->ofs - ofs;
       int n = r->ofs - ofs;
 #if LJ_TARGET_X86ORX64
 #if LJ_TARGET_X86ORX64
-      if (ctx->mode == BUILD_machasm && r->type != 0) {
-	emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+      if (r->type != 0 &&
+	  (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) {
+	emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
       } else {
       } else {
 	emit_asm_bytes(ctx, ctx->code+ofs, n);
 	emit_asm_bytes(ctx, ctx->code+ofs, n);
 	emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
 	emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
@@ -290,10 +338,7 @@ void emit_asm(BuildCtx *ctx)
 #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
 #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
     fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
     fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
 #endif
 #endif
-#if LJ_TARGET_PPCSPE
-    /* Soft-float ABI + SPE. */
-    fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
-#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
+#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
     /* Hard-float ABI. */
     /* Hard-float ABI. */
     fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
     fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
 #endif
 #endif

+ 60 - 1
luajit.mod/luajit/src/host/buildvm_lib.c

@@ -5,7 +5,9 @@
 
 
 #include "buildvm.h"
 #include "buildvm.h"
 #include "lj_obj.h"
 #include "lj_obj.h"
+#include "lj_bc.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
+#include "buildvm_libbc.h"
 
 
 /* Context for library definitions. */
 /* Context for library definitions. */
 static uint8_t obuf[8192];
 static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
   regfunc = REGFUNC_OK;
   regfunc = REGFUNC_OK;
 }
 }
 
 
+static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
+{
+  uint32_t v = *p++;
+  if (v >= 0x80) {
+    int sh = 0; v &= 0x7f;
+    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
+  }
+  *vv = v;
+  return p;
+}
+
+static void libdef_fixupbc(uint8_t *p)
+{
+  uint32_t i, sizebc;
+  p += 4;
+  p = libdef_uleb128(p, &sizebc);
+  p = libdef_uleb128(p, &sizebc);
+  p = libdef_uleb128(p, &sizebc);
+  for (i = 0; i < sizebc; i++, p += 4) {
+    uint8_t op = p[libbc_endian ? 3 : 0];
+    uint8_t ra = p[libbc_endian ? 2 : 1];
+    uint8_t rc = p[libbc_endian ? 1 : 2];
+    uint8_t rb = p[libbc_endian ? 0 : 3];
+    if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
+      op = BC_ISNUM; rc++;
+    }
+    p[LJ_ENDIAN_SELECT(0, 3)] = op;
+    p[LJ_ENDIAN_SELECT(1, 2)] = ra;
+    p[LJ_ENDIAN_SELECT(2, 1)] = rc;
+    p[LJ_ENDIAN_SELECT(3, 0)] = rb;
+  }
+}
+
+static void libdef_lua(BuildCtx *ctx, char *p, int arg)
+{
+  UNUSED(arg);
+  if (ctx->mode == BUILD_libdef) {
+    int i;
+    for (i = 0; libbc_map[i].name != NULL; i++) {
+      if (!strcmp(libbc_map[i].name, p)) {
+	int ofs = libbc_map[i].ofs;
+	int len = libbc_map[i+1].ofs - ofs;
+	obuf[2]++;  /* Bump hash table size. */
+	*optr++ = LIBINIT_LUA;
+	libdef_name(p, 0);
+	memcpy(optr, libbc_code + ofs, len);
+	libdef_fixupbc(optr);
+	optr += len;
+	return;
+      }
+    }
+    fprintf(stderr, "Error: missing libbc definition for %s\n", p);
+    exit(1);
+  }
+}
+
 static uint32_t find_rec(char *name)
 static uint32_t find_rec(char *name)
 {
 {
   char *p = (char *)obuf;
   char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
   { "CF(",	")",		libdef_func,		LIBINIT_CF },
   { "CF(",	")",		libdef_func,		LIBINIT_CF },
   { "ASM(",	")",		libdef_func,		LIBINIT_ASM },
   { "ASM(",	")",		libdef_func,		LIBINIT_ASM },
   { "ASM_(",	")",		libdef_func,		LIBINIT_ASM_ },
   { "ASM_(",	")",		libdef_func,		LIBINIT_ASM_ },
+  { "LUA(",	")",		libdef_lua,		0 },
   { "REC(",	")",		libdef_rec,		0 },
   { "REC(",	")",		libdef_rec,		0 },
   { "PUSH(",	")",		libdef_push,		0 },
   { "PUSH(",	")",		libdef_push,		0 },
   { "SET(",	")",		libdef_set,		0 },
   { "SET(",	")",		libdef_set,		0 },
@@ -373,7 +432,7 @@ void emit_lib(BuildCtx *ctx)
       "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
       "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
       ffasmfunc);
       ffasmfunc);
   } else if (ctx->mode == BUILD_vmdef) {
   } else if (ctx->mode == BUILD_vmdef) {
-    fprintf(ctx->fp, "}\n\n");
+    fprintf(ctx->fp, "},\n\n");
   } else if (ctx->mode == BUILD_bcdef) {
   } else if (ctx->mode == BUILD_bcdef) {
     int i;
     int i;
     fprintf(ctx->fp, "\n};\n\n");
     fprintf(ctx->fp, "\n};\n\n");

+ 56 - 0
luajit.mod/luajit/src/host/buildvm_libbc.h

@@ -0,0 +1,56 @@
+/* This is a generated file. DO NOT EDIT! */
+
+static const int libbc_endian = 0;
+
+static const uint8_t libbc_code[] = {
+#if LJ_FR2
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
+0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
+0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
+8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
+0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
+0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
+2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
+3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
+0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
+41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
+18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
+6,252,127,76,4,2,0,0
+#else
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
+0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
+0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
+8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
+0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
+0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
+2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
+3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
+0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
+41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
+18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
+6,252,127,76,4,2,0,0
+#endif
+};
+
+static const struct { const char *name; int ofs; } libbc_map[] = {
+{"math_deg",0},
+{"math_rad",25},
+{"string_len",50},
+{"table_foreachi",69},
+{"table_foreach",136},
+{"table_getn",207},
+{"table_remove",226},
+{"table_move",355},
+{NULL,502}
+};
+

+ 26 - 2
luajit.mod/luajit/src/host/buildvm_peobj.c

@@ -109,6 +109,8 @@ enum {
 #if LJ_TARGET_X64
 #if LJ_TARGET_X64
   PEOBJ_SECT_PDATA,
   PEOBJ_SECT_PDATA,
   PEOBJ_SECT_XDATA,
   PEOBJ_SECT_XDATA,
+#elif LJ_TARGET_X86
+  PEOBJ_SECT_SXDATA,
 #endif
 #endif
   PEOBJ_SECT_RDATA_Z,
   PEOBJ_SECT_RDATA_Z,
   PEOBJ_NSECTIONS
   PEOBJ_NSECTIONS
@@ -208,6 +210,13 @@ void emit_peobj(BuildCtx *ctx)
   sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
   sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
   /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
   /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
   pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
   pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
+#elif LJ_TARGET_X86
+  memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1);
+  pesect[PEOBJ_SECT_SXDATA].ofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4);
+  pesect[PEOBJ_SECT_SXDATA].relocofs = sofs;
+  /* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */
+  pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240;
 #endif
 #endif
 
 
   memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
   memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
@@ -232,7 +241,7 @@ void emit_peobj(BuildCtx *ctx)
   nrsym = ctx->nrelocsym;
   nrsym = ctx->nrelocsym;
   pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
   pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
 #if LJ_TARGET_X64
 #if LJ_TARGET_X64
-  pehdr.nsyms += 1;  /* Symbol for lj_err_unwind_win64. */
+  pehdr.nsyms += 1;  /* Symbol for lj_err_unwind_win. */
 #endif
 #endif
 
 
   /* Write PE object header and all sections. */
   /* Write PE object header and all sections. */
@@ -312,6 +321,19 @@ void emit_peobj(BuildCtx *ctx)
     reloc.type = PEOBJ_RELOC_ADDR32NB;
     reloc.type = PEOBJ_RELOC_ADDR32NB;
     owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
     owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
   }
   }
+#elif LJ_TARGET_X86
+  /* Write .sxdata section. */
+  for (i = 0; i < nrsym; i++) {
+    if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) {
+      uint32_t symidx = 1+2+i;
+      owrite(ctx, &symidx, 4);
+      break;
+    }
+  }
+  if (i == nrsym) {
+    fprintf(stderr, "Error: extern lj_err_unwind_win not used\n");
+    exit(1);
+  }
 #endif
 #endif
 
 
   /* Write .rdata$Z section. */
   /* Write .rdata$Z section. */
@@ -333,8 +355,10 @@ void emit_peobj(BuildCtx *ctx)
 #if LJ_TARGET_X64
 #if LJ_TARGET_X64
     emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
     emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
     emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
     emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
-    emit_peobj_sym(ctx, "lj_err_unwind_win64", 0,
+    emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
 		   PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
 		   PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
+#elif LJ_TARGET_X86
+    emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA);
 #endif
 #endif
 
 
     emit_peobj_sym(ctx, ctx->beginsym, 0,
     emit_peobj_sym(ctx, ctx->beginsym, 0,

+ 197 - 0
luajit.mod/luajit/src/host/genlibbc.lua

@@ -0,0 +1,197 @@
+----------------------------------------------------------------------------
+-- Lua script to dump the bytecode of the library functions written in Lua.
+-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
+----------------------------------------------------------------------------
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+
+local ffi = require("ffi")
+local bit = require("bit")
+local vmdef = require("jit.vmdef")
+local bcnames = vmdef.bcnames
+
+local format = string.format
+
+local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
+
+local function usage(arg)
+  io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
+		  " [-o buildvm_libbc.h] lib_*.c\n")
+  os.exit(1)
+end
+
+local function parse_arg(arg)
+  local outfile = "-"
+  if not (arg and arg[1]) then
+    usage(arg)
+  end
+  if arg[1] == "-o" then
+    outfile = arg[2]
+    if not outfile then usage(arg) end
+    table.remove(arg, 1)
+    table.remove(arg, 1)
+  end
+  return outfile
+end
+
+local function read_files(names)
+  local src = ""
+  for _,name in ipairs(names) do
+    local fp = assert(io.open(name))
+    src = src .. fp:read("*a")
+    fp:close()
+  end
+  return src
+end
+
+local function transform_lua(code)
+  local fixup = {}
+  local n = -30000
+  code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
+    n = n + 1
+    fixup[n] = { "CHECK", tp }
+    return format("%s=%d", var, n)
+  end)
+  code = string.gsub(code, "PAIRS%((.-)%)", function(var)
+    fixup.PAIRS = true
+    return format("nil, %s, 0", var)
+  end)
+  return "return "..code, fixup
+end
+
+local function read_uleb128(p)
+  local v = p[0]; p = p + 1
+  if v >= 128 then
+    local sh = 7; v = v - 128
+    repeat
+      local r = p[0]
+      v = v + bit.lshift(bit.band(r, 127), sh)
+      sh = sh + 7
+      p = p + 1
+    until r < 128
+  end
+  return p, v
+end
+
+-- ORDER LJ_T
+local name2itype = {
+  str = 5, func = 9, tab = 12, int = 14, num = 15
+}
+
+local BC = {}
+for i=0,#bcnames/6-1 do
+  BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
+end
+local xop, xra = isbe and 3 or 0, isbe and 2 or 1
+local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
+
+local function fixup_dump(dump, fixup)
+  local buf = ffi.new("uint8_t[?]", #dump+1, dump)
+  local p = buf+5
+  local n, sizebc
+  p, n = read_uleb128(p)
+  local start = p
+  p = p + 4
+  p = read_uleb128(p)
+  p = read_uleb128(p)
+  p, sizebc = read_uleb128(p)
+  local rawtab = {}
+  for i=0,sizebc-1 do
+    local op = p[xop]
+    if op == BC.KSHORT then
+      local rd = p[xrc] + 256*p[xrb]
+      rd = bit.arshift(bit.lshift(rd, 16), 16)
+      local f = fixup[rd]
+      if f then
+	if f[1] == "CHECK" then
+	  local tp = f[2]
+	  if tp == "tab" then rawtab[p[xra]] = true end
+	  p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
+	  p[xrb] = 0
+	  p[xrc] = name2itype[tp]
+	else
+	  error("unhandled fixup type: "..f[1])
+	end
+      end
+    elseif op == BC.TGETV then
+      if rawtab[p[xrb]] then
+	p[xop] = BC.TGETR
+      end
+    elseif op == BC.TSETV then
+      if rawtab[p[xrb]] then
+	p[xop] = BC.TSETR
+      end
+    elseif op == BC.ITERC then
+      if fixup.PAIRS then
+	p[xop] = BC.ITERN
+      end
+    end
+    p = p + 4
+  end
+  return ffi.string(start, n)
+end
+
+local function find_defs(src)
+  local defs = {}
+  for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
+    local env = {}
+    local tcode, fixup = transform_lua(code)
+    local func = assert(load(tcode, "", nil, env))()
+    defs[name] = fixup_dump(string.dump(func, true), fixup)
+    defs[#defs+1] = name
+  end
+  return defs
+end
+
+local function gen_header(defs)
+  local t = {}
+  local function w(x) t[#t+1] = x end
+  w("/* This is a generated file. DO NOT EDIT! */\n\n")
+  w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
+  local s = ""
+  for _,name in ipairs(defs) do
+    s = s .. defs[name]
+  end
+  w("static const uint8_t libbc_code[] = {\n")
+  local n = 0
+  for i=1,#s do
+    local x = string.byte(s, i)
+    w(x); w(",")
+    n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
+    if n >= 75 then n = 0; w("\n") end
+  end
+  w("0\n};\n\n")
+  w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
+  local m = 0
+  for _,name in ipairs(defs) do
+    w('{"'); w(name); w('",'); w(m) w('},\n')
+    m = m + #defs[name]
+  end
+  w("{NULL,"); w(m); w("}\n};\n\n")
+  return table.concat(t)
+end
+
+local function write_file(name, data)
+  if name == "-" then
+    assert(io.write(data))
+    assert(io.flush())
+  else
+    local fp = io.open(name)
+    if fp then
+      local old = fp:read("*a")
+      fp:close()
+      if data == old then return end
+    end
+    fp = assert(io.open(name, "w"))
+    assert(fp:write(data))
+    assert(fp:close())
+  end
+end
+
+local outfile = parse_arg(arg)
+local src = read_files(arg)
+local defs = find_defs(src)
+local hdr = gen_header(defs)
+write_file(outfile, hdr)
+

+ 1 - 0
luajit.mod/luajit/src/jit/.gitignore

@@ -0,0 +1 @@
+vmdef.lua

+ 9 - 10
luajit.mod/luajit/src/jit/bc.lua

@@ -41,7 +41,7 @@
 
 
 -- Cache some library functions and objects.
 -- Cache some library functions and objects.
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20005, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local vmdef = require("jit.vmdef")
 local bit = require("bit")
 local bit = require("bit")
@@ -179,13 +179,12 @@ local function bcliston(outfile)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-line = bcline
-dump = bcdump
-targets = bctargets
-
-on = bcliston
-off = bclistoff
-start = bcliston -- For -j command line option.
+return {
+  line = bcline,
+  dump = bcdump,
+  targets = bctargets,
+  on = bcliston,
+  off = bclistoff,
+  start = bcliston -- For -j command line option.
+}
 
 

+ 18 - 16
luajit.mod/luajit/src/jit/bcsave.lua

@@ -11,7 +11,7 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20005, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local bit = require("bit")
 local bit = require("bit")
 
 
 -- Symbol name prefix for LuaJIT bytecode.
 -- Symbol name prefix for LuaJIT bytecode.
@@ -63,8 +63,8 @@ local map_type = {
 }
 }
 
 
 local map_arch = {
 local map_arch = {
-  x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true,
-  mips = true, mipsel = true,
+  x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true,
+  ppc = true, mips = true, mipsel = true,
 }
 }
 
 
 local map_os = {
 local map_os = {
@@ -125,12 +125,12 @@ extern "C"
 #ifdef _WIN32
 #ifdef _WIN32
 __declspec(dllexport)
 __declspec(dllexport)
 #endif
 #endif
-const char %s%s[] = {
+const unsigned char %s%s[] = {
 ]], LJBC_PREFIX, ctx.modname))
 ]], LJBC_PREFIX, ctx.modname))
   else
   else
     fp:write(string.format([[
     fp:write(string.format([[
 #define %s%s_SIZE %d
 #define %s%s_SIZE %d
-static const char %s%s[] = {
+static const unsigned char %s%s[] = {
 ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
 ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
   end
   end
   local t, n, m = {}, 0, 0
   local t, n, m = {}, 0, 0
@@ -200,9 +200,9 @@ typedef struct {
 ]]
 ]]
   local symname = LJBC_PREFIX..ctx.modname
   local symname = LJBC_PREFIX..ctx.modname
   local is64, isbe = false, false
   local is64, isbe = false, false
-  if ctx.arch == "x64" then
+  if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then
     is64 = true
     is64 = true
-  elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
+  elseif ctx.arch == "ppc" or ctx.arch == "mips" then
     isbe = true
     isbe = true
   end
   end
 
 
@@ -237,7 +237,7 @@ typedef struct {
   hdr.eendian = isbe and 2 or 1
   hdr.eendian = isbe and 2 or 1
   hdr.eversion = 1
   hdr.eversion = 1
   hdr.type = f16(1)
   hdr.type = f16(1)
-  hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch])
+  hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
   if ctx.arch == "mips" or ctx.arch == "mipsel" then
   if ctx.arch == "mips" or ctx.arch == "mipsel" then
     hdr.flags = f32(0x50001006)
     hdr.flags = f32(0x50001006)
   end
   end
@@ -275,7 +275,7 @@ typedef struct {
   o.sect[2].size = fofs(ofs)
   o.sect[2].size = fofs(ofs)
   o.sect[3].type = f32(3) -- .strtab
   o.sect[3].type = f32(3) -- .strtab
   o.sect[3].ofs = fofs(sofs + ofs)
   o.sect[3].ofs = fofs(sofs + ofs)
-  o.sect[3].size = fofs(#symname+1)
+  o.sect[3].size = fofs(#symname+2)
   ffi.copy(o.space+ofs+1, symname)
   ffi.copy(o.space+ofs+1, symname)
   ofs = ofs + #symname + 2
   ofs = ofs + #symname + 2
   o.sect[4].type = f32(1) -- .rodata
   o.sect[4].type = f32(1) -- .rodata
@@ -477,13 +477,13 @@ typedef struct {
 } mach_obj_64;
 } mach_obj_64;
 typedef struct {
 typedef struct {
   mach_fat_header fat;
   mach_fat_header fat;
-  mach_fat_arch fat_arch[4];
+  mach_fat_arch fat_arch[2];
   struct {
   struct {
     mach_header hdr;
     mach_header hdr;
     mach_segment_command seg;
     mach_segment_command seg;
     mach_section sec;
     mach_section sec;
     mach_symtab_command sym;
     mach_symtab_command sym;
-  } arch[4];
+  } arch[2];
   mach_nlist sym_entry;
   mach_nlist sym_entry;
   uint8_t space[4096];
   uint8_t space[4096];
 } mach_fat_obj;
 } mach_fat_obj;
@@ -494,6 +494,8 @@ typedef struct {
     is64, align, mobj = true, 8, "mach_obj_64"
     is64, align, mobj = true, 8, "mach_obj_64"
   elseif ctx.arch == "arm" then
   elseif ctx.arch == "arm" then
     isfat, mobj = true, "mach_fat_obj"
     isfat, mobj = true, "mach_fat_obj"
+  elseif ctx.arch == "arm64" then
+    is64, align, isfat, mobj = true, 8, true, "mach_fat_obj"
   else
   else
     check(ctx.arch == "x86", "unsupported architecture for OSX")
     check(ctx.arch == "x86", "unsupported architecture for OSX")
   end
   end
@@ -503,8 +505,8 @@ typedef struct {
   -- Create Mach-O object and fill in header.
   -- Create Mach-O object and fill in header.
   local o = ffi.new(mobj)
   local o = ffi.new(mobj)
   local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
   local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
-  local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch]
-  local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch]
+  local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
+  local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
   if isfat then
   if isfat then
     o.fat.magic = be32(0xcafebabe)
     o.fat.magic = be32(0xcafebabe)
     o.fat.nfat_arch = be32(#cpusubtype)
     o.fat.nfat_arch = be32(#cpusubtype)
@@ -653,7 +655,7 @@ end
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-start = docmd -- Process -b command line option.
+return {
+  start = docmd -- Process -b command line option.
+}
 
 

+ 9 - 9
luajit.mod/luajit/src/jit/dis_arm.lua

@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = addr or 0
   ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 16 then return map_gpr[r] end
   if r < 16 then return map_gpr[r] end
   return "d"..(r-16)
   return "d"..(r-16)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
 
 

+ 1216 - 0
luajit.mod/luajit/src/jit/dis_arm64.lua

@@ -0,0 +1,1216 @@
+----------------------------------------------------------------------------
+-- LuaJIT ARM64 disassembler module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+--
+-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
+-- Sponsored by Cisco Systems, Inc.
+----------------------------------------------------------------------------
+-- This is a helper module used by the LuaJIT machine code dumper module.
+--
+-- It disassembles most user-mode AArch64 instructions.
+-- NYI: Advanced SIMD and VFP instructions.
+------------------------------------------------------------------------------
+
+local type = type
+local sub, byte, format = string.sub, string.byte, string.format
+local match, gmatch, gsub = string.match, string.gmatch, string.gsub
+local concat = table.concat
+local bit = require("bit")
+local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex
+local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
+local ror = bit.ror
+
+------------------------------------------------------------------------------
+-- Opcode maps
+------------------------------------------------------------------------------
+
+local map_adr = { -- PC-relative addressing.
+  shift = 31, mask = 1,
+  [0] = "adrDBx", "adrpDBx"
+}
+
+local map_addsubi = { -- Add/subtract immediate.
+  shift = 29, mask = 3,
+  [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg",
+}
+
+local map_logi = { -- Logical immediate.
+  shift = 31, mask = 1,
+  [0] = {
+    shift = 22, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
+    },
+    false -- unallocated
+  },
+  {
+    shift = 29, mask = 3,
+    [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
+  }
+}
+
+local map_movwi = { -- Move wide immediate.
+  shift = 31, mask = 1,
+  [0] = {
+    shift = 22, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
+    }, false -- unallocated
+  },
+  {
+    shift = 29, mask = 3,
+    [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
+  },
+}
+
+local map_bitf = { -- Bitfield.
+  shift = 31, mask = 1,
+  [0] = {
+    shift = 22, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w",
+      "bfm|bfi|bfxilDN13w",
+      "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w"
+    }
+  },
+  {
+    shift = 22, mask = 1,
+    {
+      shift = 29, mask = 3,
+      [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x",
+      "bfm|bfi|bfxilDN13x",
+      "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x"
+    }
+  }
+}
+
+local map_datai = { -- Data processing - immediate.
+  shift = 23, mask = 7,
+  [0] = map_adr, map_adr, map_addsubi, false,
+  map_logi, map_movwi, map_bitf,
+  {
+    shift = 15, mask = 0x1c0c1,
+    [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x",
+    [0x10081] = "extr|rorDNM4x"
+  }
+}
+
+local map_logsr = { -- Logical, shifted register.
+  shift = 31, mask = 1,
+  [0] = {
+    shift = 15, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = {
+	shift = 21, mask = 7,
+	[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
+	"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
+      },
+      {
+	shift = 21, mask = 7,
+	[0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
+	     "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
+      },
+      {
+	shift = 21, mask = 7,
+	[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
+	"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
+      },
+      {
+	shift = 21, mask = 7,
+	[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
+	"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
+      }
+    },
+    false -- unallocated
+  },
+  {
+    shift = 29, mask = 3,
+    [0] = {
+      shift = 21, mask = 7,
+      [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
+      "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
+    },
+    {
+      shift = 21, mask = 7,
+      [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
+      "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
+    },
+    {
+      shift = 21, mask = 7,
+      [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
+      "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
+    },
+    {
+      shift = 21, mask = 7,
+      [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
+      "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
+    }
+  }
+}
+
+local map_assh = {
+  shift = 31, mask = 1,
+  [0] = {
+    shift = 15, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = {
+	shift = 22, mask = 3,
+	[0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg",
+	      "adds|cmnD0NMSg", "adds|cmnD0NMg"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
+	      "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
+      },
+    },
+    false -- unallocated
+  },
+  {
+    shift = 29, mask = 3,
+    [0] = {
+      shift = 22, mask = 3,
+      [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
+    },
+    {
+      shift = 22, mask = 3,
+      [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg",
+	    "adds|cmnD0NMg"
+    },
+    {
+      shift = 22, mask = 3,
+      [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
+    },
+    {
+      shift = 22, mask = 3,
+      [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
+	    "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
+    }
+  }
+}
+
+local map_addsubsh = { -- Add/subtract, shifted register.
+  shift = 22, mask = 3,
+  [0] = map_assh, map_assh, map_assh
+}
+
+local map_addsubex = { -- Add/subtract, extended register.
+  shift = 22, mask = 3,
+  [0] = {
+    shift = 29, mask = 3,
+    [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg",
+  }
+}
+
+local map_addsubc = { -- Add/subtract, with carry.
+  shift = 10, mask = 63,
+  [0] = {
+    shift = 29, mask = 3,
+    [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg",
+  }
+}
+
+local map_ccomp = {
+  shift = 4, mask = 1,
+  [0] = {
+    shift = 10, mask = 3,
+    [0] = { -- Conditional compare register.
+      shift = 29, mask = 3,
+      "ccmnNMVCg", false, "ccmpNMVCg",
+    },
+    [2] = {  -- Conditional compare immediate.
+      shift = 29, mask = 3,
+      "ccmnN5VCg", false, "ccmpN5VCg",
+    }
+  }
+}
+
+local map_csel = { -- Conditional select.
+  shift = 11, mask = 1,
+  [0] = {
+    shift = 10, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false,
+    },
+    {
+      shift = 29, mask = 3,
+      [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false,
+    }
+  }
+}
+
+local map_data1s = { -- Data processing, 1 source.
+  shift = 29, mask = 1,
+  [0] = {
+    shift = 31, mask = 1,
+    [0] = {
+      shift = 10, mask = 0x7ff,
+      [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg"
+    },
+    {
+      shift = 10, mask = 0x7ff,
+      [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg"
+    }
+  }
+}
+
+local map_data2s = { -- Data processing, 2 sources.
+  shift = 29, mask = 1,
+  [0] = {
+    shift = 10, mask = 63,
+    false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg",
+    "lsrDNMg", "asrDNMg", "rorDNMg"
+  }
+}
+
+local map_data3s = { -- Data processing, 3 sources.
+  shift = 29, mask = 7,
+  [0] = {
+    shift = 21, mask = 7,
+    [0] = {
+      shift = 15, mask = 1,
+      [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g"
+    }
+  }, false, false, false,
+  {
+    shift = 15, mask = 1,
+    [0] = {
+      shift = 21, mask = 7,
+      [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false,
+      false, "umaddl|umullDxNMwA0x", "umulhDNMx"
+    },
+    {
+      shift = 21, mask = 7,
+      [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false,
+      false, "umsubl|umneglDxNMwA0x"
+    }
+  }
+}
+
+local map_datar = { -- Data processing, register.
+  shift = 28, mask = 1,
+  [0] = {
+    shift = 24, mask = 1,
+    [0] = map_logsr,
+    {
+      shift = 21, mask = 1,
+      [0] = map_addsubsh, map_addsubex
+    }
+  },
+  {
+    shift = 21, mask = 15,
+    [0] = map_addsubc, false, map_ccomp, false, map_csel, false,
+    {
+      shift = 30, mask = 1,
+      [0] = map_data2s, map_data1s
+    },
+    false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s,
+    map_data3s, map_data3s, map_data3s
+  }
+}
+
+local map_lrl = { -- Load register, literal.
+  shift = 26, mask = 1,
+  [0] = {
+    shift = 30, mask = 3,
+    [0] = "ldrDwB", "ldrDxB", "ldrswDxB"
+  },
+  {
+    shift = 30, mask = 3,
+    [0] = "ldrDsB", "ldrDdB"
+  }
+}
+
+local map_lsriind = { -- Load/store register, immediate pre/post-indexed.
+  shift = 30, mask = 3,
+  [0] = {
+    shift = 26, mask = 1,
+    [0] = {
+      shift = 22, mask = 3,
+      [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL"
+    }
+  },
+  {
+    shift = 26, mask = 1,
+    [0] = {
+      shift = 22, mask = 3,
+      [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL"
+    }
+  },
+  {
+    shift = 26, mask = 1,
+    [0] = {
+      shift = 22, mask = 3,
+      [0] = "strDwzL", "ldrDwzL", "ldrswDxzL"
+    },
+    {
+      shift = 22, mask = 3,
+      [0] = "strDszL", "ldrDszL"
+    }
+  },
+  {
+    shift = 26, mask = 1,
+    [0] = {
+      shift = 22, mask = 3,
+      [0] = "strDxzL", "ldrDxzL"
+    },
+    {
+      shift = 22, mask = 3,
+      [0] = "strDdzL", "ldrDdzL"
+    }
+  }
+}
+
+local map_lsriro = {
+  shift = 21, mask = 1,
+  [0] = {  -- Load/store register immediate.
+    shift = 10, mask = 3,
+    [0] = { -- Unscaled immediate.
+      shift = 26, mask = 1,
+      [0] = {
+	shift = 30, mask = 3,
+	[0] = {
+	  shift = 22, mask = 3,
+	  [0] = "sturbDwK", "ldurbDwK"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "sturhDwK", "ldurhDwK"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "sturDwK", "ldurDwK"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "sturDxK", "ldurDxK"
+	}
+      }
+    }, map_lsriind, false, map_lsriind
+  },
+  {  -- Load/store register, register offset.
+    shift = 10, mask = 3,
+    [2] = {
+      shift = 26, mask = 1,
+      [0] = {
+	shift = 30, mask = 3,
+	[0] = {
+	  shift = 22, mask = 3,
+	  [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "strDwO", "ldrDwO", "ldrswDxO"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "strDxO", "ldrDxO"
+	}
+      },
+      {
+	shift = 30, mask = 3,
+	[2] = {
+	  shift = 22, mask = 3,
+	  [0] = "strDsO", "ldrDsO"
+	},
+	[3] = {
+	  shift = 22, mask = 3,
+	  [0] = "strDdO", "ldrDdO"
+	}
+      }
+    }
+  }
+}
+
+local map_lsp = { -- Load/store register pair, offset.
+  shift = 22, mask = 1,
+  [0] = {
+    shift = 30, mask = 3,
+    [0] = {
+      shift = 26, mask = 1,
+      [0] = "stpDzAzwP", "stpDzAzsP",
+    },
+    {
+      shift = 26, mask = 1,
+      "stpDzAzdP"
+    },
+    {
+      shift = 26, mask = 1,
+      [0] = "stpDzAzxP"
+    }
+  },
+  {
+    shift = 30, mask = 3,
+    [0] = {
+      shift = 26, mask = 1,
+      [0] = "ldpDzAzwP", "ldpDzAzsP",
+    },
+    {
+      shift = 26, mask = 1,
+      [0] = "ldpswDAxP", "ldpDzAzdP"
+    },
+    {
+      shift = 26, mask = 1,
+      [0] = "ldpDzAzxP"
+    }
+  }
+}
+
+local map_ls = { -- Loads and stores.
+  shift = 24, mask = 0x31,
+  [0x10] = map_lrl, [0x30] = map_lsriro,
+  [0x20] = {
+    shift = 23, mask = 3,
+    map_lsp, map_lsp, map_lsp
+  },
+  [0x21] = {
+    shift = 23, mask = 3,
+    map_lsp, map_lsp, map_lsp
+  },
+  [0x31] = {
+    shift = 26, mask = 1,
+    [0] = {
+      shift = 30, mask = 3,
+      [0] = {
+	shift = 22, mask = 3,
+	[0] = "strbDwzU", "ldrbDwzU"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "strhDwzU", "ldrhDwzU"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "strDwzU", "ldrDwzU"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "strDxzU", "ldrDxzU"
+      }
+    },
+    {
+      shift = 30, mask = 3,
+      [2] = {
+	shift = 22, mask = 3,
+	[0] = "strDszU", "ldrDszU"
+      },
+      [3] = {
+	shift = 22, mask = 3,
+	[0] = "strDdzU", "ldrDdzU"
+      }
+    }
+  },
+}
+
+local map_datafp = { -- Data processing, SIMD and FP.
+  shift = 28, mask = 7,
+  { -- 001
+    shift = 24, mask = 1,
+    [0] = {
+      shift = 21, mask = 1,
+      {
+	shift = 10, mask = 3,
+	[0] = {
+	  shift = 12, mask = 1,
+	  [0] = {
+	    shift = 13, mask = 1,
+	    [0] = {
+	      shift = 14, mask = 1,
+	      [0] = {
+		shift = 15, mask = 1,
+		[0] = { -- FP/int conversion.
+		  shift = 31, mask = 1,
+		  [0] = {
+		    shift = 16, mask = 0xff,
+		    [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs",
+		    [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw",
+		    [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs",
+		    [0x26] = "fmovDwNs", [0x27] = "fmovDsNw",
+		    [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs",
+		    [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs",
+		    [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs",
+		    [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd",
+		    [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw",
+		    [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd",
+		    [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd",
+		    [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd",
+		    [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd"
+		  },
+		  {
+		    shift = 16, mask = 0xff,
+		    [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs",
+		    [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx",
+		    [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs",
+		    [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs",
+		    [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs",
+		    [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs",
+		    [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd",
+		    [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx",
+		    [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd",
+		    [0x66] = "fmovDxNd", [0x67] = "fmovDdNx",
+		    [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd",
+		    [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd",
+		    [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd"
+		  }
+		}
+	      },
+	      { -- FP data-processing, 1 source.
+		shift = 31, mask = 1,
+		[0] = {
+		  shift = 22, mask = 3,
+		  [0] = {
+		    shift = 15, mask = 63,
+		    [0] = "fmovDNf", "fabsDNf", "fnegDNf",
+		    "fsqrtDNf", false, "fcvtDdNs", false, false,
+		    "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
+		    "frintaDNf", false, "frintxDNf", "frintiDNf",
+		  },
+		  {
+		    shift = 15, mask = 63,
+		    [0] = "fmovDNf", "fabsDNf", "fnegDNf",
+		    "fsqrtDNf", "fcvtDsNd", false, false, false,
+		    "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
+		    "frintaDNf", false, "frintxDNf", "frintiDNf",
+		  }
+		}
+	      }
+	    },
+	    { -- FP compare.
+	      shift = 31, mask = 1,
+	      [0] = {
+		shift = 14, mask = 3,
+		[0] = {
+		  shift = 23, mask = 1,
+		  [0] = {
+		    shift = 0, mask = 31,
+		    [0] = "fcmpNMf", [8] = "fcmpNZf",
+		    [16] = "fcmpeNMf", [24] = "fcmpeNZf",
+		  }
+		}
+	      }
+	    }
+	  },
+	  { -- FP immediate.
+	    shift = 31, mask = 1,
+	    [0] = {
+	      shift = 5, mask = 31,
+	      [0] = {
+		shift = 23, mask = 1,
+		[0] = "fmovDFf"
+	      }
+	    }
+	  }
+	},
+	{ -- FP conditional compare.
+	  shift = 31, mask = 1,
+	  [0] = {
+	    shift = 23, mask = 1,
+	    [0] = {
+	      shift = 4, mask = 1,
+	      [0] = "fccmpNMVCf", "fccmpeNMVCf"
+	    }
+	  }
+	},
+	{ -- FP data-processing, 2 sources.
+	  shift = 31, mask = 1,
+	  [0] = {
+	    shift = 23, mask = 1,
+	    [0] = {
+	      shift = 12, mask = 15,
+	      [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf",
+	      "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf",
+	      "fnmulDNMf"
+	    }
+	  }
+	},
+	{ -- FP conditional select.
+	  shift = 31, mask = 1,
+	  [0] = {
+	    shift = 23, mask = 1,
+	    [0] = "fcselDNMCf"
+	  }
+	}
+      }
+    },
+    { -- FP data-processing, 3 sources.
+      shift = 31, mask = 1,
+      [0] = {
+	shift = 15, mask = 1,
+	[0] = {
+	  shift = 21, mask = 5,
+	  [0] = "fmaddDNMAf", "fnmaddDNMAf"
+	},
+	{
+	  shift = 21, mask = 5,
+	  [0] = "fmsubDNMAf", "fnmsubDNMAf"
+	}
+      }
+    }
+  }
+}
+
+local map_br = { -- Branches, exception generating and system instructions.
+  shift = 29, mask = 7,
+  [0] = "bB",
+  { -- Compare & branch, immediate.
+    shift = 24, mask = 3,
+    [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw"
+  },
+  { -- Conditional branch, immediate.
+    shift = 24, mask = 3,
+    [0] = {
+      shift = 4, mask = 1,
+      [0] = {
+	shift = 0, mask = 15,
+	[0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB",
+	"bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB"
+      }
+    }
+  }, false, "blB",
+  { -- Compare & branch, immediate.
+    shift = 24, mask = 3,
+    [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx"
+  },
+  {
+    shift = 24, mask = 3,
+    [0] = { -- Exception generation.
+      shift = 0, mask = 0xe0001f,
+      [0x200000] = "brkW"
+    },
+    { -- System instructions.
+      shift = 0, mask = 0x3fffff,
+      [0x03201f] = "nop"
+    },
+    { -- Unconditional branch, register.
+      shift = 0, mask = 0xfffc1f,
+      [0x1f0000] = "brNx", [0x3f0000] = "blrNx",
+      [0x5f0000] = "retNx"
+    },
+  }
+}
+
+local map_init = {
+  shift = 25, mask = 15,
+  [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp,
+  map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp
+}
+
+------------------------------------------------------------------------------
+
+local map_regs = { x = {}, w = {}, d = {}, s = {} }
+
+for i=0,30 do
+  map_regs.x[i] = "x"..i
+  map_regs.w[i] = "w"..i
+  map_regs.d[i] = "d"..i
+  map_regs.s[i] = "s"..i
+end
+map_regs.x[31] = "sp"
+map_regs.w[31] = "wsp"
+map_regs.d[31] = "d31"
+map_regs.s[31] = "s31"
+
+local map_cond = {
+  [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+  "hi", "ls", "ge", "lt", "gt", "le", "al",
+}
+
+local map_shift = { [0] = "lsl", "lsr", "asr", }
+
+local map_extend = {
+  [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
+}
+
+------------------------------------------------------------------------------
+
+-- Output a nicely formatted line with an opcode and operands.
+local function putop(ctx, text, operands)
+  local pos = ctx.pos
+  local extra = ""
+  if ctx.rel then
+    local sym = ctx.symtab[ctx.rel]
+    if sym then
+      extra = "\t->"..sym
+    end
+  end
+  if ctx.hexdump > 0 then
+    ctx.out(format("%08x  %s  %-5s %s%s\n",
+      ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
+  else
+    ctx.out(format("%08x  %-5s %s%s\n",
+      ctx.addr+pos, text, concat(operands, ", "), extra))
+  end
+  ctx.pos = pos + 4
+end
+
+-- Fallback for unknown opcodes.
+local function unknown(ctx)
+  return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
+end
+
+local function match_reg(p, pat, regnum)
+  return map_regs[match(pat, p.."%w-([xwds])")][regnum]
+end
+
+local function fmt_hex32(x)
+  if x < 0 then
+    return tohex(x)
+  else
+    return format("%x", x)
+  end
+end
+
+local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 }
+
+local function decode_imm13(op)
+  local imms = band(rshift(op, 10), 63)
+  local immr = band(rshift(op, 16), 63)
+  if band(op, 0x00400000) == 0 then
+    local len = 5
+    if imms >= 56 then
+      if imms >= 60 then len = 1 else len = 2 end
+    elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end
+    local l = lshift(1, len)-1
+    local s = band(imms, l)
+    local r = band(immr, l)
+    local imm = ror(rshift(-1, 31-s), r)
+    if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end
+    imm = imm * imm13_rep[len]
+    local ix = fmt_hex32(imm)
+    if rshift(op, 31) ~= 0 then
+      return ix..tohex(imm)
+    else
+      return ix
+    end
+  else
+    local lo, hi = -1, 0
+    if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end
+    if immr ~= 0 then
+      lo, hi = ror(lo, immr), ror(hi, immr)
+      local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr))
+      lo, hi = bxor(lo, x), bxor(hi, x)
+      if immr >= 32 then lo, hi = hi, lo end
+    end
+    if hi ~= 0 then
+      return fmt_hex32(hi)..tohex(lo)
+    else
+      return fmt_hex32(lo)
+    end
+  end
+end
+
+local function parse_immpc(op, name)
+  if name == "b" or name == "bl" then
+    return arshift(lshift(op, 6), 4)
+  elseif name == "adr" or name == "adrp" then
+    local immlo = band(rshift(op, 29), 3)
+    local immhi = lshift(arshift(lshift(op, 8), 13), 2)
+    return bor(immhi, immlo)
+  elseif name == "tbz" or name == "tbnz" then
+    return lshift(arshift(lshift(op, 13), 18), 2)
+  else
+    return lshift(arshift(lshift(op, 8), 13), 2)
+  end
+end
+
+local function parse_fpimm8(op)
+  local sign = band(op, 0x100000) == 0 and 1 or -1
+  local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131
+  local frac = 16+band(rshift(op, 13), 15)
+  return sign * frac * 2^exp
+end
+
+local function prefer_bfx(sf, uns, imms, immr)
+  if imms < immr or imms == 31 or imms == 63 then
+    return false
+  end
+  if immr == 0 then
+    if sf == 0 and (imms == 7 or imms == 15) then
+      return false
+    end
+    if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then
+      return false
+    end
+  end
+  return true
+end
+
+-- Disassemble a single instruction.
+local function disass_ins(ctx)
+  local pos = ctx.pos
+  local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
+  local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
+  local operands = {}
+  local suffix = ""
+  local last, name, pat
+  local map_reg
+  ctx.op = op
+  ctx.rel = nil
+  last = nil
+  local opat
+  opat = map_init[band(rshift(op, 25), 15)]
+  while type(opat) ~= "string" do
+    if not opat then return unknown(ctx) end
+    opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
+  end
+  name, pat = match(opat, "^([a-z0-9]*)(.*)")
+  local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
+  if altname then pat = pat2 end
+  if sub(pat, 1, 1) == "." then
+    local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)")
+    suffix = suffix..s2
+    pat = p2
+  end
+
+  local rt = match(pat, "[gf]")
+  if rt then
+    if rt == "g" then
+      map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w
+    else
+      map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s
+    end
+  end
+
+  local second0, immr
+
+  for p in gmatch(pat, ".") do
+    local x = nil
+    if p == "D" then
+      local regnum = band(op, 31)
+      x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+    elseif p == "N" then
+      local regnum = band(rshift(op, 5), 31)
+      x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+    elseif p == "M" then
+      local regnum = band(rshift(op, 16), 31)
+      x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+    elseif p == "A" then
+      local regnum = band(rshift(op, 10), 31)
+      x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+    elseif p == "B" then
+      local addr = ctx.addr + pos + parse_immpc(op, name)
+      ctx.rel = addr
+      x = "0x"..tohex(addr)
+    elseif p == "T" then
+      x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31))
+    elseif p == "V" then
+      x = band(op, 15)
+    elseif p == "C" then
+      x = map_cond[band(rshift(op, 12), 15)]
+    elseif p == "c" then
+      local rn = band(rshift(op, 5), 31)
+      local rm = band(rshift(op, 16), 31)
+      local cond = band(rshift(op, 12), 15)
+      local invc = bxor(cond, 1)
+      x = map_cond[cond]
+      if altname and cond ~= 14 and cond ~= 15 then
+	local a1, a2 = match(altname, "([^|]*)|(.*)")
+	if rn == rm then
+	  local n = #operands
+	  operands[n] = nil
+	  x = map_cond[invc]
+	  if rn ~= 31 then
+	    if a1 then name = a1 else name = altname end
+	  else
+	    operands[n-1] = nil
+	    name = a2
+	  end
+	end
+      end
+    elseif p == "W" then
+      x = band(rshift(op, 5), 0xffff)
+    elseif p == "Y" then
+      x = band(rshift(op, 5), 0xffff)
+      local hw = band(rshift(op, 21), 3)
+      if altname and (hw == 0 or x ~= 0) then
+	name = altname
+      end
+    elseif p == "L" then
+      local rn = map_regs.x[band(rshift(op, 5), 31)]
+      local imm9 = arshift(lshift(op, 11), 23)
+      if band(op, 0x800) ~= 0 then
+	x = "["..rn..", #"..imm9.."]!"
+      else
+	x = "["..rn.."], #"..imm9
+      end
+    elseif p == "U" then
+      local rn = map_regs.x[band(rshift(op, 5), 31)]
+      local sz = band(rshift(op, 30), 3)
+      local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
+      if imm12 ~= 0 then
+	x = "["..rn..", #"..imm12.."]"
+      else
+	x = "["..rn.."]"
+      end
+    elseif p == "K" then
+      local rn = map_regs.x[band(rshift(op, 5), 31)]
+      local imm9 = arshift(lshift(op, 11), 23)
+      if imm9 ~= 0 then
+	x = "["..rn..", #"..imm9.."]"
+      else
+	x = "["..rn.."]"
+      end
+    elseif p == "O" then
+      local rn, rm = map_regs.x[band(rshift(op, 5), 31)]
+      local m = band(rshift(op, 13), 1)
+      if m == 0 then
+	rm = map_regs.w[band(rshift(op, 16), 31)]
+      else
+	rm = map_regs.x[band(rshift(op, 16), 31)]
+      end
+      x = "["..rn..", "..rm
+      local opt = band(rshift(op, 13), 7)
+      local s = band(rshift(op, 12), 1)
+      local sz = band(rshift(op, 30), 3)
+      -- extension to be applied
+      if opt == 3 then
+       if s == 0 then x = x.."]"
+       else x = x..", lsl #"..sz.."]" end
+      elseif opt == 2 or opt == 6 or opt == 7 then
+	if s == 0 then x = x..", "..map_extend[opt].."]"
+	else x = x..", "..map_extend[opt].." #"..sz.."]" end
+      else
+	x = x.."]"
+      end
+    elseif p == "P" then
+      local opcv, sh = rshift(op, 26), 2
+      if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
+      local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
+      local rn = map_regs.x[band(rshift(op, 5), 31)]
+      local ind = band(rshift(op, 23), 3)
+      if ind == 1 then
+	x = "["..rn.."], #"..imm7
+      elseif ind == 2 then
+	if imm7 == 0 then
+	  x = "["..rn.."]"
+	else
+	  x = "["..rn..", #"..imm7.."]"
+	end
+      elseif ind == 3 then
+	x = "["..rn..", #"..imm7.."]!"
+      end
+    elseif p == "I" then
+      local shf = band(rshift(op, 22), 3)
+      local imm12 = band(rshift(op, 10), 0x0fff)
+      local rn, rd = band(rshift(op, 5), 31), band(op, 31)
+      if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then
+	name = altname
+	x = nil
+      elseif shf == 0 then
+	x = imm12
+      elseif shf == 1 then
+	x = imm12..", lsl #12"
+      end
+    elseif p == "i" then
+      x = "#0x"..decode_imm13(op)
+    elseif p == "1" then
+      immr = band(rshift(op, 16), 63)
+      x = immr
+    elseif p == "2" then
+      x = band(rshift(op, 10), 63)
+      if altname then
+	local a1, a2, a3, a4, a5, a6 =
+	  match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)")
+	local sf = band(rshift(op, 26), 32)
+	local uns = band(rshift(op, 30), 1)
+	if prefer_bfx(sf, uns, x, immr) then
+	  name = a2
+	  x = x - immr + 1
+	elseif immr == 0 and x == 7 then
+	  local n = #operands
+	  operands[n] = nil
+	  if sf ~= 0 then
+	    operands[n-1] = gsub(operands[n-1], "x", "w")
+	  end
+	  last = operands[n-1]
+	  name = a6
+	  x = nil
+	elseif immr == 0 and x == 15 then
+	  local n = #operands
+	  operands[n] = nil
+	  if sf ~= 0 then
+	    operands[n-1] = gsub(operands[n-1], "x", "w")
+	  end
+	  last = operands[n-1]
+	  name = a5
+	  x = nil
+	elseif x == 31 or x == 63 then
+	  if x == 31 and immr == 0 and name == "sbfm" then
+	    name = a4
+	    local n = #operands
+	    operands[n] = nil
+	    if sf ~= 0 then
+	      operands[n-1] = gsub(operands[n-1], "x", "w")
+	    end
+	    last = operands[n-1]
+	  else
+	    name = a3
+	  end
+	  x = nil
+	elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then
+	  name = a4
+	  last = "#"..(sf+32 - immr)
+	  operands[#operands] = last
+	  x = nil
+	elseif x < immr then
+	  name = a1
+	  last = "#"..(sf+32 - immr)
+	  operands[#operands] = last
+	  x = x + 1
+	end
+      end
+    elseif p == "3" then
+      x = band(rshift(op, 10), 63)
+      if altname then
+	local a1, a2 = match(altname, "([^|]*)|(.*)")
+	if x < immr then
+	  name = a1
+	  local sf = band(rshift(op, 26), 32)
+	  last = "#"..(sf+32 - immr)
+	  operands[#operands] = last
+	  x = x + 1
+	elseif x >= immr then
+	  name = a2
+	  x = x - immr + 1
+	end
+      end
+    elseif p == "4" then
+      x = band(rshift(op, 10), 63)
+      local rn = band(rshift(op, 5), 31)
+      local rm = band(rshift(op, 16), 31)
+      if altname and rn == rm then
+	local n = #operands
+	operands[n] = nil
+	last = operands[n-1]
+	name = altname
+      end
+    elseif p == "5" then
+      x = band(rshift(op, 16), 31)
+    elseif p == "S" then
+      x = band(rshift(op, 10), 63)
+      if x == 0 then x = nil
+      else x = map_shift[band(rshift(op, 22), 3)].." #"..x end
+    elseif p == "X" then
+      local opt = band(rshift(op, 13), 7)
+      -- Width specifier <R>.
+      if opt ~= 3 and opt ~= 7 then
+	last = map_regs.w[band(rshift(op, 16), 31)]
+	operands[#operands] = last
+      end
+      x = band(rshift(op, 10), 7)
+      -- Extension.
+      if opt == 2 + band(rshift(op, 31), 1) and
+	 band(rshift(op, second0 and 5 or 0), 31) == 31 then
+	if x == 0 then x = nil
+	else x = "lsl #"..x end
+      else
+	if x == 0 then x = map_extend[band(rshift(op, 13), 7)]
+	else x = map_extend[band(rshift(op, 13), 7)].." #"..x end
+      end
+    elseif p == "R" then
+      x = band(rshift(op,21), 3)
+      if x == 0 then x = nil
+      else x = "lsl #"..x*16 end
+    elseif p == "z" then
+      local n = #operands
+      if operands[n] == "sp" then operands[n] = "xzr"
+      elseif operands[n] == "wsp" then operands[n] = "wzr"
+      end
+    elseif p == "Z" then
+      x = 0
+    elseif p == "F" then
+      x = parse_fpimm8(op)
+    elseif p == "g" or p == "f" or p == "x" or p == "w" or
+	   p == "d" or p == "s" then
+      -- These are handled in D/N/M/A.
+    elseif p == "0" then
+      if last == "sp" or last == "wsp" then
+	local n = #operands
+	operands[n] = nil
+	last = operands[n-1]
+	if altname then
+	  local a1, a2 = match(altname, "([^|]*)|(.*)")
+	  if not a1 then
+	    name = altname
+	  elseif second0 then
+	    name, altname = a2, a1
+	  else
+	    name, altname = a1, a2
+	  end
+	end
+      end
+      second0 = true
+    else
+      assert(false)
+    end
+    if x then
+      last = x
+      if type(x) == "number" then x = "#"..x end
+      operands[#operands+1] = x
+    end
+  end
+
+  return putop(ctx, name..suffix, operands)
+end
+
+------------------------------------------------------------------------------
+
+-- Disassemble a block of code.
+local function disass_block(ctx, ofs, len)
+  if not ofs then ofs = 0 end
+  local stop = len and ofs+len or #ctx.code
+  ctx.pos = ofs
+  ctx.rel = nil
+  while ctx.pos < stop do disass_ins(ctx) end
+end
+
+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
+local function create(code, addr, out)
+  local ctx = {}
+  ctx.code = code
+  ctx.addr = addr or 0
+  ctx.out = out or io.write
+  ctx.symtab = {}
+  ctx.disass = disass_block
+  ctx.hexdump = 8
+  return ctx
+end
+
+-- Simple API: disassemble code (a string) at address and output via out.
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
+end
+
+-- Return register name for RID.
+local function regname(r)
+  if r < 32 then return map_regs.x[r] end
+  return map_regs.d[r-32]
+end
+
+-- Public module functions.
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
+

+ 12 - 0
luajit.mod/luajit/src/jit/dis_arm64be.lua

@@ -0,0 +1,12 @@
+----------------------------------------------------------------------------
+-- LuaJIT ARM64BE disassembler wrapper module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- ARM64 instructions are always little-endian. So just forward to the
+-- common ARM64 disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+return require((string.match(..., ".*%.") or "").."dis_arm64")
+

+ 47 - 32
luajit.mod/luajit/src/jit/dis_mips.lua

@@ -34,15 +34,17 @@ local map_special = {
   "jrS",	"jalrD1S",	"movzDST",	"movnDST",
   "jrS",	"jalrD1S",	"movzDST",	"movnDST",
   "syscallY",	"breakY",	false,		"sync",
   "syscallY",	"breakY",	false,		"sync",
   "mfhiD",	"mthiS",	"mfloD",	"mtloS",
   "mfhiD",	"mthiS",	"mfloD",	"mtloS",
-  false,	false,		false,		false,
+  "dsllvDST",	false,		"dsrlvDST",	"dsravDST",
   "multST",	"multuST",	"divST",	"divuST",
   "multST",	"multuST",	"divST",	"divuST",
-  false,	false,		false,		false,
+  "dmultST",	"dmultuST",	"ddivST",	"ddivuST",
   "addDST",	"addu|moveDST0", "subDST",	"subu|neguDS0T",
   "addDST",	"addu|moveDST0", "subDST",	"subu|neguDS0T",
-  "andDST",	"orDST",	"xorDST",	"nor|notDST0",
+  "andDST",	"or|moveDST0",	"xorDST",	"nor|notDST0",
   false,	false,		"sltDST",	"sltuDST",
   false,	false,		"sltDST",	"sltuDST",
-  false,	false,		false,		false,
+  "daddDST",	"dadduDST",	"dsubDST",	"dsubuDST",
   "tgeSTZ",	"tgeuSTZ",	"tltSTZ",	"tltuSTZ",
   "tgeSTZ",	"tgeuSTZ",	"tltSTZ",	"tltuSTZ",
-  "teqSTZ",	false,		"tneSTZ",
+  "teqSTZ",	false,		"tneSTZ",	false,
+  "dsllDTA",	false,		"dsrlDTA",	"dsraDTA",
+  "dsll32DTA",	false,		"dsrl32DTA",	"dsra32DTA",
 }
 }
 
 
 local map_special2 = {
 local map_special2 = {
@@ -60,11 +62,17 @@ local map_bshfl = {
   [24] = "sehDT",
   [24] = "sehDT",
 }
 }
 
 
+local map_dbshfl = {
+  shift = 6, mask = 31,
+  [2] = "dsbhDT",
+  [5] = "dshdDT",
+}
+
 local map_special3 = {
 local map_special3 = {
   shift = 0, mask = 63,
   shift = 0, mask = 63,
-  [0] = "extTSAK", [4] = "insTSAL",
-  [32] = map_bshfl,
-  [59] = "rdhwrTD",
+  [0]  = "extTSAK", [1]  = "dextmTSAP", [3]  = "dextTSAK",
+  [4]  = "insTSAL", [6]  = "dinsuTSEQ", [7]  = "dinsTSAL",
+  [32] = map_bshfl, [36] = map_dbshfl,  [59] = "rdhwrTD",
 }
 }
 
 
 local map_regimm = {
 local map_regimm = {
@@ -178,8 +186,8 @@ local map_cop1bc = {
 
 
 local map_cop1 = {
 local map_cop1 = {
   shift = 21, mask = 31,
   shift = 21, mask = 31,
-  [0] = "mfc1TG", false,	"cfc1TG",	"mfhc1TG",
-  "mtc1TG",	false,		"ctc1TG",	"mthc1TG",
+  [0] = "mfc1TG", "dmfc1TG",	"cfc1TG",	"mfhc1TG",
+  "mtc1TG",	"dmtc1TG",	"ctc1TG",	"mthc1TG",
   map_cop1bc,	false,		false,		false,
   map_cop1bc,	false,		false,		false,
   false,	false,		false,		false,
   false,	false,		false,		false,
   map_cop1s,	map_cop1d,	false,		false,
   map_cop1s,	map_cop1d,	false,		false,
@@ -213,16 +221,16 @@ local map_pri = {
   "andiTSU",	"ori|liTS0U",	"xoriTSU",	"luiTU",
   "andiTSU",	"ori|liTS0U",	"xoriTSU",	"luiTU",
   map_cop0,	map_cop1,	false,		map_cop1x,
   map_cop0,	map_cop1,	false,		map_cop1x,
   "beql|beqzlST0B",	"bnel|bnezlST0B",	"blezlSB",	"bgtzlSB",
   "beql|beqzlST0B",	"bnel|bnezlST0B",	"blezlSB",	"bgtzlSB",
-  false,	false,		false,		false,
-  map_special2,	false,		false,		map_special3,
+  "daddiTSI",	"daddiuTSI",	false,		false,
+  map_special2,	"jalxJ",	false,		map_special3,
   "lbTSO",	"lhTSO",	"lwlTSO",	"lwTSO",
   "lbTSO",	"lhTSO",	"lwlTSO",	"lwTSO",
   "lbuTSO",	"lhuTSO",	"lwrTSO",	false,
   "lbuTSO",	"lhuTSO",	"lwrTSO",	false,
   "sbTSO",	"shTSO",	"swlTSO",	"swTSO",
   "sbTSO",	"shTSO",	"swlTSO",	"swTSO",
   false,	false,		"swrTSO",	"cacheNSO",
   false,	false,		"swrTSO",	"cacheNSO",
   "llTSO",	"lwc1HSO",	"lwc2TSO",	"prefNSO",
   "llTSO",	"lwc1HSO",	"lwc2TSO",	"prefNSO",
-  false,	"ldc1HSO",	"ldc2TSO",	false,
+  false,	"ldc1HSO",	"ldc2TSO",	"ldTSO",
   "scTSO",	"swc1HSO",	"swc2TSO",	false,
   "scTSO",	"swc1HSO",	"swc2TSO",	false,
-  false,	"sdc1HSO",	"sdc2TSO",	false,
+  false,	"sdc1HSO",	"sdc2TSO",	"sdTSO",
 }
 }
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
@@ -306,6 +314,8 @@ local function disass_ins(ctx)
       x = "f"..band(rshift(op, 21), 31)
       x = "f"..band(rshift(op, 21), 31)
     elseif p == "A" then
     elseif p == "A" then
       x = band(rshift(op, 6), 31)
       x = band(rshift(op, 6), 31)
+    elseif p == "E" then
+      x = band(rshift(op, 6), 31) + 32
     elseif p == "M" then
     elseif p == "M" then
       x = band(rshift(op, 11), 31)
       x = band(rshift(op, 11), 31)
     elseif p == "N" then
     elseif p == "N" then
@@ -315,8 +325,12 @@ local function disass_ins(ctx)
       if x == 0 then x = nil end
       if x == 0 then x = nil end
     elseif p == "K" then
     elseif p == "K" then
       x = band(rshift(op, 11), 31) + 1
       x = band(rshift(op, 11), 31) + 1
+    elseif p == "P" then
+      x = band(rshift(op, 11), 31) + 33
     elseif p == "L" then
     elseif p == "L" then
       x = band(rshift(op, 11), 31) - last + 1
       x = band(rshift(op, 11), 31) - last + 1
+    elseif p == "Q" then
+      x = band(rshift(op, 11), 31) - last + 33
     elseif p == "I" then
     elseif p == "I" then
       x = arshift(lshift(op, 16), 16)
       x = arshift(lshift(op, 16), 16)
     elseif p == "U" then
     elseif p == "U" then
@@ -330,11 +344,12 @@ local function disass_ins(ctx)
     elseif p == "B" then
     elseif p == "B" then
       x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4
       x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4
       ctx.rel = x
       ctx.rel = x
-      x = "0x"..tohex(x)
+      x = format("0x%08x", x)
     elseif p == "J" then
     elseif p == "J" then
-      x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4
+      local a = ctx.addr + ctx.pos
+      x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4
       ctx.rel = x
       ctx.rel = x
-      x = "0x"..tohex(x)
+      x = format("0x%08x", x)
     elseif p == "V" then
     elseif p == "V" then
       x = band(rshift(op, 8), 7)
       x = band(rshift(op, 8), 7)
       if x == 0 then x = nil end
       if x == 0 then x = nil end
@@ -384,7 +399,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = addr or 0
   ctx.addr = addr or 0
@@ -396,33 +411,33 @@ local function create_(code, addr, out)
   return ctx
   return ctx
 end
 end
 
 
-local function create_el_(code, addr, out)
-  local ctx = create_(code, addr, out)
+local function create_el(code, addr, out)
+  local ctx = create(code, addr, out)
   ctx.get = get_le
   ctx.get = get_le
   return ctx
   return ctx
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
-local function disass_el_(code, addr, out)
-  create_el_(code, addr, out):disass()
+local function disass_el(code, addr, out)
+  create_el(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 32 then return map_gpr[r] end
   if r < 32 then return map_gpr[r] end
   return "f"..(r-32)
   return "f"..(r-32)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-create_el = create_el_
-disass = disass_
-disass_el = disass_el_
-regname = regname_
+return {
+  create = create,
+  create_el = create_el,
+  disass = disass,
+  disass_el = disass_el,
+  regname = regname
+}
 
 

+ 17 - 0
luajit.mod/luajit/src/jit/dis_mips64.lua

@@ -0,0 +1,17 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPS64 disassembler wrapper module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the big-endian functions from the
+-- MIPS disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+  create = dis_mips.create,
+  disass = dis_mips.disass,
+  regname = dis_mips.regname
+}
+

+ 17 - 0
luajit.mod/luajit/src/jit/dis_mips64el.lua

@@ -0,0 +1,17 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPS64EL disassembler wrapper module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the little-endian functions from the
+-- MIPS disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+  create = dis_mips.create_el,
+  disass = dis_mips.disass_el,
+  regname = dis_mips.regname
+}
+

+ 6 - 9
luajit.mod/luajit/src/jit/dis_mipsel.lua

@@ -8,13 +8,10 @@
 -- MIPS disassembler module. All the interesting stuff is there.
 -- MIPS disassembler module. All the interesting stuff is there.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
-local require = require
-
-module(...)
-
-local dis_mips = require(_PACKAGE.."dis_mips")
-
-create = dis_mips.create_el
-disass = dis_mips.disass_el
-regname = dis_mips.regname
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+  create = dis_mips.create_el,
+  disass = dis_mips.disass_el,
+  regname = dis_mips.regname
+}
 
 

+ 9 - 9
luajit.mod/luajit/src/jit/dis_ppc.lua

@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = addr or 0
   ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 32 then return map_gpr[r] end
   if r < 32 then return map_gpr[r] end
   return "f"..(r-32)
   return "f"..(r-32)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
 
 

+ 6 - 9
luajit.mod/luajit/src/jit/dis_x64.lua

@@ -8,13 +8,10 @@
 -- x86/x64 disassembler module. All the interesting stuff is there.
 -- x86/x64 disassembler module. All the interesting stuff is there.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
-local require = require
-
-module(...)
-
-local dis_x86 = require(_PACKAGE.."dis_x86")
-
-create = dis_x86.create64
-disass = dis_x86.disass64
-regname = dis_x86.regname64
+local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
+return {
+  create = dis_x86.create64,
+  disass = dis_x86.disass64,
+  regname = dis_x86.regname64
+}
 
 

+ 207 - 90
luajit.mod/luajit/src/jit/dis_x86.lua

@@ -15,19 +15,20 @@
 -- Intel and AMD manuals. The supported instruction set is quite extensive
 -- Intel and AMD manuals. The supported instruction set is quite extensive
 -- and reflects what a current generation Intel or AMD CPU implements in
 -- and reflects what a current generation Intel or AMD CPU implements in
 -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
 -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
--- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
--- instructions.
+-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor
+-- (VMX/SVM) instructions.
 --
 --
 -- Notes:
 -- Notes:
 -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
 -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
 -- * No attempt at optimization has been made -- it's fast enough for my needs.
 -- * No attempt at optimization has been made -- it's fast enough for my needs.
--- * The public API may change when more architectures are added.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 local type = type
 local type = type
 local sub, byte, format = string.sub, string.byte, string.format
 local sub, byte, format = string.sub, string.byte, string.format
 local match, gmatch, gsub = string.match, string.gmatch, string.gsub
 local match, gmatch, gsub = string.match, string.gmatch, string.gsub
 local lower, rep = string.lower, string.rep
 local lower, rep = string.lower, string.rep
+local bit = require("bit")
+local tohex = bit.tohex
 
 
 -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
 -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
 local map_opc1_32 = {
 local map_opc1_32 = {
@@ -76,7 +77,7 @@ local map_opc1_32 = {
 "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
 "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
 "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
 "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
 --Cx
 --Cx
-"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
+"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi",
 "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
 "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
 --Dx
 --Dx
 "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
 "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({
   [0x44]="rex*r",  [0x45]="rex*rb",  [0x46]="rex*rx",  [0x47]="rex*rxb",
   [0x44]="rex*r",  [0x45]="rex*rb",  [0x46]="rex*rx",  [0x47]="rex*rxb",
   [0x48]="rex*w",  [0x49]="rex*wb",  [0x4a]="rex*wx",  [0x4b]="rex*wxb",
   [0x48]="rex*w",  [0x49]="rex*wb",  [0x4a]="rex*wx",  [0x4b]="rex*wxb",
   [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
   [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
-  [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
+  [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false,
   [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
   [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
 }, { __index = map_opc1_32 })
 }, { __index = map_opc1_32 })
 
 
@@ -112,12 +113,12 @@ local map_opc2 = {
 [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
 [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
 "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
 "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
 --1x
 --1x
-"movupsXrm|movssXrm|movupdXrm|movsdXrm",
-"movupsXmr|movssXmr|movupdXmr|movsdXmr",
+"movupsXrm|movssXrvm|movupdXrm|movsdXrvm",
+"movupsXmr|movssXmvr|movupdXmr|movsdXmvr",
 "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
 "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
 "movlpsXmr||movlpdXmr",
 "movlpsXmr||movlpdXmr",
-"unpcklpsXrm||unpcklpdXrm",
-"unpckhpsXrm||unpckhpdXrm",
+"unpcklpsXrvm||unpcklpdXrvm",
+"unpckhpsXrvm||unpckhpdXrvm",
 "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
 "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
 "movhpsXmr||movhpdXmr",
 "movhpsXmr||movhpdXmr",
 "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
 "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
@@ -126,7 +127,7 @@ local map_opc2 = {
 "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
 "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
 "movapsXrm||movapdXrm",
 "movapsXrm||movapdXrm",
 "movapsXmr||movapdXmr",
 "movapsXmr||movapdXmr",
-"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt",
+"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt",
 "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
 "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
 "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
 "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
 "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
 "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
@@ -142,27 +143,27 @@ local map_opc2 = {
 "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
 "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
 --5x
 --5x
 "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
 "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
-"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
-"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
-"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
-"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
-"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
+"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm",
+"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm",
+"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm",
+"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm",
+"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm",
 "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
 "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
-"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
-"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
+"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm",
+"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm",
 --6x
 --6x
-"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
-"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
-"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
-"||punpcklqdqXrm","||punpckhqdqXrm",
+"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm",
+"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm",
+"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm",
+"||punpcklqdqXrvm","||punpckhqdqXrvm",
 "movPrVSm","movqMrm|movdquXrm|movdqaXrm",
 "movPrVSm","movqMrm|movdquXrm|movdqaXrm",
 --7x
 --7x
-"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
-"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
-"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
+"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu",
+"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu",
+"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
 "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
 "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
 nil,nil,
 nil,nil,
-"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
+"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm",
 "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
 "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
 --8x
 --8x
 "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
 "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
@@ -180,27 +181,27 @@ nil,nil,
 "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
 "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
 --Cx
 --Cx
 "xaddBmr","xaddVmr",
 "xaddBmr","xaddVmr",
-"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
-"pinsrwPrWmu","pextrwDrPmu",
-"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
+"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|",
+"pinsrwPrvWmu","pextrwDrPmu",
+"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp",
 "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
 "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
 --Dx
 --Dx
-"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
-"paddqPrm","pmullwPrm",
+"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm",
+"paddqPrvm","pmullwPrvm",
 "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
 "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
-"psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
-"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
+"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm",
+"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm",
 --Ex
 --Ex
-"pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
-"pmulhuwPrm","pmulhwPrm",
+"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm",
+"pmulhuwPrvm","pmulhwPrvm",
 "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
 "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
-"psubsbPrm","psubswPrm","pminswPrm","porPrm",
-"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
+"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm",
+"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm",
 --Fx
 --Fx
-"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
-"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
-"psubbPrm","psubwPrm","psubdPrm","psubqPrm",
-"paddbPrm","paddwPrm","padddPrm","ud",
+"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm",
+"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$",
+"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm",
+"paddbPrvm","paddwPrvm","padddPrvm","ud",
 }
 }
 assert(map_opc2[255] == "ud")
 assert(map_opc2[255] == "ud")
 
 
@@ -208,49 +209,91 @@ assert(map_opc2[255] == "ud")
 local map_opc3 = {
 local map_opc3 = {
 ["38"] = { -- [66] 0f 38 xx
 ["38"] = { -- [66] 0f 38 xx
 --0x
 --0x
-[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
-"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
-"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
-nil,nil,nil,nil,
+[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm",
+"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm",
+"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm",
+"||permilpsXrvm","||permilpdXrvm",nil,nil,
 --1x
 --1x
 "||pblendvbXrma",nil,nil,nil,
 "||pblendvbXrma",nil,nil,nil,
-"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
-nil,nil,nil,nil,
+"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm",
+"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil,
 "pabsbPrm","pabswPrm","pabsdPrm",nil,
 "pabsbPrm","pabswPrm","pabsdPrm",nil,
 --2x
 --2x
 "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
 "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
 "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
 "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
-"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
-nil,nil,nil,nil,
+"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm",
+"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr",
 --3x
 --3x
 "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
 "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
-"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
-"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
-"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
+"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm",
+"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm",
+"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm",
 --4x
 --4x
-"||pmulddXrm","||phminposuwXrm",
+"||pmulddXrvm","||phminposuwXrm",nil,nil,
+nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
+--5x
+[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm",
+[0x5a] = "||broadcasti128XrlXm",
+--7x
+[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm",
+--8x
+[0x8c] = "||pmaskmovXrvVSm",
+[0x8e] = "||pmaskmovVSmXvr",
+--9x
+[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm",
+[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm",
+[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm",
+[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm",
+[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm",
+--Ax
+[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm",
+[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm",
+[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm",
+[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm",
+[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm",
+--Bx
+[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm",
+[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm",
+[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm",
+[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm",
+[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm",
+--Dx
+[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
+[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
 --Fx
 --Fx
 [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
 [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
+[0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv",
 },
 },
 
 
 ["3a"] = { -- [66] 0f 3a xx
 ["3a"] = { -- [66] 0f 3a xx
 --0x
 --0x
-[0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
-"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
-"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
+[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil,
+"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil,
+"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu",
+"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu",
 --1x
 --1x
 nil,nil,nil,nil,
 nil,nil,nil,nil,
 "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
 "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
-nil,nil,nil,nil,nil,nil,nil,nil,
+"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil,
+nil,nil,nil,nil,
 --2x
 --2x
-"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
+"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil,
+--3x
+[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru",
 --4x
 --4x
-[0x40] = "||dppsXrmu",
-[0x41] = "||dppdXrmu",
-[0x42] = "||mpsadbwXrmu",
+[0x40] = "||dppsXrvmu",
+[0x41] = "||dppdXrvmu",
+[0x42] = "||mpsadbwXrvmu",
+[0x44] = "||pclmulqdqXrvmu",
+[0x46] = "||perm2i128Xrvmu",
+[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
+[0x4c] = "||pblendvbXrvmb",
 --6x
 --6x
 [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
 [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
 [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
 [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
+[0xdf] = "||aeskeygenassistXrmu",
+--Fx
+[0xf0] = "||| rorxVrmu",
 },
 },
 }
 }
 
 
@@ -354,17 +397,19 @@ local map_regs = {
 	"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
 	"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
   X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
   X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
 	"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
 	"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
+  Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
+	"ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" },
 }
 }
 local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
 local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
 
 
 -- Maps for size names.
 -- Maps for size names.
 local map_sz2n = {
 local map_sz2n = {
-  B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16,
+  B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32,
 }
 }
 local map_sz2prefix = {
 local map_sz2prefix = {
   B = "byte", W = "word", D = "dword",
   B = "byte", W = "word", D = "dword",
   Q = "qword",
   Q = "qword",
-  M = "qword", X = "xword",
+  M = "qword", X = "xword", Y = "yword",
   F = "dword", G = "qword", -- No need for sizes/register names for these two.
   F = "dword", G = "qword", -- No need for sizes/register names for these two.
 }
 }
 
 
@@ -387,10 +432,13 @@ local function putop(ctx, text, operands)
   if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
   if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
   if ctx.rex then
   if ctx.rex then
     local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
     local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
-	      (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")
-    if t ~= "" then text = "rex."..t.." "..text end
+	      (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
+	      (ctx.vexl and "l" or "")
+    if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
+    if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "")
+    elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end
     ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
     ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
-    ctx.rex = false
+    ctx.rex = false; ctx.vexl = false; ctx.vexv = false
   end
   end
   if ctx.seg then
   if ctx.seg then
     local text2, n = gsub(text, "%[", "["..ctx.seg..":")
     local text2, n = gsub(text, "%[", "["..ctx.seg..":")
@@ -405,6 +453,7 @@ local function putop(ctx, text, operands)
   end
   end
   ctx.out(format("%08x  %s%s\n", ctx.addr+ctx.start, hex, text))
   ctx.out(format("%08x  %s%s\n", ctx.addr+ctx.start, hex, text))
   ctx.mrm = false
   ctx.mrm = false
+  ctx.vexv = false
   ctx.start = pos
   ctx.start = pos
   ctx.imm = nil
   ctx.imm = nil
 end
 end
@@ -413,7 +462,7 @@ end
 local function clearprefixes(ctx)
 local function clearprefixes(ctx)
   ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
   ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
   ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
   ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
-  ctx.rex = false; ctx.a32 = false
+  ctx.rex = false; ctx.a32 = false; ctx.vexl = false
 end
 end
 
 
 -- Fallback for incomplete opcodes at the end.
 -- Fallback for incomplete opcodes at the end.
@@ -450,9 +499,9 @@ end
 -- Process pattern string and generate the operands.
 -- Process pattern string and generate the operands.
 local function putpat(ctx, name, pat)
 local function putpat(ctx, name, pat)
   local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
   local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
-  local code, pos, stop = ctx.code, ctx.pos, ctx.stop
+  local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
 
 
-  -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
+  -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
   for p in gmatch(pat, ".") do
   for p in gmatch(pat, ".") do
     local x = nil
     local x = nil
     if p == "V" or p == "U" then
     if p == "V" or p == "U" then
@@ -467,12 +516,17 @@ local function putpat(ctx, name, pat)
     elseif p == "B" then
     elseif p == "B" then
       sz = "B"
       sz = "B"
       regs = ctx.rex and map_regs.B64 or map_regs.B
       regs = ctx.rex and map_regs.B64 or map_regs.B
-    elseif match(p, "[WDQMXFG]") then
+    elseif match(p, "[WDQMXYFG]") then
       sz = p
       sz = p
+      if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
       regs = map_regs[sz]
       regs = map_regs[sz]
     elseif p == "P" then
     elseif p == "P" then
       sz = ctx.o16 and "X" or "M"; ctx.o16 = false
       sz = ctx.o16 and "X" or "M"; ctx.o16 = false
+      if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
       regs = map_regs[sz]
       regs = map_regs[sz]
+    elseif p == "H" then
+      name = name..(ctx.rexw and "d" or "s")
+      ctx.rexw = false
     elseif p == "S" then
     elseif p == "S" then
       name = name..lower(sz)
       name = name..lower(sz)
     elseif p == "s" then
     elseif p == "s" then
@@ -484,6 +538,10 @@ local function putpat(ctx, name, pat)
       local imm = getimm(ctx, pos, 1); if not imm then return end
       local imm = getimm(ctx, pos, 1); if not imm then return end
       x = format("0x%02x", imm)
       x = format("0x%02x", imm)
       pos = pos+1
       pos = pos+1
+    elseif p == "b" then
+      local imm = getimm(ctx, pos, 1); if not imm then return end
+      x = regs[imm/16+1]
+      pos = pos+1
     elseif p == "w" then
     elseif p == "w" then
       local imm = getimm(ctx, pos, 2); if not imm then return end
       local imm = getimm(ctx, pos, 2); if not imm then return end
       x = format("0x%x", imm)
       x = format("0x%x", imm)
@@ -532,7 +590,7 @@ local function putpat(ctx, name, pat)
 	local lo = imm % 0x1000000
 	local lo = imm % 0x1000000
 	x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
 	x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
       else
       else
-	x = format("0x%08x", imm)
+	x = "0x"..tohex(imm)
       end
       end
     elseif p == "R" then
     elseif p == "R" then
       local r = byte(code, pos-1, pos-1)%8
       local r = byte(code, pos-1, pos-1)%8
@@ -616,8 +674,13 @@ local function putpat(ctx, name, pat)
 	else
 	else
 	  x = "CR"..sp
 	  x = "CR"..sp
 	end
 	end
+      elseif p == "v" then
+	if ctx.vexv then
+	  x = regs[ctx.vexv+1]; ctx.vexv = false
+	end
       elseif p == "y" then x = "DR"..sp
       elseif p == "y" then x = "DR"..sp
       elseif p == "z" then x = "TR"..sp
       elseif p == "z" then x = "TR"..sp
+      elseif p == "l" then vexl = false
       elseif p == "t" then
       elseif p == "t" then
       else
       else
 	error("bad pattern `"..pat.."'")
 	error("bad pattern `"..pat.."'")
@@ -692,7 +755,8 @@ map_act = {
   B = putpat, W = putpat, D = putpat, Q = putpat,
   B = putpat, W = putpat, D = putpat, Q = putpat,
   V = putpat, U = putpat, T = putpat,
   V = putpat, U = putpat, T = putpat,
   M = putpat, X = putpat, P = putpat,
   M = putpat, X = putpat, P = putpat,
-  F = putpat, G = putpat,
+  F = putpat, G = putpat, Y = putpat,
+  H = putpat,
 
 
   -- Collect prefixes.
   -- Collect prefixes.
   [":"] = function(ctx, name, pat)
   [":"] = function(ctx, name, pat)
@@ -753,15 +817,68 @@ map_act = {
 
 
   -- REX prefix.
   -- REX prefix.
   rex = function(ctx, name, pat)
   rex = function(ctx, name, pat)
-    if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed.
+    if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
     for p in gmatch(pat, ".") do ctx["rex"..p] = true end
     for p in gmatch(pat, ".") do ctx["rex"..p] = true end
-    ctx.rex = true
+    ctx.rex = "rex"
+  end,
+
+  -- VEX prefix.
+  vex = function(ctx, name, pat)
+    if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
+    ctx.rex = "vex"
+    local pos = ctx.pos
+    if ctx.mrm then
+      ctx.mrm = nil
+      pos = pos-1
+    end
+    local b = byte(ctx.code, pos, pos)
+    if not b then return incomplete(ctx) end
+    pos = pos+1
+    if b < 128 then ctx.rexr = true end
+    local m = 1
+    if pat == "3" then
+      m = b%32; b = (b-m)/32
+      local nb = b%2; b = (b-nb)/2
+      if nb == 0 then ctx.rexb = true end
+      local nx = b%2
+      if nx == 0 then ctx.rexx = true end
+      b = byte(ctx.code, pos, pos)
+      if not b then return incomplete(ctx) end
+      pos = pos+1
+      if b >= 128 then ctx.rexw = true end
+    end
+    ctx.pos = pos
+    local map
+    if m == 1 then map = map_opc2
+    elseif m == 2 then map = map_opc3["38"]
+    elseif m == 3 then map = map_opc3["3a"]
+    else return unknown(ctx) end
+    local p = b%4; b = (b-p)/4
+    if p == 1 then ctx.o16 = "o16"
+    elseif p == 2 then ctx.rep = "rep"
+    elseif p == 3 then ctx.rep = "repne" end
+    local l = b%2; b = (b-l)/2
+    if l ~= 0 then ctx.vexl = true end
+    ctx.vexv = (-1-b)%16
+    return dispatchmap(ctx, map)
   end,
   end,
 
 
   -- Special case for nop with REX prefix.
   -- Special case for nop with REX prefix.
   nop = function(ctx, name, pat)
   nop = function(ctx, name, pat)
     return dispatch(ctx, ctx.rex and pat or "nop")
     return dispatch(ctx, ctx.rex and pat or "nop")
   end,
   end,
+
+  -- Special case for 0F 77.
+  emms = function(ctx, name, pat)
+    if ctx.rex ~= "vex" then
+      return putop(ctx, "emms")
+    elseif ctx.vexl then
+      ctx.vexl = false
+      return putop(ctx, "zeroall")
+    else
+      return putop(ctx, "zeroupper")
+    end
+  end,
 }
 }
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
@@ -782,7 +899,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = (addr or 0) - 1
   ctx.addr = (addr or 0) - 1
@@ -796,8 +913,8 @@ local function create_(code, addr, out)
   return ctx
   return ctx
 end
 end
 
 
-local function create64_(code, addr, out)
-  local ctx = create_(code, addr, out)
+local function create64(code, addr, out)
+  local ctx = create(code, addr, out)
   ctx.x64 = true
   ctx.x64 = true
   ctx.map1 = map_opc1_64
   ctx.map1 = map_opc1_64
   ctx.aregs = map_regs.Q
   ctx.aregs = map_regs.Q
@@ -805,32 +922,32 @@ local function create64_(code, addr, out)
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
-local function disass64_(code, addr, out)
-  create64_(code, addr, out):disass()
+local function disass64(code, addr, out)
+  create64(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 8 then return map_regs.D[r+1] end
   if r < 8 then return map_regs.D[r+1] end
   return map_regs.X[r-7]
   return map_regs.X[r-7]
 end
 end
 
 
-local function regname64_(r)
+local function regname64(r)
   if r < 16 then return map_regs.Q[r+1] end
   if r < 16 then return map_regs.Q[r+1] end
   return map_regs.X[r-15]
   return map_regs.X[r-15]
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-create64 = create64_
-disass = disass_
-disass64 = disass64_
-regname = regname_
-regname64 = regname64_
+return {
+  create = create,
+  create64 = create64,
+  disass = disass,
+  disass64 = disass64,
+  regname = regname,
+  regname64 = regname64
+}
 
 

+ 28 - 17
luajit.mod/luajit/src/jit/dump.lua

@@ -55,7 +55,7 @@
 
 
 -- Cache some library functions and objects.
 -- Cache some library functions and objects.
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20005, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local vmdef = require("jit.vmdef")
 local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
 local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
 local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
 local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
 local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
 local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
 local bit = require("bit")
 local bit = require("bit")
-local band, shr = bit.band, bit.rshift
+local band, shr, tohex = bit.band, bit.rshift, bit.tohex
 local sub, gsub, format = string.sub, string.gsub, string.format
 local sub, gsub, format = string.sub, string.gsub, string.format
 local byte, rep = string.byte, string.rep
 local byte, rep = string.byte, string.rep
 local type, tostring = type, tostring
 local type, tostring = type, tostring
@@ -85,12 +85,13 @@ local nexitsym = 0
 local function fillsymtab_tr(tr, nexit)
 local function fillsymtab_tr(tr, nexit)
   local t = {}
   local t = {}
   symtabmt.__index = t
   symtabmt.__index = t
-  if jit.arch == "mips" or jit.arch == "mipsel" then
+  if jit.arch:sub(1, 4) == "mips" then
     t[traceexitstub(tr, 0)] = "exit"
     t[traceexitstub(tr, 0)] = "exit"
     return
     return
   end
   end
   for i=0,nexit-1 do
   for i=0,nexit-1 do
     local addr = traceexitstub(tr, i)
     local addr = traceexitstub(tr, i)
+    if addr < 0 then addr = addr + 2^32 end
     t[addr] = tostring(i)
     t[addr] = tostring(i)
   end
   end
   local addr = traceexitstub(tr, nexit)
   local addr = traceexitstub(tr, nexit)
@@ -104,7 +105,10 @@ local function fillsymtab(tr, nexit)
     local ircall = vmdef.ircall
     local ircall = vmdef.ircall
     for i=0,#ircall do
     for i=0,#ircall do
       local addr = ircalladdr(i)
       local addr = ircalladdr(i)
-      if addr ~= 0 then t[addr] = ircall[i] end
+      if addr ~= 0 then
+	if addr < 0 then addr = addr + 2^32 end
+	t[addr] = ircall[i]
+      end
     end
     end
   end
   end
   if nexitsym == 1000000 then -- Per-trace exit stubs.
   if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -118,6 +122,7 @@ local function fillsymtab(tr, nexit)
 	nexit = 1000000
 	nexit = 1000000
 	break
 	break
       end
       end
+      if addr < 0 then addr = addr + 2^32 end
       t[addr] = tostring(i)
       t[addr] = tostring(i)
     end
     end
     nexitsym = nexit
     nexitsym = nexit
@@ -136,6 +141,7 @@ local function dump_mcode(tr)
   local mcode, addr, loop = tracemc(tr)
   local mcode, addr, loop = tracemc(tr)
   if not mcode then return end
   if not mcode then return end
   if not disass then disass = require("jit.dis_"..jit.arch) end
   if not disass then disass = require("jit.dis_"..jit.arch) end
+  if addr < 0 then addr = addr + 2^32 end
   out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
   out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
   local ctx = disass.create(mcode, addr, dumpwrite)
   local ctx = disass.create(mcode, addr, dumpwrite)
   ctx.hexdump = 0
   ctx.hexdump = 0
@@ -270,8 +276,7 @@ local litname = {
   ["CONV  "] = setmetatable({}, { __index = function(t, mode)
   ["CONV  "] = setmetatable({}, { __index = function(t, mode)
     local s = irtype[band(mode, 31)]
     local s = irtype[band(mode, 31)]
     s = irtype[band(shr(mode, 5), 31)].."."..s
     s = irtype[band(shr(mode, 5), 31)].."."..s
-    if band(mode, 0x400) ~= 0 then s = s.." trunc"
-    elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
+    if band(mode, 0x800) ~= 0 then s = s.." sext" end
     local c = shr(mode, 14)
     local c = shr(mode, 14)
     if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
     if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
     t[mode] = s
     t[mode] = s
@@ -280,6 +285,8 @@ local litname = {
   ["FLOAD "] = vmdef.irfield,
   ["FLOAD "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FPMATH"] = vmdef.irfpm,
   ["FPMATH"] = vmdef.irfpm,
+  ["BUFHDR"] = { [0] = "RESET", "APPEND" },
+  ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
 }
 }
 
 
 local function ctlsub(c)
 local function ctlsub(c)
@@ -303,15 +310,17 @@ local function fmtfunc(func, pc)
   end
   end
 end
 end
 
 
-local function formatk(tr, idx)
+local function formatk(tr, idx, sn)
   local k, t, slot = tracek(tr, idx)
   local k, t, slot = tracek(tr, idx)
   local tn = type(k)
   local tn = type(k)
   local s
   local s
   if tn == "number" then
   if tn == "number" then
-    if k == 2^52+2^51 then
+    if band(sn or 0, 0x30000) ~= 0 then
+      s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
+    elseif k == 2^52+2^51 then
       s = "bias"
       s = "bias"
     else
     else
-      s = format("%+.14g", k)
+      s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
     end
     end
   elseif tn == "string" then
   elseif tn == "string" then
     s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
     s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
@@ -329,6 +338,8 @@ local function formatk(tr, idx)
   elseif t == 21 then -- int64_t
   elseif t == 21 then -- int64_t
     s = sub(tostring(k), 1, -3)
     s = sub(tostring(k), 1, -3)
     if sub(s, 1, 1) ~= "-" then s = "+"..s end
     if sub(s, 1, 1) ~= "-" then s = "+"..s end
+  elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)
+    return "----" -- Special case for LJ_FR2 slot 1.
   else
   else
     s = tostring(k) -- For primitives.
     s = tostring(k) -- For primitives.
   end
   end
@@ -347,7 +358,7 @@ local function printsnap(tr, snap)
       n = n + 1
       n = n + 1
       local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
       local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
       if ref < 0 then
       if ref < 0 then
-	out:write(formatk(tr, ref))
+	out:write(formatk(tr, ref, sn))
       elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
       elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
 	out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
 	out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
       else
       else
@@ -545,7 +556,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
   if what == "start" then
   if what == "start" then
     if dumpmode.H then out:write('<pre class="ljdump">\n') end
     if dumpmode.H then out:write('<pre class="ljdump">\n') end
     out:write("---- TRACE ", tr, " ", what)
     out:write("---- TRACE ", tr, " ", what)
-    if otr then out:write(" ", otr, "/", oex) end
+    if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
     out:write(" ", fmtfunc(func, pc), "\n")
     out:write(" ", fmtfunc(func, pc), "\n")
   elseif what == "stop" or what == "abort" then
   elseif what == "stop" or what == "abort" then
     out:write("---- TRACE ", tr, " ", what)
     out:write("---- TRACE ", tr, " ", what)
@@ -608,7 +619,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
       end
       end
     else
     else
       for i=1,ngpr do
       for i=1,ngpr do
-	out:write(format(" %08x", regs[i]))
+	out:write(" ", tohex(regs[i]))
 	if i % 8 == 0 then out:write("\n") end
 	if i % 8 == 0 then out:write("\n") end
       end
       end
     end
     end
@@ -693,9 +704,9 @@ local function dumpon(opt, outfile)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+  on = dumpon,
+  off = dumpoff,
+  start = dumpon -- For -j command line option.
+}
 
 

+ 311 - 0
luajit.mod/luajit/src/jit/p.lua

@@ -0,0 +1,311 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module is a simple command line interface to the built-in
+-- low-overhead profiler of LuaJIT.
+--
+-- The lower-level API of the profiler is accessible via the "jit.profile"
+-- module or the luaJIT_profile_* C API.
+--
+-- Example usage:
+--
+--   luajit -jp myapp.lua
+--   luajit -jp=s myapp.lua
+--   luajit -jp=-s myapp.lua
+--   luajit -jp=vl myapp.lua
+--   luajit -jp=G,profile.txt myapp.lua
+--
+-- The following dump features are available:
+--
+--   f  Stack dump: function name, otherwise module:line. Default mode.
+--   F  Stack dump: ditto, but always prepend module.
+--   l  Stack dump: module:line.
+--   <number> stack dump depth (callee < caller). Default: 1.
+--   -<number> Inverse stack dump depth (caller > callee).
+--   s  Split stack dump after first stack level. Implies abs(depth) >= 2.
+--   p  Show full path for module names.
+--   v  Show VM states. Can be combined with stack dumps, e.g. vf or fv.
+--   z  Show zones. Can be combined with stack dumps, e.g. zf or fz.
+--   r  Show raw sample counts. Default: show percentages.
+--   a  Annotate excerpts from source code files.
+--   A  Annotate complete source code files.
+--   G  Produce raw output suitable for graphical tools (e.g. flame graphs).
+--   m<number> Minimum sample percentage to be shown. Default: 3.
+--   i<number> Sampling interval in milliseconds. Default: 10.
+--
+----------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
+local profile = require("jit.profile")
+local vmdef = require("jit.vmdef")
+local math = math
+local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
+local sort, format = table.sort, string.format
+local stdout = io.stdout
+local zone -- Load jit.zone module on demand.
+
+-- Output file handle.
+local out
+
+------------------------------------------------------------------------------
+
+local prof_ud
+local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
+local prof_ann, prof_count1, prof_count2, prof_samples
+
+local map_vmmode = {
+  N = "Compiled",
+  I = "Interpreted",
+  C = "C code",
+  G = "Garbage Collector",
+  J = "JIT Compiler",
+}
+
+-- Profiler callback.
+local function prof_cb(th, samples, vmmode)
+  prof_samples = prof_samples + samples
+  local key_stack, key_stack2, key_state
+  -- Collect keys for sample.
+  if prof_states then
+    if prof_states == "v" then
+      key_state = map_vmmode[vmmode] or vmmode
+    else
+      key_state = zone:get() or "(none)"
+    end
+  end
+  if prof_fmt then
+    key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
+    key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
+      return vmdef.ffnames[tonumber(x)]
+    end)
+    if prof_split == 2 then
+      local k1, k2 = key_stack:match("(.-) [<>] (.*)")
+      if k2 then key_stack, key_stack2 = k1, k2 end
+    elseif prof_split == 3 then
+      key_stack2 = profile.dumpstack(th, "l", 1)
+    end
+  end
+  -- Order keys.
+  local k1, k2
+  if prof_split == 1 then
+    if key_state then
+      k1 = key_state
+      if key_stack then k2 = key_stack end
+    end
+  elseif key_stack then
+    k1 = key_stack
+    if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
+  end
+  -- Coalesce samples in one or two levels.
+  if k1 then
+    local t1 = prof_count1
+    t1[k1] = (t1[k1] or 0) + samples
+    if k2 then
+      local t2 = prof_count2
+      local t3 = t2[k1]
+      if not t3 then t3 = {}; t2[k1] = t3 end
+      t3[k2] = (t3[k2] or 0) + samples
+    end
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Show top N list.
+local function prof_top(count1, count2, samples, indent)
+  local t, n = {}, 0
+  for k in pairs(count1) do
+    n = n + 1
+    t[n] = k
+  end
+  sort(t, function(a, b) return count1[a] > count1[b] end)
+  for i=1,n do
+    local k = t[i]
+    local v = count1[k]
+    local pct = floor(v*100/samples + 0.5)
+    if pct < prof_min then break end
+    if not prof_raw then
+      out:write(format("%s%2d%%  %s\n", indent, pct, k))
+    elseif prof_raw == "r" then
+      out:write(format("%s%5d  %s\n", indent, v, k))
+    else
+      out:write(format("%s %d\n", k, v))
+    end
+    if count2 then
+      local r = count2[k]
+      if r then
+	prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and "  -- " or
+			    (prof_depth < 0 and "  -> " or "  <- "))
+      end
+    end
+  end
+end
+
+-- Annotate source code
+local function prof_annotate(count1, samples)
+  local files = {}
+  local ms = 0
+  for k, v in pairs(count1) do
+    local pct = floor(v*100/samples + 0.5)
+    ms = math.max(ms, v)
+    if pct >= prof_min then
+      local file, line = k:match("^(.*):(%d+)$")
+      if not file then file = k; line = 0 end
+      local fl = files[file]
+      if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
+      line = tonumber(line)
+      fl[line] = prof_raw and v or pct
+    end
+  end
+  sort(files)
+  local fmtv, fmtn = " %3d%% | %s\n", "      | %s\n"
+  if prof_raw then
+    local n = math.max(5, math.ceil(math.log10(ms)))
+    fmtv = "%"..n.."d | %s\n"
+    fmtn = (" "):rep(n).." | %s\n"
+  end
+  local ann = prof_ann
+  for _, file in ipairs(files) do
+    local f0 = file:byte()
+    if f0 == 40 or f0 == 91 then
+      out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
+      break
+    end
+    local fp, err = io.open(file)
+    if not fp then
+      out:write(format("====== ERROR: %s: %s\n", file, err))
+      break
+    end
+    out:write(format("\n====== %s ======\n", file))
+    local fl = files[file]
+    local n, show = 1, false
+    if ann ~= 0 then
+      for i=1,ann do
+	if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
+      end
+    end
+    for line in fp:lines() do
+      if line:byte() == 27 then
+	out:write("[Cannot annotate bytecode file]\n")
+	break
+      end
+      local v = fl[n]
+      if ann ~= 0 then
+	local v2 = fl[n+ann]
+	if show then
+	  if v2 then show = n+ann elseif v then show = n
+	  elseif show+ann < n then show = false end
+	elseif v2 then
+	  show = n+ann
+	  out:write(format("@@ %d @@\n", n))
+	end
+	if not show then goto next end
+      end
+      if v then
+	out:write(format(fmtv, v, line))
+      else
+	out:write(format(fmtn, line))
+      end
+    ::next::
+      n = n + 1
+    end
+    fp:close()
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Finish profiling and dump result.
+local function prof_finish()
+  if prof_ud then
+    profile.stop()
+    local samples = prof_samples
+    if samples == 0 then
+      if prof_raw ~= true then out:write("[No samples collected]\n") end
+      return
+    end
+    if prof_ann then
+      prof_annotate(prof_count1, samples)
+    else
+      prof_top(prof_count1, prof_count2, samples, "")
+    end
+    prof_count1 = nil
+    prof_count2 = nil
+    prof_ud = nil
+  end
+end
+
+-- Start profiling.
+local function prof_start(mode)
+  local interval = ""
+  mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
+  prof_min = 3
+  mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
+  prof_depth = 1
+  mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
+  local m = {}
+  for c in mode:gmatch(".") do m[c] = c end
+  prof_states = m.z or m.v
+  if prof_states == "z" then zone = require("jit.zone") end
+  local scope = m.l or m.f or m.F or (prof_states and "" or "f")
+  local flags = (m.p or "")
+  prof_raw = m.r
+  if m.s then
+    prof_split = 2
+    if prof_depth == -1 or m["-"] then prof_depth = -2
+    elseif prof_depth == 1 then prof_depth = 2 end
+  elseif mode:find("[fF].*l") then
+    scope = "l"
+    prof_split = 3
+  else
+    prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
+  end
+  prof_ann = m.A and 0 or (m.a and 3)
+  if prof_ann then
+    scope = "l"
+    prof_fmt = "pl"
+    prof_split = 0
+    prof_depth = 1
+  elseif m.G and scope ~= "" then
+    prof_fmt = flags..scope.."Z;"
+    prof_depth = -100
+    prof_raw = true
+    prof_min = 0
+  elseif scope == "" then
+    prof_fmt = false
+  else
+    local sc = prof_split == 3 and m.f or m.F or scope
+    prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
+  end
+  prof_count1 = {}
+  prof_count2 = {}
+  prof_samples = 0
+  profile.start(scope:lower()..interval, prof_cb)
+  prof_ud = newproxy(true)
+  getmetatable(prof_ud).__gc = prof_finish
+end
+
+------------------------------------------------------------------------------
+
+local function start(mode, outfile)
+  if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
+  if outfile then
+    out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+  else
+    out = stdout
+  end
+  prof_start(mode or "f")
+end
+
+-- Public module functions.
+return {
+  start = start, -- For -j command line option.
+  stop = prof_finish
+}
+

+ 10 - 7
luajit.mod/luajit/src/jit/v.lua

@@ -59,7 +59,7 @@
 
 
 -- Cache some library functions and objects.
 -- Cache some library functions and objects.
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20005, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local vmdef = require("jit.vmdef")
 local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
 local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
@@ -99,7 +99,7 @@ end
 local function dump_trace(what, tr, func, pc, otr, oex)
 local function dump_trace(what, tr, func, pc, otr, oex)
   if what == "start" then
   if what == "start" then
     startloc = fmtfunc(func, pc)
     startloc = fmtfunc(func, pc)
-    startex = otr and "("..otr.."/"..oex..") " or ""
+    startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or ""
   else
   else
     if what == "abort" then
     if what == "abort" then
       local loc = fmtfunc(func, pc)
       local loc = fmtfunc(func, pc)
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
       if ltype == "interpreter" then
       if ltype == "interpreter" then
 	out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
 	out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
 	  tr, startex, startloc))
 	  tr, startex, startloc))
+      elseif ltype == "stitch" then
+	out:write(format("[TRACE %3s %s%s %s %s]\n",
+	  tr, startex, startloc, ltype, fmtfunc(func, pc)))
       elseif link == tr or link == 0 then
       elseif link == tr or link == 0 then
 	out:write(format("[TRACE %3s %s%s %s]\n",
 	out:write(format("[TRACE %3s %s%s %s]\n",
 	  tr, startex, startloc, ltype))
 	  tr, startex, startloc, ltype))
@@ -159,9 +162,9 @@ local function dumpon(outfile)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+  on = dumpon,
+  off = dumpoff,
+  start = dumpon -- For -j command line option.
+}
 
 

+ 45 - 0
luajit.mod/luajit/src/jit/zone.lua

@@ -0,0 +1,45 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler zones.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module implements a simple hierarchical zone model.
+--
+-- Example usage:
+--
+--   local zone = require("jit.zone")
+--   zone("AI")
+--   ...
+--     zone("A*")
+--     ...
+--     print(zone:get()) --> "A*"
+--     ...
+--     zone()
+--   ...
+--   print(zone:get()) --> "AI"
+--   ...
+--   zone()
+--
+----------------------------------------------------------------------------
+
+local remove = table.remove
+
+return setmetatable({
+  flush = function(t)
+    for i=#t,1,-1 do t[i] = nil end
+  end,
+  get = function(t)
+    return t[#t]
+  end
+}, {
+  __call = function(t, zone)
+    if zone then
+      t[#t+1] = zone
+    else
+      return (assert(remove(t), "empty zone stack"))
+    end
+  end
+})
+

+ 14 - 20
luajit.mod/luajit/src/lauxlib.h

@@ -15,9 +15,6 @@
 #include "lua.h"
 #include "lua.h"
 
 
 
 
-#define luaL_getn(L,i)          ((int)lua_objlen(L, i))
-#define luaL_setn(L,i,j)        ((void)0)  /* no op! */
-
 /* extra error code for `luaL_load' */
 /* extra error code for `luaL_load' */
 #define LUA_ERRFILE     (LUA_ERRERR+1)
 #define LUA_ERRFILE     (LUA_ERRERR+1)
 
 
@@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...);
 LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def,
 LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def,
                                    const char *const lst[]);
                                    const char *const lst[]);
 
 
+/* pre-defined references */
+#define LUA_NOREF       (-2)
+#define LUA_REFNIL      (-1)
+
 LUALIB_API int (luaL_ref) (lua_State *L, int t);
 LUALIB_API int (luaL_ref) (lua_State *L, int t);
 LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref);
 LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref);
 
 
@@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz,
 				   const char *name, const char *mode);
 				   const char *name, const char *mode);
 LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
 LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
 				int level);
 				int level);
+LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup);
+LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname,
+				   int sizehint);
+LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname);
+LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname);
 
 
 
 
 /*
 /*
@@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
 
 
 #define luaL_opt(L,f,n,d)	(lua_isnoneornil(L,(n)) ? (d) : f(L,(n)))
 #define luaL_opt(L,f,n,d)	(lua_isnoneornil(L,(n)) ? (d) : f(L,(n)))
 
 
+/* From Lua 5.2. */
+#define luaL_newlibtable(L, l) \
+	lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1)
+#define luaL_newlib(L, l)	(luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0))
+
 /*
 /*
 ** {======================================================
 ** {======================================================
 ** Generic Buffer manipulation
 ** Generic Buffer manipulation
@@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B);
 
 
 /* }====================================================== */
 /* }====================================================== */
 
 
-
-/* compatibility with ref system */
-
-/* pre-defined references */
-#define LUA_NOREF       (-2)
-#define LUA_REFNIL      (-1)
-
-#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \
-      (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0))
-
-#define lua_unref(L,ref)        luaL_unref(L, LUA_REGISTRYINDEX, (ref))
-
-#define lua_getref(L,ref)       lua_rawgeti(L, LUA_REGISTRYINDEX, (ref))
-
-
-#define luaL_reg	luaL_Reg
-
 #endif
 #endif

+ 46 - 28
luajit.mod/luajit/src/lib_aux.c

@@ -107,38 +107,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx,
 static int libsize(const luaL_Reg *l)
 static int libsize(const luaL_Reg *l)
 {
 {
   int size = 0;
   int size = 0;
-  for (; l->name; l++) size++;
+  for (; l && l->name; l++) size++;
   return size;
   return size;
 }
 }
 
 
+LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint)
+{
+  luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
+  lua_getfield(L, -1, modname);
+  if (!lua_istable(L, -1)) {
+    lua_pop(L, 1);
+    if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL)
+      lj_err_callerv(L, LJ_ERR_BADMODN, modname);
+    lua_pushvalue(L, -1);
+    lua_setfield(L, -3, modname);  /* _LOADED[modname] = new table. */
+  }
+  lua_remove(L, -2);  /* Remove _LOADED table. */
+}
+
 LUALIB_API void luaL_openlib(lua_State *L, const char *libname,
 LUALIB_API void luaL_openlib(lua_State *L, const char *libname,
 			     const luaL_Reg *l, int nup)
 			     const luaL_Reg *l, int nup)
 {
 {
   lj_lib_checkfpu(L);
   lj_lib_checkfpu(L);
   if (libname) {
   if (libname) {
-    int size = libsize(l);
-    /* check whether lib already exists */
-    luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
-    lua_getfield(L, -1, libname);  /* get _LOADED[libname] */
-    if (!lua_istable(L, -1)) {  /* not found? */
-      lua_pop(L, 1);  /* remove previous result */
-      /* try global variable (and create one if it does not exist) */
-      if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL)
-	lj_err_callerv(L, LJ_ERR_BADMODN, libname);
-      lua_pushvalue(L, -1);
-      lua_setfield(L, -3, libname);  /* _LOADED[libname] = new table */
-    }
-    lua_remove(L, -2);  /* remove _LOADED table */
-    lua_insert(L, -(nup+1));  /* move library table to below upvalues */
+    luaL_pushmodule(L, libname, libsize(l));
+    lua_insert(L, -(nup + 1));  /* Move module table below upvalues. */
   }
   }
-  for (; l->name; l++) {
-    int i;
-    for (i = 0; i < nup; i++)  /* copy upvalues to the top */
-      lua_pushvalue(L, -nup);
-    lua_pushcclosure(L, l->func, nup);
-    lua_setfield(L, -(nup+2), l->name);
-  }
-  lua_pop(L, nup);  /* remove upvalues */
+  if (l)
+    luaL_setfuncs(L, l, nup);
+  else
+    lua_pop(L, nup);  /* Remove upvalues. */
 }
 }
 
 
 LUALIB_API void luaL_register(lua_State *L, const char *libname,
 LUALIB_API void luaL_register(lua_State *L, const char *libname,
@@ -147,6 +145,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname,
   luaL_openlib(L, libname, l, 0);
   luaL_openlib(L, libname, l, 0);
 }
 }
 
 
+LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup)
+{
+  luaL_checkstack(L, nup, "too many upvalues");
+  for (; l->name; l++) {
+    int i;
+    for (i = 0; i < nup; i++)  /* Copy upvalues to the top. */
+      lua_pushvalue(L, -nup);
+    lua_pushcclosure(L, l->func, nup);
+    lua_setfield(L, -(nup + 2), l->name);
+  }
+  lua_pop(L, nup);  /* Remove upvalues. */
+}
+
 LUALIB_API const char *luaL_gsub(lua_State *L, const char *s,
 LUALIB_API const char *luaL_gsub(lua_State *L, const char *s,
 				 const char *p, const char *r)
 				 const char *p, const char *r)
 {
 {
@@ -207,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
 
 
 LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
 LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
 {
 {
-  while (l--)
-    luaL_addchar(B, *s++);
+  if (l <= bufffree(B)) {
+    memcpy(B->p, s, l);
+    B->p += l;
+  } else {
+    emptybuffer(B);
+    lua_pushlstring(B->L, s, l);
+    B->lvl++;
+    adjuststack(B);
+  }
 }
 }
 
 
 LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
 LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
@@ -302,7 +320,7 @@ static int panic(lua_State *L)
 
 
 #ifdef LUAJIT_USE_SYSMALLOC
 #ifdef LUAJIT_USE_SYSMALLOC
 
 
-#if LJ_64 && !defined(LUAJIT_USE_VALGRIND)
+#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND)
 #error "Must use builtin allocator for 64 bit target"
 #error "Must use builtin allocator for 64 bit target"
 #endif
 #endif
 
 
@@ -334,7 +352,7 @@ LUALIB_API lua_State *luaL_newstate(void)
   lua_State *L;
   lua_State *L;
   void *ud = lj_alloc_create();
   void *ud = lj_alloc_create();
   if (ud == NULL) return NULL;
   if (ud == NULL) return NULL;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   L = lj_state_newstate(lj_alloc_f, ud);
   L = lj_state_newstate(lj_alloc_f, ud);
 #else
 #else
   L = lua_newstate(lj_alloc_f, ud);
   L = lua_newstate(lj_alloc_f, ud);
@@ -343,7 +361,7 @@ LUALIB_API lua_State *luaL_newstate(void)
   return L;
   return L;
 }
 }
 
 
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
 LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
 {
 {
   UNUSED(f); UNUSED(ud);
   UNUSED(f); UNUSED(ud);

+ 64 - 68
luajit.mod/luajit/src/lib_base.c

@@ -23,6 +23,7 @@
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
 #include "lj_meta.h"
 #include "lj_state.h"
 #include "lj_state.h"
+#include "lj_frame.h"
 #if LJ_HASFFI
 #if LJ_HASFFI
 #include "lj_ctype.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
 #include "lj_cconv.h"
@@ -32,6 +33,7 @@
 #include "lj_dispatch.h"
 #include "lj_dispatch.h"
 #include "lj_char.h"
 #include "lj_char.h"
 #include "lj_strscan.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 /* -- Base library: checks ------------------------------------------------ */
 /* -- Base library: checks ------------------------------------------------ */
@@ -40,13 +42,13 @@
 
 
 LJLIB_ASM(assert)		LJLIB_REC(.)
 LJLIB_ASM(assert)		LJLIB_REC(.)
 {
 {
-  GCstr *s;
   lj_lib_checkany(L, 1);
   lj_lib_checkany(L, 1);
-  s = lj_lib_optstr(L, 2);
-  if (s)
-    lj_err_callermsg(L, strdata(s));
-  else
+  if (L->top == L->base+1)
     lj_err_caller(L, LJ_ERR_ASSERT);
     lj_err_caller(L, LJ_ERR_ASSERT);
+  else if (tvisstr(L->base+1) || tvisnumber(L->base+1))
+    lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2)));
+  else
+    lj_err_run(L);
   return FFH_UNREACHABLE;
   return FFH_UNREACHABLE;
 }
 }
 
 
@@ -86,10 +88,11 @@ static int ffh_pairs(lua_State *L, MMS mm)
   cTValue *mo = lj_meta_lookup(L, o, mm);
   cTValue *mo = lj_meta_lookup(L, o, mm);
   if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
   if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
     L->top = o+1;  /* Only keep one argument. */
     L->top = o+1;  /* Only keep one argument. */
-    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    copyTV(L, L->base-1-LJ_FR2, mo);  /* Replace callable. */
     return FFH_TAILCALL;
     return FFH_TAILCALL;
   } else {
   } else {
     if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
     if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
+    if (LJ_FR2) { copyTV(L, o-1, o); o--; }
     setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
     setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
     if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
     if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
     return FFH_RES(3);
     return FFH_RES(3);
@@ -100,7 +103,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
 #endif
 #endif
 
 
 LJLIB_PUSH(lastcl)
 LJLIB_PUSH(lastcl)
-LJLIB_ASM(pairs)
+LJLIB_ASM(pairs)		LJLIB_REC(xpairs 0)
 {
 {
   return ffh_pairs(L, MM_pairs);
   return ffh_pairs(L, MM_pairs);
 }
 }
@@ -113,7 +116,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux)	LJLIB_REC(.)
 }
 }
 
 
 LJLIB_PUSH(lastcl)
 LJLIB_PUSH(lastcl)
-LJLIB_ASM(ipairs)		LJLIB_REC(.)
+LJLIB_ASM(ipairs)		LJLIB_REC(xpairs 1)
 {
 {
   return ffh_pairs(L, MM_ipairs);
   return ffh_pairs(L, MM_ipairs);
 }
 }
@@ -131,11 +134,11 @@ LJLIB_ASM(setmetatable)		LJLIB_REC(.)
     lj_err_caller(L, LJ_ERR_PROTMT);
     lj_err_caller(L, LJ_ERR_PROTMT);
   setgcref(t->metatable, obj2gco(mt));
   setgcref(t->metatable, obj2gco(mt));
   if (mt) { lj_gc_objbarriert(L, t, mt); }
   if (mt) { lj_gc_objbarriert(L, t, mt); }
-  settabV(L, L->base-1, t);
+  settabV(L, L->base-1-LJ_FR2, t);
   return FFH_RES(1);
   return FFH_RES(1);
 }
 }
 
 
-LJLIB_CF(getfenv)
+LJLIB_CF(getfenv)		LJLIB_REC(.)
 {
 {
   GCfunc *fn;
   GCfunc *fn;
   cTValue *o = L->base;
   cTValue *o = L->base;
@@ -144,6 +147,7 @@ LJLIB_CF(getfenv)
     o = lj_debug_frame(L, level, &level);
     o = lj_debug_frame(L, level, &level);
     if (o == NULL)
     if (o == NULL)
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
+    if (LJ_FR2) o--;
   }
   }
   fn = &gcval(o)->fn;
   fn = &gcval(o)->fn;
   settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
   settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
@@ -165,6 +169,7 @@ LJLIB_CF(setfenv)
     o = lj_debug_frame(L, level, &level);
     o = lj_debug_frame(L, level, &level);
     if (o == NULL)
     if (o == NULL)
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
+    if (LJ_FR2) o--;
   }
   }
   fn = &gcval(o)->fn;
   fn = &gcval(o)->fn;
   if (!isluafunc(fn))
   if (!isluafunc(fn))
@@ -257,7 +262,7 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
   if (base == 10) {
   if (base == 10) {
     TValue *o = lj_lib_checkany(L, 1);
     TValue *o = lj_lib_checkany(L, 1);
     if (lj_strscan_numberobj(o)) {
     if (lj_strscan_numberobj(o)) {
-      copyTV(L, L->base-1, o);
+      copyTV(L, L->base-1-LJ_FR2, o);
       return FFH_RES(1);
       return FFH_RES(1);
     }
     }
 #if LJ_HASFFI
 #if LJ_HASFFI
@@ -270,11 +275,11 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
 	    ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
 	    ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
 	  int32_t i;
 	  int32_t i;
 	  lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
 	  lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
-	  setintV(L->base-1, i);
+	  setintV(L->base-1-LJ_FR2, i);
 	  return FFH_RES(1);
 	  return FFH_RES(1);
 	}
 	}
 	lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
 	lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
-		       (uint8_t *)&(L->base-1)->n, o, 0);
+		       (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0);
 	return FFH_RES(1);
 	return FFH_RES(1);
       }
       }
     }
     }
@@ -282,53 +287,46 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
   } else {
   } else {
     const char *p = strdata(lj_lib_checkstr(L, 1));
     const char *p = strdata(lj_lib_checkstr(L, 1));
     char *ep;
     char *ep;
+    unsigned int neg = 0;
     unsigned long ul;
     unsigned long ul;
     if (base < 2 || base > 36)
     if (base < 2 || base > 36)
       lj_err_arg(L, 2, LJ_ERR_BASERNG);
       lj_err_arg(L, 2, LJ_ERR_BASERNG);
-    ul = strtoul(p, &ep, base);
-    if (p != ep) {
-      while (lj_char_isspace((unsigned char)(*ep))) ep++;
-      if (*ep == '\0') {
-	if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
-	  setintV(L->base-1, (int32_t)ul);
-	else
-	  setnumV(L->base-1, (lua_Number)ul);
-	return FFH_RES(1);
+    while (lj_char_isspace((unsigned char)(*p))) p++;
+    if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; }
+    if (lj_char_isalnum((unsigned char)(*p))) {
+      ul = strtoul(p, &ep, base);
+      if (p != ep) {
+	while (lj_char_isspace((unsigned char)(*ep))) ep++;
+	if (*ep == '\0') {
+	  if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
+	    if (neg) ul = -ul;
+	    setintV(L->base-1-LJ_FR2, (int32_t)ul);
+	  } else {
+	    lua_Number n = (lua_Number)ul;
+	    if (neg) n = -n;
+	    setnumV(L->base-1-LJ_FR2, n);
+	  }
+	  return FFH_RES(1);
+	}
       }
       }
     }
     }
   }
   }
-  setnilV(L->base-1);
+  setnilV(L->base-1-LJ_FR2);
   return FFH_RES(1);
   return FFH_RES(1);
 }
 }
 
 
-LJLIB_PUSH("nil")
-LJLIB_PUSH("false")
-LJLIB_PUSH("true")
 LJLIB_ASM(tostring)		LJLIB_REC(.)
 LJLIB_ASM(tostring)		LJLIB_REC(.)
 {
 {
   TValue *o = lj_lib_checkany(L, 1);
   TValue *o = lj_lib_checkany(L, 1);
   cTValue *mo;
   cTValue *mo;
   L->top = o+1;  /* Only keep one argument. */
   L->top = o+1;  /* Only keep one argument. */
   if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
   if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
-    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    copyTV(L, L->base-1-LJ_FR2, mo);  /* Replace callable. */
     return FFH_TAILCALL;
     return FFH_TAILCALL;
-  } else {
-    GCstr *s;
-    if (tvisnumber(o)) {
-      s = lj_str_fromnumber(L, o);
-    } else if (tvispri(o)) {
-      s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
-    } else {
-      if (tvisfunc(o) && isffunc(funcV(o)))
-	lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
-      else
-	lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
-      /* Note: lua_pushfstring calls the GC which may invalidate o. */
-      s = strV(L->top-1);
-    }
-    setstrV(L, L->base-1, s);
-    return FFH_RES(1);
   }
   }
+  lj_gc_check(L);
+  setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base));
+  return FFH_RES(1);
 }
 }
 
 
 /* -- Base library: throw and catch errors -------------------------------- */
 /* -- Base library: throw and catch errors -------------------------------- */
@@ -357,7 +355,7 @@ LJLIB_ASM_(xpcall)		LJLIB_REC(.)
 
 
 static int load_aux(lua_State *L, int status, int envarg)
 static int load_aux(lua_State *L, int status, int envarg)
 {
 {
-  if (status == 0) {
+  if (status == LUA_OK) {
     if (tvistab(L->base+envarg-1)) {
     if (tvistab(L->base+envarg-1)) {
       GCfunc *fn = funcV(L->top-1);
       GCfunc *fn = funcV(L->top-1);
       GCtab *t = tabV(L->base+envarg-1);
       GCtab *t = tabV(L->base+envarg-1);
@@ -430,7 +428,7 @@ LJLIB_CF(dofile)
   GCstr *fname = lj_lib_optstr(L, 1);
   GCstr *fname = lj_lib_optstr(L, 1);
   setnilV(L->top);
   setnilV(L->top);
   L->top = L->base+1;
   L->top = L->base+1;
-  if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0)
+  if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK)
     lua_error(L);
     lua_error(L);
   lua_call(L, 0, LUA_MULTRET);
   lua_call(L, 0, LUA_MULTRET);
   return (int)(L->top - L->base) - 1;
   return (int)(L->top - L->base) - 1;
@@ -440,20 +438,20 @@ LJLIB_CF(dofile)
 
 
 LJLIB_CF(gcinfo)
 LJLIB_CF(gcinfo)
 {
 {
-  setintV(L->top++, (G(L)->gc.total >> 10));
+  setintV(L->top++, (int32_t)(G(L)->gc.total >> 10));
   return 1;
   return 1;
 }
 }
 
 
 LJLIB_CF(collectgarbage)
 LJLIB_CF(collectgarbage)
 {
 {
   int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT,  /* ORDER LUA_GC* */
   int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT,  /* ORDER LUA_GC* */
-    "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul");
+    "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning");
   int32_t data = lj_lib_optint(L, 2, 0);
   int32_t data = lj_lib_optint(L, 2, 0);
   if (opt == LUA_GCCOUNT) {
   if (opt == LUA_GCCOUNT) {
     setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
     setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
   } else {
   } else {
     int res = lua_gc(L, opt, data);
     int res = lua_gc(L, opt, data);
-    if (opt == LUA_GCSTEP)
+    if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING)
       setboolV(L->top, res);
       setboolV(L->top, res);
     else
     else
       setintV(L->top, res);
       setintV(L->top, res);
@@ -506,21 +504,12 @@ LJLIB_CF(print)
   }
   }
   shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
   shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
   for (i = 0; i < nargs; i++) {
   for (i = 0; i < nargs; i++) {
+    cTValue *o = &L->base[i];
     const char *str;
     const char *str;
     size_t size;
     size_t size;
-    cTValue *o = &L->base[i];
-    if (shortcut && tvisstr(o)) {
-      str = strVdata(o);
-      size = strV(o)->len;
-    } else if (shortcut && tvisint(o)) {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, intV(o));
-      size = (size_t)(buf+LJ_STR_INTBUF-p);
-      str = p;
-    } else if (shortcut && tvisnum(o)) {
-      char buf[LJ_STR_NUMBUF];
-      size = lj_str_bufnum(buf, o);
-      str = buf;
+    MSize len;
+    if (shortcut && (str = lj_strfmt_wstrnum(L, o, &len)) != NULL) {
+      size = len;
     } else {
     } else {
       copyTV(L, L->top+1, o);
       copyTV(L, L->top+1, o);
       copyTV(L, L->top, L->top-1);
       copyTV(L, L->top, L->top-1);
@@ -557,8 +546,8 @@ LJLIB_CF(coroutine_status)
   co = threadV(L->base);
   co = threadV(L->base);
   if (co == L) s = "running";
   if (co == L) s = "running";
   else if (co->status == LUA_YIELD) s = "suspended";
   else if (co->status == LUA_YIELD) s = "suspended";
-  else if (co->status != 0) s = "dead";
-  else if (co->base > tvref(co->stack)+1) s = "normal";
+  else if (co->status != LUA_OK) s = "dead";
+  else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
   else if (co->top == co->base) s = "dead";
   else if (co->top == co->base) s = "dead";
   else s = "suspended";
   else s = "suspended";
   lua_pushstring(L, s);
   lua_pushstring(L, s);
@@ -578,6 +567,12 @@ LJLIB_CF(coroutine_running)
 #endif
 #endif
 }
 }
 
 
+LJLIB_CF(coroutine_isyieldable)
+{
+  setboolV(L->top++, cframe_canyield(L->cframe));
+  return 1;
+}
+
 LJLIB_CF(coroutine_create)
 LJLIB_CF(coroutine_create)
 {
 {
   lua_State *L1;
   lua_State *L1;
@@ -597,11 +592,11 @@ LJLIB_ASM(coroutine_yield)
 static int ffh_resume(lua_State *L, lua_State *co, int wrap)
 static int ffh_resume(lua_State *L, lua_State *co, int wrap)
 {
 {
   if (co->cframe != NULL || co->status > LUA_YIELD ||
   if (co->cframe != NULL || co->status > LUA_YIELD ||
-      (co->status == 0 && co->top == co->base)) {
+      (co->status == LUA_OK && co->top == co->base)) {
     ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
     ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
     if (wrap) lj_err_caller(L, em);
     if (wrap) lj_err_caller(L, em);
-    setboolV(L->base-1, 0);
-    setstrV(L, L->base, lj_err_str(L, em));
+    setboolV(L->base-1-LJ_FR2, 0);
+    setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
     return FFH_RES(2);
     return FFH_RES(2);
   }
   }
   lj_state_growstack(co, (MSize)(L->top - L->base));
   lj_state_growstack(co, (MSize)(L->top - L->base));
@@ -642,9 +637,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
 
 
 LJLIB_CF(coroutine_wrap)
 LJLIB_CF(coroutine_wrap)
 {
 {
+  GCfunc *fn;
   lj_cf_coroutine_create(L);
   lj_cf_coroutine_create(L);
-  lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
-  setpc_wrap_aux(L, funcV(L->top-1));
+  fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
+  setpc_wrap_aux(L, fn);
   return 1;
   return 1;
 }
 }
 
 

+ 120 - 14
luajit.mod/luajit/src/lib_bit.c

@@ -12,26 +12,99 @@
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_err.h"
 #include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
+#include "lj_strscan.h"
+#include "lj_strfmt.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lj_cconv.h"
+#include "lj_carith.h"
+#endif
+#include "lj_ff.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #define LJLIB_MODULE_bit
 #define LJLIB_MODULE_bit
 
 
-LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_unary IR_TOBIT)
+#if LJ_HASFFI
+static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
 {
 {
+  GCcdata *cd = lj_cdata_new_(L, id, 8);
+  *(uint64_t *)cdataptr(cd) = x;
+  setcdataV(L, L->base-1-LJ_FR2, cd);
+  return FFH_RES(1);
+}
+#else
+static int32_t bit_checkbit(lua_State *L, int narg)
+{
+  TValue *o = L->base + narg-1;
+  if (!(o < L->top && lj_strscan_numberobj(o)))
+    lj_err_argt(L, narg, LUA_TNUMBER);
+  if (LJ_LIKELY(tvisint(o))) {
+    return intV(o);
+  } else {
+    int32_t i = lj_num2bit(numV(o));
+    if (LJ_DUALNUM) setintV(o, i);
+    return i;
+  }
+}
+#endif
+
+LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_tobit)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id));
+  return FFH_RES(1);
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, ~x) : FFH_RETRY;
+#else
   lj_lib_checknumber(L, 1);
   lj_lib_checknumber(L, 1);
   return FFH_RETRY;
   return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
 }
 }
-LJLIB_ASM_(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
-LJLIB_ASM_(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
 
 
 LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
 LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
 {
 {
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
+  if (id) {
+    x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
+    return bit_result64(L, id, x);
+  }
+  if (id2) setintV(L->base+1, sh);
+  return FFH_RETRY;
+#else
   lj_lib_checknumber(L, 1);
   lj_lib_checknumber(L, 1);
-  lj_lib_checkbit(L, 2);
+  bit_checkbit(L, 2);
   return FFH_RETRY;
   return FFH_RETRY;
+#endif
 }
 }
 LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
 LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
 LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
 LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
@@ -40,25 +113,58 @@ LJLIB_ASM_(bit_ror)		LJLIB_REC(bit_shift IR_BROR)
 
 
 LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
 LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
 {
 {
+#if LJ_HASFFI
+  CTypeID id = 0;
+  TValue *o = L->base, *top = L->top;
+  int i = 0;
+  do { lj_carith_check64(L, ++i, &id); } while (++o < top);
+  if (id) {
+    CTState *cts = ctype_cts(L);
+    CType *ct = ctype_get(cts, id);
+    int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
+    uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
+    o = L->base;
+    do {
+      lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
+      if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
+    } while (++o < top);
+    return bit_result64(L, id, y);
+  }
+  return FFH_RETRY;
+#else
   int i = 0;
   int i = 0;
   do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
   do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
   return FFH_RETRY;
   return FFH_RETRY;
+#endif
 }
 }
 LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
 LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
 LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
 LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
-LJLIB_CF(bit_tohex)
+LJLIB_CF(bit_tohex)		LJLIB_REC(.)
 {
 {
-  uint32_t b = (uint32_t)lj_lib_checkbit(L, 1);
-  int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2);
-  const char *hexdigits = "0123456789abcdef";
-  char buf[8];
-  if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
-  if (n > 8) n = 8;
-  for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
-  lua_pushlstring(L, buf, (size_t)n);
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t b = lj_carith_check64(L, 1, &id);
+  int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
+				  (int32_t)lj_carith_check64(L, 2, &id2);
+#else
+  uint32_t b = (uint32_t)bit_checkbit(L, 1);
+  int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
+#endif
+  SBuf *sb = lj_buf_tmp_(L);
+  SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
+  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
+#if LJ_HASFFI
+  if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
+#else
+  if (n < 8) b &= (1u << 4*n)-1;
+#endif
+  sb = lj_strfmt_putfxint(sb, sf, b);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
   return 1;
 }
 }
 
 

+ 5 - 5
luajit.mod/luajit/src/lib_debug.c

@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry)
   return 1;
   return 1;
 }
 }
 
 
-LJLIB_CF(debug_getmetatable)
+LJLIB_CF(debug_getmetatable)	LJLIB_REC(.)
 {
 {
   lj_lib_checkany(L, 1);
   lj_lib_checkany(L, 1);
   if (!lua_getmetatable(L, 1)) {
   if (!lua_getmetatable(L, 1)) {
@@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue)
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
-static const char KEY_HOOK = 'h';
+#define KEY_HOOK	((void *)0x3004)
 
 
 static void hookf(lua_State *L, lua_Debug *ar)
 static void hookf(lua_State *L, lua_Debug *ar)
 {
 {
   static const char *const hooknames[] =
   static const char *const hooknames[] =
     {"call", "return", "line", "count", "tail return"};
     {"call", "return", "line", "count", "tail return"};
-  lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+  lua_pushlightuserdata(L, KEY_HOOK);
   lua_rawget(L, LUA_REGISTRYINDEX);
   lua_rawget(L, LUA_REGISTRYINDEX);
   if (lua_isfunction(L, -1)) {
   if (lua_isfunction(L, -1)) {
     lua_pushstring(L, hooknames[(int)ar->event]);
     lua_pushstring(L, hooknames[(int)ar->event]);
@@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook)
     count = luaL_optint(L, arg+3, 0);
     count = luaL_optint(L, arg+3, 0);
     func = hookf; mask = makemask(smask, count);
     func = hookf; mask = makemask(smask, count);
   }
   }
-  lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+  lua_pushlightuserdata(L, KEY_HOOK);
   lua_pushvalue(L, arg+1);
   lua_pushvalue(L, arg+1);
   lua_rawset(L, LUA_REGISTRYINDEX);
   lua_rawset(L, LUA_REGISTRYINDEX);
   lua_sethook(L, func, mask, count);
   lua_sethook(L, func, mask, count);
@@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook)
   if (hook != NULL && hook != hookf) {  /* external hook? */
   if (hook != NULL && hook != hookf) {  /* external hook? */
     lua_pushliteral(L, "external hook");
     lua_pushliteral(L, "external hook");
   } else {
   } else {
-    lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+    lua_pushlightuserdata(L, KEY_HOOK);
     lua_rawget(L, LUA_REGISTRYINDEX);   /* get hook */
     lua_rawget(L, LUA_REGISTRYINDEX);   /* get hook */
   }
   }
   lua_pushstring(L, unmakemask(mask, buff));
   lua_pushstring(L, unmakemask(mask, buff));

+ 37 - 16
luajit.mod/luajit/src/lib_ffi.c

@@ -29,6 +29,7 @@
 #include "lj_ccall.h"
 #include "lj_ccall.h"
 #include "lj_ccallback.h"
 #include "lj_ccallback.h"
 #include "lj_clib.h"
 #include "lj_clib.h"
+#include "lj_strfmt.h"
 #include "lj_ff.h"
 #include "lj_ff.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
@@ -137,7 +138,7 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
       }
       }
     }
     }
     copyTV(L, base, L->top);
     copyTV(L, base, L->top);
-    tv = L->top-1;
+    tv = L->top-1-LJ_FR2;
   }
   }
   return lj_meta_tailcall(L, tv);
   return lj_meta_tailcall(L, tv);
 }
 }
@@ -193,7 +194,7 @@ LJLIB_CF(ffi_meta___eq)		LJLIB_REC(cdata_arith MM_eq)
 
 
 LJLIB_CF(ffi_meta___len)	LJLIB_REC(cdata_arith MM_len)
 LJLIB_CF(ffi_meta___len)	LJLIB_REC(cdata_arith MM_len)
 {
 {
-  return ffi_arith(L);
+  return lj_carith_len(L);
 }
 }
 
 
 LJLIB_CF(ffi_meta___lt)		LJLIB_REC(cdata_arith MM_lt)
 LJLIB_CF(ffi_meta___lt)		LJLIB_REC(cdata_arith MM_lt)
@@ -318,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring)
       }
       }
     }
     }
   }
   }
-  lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
+  lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
 checkgc:
 checkgc:
   lj_gc_check(L);
   lj_gc_check(L);
   return 1;
   return 1;
@@ -504,10 +505,7 @@ LJLIB_CF(ffi_new)	LJLIB_REC(.)
   }
   }
   if (sz == CTSIZE_INVALID)
   if (sz == CTSIZE_INVALID)
     lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE);
     lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE);
-  if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
-    cd = lj_cdata_new(cts, id, sz);
-  else
-    cd = lj_cdata_newv(cts, id, sz, ctype_align(info));
+  cd = lj_cdata_newx(cts, id, sz, info);
   setcdataV(L, o-1, cd);  /* Anchor the uninitialized cdata. */
   setcdataV(L, o-1, cd);  /* Anchor the uninitialized cdata. */
   lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
   lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
 		   o, (MSize)(L->top - o));  /* Initialize cdata. */
 		   o, (MSize)(L->top - o));  /* Initialize cdata. */
@@ -558,6 +556,31 @@ LJLIB_CF(ffi_typeof)	LJLIB_REC(.)
   return 1;
   return 1;
 }
 }
 
 
+/* Internal and unsupported API. */
+LJLIB_CF(ffi_typeinfo)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = (CTypeID)ffi_checkint(L, 1);
+  if (id > 0 && id < cts->top) {
+    CType *ct = ctype_get(cts, id);
+    GCtab *t;
+    lua_createtable(L, 0, 4);  /* Increment hash size if fields are added. */
+    t = tabV(L->top-1);
+    setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info);
+    if (ct->size != CTSIZE_INVALID)
+      setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size);
+    if (ct->sib)
+      setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
+    if (gcref(ct->name)) {
+      GCstr *s = gco2str(gcref(ct->name));
+      setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
+    }
+    lj_gc_check(L);
+    return 1;
+  }
+  return 0;
+}
+
 LJLIB_CF(ffi_istype)	LJLIB_REC(.)
 LJLIB_CF(ffi_istype)	LJLIB_REC(.)
 {
 {
   CTState *cts = ctype_cts(L);
   CTState *cts = ctype_cts(L);
@@ -723,8 +746,14 @@ LJLIB_CF(ffi_abi)	LJLIB_REC(.)
 #endif
 #endif
 #if LJ_ABI_WIN
 #if LJ_ABI_WIN
   case H_(4ab624a8,4ab624a8): b = 1; break;  /* win */
   case H_(4ab624a8,4ab624a8): b = 1; break;  /* win */
+#endif
+#if LJ_TARGET_UWP
+  case H_(a40f0bcb,a40f0bcb): b = 1; break;  /* uwp */
 #endif
 #endif
   case H_(3af93066,1f001464): b = 1; break;  /* le/be */
   case H_(3af93066,1f001464): b = 1; break;  /* le/be */
+#if LJ_GC64
+  case H_(9e89d2c9,13c83c92): b = 1; break;  /* gc64 */
+#endif
   default:
   default:
     break;
     break;
   }
   }
@@ -768,19 +797,11 @@ LJLIB_CF(ffi_gc)	LJLIB_REC(.)
   GCcdata *cd = ffi_checkcdata(L, 1);
   GCcdata *cd = ffi_checkcdata(L, 1);
   TValue *fin = lj_lib_checkany(L, 2);
   TValue *fin = lj_lib_checkany(L, 2);
   CTState *cts = ctype_cts(L);
   CTState *cts = ctype_cts(L);
-  GCtab *t = cts->finalizer;
   CType *ct = ctype_raw(cts, cd->ctypeid);
   CType *ct = ctype_raw(cts, cd->ctypeid);
   if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
   if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
 	ctype_isrefarray(ct->info)))
 	ctype_isrefarray(ct->info)))
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
-  if (gcref(t->metatable)) {  /* Update finalizer table, if still enabled. */
-    copyTV(L, lj_tab_set(L, t, L->base), fin);
-    lj_gc_anybarriert(L, t);
-    if (!tvisnil(fin))
-      cd->marked |= LJ_GC_CDATA_FIN;
-    else
-      cd->marked &= ~LJ_GC_CDATA_FIN;
-  }
+  lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
   L->top = L->base+1;  /* Pass through the cdata object. */
   L->top = L->base+1;  /* Pass through the cdata object. */
   return 1;
   return 1;
 }
 }

+ 17 - 24
luajit.mod/luajit/src/lib_io.c

@@ -19,8 +19,10 @@
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_state.h"
 #include "lj_state.h"
+#include "lj_strfmt.h"
 #include "lj_ff.h"
 #include "lj_ff.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
   IOFileUD *iof = io_file_new(L);
   IOFileUD *iof = io_file_new(L);
   iof->fp = fopen(fname, mode);
   iof->fp = fopen(fname, mode);
   if (iof->fp == NULL)
   if (iof->fp == NULL)
-    luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno)));
+    luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
   return iof;
   return iof;
 }
 }
 
 
@@ -97,7 +99,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
     int stat = -1;
     int stat = -1;
 #if LJ_TARGET_POSIX
 #if LJ_TARGET_POSIX
     stat = pclose(iof->fp);
     stat = pclose(iof->fp);
-#elif LJ_TARGET_WINDOWS
+#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP
     stat = _pclose(iof->fp);
     stat = _pclose(iof->fp);
 #else
 #else
     lua_assert(0);
     lua_assert(0);
@@ -145,7 +147,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
   MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
   MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
   char *buf;
   char *buf;
   for (;;) {
   for (;;) {
-    buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    buf = lj_buf_tmp(L, m);
     if (fgets(buf+n, m-n, fp) == NULL) break;
     if (fgets(buf+n, m-n, fp) == NULL) break;
     n += (MSize)strlen(buf+n);
     n += (MSize)strlen(buf+n);
     ok |= n;
     ok |= n;
@@ -161,7 +163,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
 {
 {
   MSize m, n;
   MSize m, n;
   for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
   for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
-    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    char *buf = lj_buf_tmp(L, m);
     n += (MSize)fread(buf+n, 1, m-n, fp);
     n += (MSize)fread(buf+n, 1, m-n, fp);
     if (n != m) {
     if (n != m) {
       setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
       setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
@@ -174,7 +176,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
 static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
 static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
 {
 {
   if (m) {
   if (m) {
-    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    char *buf = lj_buf_tmp(L, m);
     MSize n = (MSize)fread(buf, 1, m, fp);
     MSize n = (MSize)fread(buf, 1, m, fp);
     setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
     setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
     lj_gc_check(L);
     lj_gc_check(L);
@@ -201,13 +203,12 @@ static int io_file_read(lua_State *L, FILE *fp, int start)
     for (n = start; nargs-- && ok; n++) {
     for (n = start; nargs-- && ok; n++) {
       if (tvisstr(L->base+n)) {
       if (tvisstr(L->base+n)) {
 	const char *p = strVdata(L->base+n);
 	const char *p = strVdata(L->base+n);
-	if (p[0] != '*')
-	  lj_err_arg(L, n+1, LJ_ERR_INVOPT);
-	if (p[1] == 'n')
+	if (p[0] == '*') p++;
+	if (p[0] == 'n')
 	  ok = io_file_readnum(L, fp);
 	  ok = io_file_readnum(L, fp);
-	else if ((p[1] & ~0x20) == 'L')
-	  ok = io_file_readline(L, fp, (p[1] == 'l'));
-	else if (p[1] == 'a')
+	else if ((p[0] & ~0x20) == 'L')
+	  ok = io_file_readline(L, fp, (p[0] == 'l'));
+	else if (p[0] == 'a')
 	  io_file_readall(L, fp);
 	  io_file_readall(L, fp);
 	else
 	else
 	  lj_err_arg(L, n+1, LJ_ERR_INVFMT);
 	  lj_err_arg(L, n+1, LJ_ERR_INVFMT);
@@ -230,19 +231,11 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
   cTValue *tv;
   cTValue *tv;
   int status = 1;
   int status = 1;
   for (tv = L->base+start; tv < L->top; tv++) {
   for (tv = L->base+start; tv < L->top; tv++) {
-    if (tvisstr(tv)) {
-      MSize len = strV(tv)->len;
-      status = status && (fwrite(strVdata(tv), 1, len, fp) == len);
-    } else if (tvisint(tv)) {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, intV(tv));
-      size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
-      status = status && (fwrite(p, 1, len, fp) == len);
-    } else if (tvisnum(tv)) {
-      status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
-    } else {
+    MSize len;
+    const char *p = lj_strfmt_wstrnum(L, tv, &len);
+    if (!p)
       lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
       lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
-    }
+    status = status && (fwrite(p, 1, len, fp) == len);
   }
   }
   if (LJ_52 && status) {
   if (LJ_52 && status) {
     L->top = L->base+1;
     L->top = L->base+1;
@@ -413,7 +406,7 @@ LJLIB_CF(io_open)
 
 
 LJLIB_CF(io_popen)
 LJLIB_CF(io_popen)
 {
 {
-#if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS
+#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP)
   const char *fname = strdata(lj_lib_checkstr(L, 1));
   const char *fname = strdata(lj_lib_checkstr(L, 1));
   GCstr *s = lj_lib_optstr(L, 2);
   GCstr *s = lj_lib_optstr(L, 2);
   const char *mode = s ? strdata(s) : "r";
   const char *mode = s ? strdata(s) : "r";

+ 141 - 28
luajit.mod/luajit/src/lib_jit.c

@@ -10,13 +10,17 @@
 #include "lauxlib.h"
 #include "lauxlib.h"
 #include "lualib.h"
 #include "lualib.h"
 
 
-#include "lj_arch.h"
 #include "lj_obj.h"
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
 #include "lj_debug.h"
 #include "lj_debug.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
+#include "lj_state.h"
 #include "lj_bc.h"
 #include "lj_bc.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
 #if LJ_HASJIT
 #if LJ_HASJIT
 #include "lj_ir.h"
 #include "lj_ir.h"
 #include "lj_jit.h"
 #include "lj_jit.h"
@@ -24,6 +28,7 @@
 #include "lj_iropt.h"
 #include "lj_iropt.h"
 #include "lj_target.h"
 #include "lj_target.h"
 #endif
 #endif
+#include "lj_trace.h"
 #include "lj_dispatch.h"
 #include "lj_dispatch.h"
 #include "lj_vm.h"
 #include "lj_vm.h"
 #include "lj_vmevent.h"
 #include "lj_vmevent.h"
@@ -280,7 +285,7 @@ static GCtrace *jit_checktrace(lua_State *L)
 /* Names of link types. ORDER LJ_TRLINK */
 /* Names of link types. ORDER LJ_TRLINK */
 static const char *const jit_trlinkname[] = {
 static const char *const jit_trlinkname[] = {
   "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
   "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
-  "interpreter", "return"
+  "interpreter", "return", "stitch"
 };
 };
 
 
 /* local info = jit.util.traceinfo(tr) */
 /* local info = jit.util.traceinfo(tr) */
@@ -333,6 +338,13 @@ LJLIB_CF(jit_util_tracek)
       slot = ir->op2;
       slot = ir->op2;
       ir = &T->ir[ir->op1];
       ir = &T->ir[ir->op1];
     }
     }
+#if LJ_HASFFI
+    if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
+      ptrdiff_t oldtop = savestack(L, L->top);
+      luaopen_ffi(L);  /* Load FFI library on-demand. */
+      L->top = restorestack(L, oldtop);
+    }
+#endif
     lj_ir_kvalue(L, L->top-2, ir);
     lj_ir_kvalue(L, L->top-2, ir);
     setintV(L->top-1, (int32_t)irt_type(ir->t));
     setintV(L->top-1, (int32_t)irt_type(ir->t));
     if (slot == -1)
     if (slot == -1)
@@ -417,6 +429,12 @@ LJLIB_CF(jit_util_ircalladdr)
 
 
 #include "lj_libdef.h"
 #include "lj_libdef.h"
 
 
+static int luaopen_jit_util(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, jit_util);
+  return 1;
+}
+
 /* -- jit.opt module ------------------------------------------------------ */
 /* -- jit.opt module ------------------------------------------------------ */
 
 
 #if LJ_HASJIT
 #if LJ_HASJIT
@@ -514,6 +532,104 @@ LJLIB_CF(jit_opt_start)
 
 
 #endif
 #endif
 
 
+/* -- jit.profile module -------------------------------------------------- */
+
+#if LJ_HASPROFILE
+
+#define LJLIB_MODULE_jit_profile
+
+/* Not loaded by default, use: local profile = require("jit.profile") */
+
+static const char KEY_PROFILE_THREAD = 't';
+static const char KEY_PROFILE_FUNC = 'f';
+
+static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
+				 int vmstate)
+{
+  TValue key;
+  cTValue *tv;
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  tv = lj_tab_get(L, tabV(registry(L)), &key);
+  if (tvisfunc(tv)) {
+    char vmst = (char)vmstate;
+    int status;
+    setfuncV(L2, L2->top++, funcV(tv));
+    setthreadV(L2, L2->top++, L);
+    setintV(L2->top++, samples);
+    setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
+    status = lua_pcall(L2, 3, 0, 0);  /* callback(thread, samples, vmstate) */
+    if (status) {
+      if (G(L2)->panic) G(L2)->panic(L2);
+      exit(EXIT_FAILURE);
+    }
+    lj_trace_abort(G(L2));
+  }
+}
+
+/* profile.start(mode, cb) */
+LJLIB_CF(jit_profile_start)
+{
+  GCtab *registry = tabV(registry(L));
+  GCstr *mode = lj_lib_optstr(L, 1);
+  GCfunc *func = lj_lib_checkfunc(L, 2);
+  lua_State *L2 = lua_newthread(L);  /* Thread that runs profiler callback. */
+  TValue key;
+  /* Anchor thread and function in registry. */
+  setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
+  setthreadV(L, lj_tab_set(L, registry, &key), L2);
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  setfuncV(L, lj_tab_set(L, registry, &key), func);
+  lj_gc_anybarriert(L, registry);
+  luaJIT_profile_start(L, mode ? strdata(mode) : "",
+		       (luaJIT_profile_callback)jit_profile_callback, L2);
+  return 0;
+}
+
+/* profile.stop() */
+LJLIB_CF(jit_profile_stop)
+{
+  GCtab *registry;
+  TValue key;
+  luaJIT_profile_stop(L);
+  registry = tabV(registry(L));
+  setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
+  setnilV(lj_tab_set(L, registry, &key));
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  setnilV(lj_tab_set(L, registry, &key));
+  lj_gc_anybarriert(L, registry);
+  return 0;
+}
+
+/* dump = profile.dumpstack([thread,] fmt, depth) */
+LJLIB_CF(jit_profile_dumpstack)
+{
+  lua_State *L2 = L;
+  int arg = 0;
+  size_t len;
+  int depth;
+  GCstr *fmt;
+  const char *p;
+  if (L->top > L->base && tvisthread(L->base)) {
+    L2 = threadV(L->base);
+    arg = 1;
+  }
+  fmt = lj_lib_checkstr(L, arg+1);
+  depth = lj_lib_checkint(L, arg+2);
+  p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len);
+  lua_pushlstring(L, p, len);
+  return 1;
+}
+
+#include "lj_libdef.h"
+
+static int luaopen_jit_profile(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, jit_profile);
+  return 1;
+}
+
+#endif
+
 /* -- JIT compiler initialization ----------------------------------------- */
 /* -- JIT compiler initialization ----------------------------------------- */
 
 
 #if LJ_HASJIT
 #if LJ_HASJIT
@@ -539,38 +655,31 @@ static uint32_t jit_cpudetect(lua_State *L)
   uint32_t features[4];
   uint32_t features[4];
   if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
   if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
 #if !LJ_HASJIT
 #if !LJ_HASJIT
-#define JIT_F_CMOV	1
 #define JIT_F_SSE2	2
 #define JIT_F_SSE2	2
 #endif
 #endif
-    flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
     flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
     flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
 #if LJ_HASJIT
 #if LJ_HASJIT
     flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
     flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
     flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
     flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
     if (vendor[2] == 0x6c65746e) {  /* Intel. */
     if (vendor[2] == 0x6c65746e) {  /* Intel. */
-      if ((features[0] & 0x0ff00f00) == 0x00000f00)  /* P4. */
-	flags |= JIT_F_P4;  /* Currently unused. */
-      else if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
+      if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
 	flags |= JIT_F_LEA_AGU;
 	flags |= JIT_F_LEA_AGU;
     } else if (vendor[2] == 0x444d4163) {  /* AMD. */
     } else if (vendor[2] == 0x444d4163) {  /* AMD. */
       uint32_t fam = (features[0] & 0x0ff00f00);
       uint32_t fam = (features[0] & 0x0ff00f00);
-      if (fam == 0x00000f00)  /* K8. */
-	flags |= JIT_F_SPLIT_XMM;
       if (fam >= 0x00000f00)  /* K8, K10. */
       if (fam >= 0x00000f00)  /* K8, K10. */
 	flags |= JIT_F_PREFER_IMUL;
 	flags |= JIT_F_PREFER_IMUL;
     }
     }
+    if (vendor[0] >= 7) {
+      uint32_t xfeatures[4];
+      lj_vm_cpuid(7, xfeatures);
+      flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
+    }
 #endif
 #endif
   }
   }
   /* Check for required instruction set support on x86 (unnecessary on x64). */
   /* Check for required instruction set support on x86 (unnecessary on x64). */
 #if LJ_TARGET_X86
 #if LJ_TARGET_X86
-#if !defined(LUAJIT_CPU_NOCMOV)
-  if (!(flags & JIT_F_CMOV))
-    luaL_error(L, "CPU not supported");
-#endif
-#if defined(LUAJIT_CPU_SSE2)
   if (!(flags & JIT_F_SSE2))
   if (!(flags & JIT_F_SSE2))
-    luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
-#endif
+    luaL_error(L, "CPU with SSE2 required");
 #endif
 #endif
 #elif LJ_TARGET_ARM
 #elif LJ_TARGET_ARM
 #if LJ_HASJIT
 #if LJ_HASJIT
@@ -592,6 +701,8 @@ static uint32_t jit_cpudetect(lua_State *L)
 	   ver >= 60 ? JIT_F_ARMV6_ : 0;
 	   ver >= 60 ? JIT_F_ARMV6_ : 0;
   flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
   flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
 #endif
 #endif
+#elif LJ_TARGET_ARM64
+  /* No optional CPU features to detect (for now). */
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 #if LJ_HASJIT
 #if LJ_HASJIT
 #if LJ_ARCH_SQRT
 #if LJ_ARCH_SQRT
@@ -601,21 +712,23 @@ static uint32_t jit_cpudetect(lua_State *L)
   flags |= JIT_F_ROUND;
   flags |= JIT_F_ROUND;
 #endif
 #endif
 #endif
 #endif
-#elif LJ_TARGET_PPCSPE
-  /* Nothing to do. */
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
 #if LJ_HASJIT
 #if LJ_HASJIT
   /* Compile-time MIPS CPU detection. */
   /* Compile-time MIPS CPU detection. */
 #if LJ_ARCH_VERSION >= 20
 #if LJ_ARCH_VERSION >= 20
-  flags |= JIT_F_MIPS32R2;
+  flags |= JIT_F_MIPSXXR2;
 #endif
 #endif
   /* Runtime MIPS CPU detection. */
   /* Runtime MIPS CPU detection. */
 #if defined(__GNUC__)
 #if defined(__GNUC__)
-  if (!(flags & JIT_F_MIPS32R2)) {
+  if (!(flags & JIT_F_MIPSXXR2)) {
     int x;
     int x;
+#ifdef __mips16
+    x = 0;  /* Runtime detection is difficult. Ensure optimal -march flags. */
+#else
     /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
     /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
     __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2");
     __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2");
-    if (x) flags |= JIT_F_MIPS32R2;  /* Either 0x80000000 (R2) or 0 (R1). */
+#endif
+    if (x) flags |= JIT_F_MIPSXXR2;  /* Either 0x80000000 (R2) or 0 (R1). */
   }
   }
 #endif
 #endif
 #endif
 #endif
@@ -632,11 +745,7 @@ static void jit_init(lua_State *L)
   uint32_t flags = jit_cpudetect(L);
   uint32_t flags = jit_cpudetect(L);
 #if LJ_HASJIT
 #if LJ_HASJIT
   jit_State *J = L2J(L);
   jit_State *J = L2J(L);
-#if LJ_TARGET_X86
-  /* Silently turn off the JIT compiler on CPUs without SSE2. */
-  if ((flags & JIT_F_SSE2))
-#endif
-    J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
+  J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
   memcpy(J->param, jit_param_default, sizeof(J->param));
   memcpy(J->param, jit_param_default, sizeof(J->param));
   lj_dispatch_update(G(L));
   lj_dispatch_update(G(L));
 #else
 #else
@@ -646,19 +755,23 @@ static void jit_init(lua_State *L)
 
 
 LUALIB_API int luaopen_jit(lua_State *L)
 LUALIB_API int luaopen_jit(lua_State *L)
 {
 {
+  jit_init(L);
   lua_pushliteral(L, LJ_OS_NAME);
   lua_pushliteral(L, LJ_OS_NAME);
   lua_pushliteral(L, LJ_ARCH_NAME);
   lua_pushliteral(L, LJ_ARCH_NAME);
   lua_pushinteger(L, LUAJIT_VERSION_NUM);
   lua_pushinteger(L, LUAJIT_VERSION_NUM);
   lua_pushliteral(L, LUAJIT_VERSION);
   lua_pushliteral(L, LUAJIT_VERSION);
   LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
   LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
+#if LJ_HASPROFILE
+  lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile,
+		tabref(L->env));
+#endif
 #ifndef LUAJIT_DISABLE_JITUTIL
 #ifndef LUAJIT_DISABLE_JITUTIL
-  LJ_LIB_REG(L, "jit.util", jit_util);
+  lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env));
 #endif
 #endif
 #if LJ_HASJIT
 #if LJ_HASJIT
   LJ_LIB_REG(L, "jit.opt", jit_opt);
   LJ_LIB_REG(L, "jit.opt", jit_opt);
 #endif
 #endif
   L->top -= 2;
   L->top -= 2;
-  jit_init(L);
   return 1;
   return 1;
 }
 }
 
 

+ 4 - 11
luajit.mod/luajit/src/lib_math.c

@@ -47,12 +47,6 @@ LJLIB_ASM_(math_tanh)		LJLIB_REC(math_htrig IRCALL_tanh)
 LJLIB_ASM_(math_frexp)
 LJLIB_ASM_(math_frexp)
 LJLIB_ASM_(math_modf)		LJLIB_REC(.)
 LJLIB_ASM_(math_modf)		LJLIB_REC(.)
 
 
-LJLIB_PUSH(57.29577951308232)
-LJLIB_ASM_(math_deg)		LJLIB_REC(math_degrad)
-
-LJLIB_PUSH(0.017453292519943295)
-LJLIB_ASM_(math_rad)		LJLIB_REC(math_degrad)
-
 LJLIB_ASM(math_log)		LJLIB_REC(math_log)
 LJLIB_ASM(math_log)		LJLIB_REC(math_log)
 {
 {
   double x = lj_lib_checknum(L, 1);
   double x = lj_lib_checknum(L, 1);
@@ -63,12 +57,15 @@ LJLIB_ASM(math_log)		LJLIB_REC(math_log)
 #else
 #else
     x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
     x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
 #endif
 #endif
-    setnumV(L->base-1, x*y);  /* Do NOT join the expression to x / y. */
+    setnumV(L->base-1-LJ_FR2, x*y);  /* Do NOT join the expression to x / y. */
     return FFH_RES(1);
     return FFH_RES(1);
   }
   }
   return FFH_RETRY;
   return FFH_RETRY;
 }
 }
 
 
+LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
+LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
+
 LJLIB_ASM(math_atan2)		LJLIB_REC(.)
 LJLIB_ASM(math_atan2)		LJLIB_REC(.)
 {
 {
   lj_lib_checknum(L, 1);
   lj_lib_checknum(L, 1);
@@ -224,10 +221,6 @@ LUALIB_API int luaopen_math(lua_State *L)
   rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
   rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
   rs->valid = 0;  /* Use lazy initialization to save some time on startup. */
   rs->valid = 0;  /* Use lazy initialization to save some time on startup. */
   LJ_LIB_REG(L, LUA_MATHLIBNAME, math);
   LJ_LIB_REG(L, LUA_MATHLIBNAME, math);
-#if defined(LUA_COMPAT_MOD) && !LJ_52
-  lua_getfield(L, -1, "fmod");
-  lua_setfield(L, -2, "mod");
-#endif
   return 1;
   return 1;
 }
 }
 
 

+ 21 - 16
luajit.mod/luajit/src/lib_os.c

@@ -17,7 +17,10 @@
 #include "lualib.h"
 #include "lualib.h"
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 #if LJ_TARGET_POSIX
 #if LJ_TARGET_POSIX
@@ -188,7 +191,7 @@ LJLIB_CF(os_date)
 #endif
 #endif
   }
   }
   if (stm == NULL) {  /* Invalid date? */
   if (stm == NULL) {  /* Invalid date? */
-    setnilV(L->top-1);
+    setnilV(L->top++);
   } else if (strcmp(s, "*t") == 0) {
   } else if (strcmp(s, "*t") == 0) {
     lua_createtable(L, 0, 9);  /* 9 = number of fields */
     lua_createtable(L, 0, 9);  /* 9 = number of fields */
     setfield(L, "sec", stm->tm_sec);
     setfield(L, "sec", stm->tm_sec);
@@ -200,23 +203,25 @@ LJLIB_CF(os_date)
     setfield(L, "wday", stm->tm_wday+1);
     setfield(L, "wday", stm->tm_wday+1);
     setfield(L, "yday", stm->tm_yday+1);
     setfield(L, "yday", stm->tm_yday+1);
     setboolfield(L, "isdst", stm->tm_isdst);
     setboolfield(L, "isdst", stm->tm_isdst);
-  } else {
-    char cc[3];
-    luaL_Buffer b;
-    cc[0] = '%'; cc[2] = '\0';
-    luaL_buffinit(L, &b);
-    for (; *s; s++) {
-      if (*s != '%' || *(s + 1) == '\0') {  /* No conversion specifier? */
-	luaL_addchar(&b, *s);
-      } else {
-	size_t reslen;
-	char buff[200];  /* Should be big enough for any conversion result. */
-	cc[1] = *(++s);
-	reslen = strftime(buff, sizeof(buff), cc, stm);
-	luaL_addlstring(&b, buff, reslen);
+  } else if (*s) {
+    SBuf *sb = &G(L)->tmpbuf;
+    MSize sz = 0, retry = 4;
+    const char *q;
+    for (q = s; *q; q++)
+      sz += (*q == '%') ? 30 : 1;  /* Overflow doesn't matter. */
+    setsbufL(sb, L);
+    while (retry--) {  /* Limit growth for invalid format or empty result. */
+      char *buf = lj_buf_need(sb, sz);
+      size_t len = strftime(buf, sbufsz(sb), s, stm);
+      if (len) {
+	setstrV(L, L->top++, lj_str_new(L, buf, len));
+	lj_gc_check(L);
+	break;
       }
       }
+      sz += (sz|1);
     }
     }
-    luaL_pushresult(&b);
+  } else {
+    setstrV(L, L->top++, &G(L)->strempty);
   }
   }
   return 1;
   return 1;
 }
 }

+ 46 - 25
luajit.mod/luajit/src/lib_package.c

@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym)
 BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
 BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
 #endif
 #endif
 
 
+#if LJ_TARGET_UWP
+void *LJ_WIN_LOADLIBA(const char *path)
+{
+  DWORD err = GetLastError();
+  wchar_t wpath[256];
+  HANDLE lib = NULL;
+  if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) {
+    lib = LoadPackagedLibrary(wpath, 0);
+  }
+  SetLastError(err);
+  return lib;
+}
+#endif
+
 #undef setprogdir
 #undef setprogdir
 
 
 static void setprogdir(lua_State *L)
 static void setprogdir(lua_State *L)
@@ -96,9 +110,17 @@ static void setprogdir(lua_State *L)
 static void pusherror(lua_State *L)
 static void pusherror(lua_State *L)
 {
 {
   DWORD error = GetLastError();
   DWORD error = GetLastError();
+#if LJ_TARGET_XBOXONE
+  wchar_t wbuffer[128];
+  char buffer[128*2];
+  if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
+      NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) &&
+      WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL))
+#else
   char buffer[128];
   char buffer[128];
   if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
   if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
       NULL, error, 0, buffer, sizeof(buffer), NULL))
       NULL, error, 0, buffer, sizeof(buffer), NULL))
+#endif
     lua_pushstring(L, buffer);
     lua_pushstring(L, buffer);
   else
   else
     lua_pushfstring(L, "system error %d\n", error);
     lua_pushfstring(L, "system error %d\n", error);
@@ -111,7 +133,7 @@ static void ll_unloadlib(void *lib)
 
 
 static void *ll_load(lua_State *L, const char *path, int gl)
 static void *ll_load(lua_State *L, const char *path, int gl)
 {
 {
-  HINSTANCE lib = LoadLibraryA(path);
+  HINSTANCE lib = LJ_WIN_LOADLIBA(path);
   if (lib == NULL) pusherror(L);
   if (lib == NULL) pusherror(L);
   UNUSED(gl);
   UNUSED(gl);
   return lib;
   return lib;
@@ -124,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
   return f;
   return f;
 }
 }
 
 
+#if LJ_TARGET_UWP
+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
+#endif
+
 static const char *ll_bcsym(void *lib, const char *sym)
 static const char *ll_bcsym(void *lib, const char *sym)
 {
 {
   if (lib) {
   if (lib) {
     return (const char *)GetProcAddress((HINSTANCE)lib, sym);
     return (const char *)GetProcAddress((HINSTANCE)lib, sym);
   } else {
   } else {
+#if LJ_TARGET_UWP
+    return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym);
+#else
     HINSTANCE h = GetModuleHandleA(NULL);
     HINSTANCE h = GetModuleHandleA(NULL);
     const char *p = (const char *)GetProcAddress(h, sym);
     const char *p = (const char *)GetProcAddress(h, sym);
     if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
     if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
 					(const char *)ll_bcsym, &h))
 					(const char *)ll_bcsym, &h))
       p = (const char *)GetProcAddress(h, sym);
       p = (const char *)GetProcAddress(h, sym);
     return p;
     return p;
+#endif
   }
   }
 }
 }
 
 
@@ -185,8 +215,7 @@ static void **ll_register(lua_State *L, const char *path)
     lua_pop(L, 1);
     lua_pop(L, 1);
     plib = (void **)lua_newuserdata(L, sizeof(void *));
     plib = (void **)lua_newuserdata(L, sizeof(void *));
     *plib = NULL;
     *plib = NULL;
-    luaL_getmetatable(L, "_LOADLIB");
-    lua_setmetatable(L, -2);
+    luaL_setmetatable(L, "_LOADLIB");
     lua_pushfstring(L, "LOADLIB: %s", path);
     lua_pushfstring(L, "LOADLIB: %s", path);
     lua_pushvalue(L, -2);
     lua_pushvalue(L, -2);
     lua_settable(L, LUA_REGISTRYINDEX);
     lua_settable(L, LUA_REGISTRYINDEX);
@@ -226,7 +255,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
       const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
       const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
       lua_pop(L, 1);
       lua_pop(L, 1);
       if (bcdata) {
       if (bcdata) {
-	if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+	if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
 	  return PACKAGE_ERR_LOAD;
 	  return PACKAGE_ERR_LOAD;
 	return 0;
 	return 0;
       }
       }
@@ -383,7 +412,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
   if (lua_isnil(L, -1)) {  /* Not found? */
   if (lua_isnil(L, -1)) {  /* Not found? */
     const char *bcname = mksymname(L, name, SYMPREFIX_BC);
     const char *bcname = mksymname(L, name, SYMPREFIX_BC);
     const char *bcdata = ll_bcsym(NULL, bcname);
     const char *bcdata = ll_bcsym(NULL, bcname);
-    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
       lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
       lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
   }
   }
   return 1;
   return 1;
@@ -391,8 +420,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
-static const int sentinel_ = 0;
-#define sentinel	((void *)&sentinel_)
+#define sentinel	((void *)0x4004)
 
 
 static int lj_cf_package_require(lua_State *L)
 static int lj_cf_package_require(lua_State *L)
 {
 {
@@ -482,29 +510,19 @@ static void modinit(lua_State *L, const char *modname)
 static int lj_cf_package_module(lua_State *L)
 static int lj_cf_package_module(lua_State *L)
 {
 {
   const char *modname = luaL_checkstring(L, 1);
   const char *modname = luaL_checkstring(L, 1);
-  int loaded = lua_gettop(L) + 1;  /* index of _LOADED table */
-  lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
-  lua_getfield(L, loaded, modname);  /* get _LOADED[modname] */
-  if (!lua_istable(L, -1)) {  /* not found? */
-    lua_pop(L, 1);  /* remove previous result */
-    /* try global variable (and create one if it does not exist) */
-    if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL)
-      lj_err_callerv(L, LJ_ERR_BADMODN, modname);
-    lua_pushvalue(L, -1);
-    lua_setfield(L, loaded, modname);  /* _LOADED[modname] = new table */
-  }
-  /* check whether table already has a _NAME field */
+  int lastarg = (int)(L->top - L->base);
+  luaL_pushmodule(L, modname, 1);
   lua_getfield(L, -1, "_NAME");
   lua_getfield(L, -1, "_NAME");
-  if (!lua_isnil(L, -1)) {  /* is table an initialized module? */
+  if (!lua_isnil(L, -1)) {  /* Module already initialized? */
     lua_pop(L, 1);
     lua_pop(L, 1);
-  } else {  /* no; initialize it */
+  } else {
     lua_pop(L, 1);
     lua_pop(L, 1);
     modinit(L, modname);
     modinit(L, modname);
   }
   }
   lua_pushvalue(L, -1);
   lua_pushvalue(L, -1);
   setfenv(L);
   setfenv(L);
-  dooptions(L, loaded - 1);
-  return 0;
+  dooptions(L, lastarg);
+  return LJ_52;
 }
 }
 
 
 static int lj_cf_package_seeall(lua_State *L)
 static int lj_cf_package_seeall(lua_State *L)
@@ -575,13 +593,16 @@ LUALIB_API int luaopen_package(lua_State *L)
   lj_lib_pushcf(L, lj_cf_package_unloadlib, 1);
   lj_lib_pushcf(L, lj_cf_package_unloadlib, 1);
   lua_setfield(L, -2, "__gc");
   lua_setfield(L, -2, "__gc");
   luaL_register(L, LUA_LOADLIBNAME, package_lib);
   luaL_register(L, LUA_LOADLIBNAME, package_lib);
-  lua_pushvalue(L, -1);
-  lua_replace(L, LUA_ENVIRONINDEX);
+  lua_copy(L, -1, LUA_ENVIRONINDEX);
   lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0);
   lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0);
   for (i = 0; package_loaders[i] != NULL; i++) {
   for (i = 0; package_loaders[i] != NULL; i++) {
     lj_lib_pushcf(L, package_loaders[i], 1);
     lj_lib_pushcf(L, package_loaders[i], 1);
     lua_rawseti(L, -2, i+1);
     lua_rawseti(L, -2, i+1);
   }
   }
+#if LJ_52
+  lua_pushvalue(L, -1);
+  lua_setfield(L, -3, "searchers");
+#endif
   lua_setfield(L, -2, "loaders");
   lua_setfield(L, -2, "loaders");
   lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
   lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
   noenv = lua_toboolean(L, -1);
   noenv = lua_toboolean(L, -1);

+ 130 - 322
luajit.mod/luajit/src/lib_string.c

@@ -6,8 +6,6 @@
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 */
 */
 
 
-#include <stdio.h>
-
 #define lib_string_c
 #define lib_string_c
 #define LUA_LIB
 #define LUA_LIB
 
 
@@ -18,6 +16,7 @@
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
 #include "lj_meta.h"
@@ -25,17 +24,19 @@
 #include "lj_ff.h"
 #include "lj_ff.h"
 #include "lj_bcdump.h"
 #include "lj_bcdump.h"
 #include "lj_char.h"
 #include "lj_char.h"
+#include "lj_strfmt.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #define LJLIB_MODULE_string
 #define LJLIB_MODULE_string
 
 
-LJLIB_ASM(string_len)		LJLIB_REC(.)
-{
-  lj_lib_checkstr(L, 1);
-  return FFH_RETRY;
-}
+LJLIB_LUA(string_len) /*
+  function(s)
+    CHECK_str(s)
+    return #s
+  end
+*/
 
 
 LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
 LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
 {
 {
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
   lj_state_checkstack(L, (MSize)n);
   lj_state_checkstack(L, (MSize)n);
   p = (const unsigned char *)strdata(s) + start;
   p = (const unsigned char *)strdata(s) + start;
   for (i = 0; i < n; i++)
   for (i = 0; i < n; i++)
-    setintV(L->base + i-1, p[i]);
+    setintV(L->base + i-1-LJ_FR2, p[i]);
   return FFH_RES(n);
   return FFH_RES(n);
 }
 }
 
 
-LJLIB_ASM(string_char)
+LJLIB_ASM(string_char)		LJLIB_REC(.)
 {
 {
   int i, nargs = (int)(L->top - L->base);
   int i, nargs = (int)(L->top - L->base);
-  char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs);
+  char *buf = lj_buf_tmp(L, (MSize)nargs);
   for (i = 1; i <= nargs; i++) {
   for (i = 1; i <= nargs; i++) {
     int32_t k = lj_lib_checkint(L, i);
     int32_t k = lj_lib_checkint(L, i);
     if (!checku8(k))
     if (!checku8(k))
       lj_err_arg(L, i, LJ_ERR_BADVAL);
       lj_err_arg(L, i, LJ_ERR_BADVAL);
     buf[i-1] = (char)k;
     buf[i-1] = (char)k;
   }
   }
-  setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs));
+  setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs));
   return FFH_RES(1);
   return FFH_RES(1);
 }
 }
 
 
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub)		LJLIB_REC(string_range 1)
   return FFH_RETRY;
   return FFH_RETRY;
 }
 }
 
 
-LJLIB_ASM(string_rep)
+LJLIB_CF(string_rep)		LJLIB_REC(.)
 {
 {
   GCstr *s = lj_lib_checkstr(L, 1);
   GCstr *s = lj_lib_checkstr(L, 1);
-  int32_t k = lj_lib_checkint(L, 2);
+  int32_t rep = lj_lib_checkint(L, 2);
   GCstr *sep = lj_lib_optstr(L, 3);
   GCstr *sep = lj_lib_optstr(L, 3);
-  int32_t len = (int32_t)s->len;
-  global_State *g = G(L);
-  int64_t tlen;
-  const char *src;
-  char *buf;
-  if (k <= 0) {
-  empty:
-    setstrV(L, L->base-1, &g->strempty);
-    return FFH_RES(1);
-  }
-  if (sep) {
-    tlen = (int64_t)len + sep->len;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-    tlen *= k;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-  } else {
-    tlen = (int64_t)k * len;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-  }
-  if (tlen == 0) goto empty;
-  buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
-  src = strdata(s);
-  if (sep) {
-    tlen -= sep->len;  /* Ignore trailing separator. */
-    if (k > 1) {  /* Paste one string and one separator. */
-      int32_t i;
-      i = 0; while (i < len) *buf++ = src[i++];
-      src = strdata(sep); len = sep->len;
-      i = 0; while (i < len) *buf++ = src[i++];
-      src = g->tmpbuf.buf; len += s->len; k--;  /* Now copy that k-1 times. */
-    }
+  SBuf *sb = lj_buf_tmp_(L);
+  if (sep && rep > 1) {
+    GCstr *s2 = lj_buf_cat2str(L, sep, s);
+    lj_buf_reset(sb);
+    lj_buf_putstr(sb, s);
+    s = s2;
+    rep--;
   }
   }
-  do {
-    int32_t i = 0;
-    do { *buf++ = src[i++]; } while (i < len);
-  } while (--k > 0);
-  setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
-  return FFH_RES(1);
+  sb = lj_buf_putstr_rep(sb, s, rep);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
+  return 1;
 }
 }
 
 
-LJLIB_ASM(string_reverse)
+LJLIB_ASM(string_reverse)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
 {
 {
-  GCstr *s = lj_lib_checkstr(L, 1);
-  lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
+  lj_lib_checkstr(L, 1);
   return FFH_RETRY;
   return FFH_RETRY;
 }
 }
-LJLIB_ASM_(string_lower)
-LJLIB_ASM_(string_upper)
+LJLIB_ASM_(string_lower)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
+LJLIB_ASM_(string_upper)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
-static int writer_buf(lua_State *L, const void *p, size_t size, void *b)
+static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
 {
 {
-  luaL_addlstring((luaL_Buffer *)b, (const char *)p, size);
+  lj_buf_putmem((SBuf *)sb, p, (MSize)size);
   UNUSED(L);
   UNUSED(L);
   return 0;
   return 0;
 }
 }
@@ -153,12 +124,12 @@ LJLIB_CF(string_dump)
 {
 {
   GCfunc *fn = lj_lib_checkfunc(L, 1);
   GCfunc *fn = lj_lib_checkfunc(L, 1);
   int strip = L->base+1 < L->top && tvistruecond(L->base+1);
   int strip = L->base+1 < L->top && tvistruecond(L->base+1);
-  luaL_Buffer b;
+  SBuf *sb = lj_buf_tmp_(L);  /* Assumes lj_bcwrite() doesn't use tmpbuf. */
   L->top = L->base+1;
   L->top = L->base+1;
-  luaL_buffinit(L, &b);
-  if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
+  if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
     lj_err_caller(L, LJ_ERR_STRDUMP);
     lj_err_caller(L, LJ_ERR_STRDUMP);
-  luaL_pushresult(&b);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
   return 1;
 }
 }
 
 
@@ -183,7 +154,6 @@ typedef struct MatchState {
 } MatchState;
 } MatchState;
 
 
 #define L_ESC		'%'
 #define L_ESC		'%'
-#define SPECIALS	"^$*+?.([%-"
 
 
 static int check_capture(MatchState *ms, int l)
 static int check_capture(MatchState *ms, int l)
 {
 {
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
   return s;
   return s;
 }
 }
 
 
-static const char *lmemfind(const char *s1, size_t l1,
-			    const char *s2, size_t l2)
-{
-  if (l2 == 0) {
-    return s1;  /* empty strings are everywhere */
-  } else if (l2 > l1) {
-    return NULL;  /* avoids a negative `l1' */
-  } else {
-    const char *init;  /* to search for a `*s2' inside `s1' */
-    l2--;  /* 1st char will be checked by `memchr' */
-    l1 = l1-l2;  /* `s2' cannot be found after that */
-    while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
-      init++;   /* 1st char is already checked */
-      if (memcmp(init, s2+1, l2) == 0) {
-	return init-1;
-      } else {  /* correct `l1' and `s1' to try again */
-	l1 -= (size_t)(init-s1);
-	s1 = init;
-      }
-    }
-    return NULL;  /* not found */
-  }
-}
-
 static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
 static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
 {
 {
   if (i >= ms->level) {
   if (i >= ms->level) {
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
   return nlevels;  /* number of strings pushed */
   return nlevels;  /* number of strings pushed */
 }
 }
 
 
-static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
-{
-  /* relative string position: negative means back from end */
-  if (pos < 0) pos += (ptrdiff_t)len + 1;
-  return (pos >= 0) ? pos : 0;
-}
-
 static int str_find_aux(lua_State *L, int find)
 static int str_find_aux(lua_State *L, int find)
 {
 {
-  size_t l1, l2;
-  const char *s = luaL_checklstring(L, 1, &l1);
-  const char *p = luaL_checklstring(L, 2, &l2);
-  ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
-  if (init < 0) {
-    init = 0;
-  } else if ((size_t)(init) > l1) {
+  GCstr *s = lj_lib_checkstr(L, 1);
+  GCstr *p = lj_lib_checkstr(L, 2);
+  int32_t start = lj_lib_optint(L, 3, 1);
+  MSize st;
+  if (start < 0) start += (int32_t)s->len; else start--;
+  if (start < 0) start = 0;
+  st = (MSize)start;
+  if (st > s->len) {
 #if LJ_52
 #if LJ_52
     setnilV(L->top-1);
     setnilV(L->top-1);
     return 1;
     return 1;
 #else
 #else
-    init = (ptrdiff_t)l1;
+    st = s->len;
 #endif
 #endif
   }
   }
-  if (find && (lua_toboolean(L, 4) ||  /* explicit request? */
-      strpbrk(p, SPECIALS) == NULL)) {  /* or no special characters? */
-    /* do a plain search */
-    const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2);
-    if (s2) {
-      lua_pushinteger(L, s2-s+1);
-      lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
+  if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
+	       !lj_str_haspattern(p))) {  /* Search for fixed string. */
+    const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
+    if (q) {
+      setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
+      setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
       return 2;
       return 2;
     }
     }
-  } else {
+  } else {  /* Search for pattern. */
     MatchState ms;
     MatchState ms;
-    int anchor = (*p == '^') ? (p++, 1) : 0;
-    const char *s1=s+init;
+    const char *pstr = strdata(p);
+    const char *sstr = strdata(s) + st;
+    int anchor = 0;
+    if (*pstr == '^') { pstr++; anchor = 1; }
     ms.L = L;
     ms.L = L;
-    ms.src_init = s;
-    ms.src_end = s+l1;
-    do {
-      const char *res;
+    ms.src_init = strdata(s);
+    ms.src_end = strdata(s) + s->len;
+    do {  /* Loop through string and try to match the pattern. */
+      const char *q;
       ms.level = ms.depth = 0;
       ms.level = ms.depth = 0;
-      if ((res=match(&ms, s1, p)) != NULL) {
+      q = match(&ms, sstr, pstr);
+      if (q) {
 	if (find) {
 	if (find) {
-	  lua_pushinteger(L, s1-s+1);  /* start */
-	  lua_pushinteger(L, res-s);   /* end */
-	  return push_captures(&ms, NULL, 0) + 2;
+	  setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
+	  setintV(L->top++, (int32_t)(q-strdata(s)));
+	  return push_captures(&ms, NULL, NULL) + 2;
 	} else {
 	} else {
-	  return push_captures(&ms, s1, res);
+	  return push_captures(&ms, sstr, q);
 	}
 	}
       }
       }
-    } while (s1++ < ms.src_end && !anchor);
+    } while (sstr++ < ms.src_end && !anchor);
   }
   }
-  lua_pushnil(L);  /* not found */
+  setnilV(L->top-1);  /* Not found. */
   return 1;
   return 1;
 }
 }
 
 
-LJLIB_CF(string_find)
+LJLIB_CF(string_find)		LJLIB_REC(.)
 {
 {
   return str_find_aux(L, 1);
   return str_find_aux(L, 1);
 }
 }
@@ -698,221 +640,91 @@ LJLIB_CF(string_gsub)
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
-/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
-#define MAX_FMTITEM	512
-/* valid flags in a format specification */
-#define FMT_FLAGS	"-+ #0"
-/*
-** maximum size of each format specification (such as '%-099.99d')
-** (+10 accounts for %99.99x plus margin of error)
-*/
-#define MAX_FMTSPEC	(sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
-
-static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
-{
-  GCstr *str = lj_lib_checkstr(L, arg);
-  int32_t len = (int32_t)str->len;
-  const char *s = strdata(str);
-  luaL_addchar(b, '"');
-  while (len--) {
-    uint32_t c = uchar(*s);
-    if (c == '"' || c == '\\' || c == '\n') {
-      luaL_addchar(b, '\\');
-    } else if (lj_char_iscntrl(c)) {  /* This can only be 0-31 or 127. */
-      uint32_t d;
-      luaL_addchar(b, '\\');
-      if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
-	luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
-	goto tens;
-      } else if (c >= 10) {
-      tens:
-	d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
-      }
-      c += '0';
-    }
-    luaL_addchar(b, c);
-    s++;
-  }
-  luaL_addchar(b, '"');
-}
-
-static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
-{
-  const char *p = strfrmt;
-  while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++;  /* skip flags */
-  if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
-    lj_err_caller(L, LJ_ERR_STRFMTR);
-  if (lj_char_isdigit(uchar(*p))) p++;  /* skip width */
-  if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
-  if (*p == '.') {
-    p++;
-    if (lj_char_isdigit(uchar(*p))) p++;  /* skip precision */
-    if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
-  }
-  if (lj_char_isdigit(uchar(*p)))
-    lj_err_caller(L, LJ_ERR_STRFMTW);
-  *(form++) = '%';
-  strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
-  form += p - strfrmt + 1;
-  *form = '\0';
-  return p;
-}
-
-static void addintlen(char *form)
-{
-  size_t l = strlen(form);
-  char spec = form[l - 1];
-  strcpy(form + l - 1, LUA_INTFRMLEN);
-  form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
-  form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
-}
-
-static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
-{
-  if (sizeof(LUA_INTFRM_T) == 4) {
-    return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
-  } else {
-    cTValue *o;
-    lj_lib_checknumber(L, arg);
-    o = L->base+arg-1;
-    if (tvisint(o))
-      return (LUA_INTFRM_T)intV(o);
-    else
-      return (LUA_INTFRM_T)numV(o);
-  }
-}
-
-static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
-{
-  if (sizeof(LUA_INTFRM_T) == 4) {
-    return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
-  } else {
-    cTValue *o;
-    lj_lib_checknumber(L, arg);
-    o = L->base+arg-1;
-    if (tvisint(o))
-      return (unsigned LUA_INTFRM_T)intV(o);
-    else if ((int32_t)o->u32.hi < 0)
-      return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
-    else
-      return (unsigned LUA_INTFRM_T)numV(o);
-  }
-}
-
-static GCstr *meta_tostring(lua_State *L, int arg)
+/* Emulate tostring() inline. */
+static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
 {
 {
   TValue *o = L->base+arg-1;
   TValue *o = L->base+arg-1;
   cTValue *mo;
   cTValue *mo;
   lua_assert(o < L->top);  /* Caller already checks for existence. */
   lua_assert(o < L->top);  /* Caller already checks for existence. */
   if (LJ_LIKELY(tvisstr(o)))
   if (LJ_LIKELY(tvisstr(o)))
     return strV(o);
     return strV(o);
-  if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+  if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
     copyTV(L, L->top++, mo);
     copyTV(L, L->top++, mo);
     copyTV(L, L->top++, o);
     copyTV(L, L->top++, o);
     lua_call(L, 1, 1);
     lua_call(L, 1, 1);
-    L->top--;
-    if (tvisstr(L->top))
-      return strV(L->top);
-    o = L->base+arg-1;
-    copyTV(L, o, L->top);
-  }
-  if (tvisnumber(o)) {
-    return lj_str_fromnumber(L, o);
-  } else if (tvisnil(o)) {
-    return lj_str_newlit(L, "nil");
-  } else if (tvisfalse(o)) {
-    return lj_str_newlit(L, "false");
-  } else if (tvistrue(o)) {
-    return lj_str_newlit(L, "true");
-  } else {
-    if (tvisfunc(o) && isffunc(funcV(o)))
-      lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
-    else
-      lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
-    L->top--;
-    return strV(L->top);
+    copyTV(L, L->base+arg-1, --L->top);
+    return NULL;  /* Buffer may be overwritten, retry. */
   }
   }
-}
-
-LJLIB_CF(string_format)
-{
-  int arg = 1, top = (int)(L->top - L->base);
-  GCstr *fmt = lj_lib_checkstr(L, arg);
-  const char *strfrmt = strdata(fmt);
-  const char *strfrmt_end = strfrmt + fmt->len;
-  luaL_Buffer b;
-  luaL_buffinit(L, &b);
-  while (strfrmt < strfrmt_end) {
-    if (*strfrmt != L_ESC) {
-      luaL_addchar(&b, *strfrmt++);
-    } else if (*++strfrmt == L_ESC) {
-      luaL_addchar(&b, *strfrmt++);  /* %% */
-    } else { /* format item */
-      char form[MAX_FMTSPEC];  /* to store the format (`%...') */
-      char buff[MAX_FMTITEM];  /* to store the formatted item */
+  return lj_strfmt_obj(L, o);
+}
+
+LJLIB_CF(string_format)		LJLIB_REC(.)
+{
+  int arg, top = (int)(L->top - L->base);
+  GCstr *fmt;
+  SBuf *sb;
+  FormatState fs;
+  SFormat sf;
+  int retry = 0;
+again:
+  arg = 1;
+  sb = lj_buf_tmp_(L);
+  fmt = lj_lib_checkstr(L, arg);
+  lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+    if (sf == STRFMT_LIT) {
+      lj_buf_putmem(sb, fs.str, fs.len);
+    } else if (sf == STRFMT_ERR) {
+      lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
+    } else {
       if (++arg > top)
       if (++arg > top)
 	luaL_argerror(L, arg, lj_obj_typename[0]);
 	luaL_argerror(L, arg, lj_obj_typename[0]);
-      strfrmt = scanformat(L, strfrmt, form);
-      switch (*strfrmt++) {
-      case 'c':
-	sprintf(buff, form, lj_lib_checkint(L, arg));
+      switch (STRFMT_TYPE(sf)) {
+      case STRFMT_INT:
+	if (tvisint(L->base+arg-1)) {
+	  int32_t k = intV(L->base+arg-1);
+	  if (sf == STRFMT_INT)
+	    lj_strfmt_putint(sb, k);  /* Shortcut for plain %d. */
+	  else
+	    lj_strfmt_putfxint(sb, sf, k);
+	} else {
+	  lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
+	}
 	break;
 	break;
-      case 'd':  case 'i':
-	addintlen(form);
-	sprintf(buff, form, num2intfrm(L, arg));
+      case STRFMT_UINT:
+	if (tvisint(L->base+arg-1))
+	  lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
+	else
+	  lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
 	break;
 	break;
-      case 'o':  case 'u':  case 'x':  case 'X':
-	addintlen(form);
-	sprintf(buff, form, num2uintfrm(L, arg));
+      case STRFMT_NUM:
+	lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
 	break;
 	break;
-      case 'e':  case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
-	TValue tv;
-	tv.n = lj_lib_checknum(L, arg);
-	if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
-	  /* Canonicalize output of non-finite values. */
-	  char *p, nbuf[LJ_STR_NUMBUF];
-	  size_t len = lj_str_bufnum(nbuf, &tv);
-	  if (strfrmt[-1] < 'a') {
-	    nbuf[len-3] = nbuf[len-3] - 0x20;
-	    nbuf[len-2] = nbuf[len-2] - 0x20;
-	    nbuf[len-1] = nbuf[len-1] - 0x20;
-	  }
-	  nbuf[len] = '\0';
-	  for (p = form; *p < 'A' && *p != '.'; p++) ;
-	  *p++ = 's'; *p = '\0';
-	  sprintf(buff, form, nbuf);
-	  break;
-	}
-	sprintf(buff, form, (double)tv.n);
+      case STRFMT_STR: {
+	GCstr *str = string_fmt_tostring(L, arg, retry);
+	if (str == NULL)
+	  retry = 1;
+	else if ((sf & STRFMT_T_QUOTED))
+	  lj_strfmt_putquoted(sb, str);  /* No formatting. */
+	else
+	  lj_strfmt_putfstr(sb, sf, str);
 	break;
 	break;
 	}
 	}
-      case 'q':
-	addquoted(L, &b, arg);
-	continue;
-      case 'p':
-	lj_str_pushf(L, "%p", lua_topointer(L, arg));
-	luaL_addvalue(&b);
-	continue;
-      case 's': {
-	GCstr *str = meta_tostring(L, arg);
-	if (!strchr(form, '.') && str->len >= 100) {
-	  /* no precision and string is too long to be formatted;
-	     keep original string */
-	  setstrV(L, L->top++, str);
-	  luaL_addvalue(&b);
-	  continue;
-	}
-	sprintf(buff, form, strdata(str));
+      case STRFMT_CHAR:
+	lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
+	break;
+      case STRFMT_PTR:  /* No formatting. */
+	lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1));
 	break;
 	break;
-	}
       default:
       default:
-	lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1));
+	lua_assert(0);
 	break;
 	break;
       }
       }
-      luaL_addlstring(&b, buff, strlen(buff));
     }
     }
   }
   }
-  luaL_pushresult(&b);
+  if (retry++ == 1) goto again;
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
   return 1;
 }
 }
 
 
@@ -925,10 +737,6 @@ LUALIB_API int luaopen_string(lua_State *L)
   GCtab *mt;
   GCtab *mt;
   global_State *g;
   global_State *g;
   LJ_LIB_REG(L, LUA_STRLIBNAME, string);
   LJ_LIB_REG(L, LUA_STRLIBNAME, string);
-#if defined(LUA_COMPAT_GFIND) && !LJ_52
-  lua_getfield(L, -1, "gmatch");
-  lua_setfield(L, -2, "gfind");
-#endif
   mt = lj_tab_new(L, 0, 1);
   mt = lj_tab_new(L, 0, 1);
   /* NOBARRIER: basemt is a GC root. */
   /* NOBARRIER: basemt is a GC root. */
   g = G(L);
   g = G(L);

+ 107 - 80
luajit.mod/luajit/src/lib_table.c

@@ -16,57 +16,43 @@
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
+#include "lj_ff.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #define LJLIB_MODULE_table
 #define LJLIB_MODULE_table
 
 
-LJLIB_CF(table_foreachi)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  GCfunc *func = lj_lib_checkfunc(L, 2);
-  MSize i, n = lj_tab_len(t);
-  for (i = 1; i <= n; i++) {
-    cTValue *val;
-    setfuncV(L, L->top, func);
-    setintV(L->top+1, i);
-    val = lj_tab_getint(t, (int32_t)i);
-    if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
-    L->top += 3;
-    lua_call(L, 2, 1);
-    if (!tvisnil(L->top-1))
-      return 1;
-    L->top--;
-  }
-  return 0;
-}
+LJLIB_LUA(table_foreachi) /*
+  function(t, f)
+    CHECK_tab(t)
+    CHECK_func(f)
+    for i=1,#t do
+      local r = f(i, t[i])
+      if r ~= nil then return r end
+    end
+  end
+*/
 
 
-LJLIB_CF(table_foreach)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  GCfunc *func = lj_lib_checkfunc(L, 2);
-  L->top = L->base+3;
-  setnilV(L->top-1);
-  while (lj_tab_next(L, t, L->top-1)) {
-    copyTV(L, L->top+2, L->top);
-    copyTV(L, L->top+1, L->top-1);
-    setfuncV(L, L->top, func);
-    L->top += 3;
-    lua_call(L, 2, 1);
-    if (!tvisnil(L->top-1))
-      return 1;
-    L->top--;
-  }
-  return 0;
-}
+LJLIB_LUA(table_foreach) /*
+  function(t, f)
+    CHECK_tab(t)
+    CHECK_func(f)
+    for k, v in PAIRS(t) do
+      local r = f(k, v)
+      if r ~= nil then return r end
+    end
+  end
+*/
 
 
-LJLIB_ASM(table_getn)		LJLIB_REC(.)
-{
-  lj_lib_checktab(L, 1);
-  return FFH_UNREACHABLE;
-}
+LJLIB_LUA(table_getn) /*
+  function(t)
+    CHECK_tab(t)
+    return #t
+  end
+*/
 
 
 LJLIB_CF(table_maxn)
 LJLIB_CF(table_maxn)
 {
 {
@@ -119,52 +105,67 @@ LJLIB_CF(table_insert)		LJLIB_REC(.)
   return 0;
   return 0;
 }
 }
 
 
-LJLIB_CF(table_remove)		LJLIB_REC(.)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  int32_t e = (int32_t)lj_tab_len(t);
-  int32_t pos = lj_lib_optint(L, 2, e);
-  if (!(1 <= pos && pos <= e))  /* Nothing to remove? */
-    return 0;
-  lua_rawgeti(L, 1, pos);  /* Get previous value. */
-  /* NOBARRIER: This just moves existing elements around. */
-  for (; pos < e; pos++) {
-    cTValue *src = lj_tab_getint(t, pos+1);
-    TValue *dst = lj_tab_setint(L, t, pos);
-    if (src) {
-      copyTV(L, dst, src);
-    } else {
-      setnilV(dst);
-    }
-  }
-  setnilV(lj_tab_setint(L, t, e));  /* Remove (last) value. */
-  return 1;  /* Return previous value. */
-}
+LJLIB_LUA(table_remove) /*
+  function(t, pos)
+    CHECK_tab(t)
+    local len = #t
+    if pos == nil then
+      if len ~= 0 then
+	local old = t[len]
+	t[len] = nil
+	return old
+      end
+    else
+      CHECK_int(pos)
+      if pos >= 1 and pos <= len then
+	local old = t[pos]
+	for i=pos+1,len do
+	  t[i-1] = t[i]
+	end
+	t[len] = nil
+	return old
+      end
+    end
+  end
+*/
+
+LJLIB_LUA(table_move) /*
+  function(a1, f, e, t, a2)
+    CHECK_tab(a1)
+    CHECK_int(f)
+    CHECK_int(e)
+    CHECK_int(t)
+    if a2 == nil then a2 = a1 end
+    CHECK_tab(a2)
+    if e >= f then
+      local d = t - f
+      if t > e or t <= f or a2 ~= a1 then
+	for i=f,e do a2[i+d] = a1[i] end
+      else
+	for i=e,f,-1 do a2[i+d] = a1[i] end
+      end
+    end
+    return a2
+  end
+*/
 
 
-LJLIB_CF(table_concat)
+LJLIB_CF(table_concat)		LJLIB_REC(.)
 {
 {
-  luaL_Buffer b;
   GCtab *t = lj_lib_checktab(L, 1);
   GCtab *t = lj_lib_checktab(L, 1);
   GCstr *sep = lj_lib_optstr(L, 2);
   GCstr *sep = lj_lib_optstr(L, 2);
-  MSize seplen = sep ? sep->len : 0;
   int32_t i = lj_lib_optint(L, 3, 1);
   int32_t i = lj_lib_optint(L, 3, 1);
   int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
   int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
 	      lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
 	      lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
-  luaL_buffinit(L, &b);
-  if (i <= e) {
-    for (;;) {
-      cTValue *o;
-      lua_rawgeti(L, 1, i);
-      o = L->top-1;
-      if (!(tvisstr(o) || tvisnumber(o)))
-	lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
-      luaL_addvalue(&b);
-      if (i++ == e) break;
-      if (seplen)
-	luaL_addlstring(&b, strdata(sep), seplen);
-    }
+  SBuf *sb = lj_buf_tmp_(L);
+  SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
+  if (LJ_UNLIKELY(!sbx)) {  /* Error: bad element type. */
+    int32_t idx = (int32_t)(intptr_t)sbufP(sb);
+    cTValue *o = lj_tab_getint(t, idx);
+    lj_err_callerv(L, LJ_ERR_TABCAT,
+		   lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
   }
   }
-  luaL_pushresult(&b);
+  setstrV(L, L->top-1, lj_buf_str(L, sbx));
+  lj_gc_check(L);
   return 1;
   return 1;
 }
 }
 
 
@@ -284,6 +285,30 @@ LJLIB_CF(table_pack)
 }
 }
 #endif
 #endif
 
 
+LJLIB_NOREG LJLIB_CF(table_new)		LJLIB_REC(.)
+{
+  int32_t a = lj_lib_checkint(L, 1);
+  int32_t h = lj_lib_checkint(L, 2);
+  lua_createtable(L, a, h);
+  return 1;
+}
+
+LJLIB_NOREG LJLIB_CF(table_clear)	LJLIB_REC(.)
+{
+  lj_tab_clear(lj_lib_checktab(L, 1));
+  return 0;
+}
+
+static int luaopen_table_new(lua_State *L)
+{
+  return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
+}
+
+static int luaopen_table_clear(lua_State *L)
+{
+  return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
+}
+
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #include "lj_libdef.h"
 #include "lj_libdef.h"
@@ -295,6 +320,8 @@ LUALIB_API int luaopen_table(lua_State *L)
   lua_getglobal(L, "unpack");
   lua_getglobal(L, "unpack");
   lua_setfield(L, -2, "unpack");
   lua_setfield(L, -2, "unpack");
 #endif
 #endif
+  lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
+  lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
   return 1;
   return 1;
 }
 }
 
 

+ 179 - 85
luajit.mod/luajit/src/lj_alloc.c

@@ -72,13 +72,56 @@
 
 
 #define IS_DIRECT_BIT		(SIZE_T_ONE)
 #define IS_DIRECT_BIT		(SIZE_T_ONE)
 
 
+
+/* Determine system-specific block allocation method. */
 #if LJ_TARGET_WINDOWS
 #if LJ_TARGET_WINDOWS
 
 
 #define WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #include <windows.h>
 
 
+#define LJ_ALLOC_VIRTUALALLOC	1
+
+#if LJ_64 && !LJ_GC64
+#define LJ_ALLOC_NTAVM		1
+#endif
+
+#else
+
+#include <errno.h>
+/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
+#include <sys/mman.h>
+
+#define LJ_ALLOC_MMAP		1
+
 #if LJ_64
 #if LJ_64
 
 
+#define LJ_ALLOC_MMAP_PROBE	1
+
+#if LJ_GC64
+#define LJ_ALLOC_MBITS		47	/* 128 TB in LJ_GC64 mode. */
+#elif LJ_TARGET_X64 && LJ_HASJIT
+/* Due to limitations in the x64 compiler backend. */
+#define LJ_ALLOC_MBITS		31	/* 2 GB on x64 with !LJ_GC64. */
+#else
+#define LJ_ALLOC_MBITS		32	/* 4 GB on other archs with !LJ_GC64. */
+#endif
+
+#endif
+
+#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT)
+#define LJ_ALLOC_MMAP32		1
+#endif
+
+#if LJ_TARGET_LINUX
+#define LJ_ALLOC_MREMAP		1
+#endif
+
+#endif
+
+
+#if LJ_ALLOC_VIRTUALALLOC
+
+#if LJ_ALLOC_NTAVM
 /* Undocumented, but hey, that's what we all love so much about Windows. */
 /* Undocumented, but hey, that's what we all love so much about Windows. */
 typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
 typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
 		       size_t *size, ULONG alloctype, ULONG prot);
 		       size_t *size, ULONG alloctype, ULONG prot);
@@ -89,14 +132,15 @@ static PNTAVM ntavm;
 */
 */
 #define NTAVM_ZEROBITS		1
 #define NTAVM_ZEROBITS		1
 
 
-static void INIT_MMAP(void)
+static void init_mmap(void)
 {
 {
   ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
   ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
 				 "NtAllocateVirtualMemory");
 				 "NtAllocateVirtualMemory");
 }
 }
+#define INIT_MMAP()	init_mmap()
 
 
 /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
 /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+static void *CALL_MMAP(size_t size)
 {
 {
   DWORD olderr = GetLastError();
   DWORD olderr = GetLastError();
   void *ptr = NULL;
   void *ptr = NULL;
@@ -107,7 +151,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
 }
 }
 
 
 /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
 /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
-static LJ_AINLINE void *DIRECT_MMAP(size_t size)
+static void *DIRECT_MMAP(size_t size)
 {
 {
   DWORD olderr = GetLastError();
   DWORD olderr = GetLastError();
   void *ptr = NULL;
   void *ptr = NULL;
@@ -119,23 +163,21 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
 
 
 #else
 #else
 
 
-#define INIT_MMAP()		((void)0)
-
 /* Win32 MMAP via VirtualAlloc */
 /* Win32 MMAP via VirtualAlloc */
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+static void *CALL_MMAP(size_t size)
 {
 {
   DWORD olderr = GetLastError();
   DWORD olderr = GetLastError();
-  void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+  void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
   SetLastError(olderr);
   SetLastError(olderr);
   return ptr ? ptr : MFAIL;
   return ptr ? ptr : MFAIL;
 }
 }
 
 
 /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
 /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
-static LJ_AINLINE void *DIRECT_MMAP(size_t size)
+static void *DIRECT_MMAP(size_t size)
 {
 {
   DWORD olderr = GetLastError();
   DWORD olderr = GetLastError();
-  void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
-			   PAGE_READWRITE);
+  void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+			    PAGE_READWRITE);
   SetLastError(olderr);
   SetLastError(olderr);
   return ptr ? ptr : MFAIL;
   return ptr ? ptr : MFAIL;
 }
 }
@@ -143,7 +185,7 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
 #endif
 #endif
 
 
 /* This function supports releasing coalesed segments */
 /* This function supports releasing coalesed segments */
-static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
+static int CALL_MUNMAP(void *ptr, size_t size)
 {
 {
   DWORD olderr = GetLastError();
   DWORD olderr = GetLastError();
   MEMORY_BASIC_INFORMATION minfo;
   MEMORY_BASIC_INFORMATION minfo;
@@ -163,10 +205,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
   return 0;
   return 0;
 }
 }
 
 
-#else
-
-#include <errno.h>
-#include <sys/mman.h>
+#elif LJ_ALLOC_MMAP
 
 
 #define MMAP_PROT		(PROT_READ|PROT_WRITE)
 #define MMAP_PROT		(PROT_READ|PROT_WRITE)
 #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
 #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
@@ -174,105 +213,152 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
 #endif
 #endif
 #define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
 #define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
 
 
-#if LJ_64
-/* 64 bit mode needs special support for allocating memory in the lower 2GB. */
-
-#if defined(MAP_32BIT)
+#if LJ_ALLOC_MMAP_PROBE
 
 
-#if defined(__sun__)
-#define MMAP_REGION_START	((uintptr_t)0x1000)
+#ifdef MAP_TRYFIXED
+#define MMAP_FLAGS_PROBE	(MMAP_FLAGS|MAP_TRYFIXED)
 #else
 #else
-/* Actually this only gives us max. 1GB in current Linux kernels. */
-#define MMAP_REGION_START	((uintptr_t)0)
+#define MMAP_FLAGS_PROBE	MMAP_FLAGS
 #endif
 #endif
 
 
-static LJ_AINLINE void *CALL_MMAP(size_t size)
-{
-  int olderr = errno;
-  void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
-  errno = olderr;
-  return ptr;
-}
+#define LJ_ALLOC_MMAP_PROBE_MAX		30
+#define LJ_ALLOC_MMAP_PROBE_LINEAR	5
 
 
-#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN
+#define LJ_ALLOC_MMAP_PROBE_LOWER	((uintptr_t)0x4000)
 
 
-/* OSX and FreeBSD mmap() use a naive first-fit linear search.
-** That's perfect for us. Except that -pagezero_size must be set for OSX,
-** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
-** to be reduced to 250MB on FreeBSD.
+/* No point in a giant ifdef mess. Just try to open /dev/urandom.
+** It doesn't really matter if this fails, since we get some ASLR bits from
+** every unsuitable allocation, too. And we prefer linear allocation, anyway.
 */
 */
-#if LJ_TARGET_OSX || defined(__DragonFly__)
-#define MMAP_REGION_START	((uintptr_t)0x10000)
-#elif LJ_TARGET_PS4
-#define MMAP_REGION_START	((uintptr_t)0x4000)
-#else
-#define MMAP_REGION_START	((uintptr_t)0x10000000)
-#endif
-#define MMAP_REGION_END		((uintptr_t)0x80000000)
+#include <fcntl.h>
+#include <unistd.h>
 
 
-#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
-#include <sys/resource.h>
-#endif
+static uintptr_t mmap_probe_seed(void)
+{
+  uintptr_t val;
+  int fd = open("/dev/urandom", O_RDONLY);
+  if (fd != -1) {
+    int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val));
+    (void)close(fd);
+    if (ok) return val;
+  }
+  return 1;  /* Punt. */
+}
 
 
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+static void *mmap_probe(size_t size)
 {
 {
-  int olderr = errno;
   /* Hint for next allocation. Doesn't need to be thread-safe. */
   /* Hint for next allocation. Doesn't need to be thread-safe. */
-  static uintptr_t alloc_hint = MMAP_REGION_START;
-  int retry = 0;
-#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
-  static int rlimit_modified = 0;
-  if (LJ_UNLIKELY(rlimit_modified == 0)) {
-    struct rlimit rlim;
-    rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START;
-    setrlimit(RLIMIT_DATA, &rlim);  /* Ignore result. May fail below. */
-    rlimit_modified = 1;
-  }
-#endif
-  for (;;) {
-    void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
-    if ((uintptr_t)p >= MMAP_REGION_START &&
-	(uintptr_t)p + size < MMAP_REGION_END) {
-      alloc_hint = (uintptr_t)p + size;
+  static uintptr_t hint_addr = 0;
+  static uintptr_t hint_prng = 0;
+  int olderr = errno;
+  int retry;
+  for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
+    void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
+    uintptr_t addr = (uintptr_t)p;
+    if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER &&
+	((addr + size) >> LJ_ALLOC_MBITS) == 0) {
+      /* We got a suitable address. Bump the hint address. */
+      hint_addr = addr + size;
       errno = olderr;
       errno = olderr;
       return p;
       return p;
     }
     }
-    if (p != CMFAIL) munmap(p, size);
-#if defined(__sun__) || defined(__DragonFly__)
-    alloc_hint += 0x1000000;  /* Need near-exhaustive linear scan. */
-    if (alloc_hint + size < MMAP_REGION_END) continue;
-#endif
-    if (retry) break;
-    retry = 1;
-    alloc_hint = MMAP_REGION_START;
+    if (p != MFAIL) {
+      munmap(p, size);
+    } else if (errno == ENOMEM) {
+      return MFAIL;
+    }
+    if (hint_addr) {
+      /* First, try linear probing. */
+      if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) {
+	hint_addr += 0x1000000;
+	if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0)
+	  hint_addr = 0;
+	continue;
+      } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) {
+	/* Next, try a no-hint probe to get back an ASLR address. */
+	hint_addr = 0;
+	continue;
+      }
+    }
+    /* Finally, try pseudo-random probing. */
+    if (LJ_UNLIKELY(hint_prng == 0)) {
+      hint_prng = mmap_probe_seed();
+    }
+    /* The unsuitable address we got has some ASLR PRNG bits. */
+    hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1));
+    do {  /* The PRNG itself is very weak, but see above. */
+      hint_prng = hint_prng * 1103515245 + 12345;
+      hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE;
+      hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1);
+    } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
   }
   }
   errno = olderr;
   errno = olderr;
-  return CMFAIL;
+  return MFAIL;
 }
 }
 
 
+#endif
+
+#if LJ_ALLOC_MMAP32
+
+#if defined(__sun__)
+#define LJ_ALLOC_MMAP32_START	((uintptr_t)0x1000)
 #else
 #else
+#define LJ_ALLOC_MMAP32_START	((uintptr_t)0)
+#endif
 
 
-#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"
+static void *mmap_map32(size_t size)
+{
+#if LJ_ALLOC_MMAP_PROBE
+  static int fallback = 0;
+  if (fallback)
+    return mmap_probe(size);
+#endif
+  {
+    int olderr = errno;
+    void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
+    errno = olderr;
+    /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */
+#if LJ_ALLOC_MMAP_PROBE
+    if (ptr == MFAIL) {
+      fallback = 1;
+      return mmap_probe(size);
+    }
+#endif
+    return ptr;
+  }
+}
 
 
 #endif
 #endif
 
 
+#if LJ_ALLOC_MMAP32
+#define CALL_MMAP(size)		mmap_map32(size)
+#elif LJ_ALLOC_MMAP_PROBE
+#define CALL_MMAP(size)		mmap_probe(size)
 #else
 #else
-
-/* 32 bit mode is easy. */
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+static void *CALL_MMAP(size_t size)
 {
 {
   int olderr = errno;
   int olderr = errno;
   void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
   void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
   errno = olderr;
   errno = olderr;
   return ptr;
   return ptr;
 }
 }
-
 #endif
 #endif
 
 
-#define INIT_MMAP()		((void)0)
-#define DIRECT_MMAP(s)		CALL_MMAP(s)
+#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
+
+#include <sys/resource.h>
+
+static void init_mmap(void)
+{
+  struct rlimit rlim;
+  rlim.rlim_cur = rlim.rlim_max = 0x10000;
+  setrlimit(RLIMIT_DATA, &rlim);  /* Ignore result. May fail later. */
+}
+#define INIT_MMAP()	init_mmap()
 
 
-static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
+#endif
+
+static int CALL_MUNMAP(void *ptr, size_t size)
 {
 {
   int olderr = errno;
   int olderr = errno;
   int ret = munmap(ptr, size);
   int ret = munmap(ptr, size);
@@ -280,10 +366,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
   return ret;
   return ret;
 }
 }
 
 
-#if LJ_TARGET_LINUX
+#if LJ_ALLOC_MREMAP
 /* Need to define _GNU_SOURCE to get the mremap prototype. */
 /* Need to define _GNU_SOURCE to get the mremap prototype. */
-static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
-				     int flags)
+static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
 {
 {
   int olderr = errno;
   int olderr = errno;
   ptr = mremap(ptr, osz, nsz, flags);
   ptr = mremap(ptr, osz, nsz, flags);
@@ -294,7 +379,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
 #define CALL_MREMAP_NOMOVE	0
 #define CALL_MREMAP_NOMOVE	0
 #define CALL_MREMAP_MAYMOVE	1
 #define CALL_MREMAP_MAYMOVE	1
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
 #else
 #else
 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE
 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE
@@ -303,6 +388,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
 
 
 #endif
 #endif
 
 
+
+#ifndef INIT_MMAP
+#define INIT_MMAP()		((void)0)
+#endif
+
+#ifndef DIRECT_MMAP
+#define DIRECT_MMAP(s)		CALL_MMAP(s)
+#endif
+
 #ifndef CALL_MREMAP
 #ifndef CALL_MREMAP
 #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
 #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
 #endif
 #endif

+ 169 - 77
luajit.mod/luajit/src/lj_api.c

@@ -24,6 +24,7 @@
 #include "lj_trace.h"
 #include "lj_trace.h"
 #include "lj_vm.h"
 #include "lj_vm.h"
 #include "lj_strscan.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 
 
 /* -- Common helper functions --------------------------------------------- */
 /* -- Common helper functions --------------------------------------------- */
 
 
@@ -111,6 +112,13 @@ LUA_API void lua_xmove(lua_State *from, lua_State *to, int n)
   from->top = f;
   from->top = f;
 }
 }
 
 
+LUA_API const lua_Number *lua_version(lua_State *L)
+{
+  static const lua_Number version = LUA_VERSION_NUM;
+  UNUSED(L);
+  return &version;
+}
+
 /* -- Stack manipulation -------------------------------------------------- */
 /* -- Stack manipulation -------------------------------------------------- */
 
 
 LUA_API int lua_gettop(lua_State *L)
 LUA_API int lua_gettop(lua_State *L)
@@ -151,30 +159,40 @@ LUA_API void lua_insert(lua_State *L, int idx)
   copyTV(L, p, L->top);
   copyTV(L, p, L->top);
 }
 }
 
 
-LUA_API void lua_replace(lua_State *L, int idx)
+static void copy_slot(lua_State *L, TValue *f, int idx)
 {
 {
-  api_checknelems(L, 1);
   if (idx == LUA_GLOBALSINDEX) {
   if (idx == LUA_GLOBALSINDEX) {
-    api_check(L, tvistab(L->top-1));
+    api_check(L, tvistab(f));
     /* NOBARRIER: A thread (i.e. L) is never black. */
     /* NOBARRIER: A thread (i.e. L) is never black. */
-    setgcref(L->env, obj2gco(tabV(L->top-1)));
+    setgcref(L->env, obj2gco(tabV(f)));
   } else if (idx == LUA_ENVIRONINDEX) {
   } else if (idx == LUA_ENVIRONINDEX) {
     GCfunc *fn = curr_func(L);
     GCfunc *fn = curr_func(L);
     if (fn->c.gct != ~LJ_TFUNC)
     if (fn->c.gct != ~LJ_TFUNC)
       lj_err_msg(L, LJ_ERR_NOENV);
       lj_err_msg(L, LJ_ERR_NOENV);
-    api_check(L, tvistab(L->top-1));
-    setgcref(fn->c.env, obj2gco(tabV(L->top-1)));
-    lj_gc_barrier(L, fn, L->top-1);
+    api_check(L, tvistab(f));
+    setgcref(fn->c.env, obj2gco(tabV(f)));
+    lj_gc_barrier(L, fn, f);
   } else {
   } else {
     TValue *o = index2adr(L, idx);
     TValue *o = index2adr(L, idx);
     api_checkvalidindex(L, o);
     api_checkvalidindex(L, o);
-    copyTV(L, o, L->top-1);
+    copyTV(L, o, f);
     if (idx < LUA_GLOBALSINDEX)  /* Need a barrier for upvalues. */
     if (idx < LUA_GLOBALSINDEX)  /* Need a barrier for upvalues. */
-      lj_gc_barrier(L, curr_func(L), L->top-1);
+      lj_gc_barrier(L, curr_func(L), f);
   }
   }
+}
+
+LUA_API void lua_replace(lua_State *L, int idx)
+{
+  api_checknelems(L, 1);
+  copy_slot(L, L->top - 1, idx);
   L->top--;
   L->top--;
 }
 }
 
 
+LUA_API void lua_copy(lua_State *L, int fromidx, int toidx)
+{
+  copy_slot(L, index2adr(L, fromidx), toidx);
+}
+
 LUA_API void lua_pushvalue(lua_State *L, int idx)
 LUA_API void lua_pushvalue(lua_State *L, int idx)
 {
 {
   copyTV(L, L->top, index2adr(L, idx));
   copyTV(L, L->top, index2adr(L, idx));
@@ -188,7 +206,7 @@ LUA_API int lua_type(lua_State *L, int idx)
   cTValue *o = index2adr(L, idx);
   cTValue *o = index2adr(L, idx);
   if (tvisnumber(o)) {
   if (tvisnumber(o)) {
     return LUA_TNUMBER;
     return LUA_TNUMBER;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   } else if (tvislightud(o)) {
   } else if (tvislightud(o)) {
     return LUA_TLIGHTUSERDATA;
     return LUA_TLIGHTUSERDATA;
 #endif
 #endif
@@ -268,7 +286,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
     return 0;
     return 0;
   } else if (tvispri(o1)) {
   } else if (tvispri(o1)) {
     return o1 != niltv(L) && o2 != niltv(L);
     return o1 != niltv(L) && o2 != niltv(L);
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   } else if (tvislightud(o1)) {
   } else if (tvislightud(o1)) {
     return o1->u64 == o2->u64;
     return o1->u64 == o2->u64;
 #endif
 #endif
@@ -283,8 +301,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
     } else {
     } else {
       L->top = base+2;
       L->top = base+2;
       lj_vm_call(L, base, 1+1);
       lj_vm_call(L, base, 1+1);
-      L->top -= 2;
-      return tvistruecond(L->top+1);
+      L->top -= 2+LJ_FR2;
+      return tvistruecond(L->top+1+LJ_FR2);
     }
     }
   }
   }
 }
 }
@@ -306,8 +324,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
     } else {
     } else {
       L->top = base+2;
       L->top = base+2;
       lj_vm_call(L, base, 1+1);
       lj_vm_call(L, base, 1+1);
-      L->top -= 2;
-      return tvistruecond(L->top+1);
+      L->top -= 2+LJ_FR2;
+      return tvistruecond(L->top+1+LJ_FR2);
     }
     }
   }
   }
 }
 }
@@ -324,6 +342,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
     return 0;
     return 0;
 }
 }
 
 
+LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok)
+{
+  cTValue *o = index2adr(L, idx);
+  TValue tmp;
+  if (LJ_LIKELY(tvisnumber(o))) {
+    if (ok) *ok = 1;
+    return numberVnum(o);
+  } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) {
+    if (ok) *ok = 1;
+    return numV(&tmp);
+  } else {
+    if (ok) *ok = 0;
+    return 0;
+  }
+}
+
 LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
 LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
 {
 {
   cTValue *o = index2adr(L, idx);
   cTValue *o = index2adr(L, idx);
@@ -361,9 +395,38 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
     if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
     if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
       return 0;
       return 0;
     if (tvisint(&tmp))
     if (tvisint(&tmp))
-      return (lua_Integer)intV(&tmp);
+      return intV(&tmp);
+    n = numV(&tmp);
+  }
+#if LJ_64
+  return (lua_Integer)n;
+#else
+  return lj_num2int(n);
+#endif
+}
+
+LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
+{
+  cTValue *o = index2adr(L, idx);
+  TValue tmp;
+  lua_Number n;
+  if (LJ_LIKELY(tvisint(o))) {
+    if (ok) *ok = 1;
+    return intV(o);
+  } else if (LJ_LIKELY(tvisnum(o))) {
+    n = numV(o);
+  } else {
+    if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) {
+      if (ok) *ok = 0;
+      return 0;
+    }
+    if (tvisint(&tmp)) {
+      if (ok) *ok = 1;
+      return intV(&tmp);
+    }
     n = numV(&tmp);
     n = numV(&tmp);
   }
   }
+  if (ok) *ok = 1;
 #if LJ_64
 #if LJ_64
   return (lua_Integer)n;
   return (lua_Integer)n;
 #else
 #else
@@ -434,7 +497,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
   } else {
   } else {
     if (len != NULL) *len = 0;
     if (len != NULL) *len = 0;
@@ -453,7 +516,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
   } else {
   } else {
     lj_err_argt(L, idx, LUA_TSTRING);
     lj_err_argt(L, idx, LUA_TSTRING);
@@ -475,7 +538,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
   } else {
   } else {
     lj_err_argt(L, idx, LUA_TSTRING);
     lj_err_argt(L, idx, LUA_TSTRING);
@@ -507,7 +570,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
   } else if (tvisudata(o)) {
   } else if (tvisudata(o)) {
     return udataV(o)->len;
     return udataV(o)->len;
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
-    GCstr *s = lj_str_fromnumber(L, o);
+    GCstr *s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
     return s->len;
     return s->len;
   } else {
   } else {
@@ -545,17 +608,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
 
 
 LUA_API const void *lua_topointer(lua_State *L, int idx)
 LUA_API const void *lua_topointer(lua_State *L, int idx)
 {
 {
-  cTValue *o = index2adr(L, idx);
-  if (tvisudata(o))
-    return uddata(udataV(o));
-  else if (tvislightud(o))
-    return lightudV(o);
-  else if (tviscdata(o))
-    return cdataptr(cdataV(o));
-  else if (tvisgcv(o))
-    return gcV(o);
-  else
-    return NULL;
+  return lj_obj_ptr(index2adr(L, idx));
 }
 }
 
 
 /* -- Stack setters (object creation) ------------------------------------- */
 /* -- Stack setters (object creation) ------------------------------------- */
@@ -606,7 +659,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
 				     va_list argp)
 				     va_list argp)
 {
 {
   lj_gc_check(L);
   lj_gc_check(L);
-  return lj_str_pushvf(L, fmt, argp);
+  return lj_strfmt_pushvf(L, fmt, argp);
 }
 }
 
 
 LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
 LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -615,7 +668,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
   va_list argp;
   va_list argp;
   lj_gc_check(L);
   lj_gc_check(L);
   va_start(argp, fmt);
   va_start(argp, fmt);
-  ret = lj_str_pushvf(L, fmt, argp);
+  ret = lj_strfmt_pushvf(L, fmt, argp);
   va_end(argp);
   va_end(argp);
   return ret;
   return ret;
 }
 }
@@ -649,10 +702,8 @@ LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
 
 
 LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
 LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
 {
 {
-  GCtab *t;
   lj_gc_check(L);
   lj_gc_check(L);
-  t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec));
-  settabV(L, L->top, t);
+  settabV(L, L->top, lj_tab_new_ah(L, narray, nrec));
   incr_top(L);
   incr_top(L);
 }
 }
 
 
@@ -715,8 +766,8 @@ LUA_API void lua_concat(lua_State *L, int n)
       n -= (int)(L->top - top);
       n -= (int)(L->top - top);
       L->top = top+2;
       L->top = top+2;
       lj_vm_call(L, top, 1+1);
       lj_vm_call(L, top, 1+1);
-      L->top--;
-      copyTV(L, L->top-1, L->top);
+      L->top -= 1+LJ_FR2;
+      copyTV(L, L->top-1, L->top+LJ_FR2);
     } while (--n > 0);
     } while (--n > 0);
   } else if (n == 0) {  /* Push empty string. */
   } else if (n == 0) {  /* Push empty string. */
     setstrV(L, L->top, &G(L)->strempty);
     setstrV(L, L->top, &G(L)->strempty);
@@ -735,8 +786,8 @@ LUA_API void lua_gettable(lua_State *L, int idx)
   if (v == NULL) {
   if (v == NULL) {
     L->top += 2;
     L->top += 2;
     lj_vm_call(L, L->top-2, 1+1);
     lj_vm_call(L, L->top-2, 1+1);
-    L->top -= 2;
-    v = L->top+1;
+    L->top -= 2+LJ_FR2;
+    v = L->top+1+LJ_FR2;
   }
   }
   copyTV(L, L->top-1, v);
   copyTV(L, L->top-1, v);
 }
 }
@@ -751,8 +802,8 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
   if (v == NULL) {
   if (v == NULL) {
     L->top += 2;
     L->top += 2;
     lj_vm_call(L, L->top-2, 1+1);
     lj_vm_call(L, L->top-2, 1+1);
-    L->top -= 2;
-    v = L->top+1;
+    L->top -= 2+LJ_FR2;
+    v = L->top+1+LJ_FR2;
   }
   }
   copyTV(L, L->top, v);
   copyTV(L, L->top, v);
   incr_top(L);
   incr_top(L);
@@ -869,7 +920,7 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2)
   lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1]));
   lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1]));
 }
 }
 
 
-LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
+LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname)
 {
 {
   cTValue *o = index2adr(L, idx);
   cTValue *o = index2adr(L, idx);
   if (tvisudata(o)) {
   if (tvisudata(o)) {
@@ -878,8 +929,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
     if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable))
     if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable))
       return uddata(ud);
       return uddata(ud);
   }
   }
-  lj_err_argtype(L, idx, tname);
-  return NULL;  /* unreachable */
+  return NULL;  /* value is not a userdata with a metatable */
+}
+
+LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
+{
+  void *p = luaL_testudata(L, idx, tname);
+  if (!p) lj_err_argtype(L, idx, tname);
+  return p;
 }
 }
 
 
 /* -- Object setters ------------------------------------------------------ */
 /* -- Object setters ------------------------------------------------------ */
@@ -893,13 +950,14 @@ LUA_API void lua_settable(lua_State *L, int idx)
   o = lj_meta_tset(L, t, L->top-2);
   o = lj_meta_tset(L, t, L->top-2);
   if (o) {
   if (o) {
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
-    copyTV(L, o, L->top-1);
     L->top -= 2;
     L->top -= 2;
+    copyTV(L, o, L->top+1);
   } else {
   } else {
-    L->top += 3;
-    copyTV(L, L->top-1, L->top-6);
-    lj_vm_call(L, L->top-3, 0+1);
-    L->top -= 3;
+    TValue *base = L->top;
+    copyTV(L, base+2, base-3-2*LJ_FR2);
+    L->top = base+3;
+    lj_vm_call(L, base, 0+1);
+    L->top -= 3+LJ_FR2;
   }
   }
 }
 }
 
 
@@ -913,14 +971,14 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
   setstrV(L, &key, lj_str_newz(L, k));
   setstrV(L, &key, lj_str_newz(L, k));
   o = lj_meta_tset(L, t, &key);
   o = lj_meta_tset(L, t, &key);
   if (o) {
   if (o) {
-    L->top--;
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
-    copyTV(L, o, L->top);
+    copyTV(L, o, --L->top);
   } else {
   } else {
-    L->top += 3;
-    copyTV(L, L->top-1, L->top-6);
-    lj_vm_call(L, L->top-3, 0+1);
-    L->top -= 2;
+    TValue *base = L->top;
+    copyTV(L, base+2, base-3-2*LJ_FR2);
+    L->top = base+3;
+    lj_vm_call(L, base, 0+1);
+    L->top -= 2+LJ_FR2;
   }
   }
 }
 }
 
 
@@ -987,6 +1045,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
   return 1;
   return 1;
 }
 }
 
 
+LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname)
+{
+  lua_getfield(L, LUA_REGISTRYINDEX, tname);
+  lua_setmetatable(L, -2);
+}
+
 LUA_API int lua_setfenv(lua_State *L, int idx)
 LUA_API int lua_setfenv(lua_State *L, int idx)
 {
 {
   cTValue *o = index2adr(L, idx);
   cTValue *o = index2adr(L, idx);
@@ -1027,11 +1091,24 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
 
 
 /* -- Calls --------------------------------------------------------------- */
 /* -- Calls --------------------------------------------------------------- */
 
 
+#if LJ_FR2
+static TValue *api_call_base(lua_State *L, int nargs)
+{
+  TValue *o = L->top, *base = o - nargs;
+  L->top = o+1;
+  for (; o > base; o--) copyTV(L, o, o-1);
+  setnilV(o);
+  return o+1;
+}
+#else
+#define api_call_base(L, nargs)	(L->top - (nargs))
+#endif
+
 LUA_API void lua_call(lua_State *L, int nargs, int nresults)
 LUA_API void lua_call(lua_State *L, int nargs, int nresults)
 {
 {
-  api_check(L, L->status == 0 || L->status == LUA_ERRERR);
+  api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
   api_checknelems(L, nargs+1);
   api_checknelems(L, nargs+1);
-  lj_vm_call(L, L->top - nargs, nresults+1);
+  lj_vm_call(L, api_call_base(L, nargs), nresults+1);
 }
 }
 
 
 LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
 LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
@@ -1040,7 +1117,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
   uint8_t oldh = hook_save(g);
   uint8_t oldh = hook_save(g);
   ptrdiff_t ef;
   ptrdiff_t ef;
   int status;
   int status;
-  api_check(L, L->status == 0 || L->status == LUA_ERRERR);
+  api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
   api_checknelems(L, nargs+1);
   api_checknelems(L, nargs+1);
   if (errfunc == 0) {
   if (errfunc == 0) {
     ef = 0;
     ef = 0;
@@ -1049,7 +1126,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
     api_checkvalidindex(L, o);
     api_checkvalidindex(L, o);
     ef = savestack(L, o);
     ef = savestack(L, o);
   }
   }
-  status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef);
+  status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
   if (status) hook_restore(g, oldh);
   if (status) hook_restore(g, oldh);
   return status;
   return status;
 }
 }
@@ -1057,12 +1134,14 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
 static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
 static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
 {
 {
   GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
   GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
+  TValue *top = L->top;
   fn->c.f = func;
   fn->c.f = func;
-  setfuncV(L, L->top, fn);
-  setlightudV(L->top+1, checklightudptr(L, ud));
+  setfuncV(L, top++, fn);
+  if (LJ_FR2) setnilV(top++);
+  setlightudV(top++, checklightudptr(L, ud));
   cframe_nres(L->cframe) = 1+0;  /* Zero results. */
   cframe_nres(L->cframe) = 1+0;  /* Zero results. */
-  L->top += 2;
-  return L->top-1;  /* Now call the newly allocated C function. */
+  L->top = top;
+  return top-1;  /* Now call the newly allocated C function. */
 }
 }
 
 
 LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
 LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
@@ -1070,7 +1149,7 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
   global_State *g = G(L);
   global_State *g = G(L);
   uint8_t oldh = hook_save(g);
   uint8_t oldh = hook_save(g);
   int status;
   int status;
-  api_check(L, L->status == 0 || L->status == LUA_ERRERR);
+  api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
   status = lj_vm_cpcall(L, func, ud, cpcall);
   status = lj_vm_cpcall(L, func, ud, cpcall);
   if (status) hook_restore(g, oldh);
   if (status) hook_restore(g, oldh);
   return status;
   return status;
@@ -1079,10 +1158,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
 LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
 LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
 {
 {
   if (luaL_getmetafield(L, idx, field)) {
   if (luaL_getmetafield(L, idx, field)) {
-    TValue *base = L->top--;
-    copyTV(L, base, index2adr(L, idx));
-    L->top = base+1;
-    lj_vm_call(L, base, 1+1);
+    TValue *top = L->top--;
+    if (LJ_FR2) setnilV(top++);
+    copyTV(L, top++, index2adr(L, idx));
+    L->top = top;
+    lj_vm_call(L, top-1, 1+1);
     return 1;
     return 1;
   }
   }
   return 0;
   return 0;
@@ -1090,6 +1170,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
 
 
 /* -- Coroutine yield and resume ------------------------------------------ */
 /* -- Coroutine yield and resume ------------------------------------------ */
 
 
+LUA_API int lua_isyieldable(lua_State *L)
+{
+  return cframe_canyield(L->cframe);
+}
+
 LUA_API int lua_yield(lua_State *L, int nresults)
 LUA_API int lua_yield(lua_State *L, int nresults)
 {
 {
   void *cf = L->cframe;
   void *cf = L->cframe;
@@ -1109,12 +1194,14 @@ LUA_API int lua_yield(lua_State *L, int nresults)
     } else {  /* Yield from hook: add a pseudo-frame. */
     } else {  /* Yield from hook: add a pseudo-frame. */
       TValue *top = L->top;
       TValue *top = L->top;
       hook_leave(g);
       hook_leave(g);
-      top->u64 = cframe_multres(cf);
-      setcont(top+1, lj_cont_hook);
-      setframe_pc(top+1, cframe_pc(cf)-1);
-      setframe_gc(top+2, obj2gco(L));
-      setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT);
-      L->top = L->base = top+3;
+      (top++)->u64 = cframe_multres(cf);
+      setcont(top, lj_cont_hook);
+      if (LJ_FR2) top++;
+      setframe_pc(top, cframe_pc(cf)-1);
+      if (LJ_FR2) top++;
+      setframe_gc(top, obj2gco(L), LJ_TTHREAD);
+      setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
+      L->top = L->base = top+1;
 #if LJ_TARGET_X64
 #if LJ_TARGET_X64
       lj_err_throw(L, LUA_YIELD);
       lj_err_throw(L, LUA_YIELD);
 #else
 #else
@@ -1131,7 +1218,9 @@ LUA_API int lua_yield(lua_State *L, int nresults)
 LUA_API int lua_resume(lua_State *L, int nargs)
 LUA_API int lua_resume(lua_State *L, int nargs)
 {
 {
   if (L->cframe == NULL && L->status <= LUA_YIELD)
   if (L->cframe == NULL && L->status <= LUA_YIELD)
-    return lj_vm_resume(L, L->top - nargs, 0, 0);
+    return lj_vm_resume(L,
+      L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs,
+      0, 0);
   L->top = L->base;
   L->top = L->base;
   setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
   setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
   incr_top(L);
   incr_top(L);
@@ -1161,7 +1250,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
     res = (int)(g->gc.total & 0x3ff);
     res = (int)(g->gc.total & 0x3ff);
     break;
     break;
   case LUA_GCSTEP: {
   case LUA_GCSTEP: {
-    MSize a = (MSize)data << 10;
+    GCSize a = (GCSize)data << 10;
     g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
     g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
     while (g->gc.total >= g->gc.threshold)
     while (g->gc.total >= g->gc.threshold)
       if (lj_gc_step(L) > 0) {
       if (lj_gc_step(L) > 0) {
@@ -1178,6 +1267,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
     res = (int)(g->gc.stepmul);
     res = (int)(g->gc.stepmul);
     g->gc.stepmul = (MSize)data;
     g->gc.stepmul = (MSize)data;
     break;
     break;
+  case LUA_GCISRUNNING:
+    res = (g->gc.threshold != LJ_MAX_MEM);
+    break;
   default:
   default:
     res = -1;  /* Invalid option. */
     res = -1;  /* Invalid option. */
   }
   }

+ 203 - 56
luajit.mod/luajit/src/lj_arch.h

@@ -19,12 +19,16 @@
 #define LUAJIT_ARCH_x64		2
 #define LUAJIT_ARCH_x64		2
 #define LUAJIT_ARCH_ARM		3
 #define LUAJIT_ARCH_ARM		3
 #define LUAJIT_ARCH_arm		3
 #define LUAJIT_ARCH_arm		3
-#define LUAJIT_ARCH_PPC		4
-#define LUAJIT_ARCH_ppc		4
-#define LUAJIT_ARCH_PPCSPE	5
-#define LUAJIT_ARCH_ppcspe	5
+#define LUAJIT_ARCH_ARM64	4
+#define LUAJIT_ARCH_arm64	4
+#define LUAJIT_ARCH_PPC		5
+#define LUAJIT_ARCH_ppc		5
 #define LUAJIT_ARCH_MIPS	6
 #define LUAJIT_ARCH_MIPS	6
 #define LUAJIT_ARCH_mips	6
 #define LUAJIT_ARCH_mips	6
+#define LUAJIT_ARCH_MIPS32	6
+#define LUAJIT_ARCH_mips32	6
+#define LUAJIT_ARCH_MIPS64	7
+#define LUAJIT_ARCH_mips64	7
 
 
 /* Target OS. */
 /* Target OS. */
 #define LUAJIT_OS_OTHER		0
 #define LUAJIT_OS_OTHER		0
@@ -43,14 +47,14 @@
 #define LUAJIT_TARGET	LUAJIT_ARCH_X64
 #define LUAJIT_TARGET	LUAJIT_ARCH_X64
 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
 #define LUAJIT_TARGET	LUAJIT_ARCH_ARM
 #define LUAJIT_TARGET	LUAJIT_ARCH_ARM
+#elif defined(__aarch64__)
+#define LUAJIT_TARGET	LUAJIT_ARCH_ARM64
 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
-#ifdef __NO_FPRS__
-#define LUAJIT_TARGET	LUAJIT_ARCH_PPCSPE
-#else
 #define LUAJIT_TARGET	LUAJIT_ARCH_PPC
 #define LUAJIT_TARGET	LUAJIT_ARCH_PPC
-#endif
+#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
+#define LUAJIT_TARGET	LUAJIT_ARCH_MIPS64
 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
-#define LUAJIT_TARGET	LUAJIT_ARCH_MIPS
+#define LUAJIT_TARGET	LUAJIT_ARCH_MIPS32
 #else
 #else
 #error "No support for this architecture (yet)"
 #error "No support for this architecture (yet)"
 #endif
 #endif
@@ -70,7 +74,7 @@
        defined(__NetBSD__) || defined(__OpenBSD__) || \
        defined(__NetBSD__) || defined(__OpenBSD__) || \
        defined(__DragonFly__)) && !defined(__ORBIS__)
        defined(__DragonFly__)) && !defined(__ORBIS__)
 #define LUAJIT_OS	LUAJIT_OS_BSD
 #define LUAJIT_OS	LUAJIT_OS_BSD
-#elif (defined(__sun__) && defined(__svr4__))
+#elif (defined(__sun__) && defined(__svr4__)) || defined(__HAIKU__)
 #define LUAJIT_OS	LUAJIT_OS_POSIX
 #define LUAJIT_OS	LUAJIT_OS_POSIX
 #elif defined(__CYGWIN__)
 #elif defined(__CYGWIN__)
 #define LJ_TARGET_CYGWIN	1
 #define LJ_TARGET_CYGWIN	1
@@ -99,7 +103,7 @@
 #define LJ_TARGET_WINDOWS	(LUAJIT_OS == LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_WINDOWS	(LUAJIT_OS == LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_LINUX		(LUAJIT_OS == LUAJIT_OS_LINUX)
 #define LJ_TARGET_LINUX		(LUAJIT_OS == LUAJIT_OS_LINUX)
 #define LJ_TARGET_OSX		(LUAJIT_OS == LUAJIT_OS_OSX)
 #define LJ_TARGET_OSX		(LUAJIT_OS == LUAJIT_OS_OSX)
-#define LJ_TARGET_IOS		(LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM)
+#define LJ_TARGET_IOS		(LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64))
 #define LJ_TARGET_POSIX		(LUAJIT_OS > LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_POSIX		(LUAJIT_OS > LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_DLOPEN	LJ_TARGET_POSIX
 #define LJ_TARGET_DLOPEN	LJ_TARGET_POSIX
 
 
@@ -125,6 +129,19 @@
 #define LJ_TARGET_CONSOLE	1
 #define LJ_TARGET_CONSOLE	1
 #endif
 #endif
 
 
+#ifdef _DURANGO
+#define LJ_TARGET_XBOXONE	1
+#define LJ_TARGET_CONSOLE	1
+#define LJ_TARGET_GC64		1
+#endif
+
+#ifdef _UWP
+#define LJ_TARGET_UWP		1
+#if LUAJIT_TARGET == LUAJIT_ARCH_X64
+#define LJ_TARGET_GC64		1
+#endif
+#endif
+
 #define LJ_NUMMODE_SINGLE	0	/* Single-number mode only. */
 #define LJ_NUMMODE_SINGLE	0	/* Single-number mode only. */
 #define LJ_NUMMODE_SINGLE_DUAL	1	/* Default to single-number mode. */
 #define LJ_NUMMODE_SINGLE_DUAL	1	/* Default to single-number mode. */
 #define LJ_NUMMODE_DUAL		2	/* Dual-number mode only. */
 #define LJ_NUMMODE_DUAL		2	/* Dual-number mode only. */
@@ -167,6 +184,9 @@
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNALIGNED	1
 #define LJ_TARGET_UNALIGNED	1
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
+#ifdef LUAJIT_ENABLE_GC64
+#define LJ_TARGET_GC64		1
+#endif
 
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
 
 
@@ -188,7 +208,7 @@
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 
 
-#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
+#if __ARM_ARCH_8__ || __ARM_ARCH_8A__
 #define LJ_ARCH_VERSION		80
 #define LJ_ARCH_VERSION		80
 #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
 #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
 #define LJ_ARCH_VERSION		70
 #define LJ_ARCH_VERSION		70
@@ -200,22 +220,86 @@
 #define LJ_ARCH_VERSION		50
 #define LJ_ARCH_VERSION		50
 #endif
 #endif
 
 
+#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
+
+#define LJ_ARCH_BITS		64
+#if defined(__AARCH64EB__)
+#define LJ_ARCH_NAME		"arm64be"
+#define LJ_ARCH_ENDIAN		LUAJIT_BE
+#else
+#define LJ_ARCH_NAME		"arm64"
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#endif
+#define LJ_TARGET_ARM64		1
+#define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_JUMPRANGE	27	/* +-2^27 = +-128MB */
+#define LJ_TARGET_MASKSHIFT	1
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
+#define LJ_TARGET_GC64		1
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
+
+#define LJ_ARCH_VERSION		80
+
 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
 
 
-#define LJ_ARCH_NAME		"ppc"
+#ifndef LJ_ARCH_ENDIAN
+#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#else
+#define LJ_ARCH_ENDIAN		LUAJIT_BE
+#endif
+#endif
+
 #if _LP64
 #if _LP64
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_BITS		64
+#if LJ_ARCH_ENDIAN == LUAJIT_LE
+#define LJ_ARCH_NAME		"ppc64le"
+#else
+#define LJ_ARCH_NAME		"ppc64"
+#endif
 #else
 #else
 #define LJ_ARCH_BITS		32
 #define LJ_ARCH_BITS		32
+#define LJ_ARCH_NAME		"ppc"
+
+#if !defined(LJ_ARCH_HASFPU)
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+#define LJ_ARCH_HASFPU		0
+#else
+#define LJ_ARCH_HASFPU		1
 #endif
 #endif
-#define LJ_ARCH_ENDIAN		LUAJIT_BE
+#endif
+
+#if !defined(LJ_ABI_SOFTFP)
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+#define LJ_ABI_SOFTFP		1
+#else
+#define LJ_ABI_SOFTFP		0
+#endif
+#endif
+#endif
+
+#if LJ_ABI_SOFTFP
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
+#else
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
+#endif
+
 #define LJ_TARGET_PPC		1
 #define LJ_TARGET_PPC		1
 #define LJ_TARGET_EHRETREG	3
 #define LJ_TARGET_EHRETREG	3
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
 #define LJ_TARGET_MASKSHIFT	0
 #define LJ_TARGET_MASKSHIFT	0
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
 #define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
+
+#if LJ_TARGET_CONSOLE
+#define LJ_ARCH_PPC32ON64	1
+#define LJ_ARCH_NOFFI		1
+#elif LJ_ARCH_BITS == 64
+#define LJ_ARCH_PPC64		1
+#define LJ_TARGET_GC64		1
+#define LJ_ARCH_NOJIT		1	/* NYI */
+#endif
 
 
 #if _ARCH_PWR7
 #if _ARCH_PWR7
 #define LJ_ARCH_VERSION		70
 #define LJ_ARCH_VERSION		70
@@ -230,10 +314,6 @@
 #else
 #else
 #define LJ_ARCH_VERSION		0
 #define LJ_ARCH_VERSION		0
 #endif
 #endif
-#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
-#define LJ_ARCH_PPC64		1
-#define LJ_ARCH_NOFFI		1
-#endif
 #if _ARCH_PPCSQ
 #if _ARCH_PPCSQ
 #define LJ_ARCH_SQRT		1
 #define LJ_ARCH_SQRT		1
 #endif
 #endif
@@ -247,44 +327,57 @@
 #define LJ_ARCH_XENON		1
 #define LJ_ARCH_XENON		1
 #endif
 #endif
 
 
-#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
-
-#define LJ_ARCH_NAME		"ppcspe"
-#define LJ_ARCH_BITS		32
-#define LJ_ARCH_ENDIAN		LUAJIT_BE
-#ifndef LJ_ABI_SOFTFP
-#define LJ_ABI_SOFTFP		1
-#endif
-#define LJ_ABI_EABI		1
-#define LJ_TARGET_PPCSPE	1
-#define LJ_TARGET_EHRETREG	3
-#define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
-#define LJ_TARGET_MASKSHIFT	0
-#define LJ_TARGET_MASKROT	1
-#define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE
-#define LJ_ARCH_NOFFI		1	/* NYI: comparisons, calls. */
-#define LJ_ARCH_NOJIT		1
-
-#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
+#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
 
 
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
+#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
 #define LJ_ARCH_NAME		"mipsel"
 #define LJ_ARCH_NAME		"mipsel"
+#else
+#define LJ_ARCH_NAME		"mips64el"
+#endif
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
 #else
 #else
+#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
 #define LJ_ARCH_NAME		"mips"
 #define LJ_ARCH_NAME		"mips"
+#else
+#define LJ_ARCH_NAME		"mips64"
+#endif
 #define LJ_ARCH_ENDIAN		LUAJIT_BE
 #define LJ_ARCH_ENDIAN		LUAJIT_BE
 #endif
 #endif
+
+#if !defined(LJ_ARCH_HASFPU)
+#ifdef __mips_soft_float
+#define LJ_ARCH_HASFPU		0
+#else
+#define LJ_ARCH_HASFPU		1
+#endif
+#endif
+
+#if !defined(LJ_ABI_SOFTFP)
+#ifdef __mips_soft_float
+#define LJ_ABI_SOFTFP		1
+#else
+#define LJ_ABI_SOFTFP		0
+#endif
+#endif
+
+#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
 #define LJ_ARCH_BITS		32
 #define LJ_ARCH_BITS		32
+#define LJ_TARGET_MIPS32	1
+#else
+#define LJ_ARCH_BITS		64
+#define LJ_TARGET_MIPS64	1
+#define LJ_TARGET_GC64		1
+#endif
 #define LJ_TARGET_MIPS		1
 #define LJ_TARGET_MIPS		1
 #define LJ_TARGET_EHRETREG	4
 #define LJ_TARGET_EHRETREG	4
 #define LJ_TARGET_JUMPRANGE	27	/* 2*2^27 = 256MB-aligned region */
 #define LJ_TARGET_JUMPRANGE	27	/* 2*2^27 = 256MB-aligned region */
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
-#define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 
 
-#if _MIPS_ARCH_MIPS32R2
+#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
 #define LJ_ARCH_VERSION		20
 #define LJ_ARCH_VERSION		20
 #else
 #else
 #define LJ_ARCH_VERSION		10
 #define LJ_ARCH_VERSION		10
@@ -312,6 +405,16 @@
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
 #error "Need at least GCC 4.2 or newer"
 #error "Need at least GCC 4.2 or newer"
 #endif
 #endif
+#elif LJ_TARGET_ARM64
+#if __clang__
+#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__)
+#error "Need at least Clang 3.5 or newer"
+#endif
+#else
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
+#error "Need at least GCC 4.8 or newer"
+#endif
+#endif
 #elif !LJ_TARGET_PS3
 #elif !LJ_TARGET_PS3
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
 #error "Need at least GCC 4.3 or newer"
 #error "Need at least GCC 4.3 or newer"
@@ -335,22 +438,24 @@
 #if !(__ARM_EABI__ || LJ_TARGET_IOS)
 #if !(__ARM_EABI__ || LJ_TARGET_IOS)
 #error "Only ARM EABI or iOS 3.0+ ABI is supported"
 #error "Only ARM EABI or iOS 3.0+ ABI is supported"
 #endif
 #endif
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
-#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
-#error "No support for PowerPC CPUs without double-precision FPU"
+#elif LJ_TARGET_ARM64
+#if defined(_ILP32)
+#error "No support for ILP32 model on ARM64"
 #endif
 #endif
-#if defined(_LITTLE_ENDIAN)
-#error "No support for little-endian PowerPC"
+#elif LJ_TARGET_PPC
+#if !LJ_ARCH_PPC64 && (defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)))
+#error "No support for little-endian PPC32"
 #endif
 #endif
-#if defined(_LP64)
-#error "No support for PowerPC 64 bit mode"
+#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
+#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
 #endif
 #endif
-#elif LJ_TARGET_MIPS
-#if defined(__mips_soft_float)
-#error "No support for MIPS CPUs without FPU"
+#elif LJ_TARGET_MIPS32
+#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
+#error "Only o32 ABI supported for MIPS32"
 #endif
 #endif
-#if defined(_LP64)
-#error "No support for MIPS64"
+#elif LJ_TARGET_MIPS64
+#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
+#error "Only n64 ABI supported for MIPS64"
 #endif
 #endif
 #endif
 #endif
 #endif
 #endif
@@ -376,6 +481,20 @@
 #endif
 #endif
 #endif
 #endif
 
 
+/* 64 bit GC references. */
+#if LJ_TARGET_GC64
+#define LJ_GC64			1
+#else
+#define LJ_GC64			0
+#endif
+
+/* 2-slot frame info. */
+#if LJ_GC64
+#define LJ_FR2			1
+#else
+#define LJ_FR2			0
+#endif
+
 /* Disable or enable the JIT compiler. */
 /* Disable or enable the JIT compiler. */
 #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
 #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
 #define LJ_HASJIT		0
 #define LJ_HASJIT		0
@@ -390,6 +509,21 @@
 #define LJ_HASFFI		1
 #define LJ_HASFFI		1
 #endif
 #endif
 
 
+#if defined(LUAJIT_DISABLE_PROFILE)
+#define LJ_HASPROFILE		0
+#elif LJ_TARGET_POSIX
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_SIGPROF	1
+#elif LJ_TARGET_PS3
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_PTHREAD	1
+#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_WTHREAD	1
+#else
+#define LJ_HASPROFILE		0
+#endif
+
 #ifndef LJ_ARCH_HASFPU
 #ifndef LJ_ARCH_HASFPU
 #define LJ_ARCH_HASFPU		1
 #define LJ_ARCH_HASFPU		1
 #endif
 #endif
@@ -397,6 +531,7 @@
 #define LJ_ABI_SOFTFP		0
 #define LJ_ABI_SOFTFP		0
 #endif
 #endif
 #define LJ_SOFTFP		(!LJ_ARCH_HASFPU)
 #define LJ_SOFTFP		(!LJ_ARCH_HASFPU)
+#define LJ_SOFTFP32		(LJ_SOFTFP && LJ_32)
 
 
 #if LJ_ARCH_ENDIAN == LUAJIT_BE
 #if LJ_ARCH_ENDIAN == LUAJIT_BE
 #define LJ_LE			0
 #define LJ_LE			0
@@ -422,11 +557,11 @@
 #define LJ_TARGET_UNALIGNED	0
 #define LJ_TARGET_UNALIGNED	0
 #endif
 #endif
 
 
-/* Various workarounds for embedded operating systems. */
-#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360
+/* Various workarounds for embedded operating systems or weak C runtimes. */
+#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_LOG2
 #define LUAJIT_NO_LOG2
 #endif
 #endif
-#if defined(__symbian__)
+#if defined(__symbian__) || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_EXP2
 #define LUAJIT_NO_EXP2
 #endif
 #endif
 #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
 #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
@@ -442,6 +577,18 @@
 #define LJ_NO_UNWIND		1
 #define LJ_NO_UNWIND		1
 #endif
 #endif
 
 
+#if LJ_TARGET_WINDOWS
+#if LJ_TARGET_UWP
+#define LJ_WIN_VALLOC	VirtualAllocFromApp
+#define LJ_WIN_VPROTECT	VirtualProtectFromApp
+extern void *LJ_WIN_LOADLIBA(const char *path);
+#else
+#define LJ_WIN_VALLOC	VirtualAlloc
+#define LJ_WIN_VPROTECT	VirtualProtect
+#define LJ_WIN_LOADLIBA(path)	LoadLibraryExA((path), NULL, 0)
+#endif
+#endif
+
 /* Compatibility with Lua 5.1 vs. 5.2. */
 /* Compatibility with Lua 5.1 vs. 5.2. */
 #ifdef LUAJIT_ENABLE_LUA52COMPAT
 #ifdef LUAJIT_ENABLE_LUA52COMPAT
 #define LJ_52			1
 #define LJ_52			1

Diferenças do arquivo suprimidas por serem muito extensas
+ 611 - 120
luajit.mod/luajit/src/lj_asm.c


+ 157 - 307
luajit.mod/luajit/src/lj_asm_arm.h

@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
 /* Generate a call to a C function. */
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 0;
   int32_t ofs = 0;
 #if LJ_SOFTFP
 #if LJ_SOFTFP
   Reg gpr = REGARG_FIRSTGPR;
   Reg gpr = REGARG_FIRSTGPR;
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   UNUSED(ci);
   UNUSED(ci);
 }
 }
 
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 static void asm_callx(ASMState *as, IRIns *ir)
 {
 {
   IRRef args[CCI_NARGS_MAX*2];
   IRRef args[CCI_NARGS_MAX*2];
@@ -490,7 +481,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
@@ -601,31 +592,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-#if !LJ_SOFTFP && LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  CCallInfo ci;
-  IRRef args[2];
-  args[0] = (ir-1)->op1;
-  args[1] = ir->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = lj_ir_callinfo[id];
-#if !LJ_ABI_SOFTFP
-  ci.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
-#endif
-  asm_setupresult(as, ir, &ci);
-  asm_gencall(as, &ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 static void asm_strto(ASMState *as, IRIns *ir)
 {
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,6 +655,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
     emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
     emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
 }
 }
 
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
 {
@@ -714,7 +682,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
       Reg src = ra_alloc1(as, ref, allow);
       Reg src = ra_alloc1(as, ref, allow);
       emit_lso(as, ARMI_STR, src, RID_SP, 0);
       emit_lso(as, ARMI_STR, src, RID_SP, 0);
     }
     }
-    if ((ir+1)->o == IR_HIOP)
+    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
       type = ra_alloc1(as, ref+1, allow);
       type = ra_alloc1(as, ref+1, allow);
     else
     else
       type = ra_allock(as, irt_toitype(ir->t), allow);
       type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -722,27 +690,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
   }
 }
 }
 
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 static void asm_aref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,20 +907,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
     emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
     emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
 }
 }
 
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1064,22 +997,26 @@ static ARMIns asm_fxstoreins(IRIns *ir)
 
 
 static void asm_fload(ASMState *as, IRIns *ir)
 static void asm_fload(ASMState *as, IRIns *ir)
 {
 {
-  Reg dest = ra_dest(as, ir, RSET_GPR);
-  Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
-  ARMIns ai = asm_fxloadins(ir);
-  int32_t ofs;
-  if (ir->op2 == IRFL_TAB_ARRAY) {
-    ofs = asm_fuseabase(as, ir->op1);
-    if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
-      emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
-      return;
+  if (ir->op1 == REF_NIL) {
+    lua_assert(!ra_used(ir));  /* We can end up here if DCE is turned off. */
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
+    ARMIns ai = asm_fxloadins(ir);
+    int32_t ofs;
+    if (ir->op2 == IRFL_TAB_ARRAY) {
+      ofs = asm_fuseabase(as, ir->op1);
+      if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
+	emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
+	return;
+      }
     }
     }
+    ofs = field_ofs[ir->op2];
+    if ((ai & 0x04000000))
+      emit_lso(as, ai, dest, idx, ofs);
+    else
+      emit_lsox(as, ai, dest, idx, ofs);
   }
   }
-  ofs = field_ofs[ir->op2];
-  if ((ai & 0x04000000))
-    emit_lso(as, ai, dest, idx, ofs);
-  else
-    emit_lsox(as, ai, dest, idx, ofs);
 }
 }
 
 
 static void asm_fstore(ASMState *as, IRIns *ir)
 static void asm_fstore(ASMState *as, IRIns *ir)
@@ -1105,7 +1042,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 }
 
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
 {
   if (ir->r != RID_SINK) {
   if (ir->r != RID_SINK) {
     Reg src = ra_alloc1(as, ir->op2,
     Reg src = ra_alloc1(as, ir->op2,
@@ -1115,6 +1052,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
   }
 }
 }
 
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
 {
   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1272,19 +1211,16 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
+  IRRef args[4];
   RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
   RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
   RegSet drop = RSET_SCRATCH;
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
   ra_evictset(as, drop);
@@ -1306,16 +1242,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; ir--;
       ofs -= 4; ir--;
     }
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   {
   {
-    uint32_t k = emit_isk12(ARMI_MOV, ctypeid);
-    Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow);
+    uint32_t k = emit_isk12(ARMI_MOV, id);
+    Reg r = k ? RID_R1 : ra_allock(as, id, allow);
     emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
     emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
     emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
     emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
     emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
     emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
     if (k) emit_d(as, ARMI_MOV^k, RID_R1);
     if (k) emit_d(as, ARMI_MOV^k, RID_R1);
   }
   }
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1392,23 +1340,38 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
   emit_dm(as, ai, (dest & 15), (left & 15));
   emit_dm(as, ai, (dest & 15), (left & 15));
 }
 }
 
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_callround(ASMState *as, IRIns *ir, int id)
+{
+  /* The modified regs must match with the *.dasc implementation. */
+  RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
+		RID2RSET(RID_R3)|RID2RSET(RID_R12);
+  RegSet of;
+  Reg dest, src;
+  ra_evictset(as, drop);
+  dest = ra_dest(as, ir, RSET_FPR);
+  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
+  emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
+		id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
+				   (void *)lj_vm_trunc_sf);
+  /* Workaround to protect argument GPRs from being used for remat. */
+  of = as->freeset;
+  as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
+  as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
+  src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
+  as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
+  emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 <= IRFPM_TRUNC)
+    asm_callround(as, ir, ir->op2);
+  else if (ir->op2 == IRFPM_SQRT)
+    asm_fpunary(as, ir, ARMI_VSQRT_D);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
 }
 #endif
 #endif
 
 
@@ -1459,32 +1422,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
   asm_intop(as, ir, ai);
   asm_intop(as, ir, ai);
 }
 }
 
 
-static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
-{
-  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
-    uint32_t cc = (as->mcp[1] >> 28);
-    as->flagmcp = NULL;
-    if (cc <= CC_NE) {
-      as->mcp++;
-      ai |= ARMI_S;
-    } else if (cc == CC_GE) {
-      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
-      ai |= ARMI_S;
-    } else if (cc == CC_LT) {
-      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
-      ai |= ARMI_S;
-    }  /* else: other conds don't work with bit ops. */
-  }
-  if (ir->op2 == 0) {
-    Reg dest = ra_dest(as, ir, RSET_GPR);
-    uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
-    emit_d(as, ai^m, dest);
-  } else {
-    /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
-    asm_intop(as, ir, ai);
-  }
-}
-
 static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
 static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1550,6 +1487,20 @@ static void asm_mul(ASMState *as, IRIns *ir)
   asm_intmul(as, ir);
   asm_intmul(as, ir);
 }
 }
 
 
+#define asm_addov(as, ir)	asm_add(as, ir)
+#define asm_subov(as, ir)	asm_sub(as, ir)
+#define asm_mulov(as, ir)	asm_mul(as, ir)
+
+#if !LJ_SOFTFP
+#define asm_div(as, ir)		asm_fparith(as, ir, ARMI_VDIV_D)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+#define asm_abs(as, ir)		asm_fpunary(as, ir, ARMI_VABS_D)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+#endif
+
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 {
 #if !LJ_SOFTFP
 #if !LJ_SOFTFP
@@ -1561,41 +1512,35 @@ static void asm_neg(ASMState *as, IRIns *ir)
   asm_intneg(as, ir, ARMI_RSB);
   asm_intneg(as, ir, ARMI_RSB);
 }
 }
 
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
 {
 {
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
+  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
+    uint32_t cc = (as->mcp[1] >> 28);
+    as->flagmcp = NULL;
+    if (cc <= CC_NE) {
+      as->mcp++;
+      ai |= ARMI_S;
+    } else if (cc == CC_GE) {
+      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
+      ai |= ARMI_S;
+    } else if (cc == CC_LT) {
+      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
+      ai |= ARMI_S;
+    }  /* else: other conds don't work with bit ops. */
+  }
+  if (ir->op2 == 0) {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+    emit_d(as, ai^m, dest);
+  } else {
+    /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
+    asm_intop(as, ir, ai);
+  }
 }
 }
 
 
-#if !LJ_SOFTFP
-static void asm_callround(ASMState *as, IRIns *ir, int id)
-{
-  /* The modified regs must match with the *.dasc implementation. */
-  RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
-		RID2RSET(RID_R3)|RID2RSET(RID_R12);
-  RegSet of;
-  Reg dest, src;
-  ra_evictset(as, drop);
-  dest = ra_dest(as, ir, RSET_FPR);
-  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
-  emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
-		id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
-				   (void *)lj_vm_trunc_sf);
-  /* Workaround to protect argument GPRs from being used for remat. */
-  of = as->freeset;
-  as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
-  as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
-  src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
-  as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
-  emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
-}
-#endif
+#define asm_bnot(as, ir)	asm_bitop(as, ir, ARMI_MVN)
 
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1612,6 +1557,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_band(as, ir)	asm_bitop(as, ir, ARMI_AND)
+#define asm_bor(as, ir)		asm_bitop(as, ir, ARMI_ORR)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, ARMI_EOR)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
 static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
 {
 {
   if (irref_isk(ir->op2)) {  /* Constant shifts. */
   if (irref_isk(ir->op2)) {  /* Constant shifts. */
@@ -1629,6 +1578,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
   }
   }
 }
 }
 
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, ARMSH_LSL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, ARMSH_LSR)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, ARMSH_ASR)
+#define asm_bror(as, ir)	asm_bitshift(as, ir, ARMSH_ROR)
+#define asm_brol(as, ir)	lua_assert(0)
+
 static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 {
 {
   uint32_t kcmp = 0, kmov = 0;
   uint32_t kcmp = 0, kmov = 0;
@@ -1702,6 +1657,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
     asm_intmin_max(as, ir, cc);
     asm_intmin_max(as, ir, cc);
 }
 }
 
 
+#define asm_min(as, ir)		asm_min_max(as, ir, CC_GT, CC_HI)
+#define asm_max(as, ir)		asm_min_max(as, ir, CC_LT, CC_LO)
+
 /* -- Comparisons --------------------------------------------------------- */
 /* -- Comparisons --------------------------------------------------------- */
 
 
 /* Map of comparisons to flags. ORDER IR. */
 /* Map of comparisons to flags. ORDER IR. */
@@ -1817,6 +1775,18 @@ notst:
     as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
     as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
 }
 }
 
 
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+  if (irt_isnum(ir->t))
+    asm_fpcomp(as, ir);
+  else
+#endif
+    asm_intcomp(as, ir);
+}
+
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
 #if LJ_HASFFI
 #if LJ_HASFFI
 /* 64 bit integer comparisons. */
 /* 64 bit integer comparisons. */
 static void asm_int64comp(ASMState *as, IRIns *ir)
 static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1891,7 +1861,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
   } else if ((ir-1)->o == IR_XSTORE) {
   } else if ((ir-1)->o == IR_XSTORE) {
     if ((ir-1)->r != RID_SINK)
     if ((ir-1)->r != RID_SINK)
-      asm_xstore(as, ir, 4);
+      asm_xstore_(as, ir, 4);
     return;
     return;
   }
   }
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
@@ -1939,6 +1909,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
 }
 }
 
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
+  emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 /* -- Stack handling ------------------------------------------------------ */
 
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1968,7 +1948,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
   emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
 	   (int32_t)offsetof(lua_State, maxstack));
 	   (int32_t)offsetof(lua_State, maxstack));
   if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
   if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
-    int32_t i = i32ptr(&J2G(as->J)->jit_L);
+    int32_t i = i32ptr(&J2G(as->J)->cur_L);
     if (ra_hasspill(irp->s))
     if (ra_hasspill(irp->s))
       emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
       emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
     emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
     emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1976,7 +1956,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
       emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
       emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
     emit_loadi(as, RID_TMP, (i & ~4095));
     emit_loadi(as, RID_TMP, (i & ~4095));
   } else {
   } else {
-    emit_getgl(as, RID_TMP, jit_L);
+    emit_getgl(as, RID_TMP, cur_L);
   }
   }
 }
 }
 
 
@@ -2085,13 +2065,13 @@ static void asm_loop_fixup(ASMState *as)
 
 
 /* -- Head of trace ------------------------------------------------------- */
 /* -- Head of trace ------------------------------------------------------- */
 
 
-/* Reload L register from g->jit_L. */
+/* Reload L register from g->cur_L. */
 static void asm_head_lreg(ASMState *as)
 static void asm_head_lreg(ASMState *as)
 {
 {
   IRIns *ir = IR(ASMREF_L);
   IRIns *ir = IR(ASMREF_L);
   if (ra_used(ir)) {
   if (ra_used(ir)) {
     Reg r = ra_dest(as, ir, RSET_GPR);
     Reg r = ra_dest(as, ir, RSET_GPR);
-    emit_getgl(as, r, jit_L);
+    emit_getgl(as, r, cur_L);
     ra_evictk(as);
     ra_evictk(as);
   }
   }
 }
 }
@@ -2162,143 +2142,13 @@ static void asm_tail_prep(ASMState *as)
   *p = 0;  /* Prevent load/store merging. */
   *p = 0;  /* Prevent load/store merging. */
 }
 }
 
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE:
-    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
-      as->curins--;
-      asm_href(as, ir-1, (IROp)ir->o);
-      break;
-    }
-    /* fallthrough */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-#if !LJ_SOFTFP
-    if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
-#endif
-    asm_intcomp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
-  case IR_BOR:  asm_bitop(as, ir, ARMI_ORR); break;
-  case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
-  case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
-  case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
-  case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
-  case IR_BROL: lua_assert(0); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
-  case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
-  case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-#if LJ_SOFTFP
-  case IR_DIV: case IR_POW: case IR_ABS:
-  case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
-    lua_assert(0);  /* Unused for LJ_SOFTFP. */
-    break;
-#else
-  case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 <= IRFPM_TRUNC)
-      asm_callround(as, ir, ir->op2);
-    else if (ir->op2 == IRFPM_SQRT)
-      asm_fpunary(as, ir, ARMI_VSQRT_D);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-#endif
-
-  case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
-  case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir, 0); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 /* -- Trace setup --------------------------------------------------------- */
 
 
 /* Ensure there are enough stack slots for call arguments. */
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
 {
   IRRef args[CCI_NARGS_MAX*2];
   IRRef args[CCI_NARGS_MAX*2];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
   int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
   asm_collectargs(as, ir, ci, args);
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++) {
   for (i = 0; i < nargs; i++) {

+ 2031 - 0
luajit.mod/luajit/src/lj_asm_arm64.h

@@ -0,0 +1,2031 @@
+/*
+** ARM64 IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+**
+** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
+** Sponsored by Cisco Systems, Inc.
+*/
+
+/* -- Register allocator extensions --------------------------------------- */
+
+/* Allocate a register with a hint. */
+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
+{
+  Reg r = IR(ref)->r;
+  if (ra_noreg(r)) {
+    if (!ra_hashint(r) && !iscrossref(as, ref))
+      ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
+    r = ra_allocref(as, ref, allow);
+  }
+  ra_noweak(as, r);
+  return r;
+}
+
+/* Allocate two source registers for three-operand instructions. */
+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
+{
+  IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+  Reg left = irl->r, right = irr->r;
+  if (ra_hasreg(left)) {
+    ra_noweak(as, left);
+    if (ra_noreg(right))
+      right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
+    else
+      ra_noweak(as, right);
+  } else if (ra_hasreg(right)) {
+    ra_noweak(as, right);
+    left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
+  } else if (ra_hashint(right)) {
+    right = ra_allocref(as, ir->op2, allow);
+    left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
+  } else {
+    left = ra_allocref(as, ir->op1, allow);
+    right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
+  }
+  return left | (right << 8);
+}
+
+/* -- Guard handling ------------------------------------------------------ */
+
+/* Setup all needed exit stubs. */
+static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
+{
+  ExitNo i;
+  MCode *mxp = as->mctop;
+  if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+    asm_mclimit(as);
+  /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
+  for (i = nexits-1; (int32_t)i >= 0; i--)
+    *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
+  *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
+  mxp--;
+  *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
+  *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
+  as->mctop = mxp;
+}
+
+static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
+{
+  /* Keep this in-sync with exitstub_trace_addr(). */
+  return as->mctop + exitno + 3;
+}
+
+/* Emit conditional branch to exit for guard. */
+static void asm_guardcc(ASMState *as, A64CC cc)
+{
+  MCode *target = asm_exitstub_addr(as, as->snapno);
+  MCode *p = as->mcp;
+  if (LJ_UNLIKELY(p == as->invmcp)) {
+    as->loopinv = 1;
+    *p = A64I_B | A64F_S26(target-p);
+    emit_cond_branch(as, cc^1, p-1);
+    return;
+  }
+  emit_cond_branch(as, cc, target);
+}
+
+/* Emit test and branch instruction to exit for guard. */
+static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
+{
+  MCode *target = asm_exitstub_addr(as, as->snapno);
+  MCode *p = as->mcp;
+  if (LJ_UNLIKELY(p == as->invmcp)) {
+    as->loopinv = 1;
+    *p = A64I_B | A64F_S26(target-p);
+    emit_tnb(as, ai^0x01000000u, r, bit, p-1);
+    return;
+  }
+  emit_tnb(as, ai, r, bit, target);
+}
+
+/* Emit compare and branch instruction to exit for guard. */
+static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
+{
+  MCode *target = asm_exitstub_addr(as, as->snapno);
+  MCode *p = as->mcp;
+  if (LJ_UNLIKELY(p == as->invmcp)) {
+    as->loopinv = 1;
+    *p = A64I_B | A64F_S26(target-p);
+    emit_cnb(as, ai^0x01000000u, r, p-1);
+    return;
+  }
+  emit_cnb(as, ai, r, target);
+}
+
+/* -- Operand fusion ------------------------------------------------------ */
+
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
+#define CONFLICT_SEARCH_LIM	31
+
+static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
+{
+  if (irref_isk(ref)) {
+    IRIns *ir = IR(ref);
+    if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
+      *k = ir->i;
+      return 1;
+    } else if (checki32((int64_t)ir_k64(ir)->u64)) {
+      *k = (int32_t)ir_k64(ir)->u64;
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/* Check if there's no conflicting instruction between curins and ref. */
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
+{
+  IRIns *ir = as->ir;
+  IRRef i = as->curins;
+  if (i > ref + CONFLICT_SEARCH_LIM)
+    return 0;  /* Give up, ref is too far away. */
+  while (--i > ref)
+    if (ir[i].o == conflict)
+      return 0;  /* Conflict found. */
+  return 1;  /* Ok, no conflict. */
+}
+
+/* Fuse the array base of colocated arrays. */
+static int32_t asm_fuseabase(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
+      !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
+    return (int32_t)sizeof(GCtab);
+  return 0;
+}
+
+#define FUSE_REG	0x40000000
+
+/* Fuse array/hash/upvalue reference into register+offset operand. */
+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
+			  A64Ins ins)
+{
+  IRIns *ir = IR(ref);
+  if (ra_noreg(ir->r)) {
+    if (ir->o == IR_AREF) {
+      if (mayfuse(as, ref)) {
+	if (irref_isk(ir->op2)) {
+	  IRRef tab = IR(ir->op1)->op1;
+	  int32_t ofs = asm_fuseabase(as, tab);
+	  IRRef refa = ofs ? tab : ir->op1;
+	  ofs += 8*IR(ir->op2)->i;
+	  if (emit_checkofs(ins, ofs)) {
+	    *ofsp = ofs;
+	    return ra_alloc1(as, refa, allow);
+	  }
+	} else {
+	  Reg base = ra_alloc1(as, ir->op1, allow);
+	  *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base));
+	  return base;
+	}
+      }
+    } else if (ir->o == IR_HREFK) {
+      if (mayfuse(as, ref)) {
+	int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+	if (emit_checkofs(ins, ofs)) {
+	  *ofsp = ofs;
+	  return ra_alloc1(as, ir->op1, allow);
+	}
+      }
+    } else if (ir->o == IR_UREFC) {
+      if (irref_isk(ir->op1)) {
+	GCfunc *fn = ir_kfunc(IR(ir->op1));
+	GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
+	int64_t ofs = glofs(as, &uv->tv);
+	if (emit_checkofs(ins, ofs)) {
+	  *ofsp = (int32_t)ofs;
+	  return RID_GL;
+	}
+      }
+    }
+  }
+  *ofsp = 0;
+  return ra_alloc1(as, ref, allow);
+}
+
+/* Fuse m operand into arithmetic/logic instructions. */
+static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  if (ra_hasreg(ir->r)) {
+    ra_noweak(as, ir->r);
+    return A64F_M(ir->r);
+  } else if (irref_isk(ref)) {
+    uint32_t m;
+    int64_t k = get_k64val(ir);
+    if ((ai & 0x1f000000) == 0x0a000000)
+      m = emit_isk13(k, irt_is64(ir->t));
+    else
+      m = emit_isk12(k);
+    if (m)
+      return m;
+  } else if (mayfuse(as, ref)) {
+    if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) ||
+	(ir->o == IR_ADD && ir->op1 == ir->op2)) {
+      A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR :
+		    ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL;
+      int shift = ir->o == IR_ADD ? 1 :
+		    (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
+      IRIns *irl = IR(ir->op1);
+      if (sh == A64SH_LSL &&
+	  irl->o == IR_CONV &&
+	  irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
+	  shift <= 4 &&
+	  canfuse(as, irl)) {
+	Reg m = ra_alloc1(as, irl->op1, allow);
+	return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift);
+      } else {
+	Reg m = ra_alloc1(as, ir->op1, allow);
+	return A64F_M(m) | A64F_SH(sh, shift);
+      }
+    } else if (ir->o == IR_CONV &&
+	       ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
+      Reg m = ra_alloc1(as, ir->op1, allow);
+      return A64F_M(m) | A64F_EX(A64EX_SXTW);
+    }
+  }
+  return A64F_M(ra_allocref(as, ref, allow));
+}
+
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
+static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
+			 RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  Reg base;
+  int32_t ofs = 0;
+  if (ra_noreg(ir->r) && canfuse(as, ir)) {
+    if (ir->o == IR_ADD) {
+      if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) {
+	ref = ir->op1;
+      } else {
+	Reg rn, rm;
+	IRRef lref = ir->op1, rref = ir->op2;
+	IRIns *irl = IR(lref);
+	if (mayfuse(as, irl->op1)) {
+	  unsigned int shift = 4;
+	  if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
+	    shift = (IR(irl->op2)->i & 63);
+	  } else if (irl->o == IR_ADD && irl->op1 == irl->op2) {
+	    shift = 1;
+	  }
+	  if ((ai >> 30) == shift) {
+	    lref = irl->op1;
+	    irl = IR(lref);
+	    ai |= A64I_LS_SH;
+	  }
+	}
+	if (irl->o == IR_CONV &&
+	    irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
+	    canfuse(as, irl)) {
+	  lref = irl->op1;
+	  ai |= A64I_LS_SXTWx;
+	} else {
+	  ai |= A64I_LS_LSLx;
+	}
+	rm = ra_alloc1(as, lref, allow);
+	rn = ra_alloc1(as, rref, rset_exclude(allow, rm));
+	emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm);
+	return;
+      }
+    } else if (ir->o == IR_STRREF) {
+      if (asm_isk32(as, ir->op2, &ofs)) {
+	ref = ir->op1;
+      } else if (asm_isk32(as, ir->op1, &ofs)) {
+	ref = ir->op2;
+      } else {
+	Reg rn = ra_alloc1(as, ir->op1, allow);
+	IRIns *irr = IR(ir->op2);
+	uint32_t m;
+	if (irr+1 == ir && !ra_used(irr) &&
+	    irr->o == IR_ADD && irref_isk(irr->op2)) {
+	  ofs = sizeof(GCstr) + IR(irr->op2)->i;
+	  if (emit_checkofs(ai, ofs)) {
+	    Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn));
+	    m = A64F_M(rm) | A64F_EX(A64EX_SXTW);
+	    goto skipopm;
+	  }
+	}
+	m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
+	ofs = sizeof(GCstr);
+      skipopm:
+	emit_lso(as, ai, rd, rd, ofs);
+	emit_dn(as, A64I_ADDx^m, rd, rn);
+	return;
+      }
+      ofs += sizeof(GCstr);
+      if (!emit_checkofs(ai, ofs)) {
+	Reg rn = ra_alloc1(as, ref, allow);
+	Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
+	emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm);
+	return;
+      }
+    }
+  }
+  base = ra_alloc1(as, ref, allow);
+  emit_lso(as, ai, (rd & 31), base, ofs);
+}
+
+/* Fuse FP multiply-add/sub. */
+static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  IRIns *irm;
+  if (lref != rref &&
+      ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
+       ra_noreg(irm->r)) ||
+       (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
+       (rref = lref, ai = air, ra_noreg(irm->r))))) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
+    Reg left = ra_alloc2(as, irm,
+			 rset_exclude(rset_exclude(RSET_FPR, dest), add));
+    Reg right = (left >> 8); left &= 255;
+    emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31));
+    return 1;
+  }
+  return 0;
+}
+
+/* Fuse BAND + BSHL/BSHR into UBFM. */
+static int asm_fuseandshift(ASMState *as, IRIns *ir)
+{
+  IRIns *irl = IR(ir->op1);
+  lua_assert(ir->o == IR_BAND);
+  if (canfuse(as, irl) && irref_isk(ir->op2)) {
+    uint64_t mask = get_k64val(IR(ir->op2));
+    if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) {
+      int32_t shmask = irt_is64(irl->t) ? 63 : 31;
+      int32_t shift = (IR(irl->op2)->i & shmask);
+      int32_t imms = shift;
+      if (irl->o == IR_BSHL) {
+	mask >>= shift;
+	shift = (shmask-shift+1) & shmask;
+	imms = 0;
+      }
+      if (mask && !((mask+1) & mask)) {  /* Contiguous 1-bits at the bottom. */
+	Reg dest = ra_dest(as, ir, RSET_GPR);
+	Reg left = ra_alloc1(as, irl->op1, RSET_GPR);
+	A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw;
+	imms += 63 - emit_clz64(mask);
+	if (imms > shmask) imms = shmask;
+	emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left);
+	return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */
+static int asm_fuseorshift(ASMState *as, IRIns *ir)
+{
+  IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+  lua_assert(ir->o == IR_BOR);
+  if (canfuse(as, irl) && canfuse(as, irr) &&
+      ((irl->o == IR_BSHR && irr->o == IR_BSHL) ||
+       (irl->o == IR_BSHL && irr->o == IR_BSHR))) {
+    if (irref_isk(irl->op2) && irref_isk(irr->op2)) {
+      IRRef lref = irl->op1, rref = irr->op1;
+      uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i;
+      if (irl->o == IR_BSHR) {  /* BSHR needs to be the right operand. */
+	uint32_t tmp2;
+	IRRef tmp1 = lref; lref = rref; rref = tmp1;
+	tmp2 = lshift; lshift = rshift; rshift = tmp2;
+      }
+      if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) {
+	A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw;
+	Reg dest = ra_dest(as, ir, RSET_GPR);
+	Reg left = ra_alloc1(as, lref, RSET_GPR);
+	Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left));
+	emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right);
+	return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n, nargs = CCI_XNARGS(ci);
+  int32_t ofs = 0;
+  Reg gpr, fpr = REGARG_FIRSTFPR;
+  if ((void *)ci->func)
+    emit_call(as, (void *)ci->func);
+  for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
+    as->cost[gpr] = REGCOST(~0u, ASMREF_L);
+  gpr = REGARG_FIRSTGPR;
+  for (n = 0; n < nargs; n++) { /* Setup args. */
+    IRRef ref = args[n];
+    IRIns *ir = IR(ref);
+    if (ref) {
+      if (irt_isfp(ir->t)) {
+	if (fpr <= REGARG_LASTFPR) {
+	  lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */
+	  ra_leftov(as, fpr, ref);
+	  fpr++;
+	} else {
+	  Reg r = ra_alloc1(as, ref, RSET_FPR);
+	  emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
+	  ofs += 8;
+	}
+      } else {
+	if (gpr <= REGARG_LASTGPR) {
+	  lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
+	  ra_leftov(as, gpr, ref);
+	  gpr++;
+	} else {
+	  Reg r = ra_alloc1(as, ref, RSET_GPR);
+	  emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
+	  ofs += 8;
+	}
+      }
+    }
+  }
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  RegSet drop = RSET_SCRATCH;
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r); /* Dest reg handled below. */
+  ra_evictset(as, drop); /* Evictions must be performed first. */
+  if (ra_used(ir)) {
+    lua_assert(!irt_ispri(ir->t));
+    if (irt_isfp(ir->t)) {
+      if (ci->flags & CCI_CASTU64) {
+	Reg dest = ra_dest(as, ir, RSET_FPR) & 31;
+	emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R,
+		dest, RID_RET);
+      } else {
+	ra_destreg(as, ir, RID_FPRET);
+      }
+    } else {
+      ra_destreg(as, ir, RID_RET);
+    }
+  }
+  UNUSED(ci);
+}
+
+static void asm_callx(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  CCallInfo ci;
+  IRRef func;
+  IRIns *irf;
+  ci.flags = asm_callx_flags(as, ir);
+  asm_collectargs(as, ir, &ci, args);
+  asm_setupresult(as, ir, &ci);
+  func = ir->op2; irf = IR(func);
+  if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+  if (irref_isk(func)) {  /* Call to constant address. */
+    ci.func = (ASMFunction)(ir_k64(irf)->u64);
+  } else {  /* Need a non-argument register for indirect calls. */
+    Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
+    emit_n(as, A64I_BLR, freg);
+    ci.func = (ASMFunction)(void *)0;
+  }
+  asm_gencall(as, &ci, args);
+}
+
+/* -- Returns ------------------------------------------------------------- */
+
+/* Return to lower frame. Guard that it goes to the right spot. */
+static void asm_retf(ASMState *as, IRIns *ir)
+{
+  Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
+  void *pc = ir_kptr(IR(ir->op2));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
+  as->topslot -= (BCReg)delta;
+  if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
+  /* Need to force a spill on REF_BASE now to update the stack slot. */
+  emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
+  emit_setgl(as, base, jit_base);
+  emit_addptr(as, base, -8*delta);
+  asm_guardcc(as, CC_NE);
+  emit_nm(as, A64I_CMPx, RID_TMP,
+	  ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base)));
+  emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  asm_guardcc(as, CC_NE);
+  emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31));
+  emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest);
+  emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31));
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_FPR;
+  Reg left = ra_alloc1(as, ir->op1, allow);
+  Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
+  Reg tmp = ra_scratch(as, rset_clear(allow, right));
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31));
+  emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31));
+}
+
+static void asm_conv(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+  int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
+  int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+  IRRef lref = ir->op1;
+  lua_assert(irt_type(ir->t) != st);
+  if (irt_isfp(ir->t)) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    if (stfp) {  /* FP to FP conversion. */
+      emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32,
+	      (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31));
+    } else {  /* Integer to FP conversion. */
+      Reg left = ra_alloc1(as, lref, RSET_GPR);
+      A64Ins ai = irt_isfloat(ir->t) ?
+	(((IRT_IS64 >> st) & 1) ?
+	 (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) :
+	 (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) :
+	(((IRT_IS64 >> st) & 1) ?
+	 (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) :
+	 (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32));
+      emit_dn(as, ai, (dest & 31), left);
+    }
+  } else if (stfp) {  /* FP to integer conversion. */
+    if (irt_isguard(ir->t)) {
+      /* Checked conversions are only supported from number to int. */
+      lua_assert(irt_isint(ir->t) && st == IRT_NUM);
+      asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+    } else {
+      Reg left = ra_alloc1(as, lref, RSET_FPR);
+      Reg dest = ra_dest(as, ir, RSET_GPR);
+      A64Ins ai = irt_is64(ir->t) ?
+	(st == IRT_NUM ?
+	 (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
+	 (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
+	(st == IRT_NUM ?
+	 (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
+	 (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
+      emit_dn(as, ai, dest, (left & 31));
+    }
+  } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left = ra_alloc1(as, lref, RSET_GPR);
+    A64Ins ai = st == IRT_I8 ? A64I_SXTBw :
+		st == IRT_U8 ? A64I_UXTBw :
+		st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw;
+    lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
+    emit_dn(as, ai, dest, left);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    if (irt_is64(ir->t)) {
+      if (st64 || !(ir->op2 & IRCONV_SEXT)) {
+	/* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
+	ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
+      } else {  /* 32 to 64 bit sign extension. */
+	Reg left = ra_alloc1(as, lref, RSET_GPR);
+	emit_dn(as, A64I_SXTW, dest, left);
+      }
+    } else {
+      if (st64) {
+	/* This is either a 32 bit reg/reg mov which zeroes the hiword
+	** or a load of the loword from a 64 bit address.
+	*/
+	Reg left = ra_alloc1(as, lref, RSET_GPR);
+	emit_dm(as, A64I_MOVw, dest, left);
+      } else {  /* 32/32 bit no-op (cast). */
+	ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
+      }
+    }
+  }
+}
+
+static void asm_strto(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+  IRRef args[2];
+  Reg dest = 0, tmp;
+  int destused = ra_used(ir);
+  int32_t ofs = 0;
+  ra_evictset(as, RSET_SCRATCH);
+  if (destused) {
+    if (ra_hasspill(ir->s)) {
+      ofs = sps_scale(ir->s);
+      destused = 0;
+      if (ra_hasreg(ir->r)) {
+	ra_free(as, ir->r);
+	ra_modified(as, ir->r);
+	emit_spload(as, ir, ir->r, ofs);
+      }
+    } else {
+      dest = ra_dest(as, ir, RSET_FPR);
+    }
+  }
+  if (destused)
+    emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
+  asm_guardcnb(as, A64I_CBZ, RID_RET);
+  args[0] = ir->op1; /* GCstr *str */
+  args[1] = ASMREF_TMP1; /* TValue *n  */
+  asm_gencall(as, ci, args);
+  tmp = ra_releasetmp(as, ASMREF_TMP1);
+  emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR);
+}
+
+/* -- Memory references --------------------------------------------------- */
+
+/* Store tagged value for ref at base+ofs. */
+static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
+{
+  RegSet allow = rset_exclude(RSET_GPR, base);
+  IRIns *ir = IR(ref);
+  lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
+  if (irref_isk(ref)) {
+    TValue k;
+    lj_ir_kvalue(as->J->L, &k, ir);
+    emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs);
+  } else {
+    Reg src = ra_alloc1(as, ref, allow);
+    rset_clear(allow, src);
+    if (irt_isinteger(ir->t)) {
+      Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
+      emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
+      emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src);
+    } else {
+      Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+      emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
+      emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type);
+    }
+  }
+}
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (irt_isnum(ir->t)) {
+    if (irref_isk(ref)) {
+      /* Use the number constant itself as a TValue. */
+      ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
+    } else {
+      /* Otherwise force a spill and use the spill slot. */
+      emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+    }
+  } else {
+    /* Otherwise use g->tmptv to hold the TValue. */
+    asm_tvstore64(as, dest, 0, ref);
+    ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest);
+  }
+}
+
+static void asm_aref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx, base;
+  if (irref_isk(ir->op2)) {
+    IRRef tab = IR(ir->op1)->op1;
+    int32_t ofs = asm_fuseabase(as, tab);
+    IRRef refa = ofs ? tab : ir->op1;
+    uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i);
+    if (k) {
+      base = ra_alloc1(as, refa, RSET_GPR);
+      emit_dn(as, A64I_ADDx^k, dest, base);
+      return;
+    }
+  }
+  base = ra_alloc1(as, ir->op1, RSET_GPR);
+  idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
+  emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx);
+}
+
+/* Inlined hash lookup. Specialized for key type and for const keys.
+** The equivalent C code is:
+**   Node *n = hashkey(t, key);
+**   do {
+**     if (lj_obj_equal(&n->key, key)) return &n->val;
+**   } while ((n = nextnode(n)));
+**   return niltv(L);
+*/
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+{
+  RegSet allow = RSET_GPR;
+  int destused = ra_used(ir);
+  Reg dest = ra_dest(as, ir, allow);
+  Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+  Reg key = 0, tmp = RID_TMP;
+  IRRef refkey = ir->op2;
+  IRIns *irkey = IR(refkey);
+  int isk = irref_isk(ir->op2);
+  IRType1 kt = irkey->t;
+  uint32_t k = 0;
+  uint32_t khash;
+  MCLabel l_end, l_loop, l_next;
+  rset_clear(allow, tab);
+
+  if (!isk) {
+    key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
+    rset_clear(allow, key);
+    if (!irt_isstr(kt)) {
+      tmp = ra_scratch(as, allow);
+      rset_clear(allow, tmp);
+    }
+  } else if (irt_isnum(kt)) {
+    int64_t val = (int64_t)ir_knum(irkey)->u64;
+    if (!(k = emit_isk12(val))) {
+      key = ra_allock(as, val, allow);
+      rset_clear(allow, key);
+    }
+  } else if (!irt_ispri(kt)) {
+    if (!(k = emit_isk12(irkey->i))) {
+      key = ra_alloc1(as, refkey, allow);
+      rset_clear(allow, key);
+    }
+  }
+
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
+  l_end = emit_label(as);
+  as->invmcp = NULL;
+  if (merge == IR_NE)
+    asm_guardcc(as, CC_AL);
+  else if (destused)
+    emit_loada(as, dest, niltvg(J2G(as->J)));
+
+  /* Follow hash chain until the end. */
+  l_loop = --as->mcp;
+  emit_n(as, A64I_CMPx^A64I_K12^0, dest);
+  emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
+  l_next = emit_label(as);
+
+  /* Type and value comparison. */
+  if (merge == IR_EQ)
+    asm_guardcc(as, CC_EQ);
+  else
+    emit_cond_branch(as, CC_EQ, l_end);
+
+  if (irt_isnum(kt)) {
+    if (isk) {
+      /* Assumes -0.0 is already canonicalized to +0.0. */
+      if (k)
+	emit_n(as, A64I_CMPx^k, tmp);
+      else
+	emit_nm(as, A64I_CMPx, key, tmp);
+      emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
+    } else {
+      Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
+      Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
+      rset_clear(allow, tisnum);
+      emit_nm(as, A64I_FCMPd, key, ftmp);
+      emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
+      emit_cond_branch(as, CC_LO, l_next);
+      emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
+      emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
+    }
+  } else if (irt_isaddr(kt)) {
+    Reg scr;
+    if (isk) {
+      int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
+      scr = ra_allock(as, kk, allow);
+      emit_nm(as, A64I_CMPx, scr, tmp);
+      emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
+    } else {
+      scr = ra_scratch(as, allow);
+      emit_nm(as, A64I_CMPx, tmp, scr);
+      emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
+    }
+    rset_clear(allow, scr);
+  } else {
+    Reg type, scr;
+    lua_assert(irt_ispri(kt) && !irt_isnil(kt));
+    type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
+    scr = ra_scratch(as, rset_clear(allow, type));
+    rset_clear(allow, scr);
+    emit_nm(as, A64I_CMPw, scr, type);
+    emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
+  }
+
+  *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
+  if (!isk && irt_isaddr(kt)) {
+    Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
+    emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
+    rset_clear(allow, type);
+  }
+  /* Load main position relative to tab->node into dest. */
+  khash = isk ? ir_khash(irkey) : 1;
+  if (khash == 0) {
+    emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node));
+  } else {
+    emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest);
+    emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest);
+    emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node));
+    if (isk) {
+      Reg tmphash = ra_allock(as, khash, allow);
+      emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
+      emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
+    } else if (irt_isstr(kt)) {
+      /* Fetch of str->hash is cheaper than ra_allock. */
+      emit_dnm(as, A64I_ANDw, dest, dest, tmp);
+      emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash));
+      emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
+    } else {  /* Must match with hash*() in lj_tab.c. */
+      emit_dnm(as, A64I_ANDw, dest, dest, tmp);
+      emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
+      emit_dnm(as, A64I_SUBw, dest, dest, tmp);
+      emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
+      emit_dnm(as, A64I_EORw, dest, dest, tmp);
+      emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
+      emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
+      emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
+      emit_dnm(as, A64I_EORw, tmp, tmp, dest);
+      if (irt_isnum(kt)) {
+	emit_dnm(as, A64I_ADDw, dest, dest, dest);
+	emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
+	emit_dm(as, A64I_MOVw, tmp, dest);
+	emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
+      } else {
+	checkmclim(as);
+	emit_dm(as, A64I_MOVw, tmp, key);
+	emit_dnm(as, A64I_EORw, dest, dest,
+		 ra_allock(as, irt_toitype(kt) << 15, allow));
+	emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
+	emit_dm(as, A64I_MOVx, dest, key);
+      }
+    }
+  }
+}
+
+static void asm_hrefk(ASMState *as, IRIns *ir)
+{
+  IRIns *kslot = IR(ir->op2);
+  IRIns *irkey = IR(kslot->op1);
+  int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+  int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+  int bigofs = !emit_checkofs(A64I_LDRx, ofs);
+  Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+  Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg key, idx = node;
+  RegSet allow = rset_exclude(RSET_GPR, node);
+  uint64_t k;
+  lua_assert(ofs % sizeof(Node) == 0);
+  if (bigofs) {
+    idx = dest;
+    rset_clear(allow, dest);
+    kofs = (int32_t)offsetof(Node, key);
+  } else if (ra_hasreg(dest)) {
+    emit_opk(as, A64I_ADDx, dest, node, ofs, allow);
+  }
+  asm_guardcc(as, CC_NE);
+  if (irt_ispri(irkey->t)) {
+    k = ~((int64_t)~irt_toitype(irkey->t) << 47);
+  } else if (irt_isnum(irkey->t)) {
+    k = ir_knum(irkey)->u64;
+  } else {
+    k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
+  }
+  key = ra_scratch(as, allow);
+  emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
+  emit_lso(as, A64I_LDRx, key, idx, kofs);
+  if (bigofs)
+    emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
+}
+
+static void asm_uref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  if (irref_isk(ir->op1)) {
+    GCfunc *fn = ir_kfunc(IR(ir->op1));
+    MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
+    emit_lsptr(as, A64I_LDRx, dest, v);
+  } else {
+    Reg uv = ra_scratch(as, RSET_GPR);
+    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
+    if (ir->o == IR_UREFC) {
+      asm_guardcc(as, CC_NE);
+      emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
+      emit_opk(as, A64I_ADDx, dest, uv,
+	       (int32_t)offsetof(GCupval, tv), RSET_GPR);
+      emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+    } else {
+      emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
+    }
+    emit_lso(as, A64I_LDRx, uv, func,
+	     (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
+  }
+}
+
+static void asm_fref(ASMState *as, IRIns *ir)
+{
+  UNUSED(as); UNUSED(ir);
+  lua_assert(!ra_used(ir));
+}
+
+static void asm_strref(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_GPR;
+  Reg dest = ra_dest(as, ir, allow);
+  Reg base = ra_alloc1(as, ir->op1, allow);
+  IRIns *irr = IR(ir->op2);
+  int32_t ofs = sizeof(GCstr);
+  uint32_t m;
+  rset_clear(allow, base);
+  if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) {
+    emit_dn(as, A64I_ADDx^m, dest, base);
+  } else {
+    emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest);
+    emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow));
+  }
+}
+
+/* -- Loads and stores ---------------------------------------------------- */
+
+static A64Ins asm_fxloadins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: return A64I_LDRB ^ A64I_LS_S;
+  case IRT_U8: return A64I_LDRB;
+  case IRT_I16: return A64I_LDRH ^ A64I_LS_S;
+  case IRT_U16: return A64I_LDRH;
+  case IRT_NUM: return A64I_LDRd;
+  case IRT_FLOAT: return A64I_LDRs;
+  default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw;
+  }
+}
+
+static A64Ins asm_fxstoreins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: case IRT_U8: return A64I_STRB;
+  case IRT_I16: case IRT_U16: return A64I_STRH;
+  case IRT_NUM: return A64I_STRd;
+  case IRT_FLOAT: return A64I_STRs;
+  default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw;
+  }
+}
+
+static void asm_fload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx;
+  A64Ins ai = asm_fxloadins(ir);
+  int32_t ofs;
+  if (ir->op1 == REF_NIL) {
+    idx = RID_GL;
+    ofs = (ir->op2 << 2) - GG_OFS(g);
+  } else {
+    idx = ra_alloc1(as, ir->op1, RSET_GPR);
+    if (ir->op2 == IRFL_TAB_ARRAY) {
+      ofs = asm_fuseabase(as, ir->op1);
+      if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
+	emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx);
+	return;
+      }
+    }
+    ofs = field_ofs[ir->op2];
+  }
+  emit_lso(as, ai, (dest & 31), idx, ofs);
+}
+
+static void asm_fstore(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+    IRIns *irf = IR(ir->op1);
+    Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+    int32_t ofs = field_ofs[irf->op2];
+    emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs);
+  }
+}
+
+static void asm_xload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+  lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
+  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+}
+
+static void asm_xstore(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+    asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
+		 rset_exclude(RSET_GPR, src));
+  }
+}
+
+static void asm_ahuvload(ASMState *as, IRIns *ir)
+{
+  Reg idx, tmp, type;
+  int32_t ofs = 0;
+  RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
+  lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
+	     irt_isint(ir->t));
+  if (ra_used(ir)) {
+    Reg dest = ra_dest(as, ir, allow);
+    tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest;
+    if (irt_isaddr(ir->t)) {
+      emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
+    } else if (irt_isnum(ir->t)) {
+      emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
+    } else if (irt_isint(ir->t)) {
+      emit_dm(as, A64I_MOVw, dest, dest);
+    }
+  } else {
+    tmp = ra_scratch(as, gpr);
+  }
+  type = ra_scratch(as, rset_clear(gpr, tmp));
+  idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
+  /* Always do the type check, even if the load result is unused. */
+  asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
+  if (irt_type(ir->t) >= IRT_NUM) {
+    lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
+    emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+	    ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
+  } else if (irt_isaddr(ir->t)) {
+    emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
+    emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
+  } else if (irt_isnil(ir->t)) {
+    emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
+  } else {
+    emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+	    ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp);
+  }
+  if (ofs & FUSE_REG)
+    emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
+  else
+    emit_lso(as, A64I_LDRx, tmp, idx, ofs);
+}
+
+static void asm_ahustore(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    RegSet allow = RSET_GPR;
+    Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE;
+    int32_t ofs = 0;
+    if (irt_isnum(ir->t)) {
+      src = ra_alloc1(as, ir->op2, RSET_FPR);
+      idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd);
+      if (ofs & FUSE_REG)
+	emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31));
+      else
+	emit_lso(as, A64I_STRd, (src & 31), idx, ofs);
+    } else {
+      if (!irt_ispri(ir->t)) {
+	src = ra_alloc1(as, ir->op2, allow);
+	rset_clear(allow, src);
+	if (irt_isinteger(ir->t))
+	  type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
+	else
+	  type = ra_allock(as, irt_toitype(ir->t), allow);
+      } else {
+	tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow);
+      }
+      idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type),
+			   A64I_STRx);
+      if (ofs & FUSE_REG)
+	emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
+      else
+	emit_lso(as, A64I_STRx, tmp, idx, ofs);
+      if (ra_hasreg(src)) {
+	if (irt_isinteger(ir->t)) {
+	  emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src);
+	} else {
+	  emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type);
+	}
+      }
+    }
+  }
+}
+
+static void asm_sload(ASMState *as, IRIns *ir)
+{
+  int32_t ofs = 8*((int32_t)ir->op1-2);
+  IRType1 t = ir->t;
+  Reg dest = RID_NONE, base;
+  RegSet allow = RSET_GPR;
+  lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
+  lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+  if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
+    dest = ra_scratch(as, RSET_FPR);
+    asm_tointg(as, ir, dest);
+    t.irt = IRT_NUM;  /* Continue with a regular number type check. */
+  } else if (ra_used(ir)) {
+    Reg tmp = RID_NONE;
+    if ((ir->op2 & IRSLOAD_CONVERT))
+      tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
+    lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t));
+    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
+    base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest));
+    if (irt_isaddr(t)) {
+      emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
+    } else if ((ir->op2 & IRSLOAD_CONVERT)) {
+      if (irt_isint(t)) {
+	emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31));
+	/* If value is already loaded for type check, move it to FPR. */
+	if ((ir->op2 & IRSLOAD_TYPECHECK))
+	  emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest);
+	else
+	  dest = tmp;
+	t.irt = IRT_NUM;  /* Check for original type. */
+      } else {
+	emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp);
+	dest = tmp;
+	t.irt = IRT_INT;  /* Check for original type. */
+      }
+    } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
+      emit_dm(as, A64I_MOVw, dest, dest);
+    }
+    goto dotypecheck;
+  }
+  base = ra_alloc1(as, REF_BASE, allow);
+dotypecheck:
+  rset_clear(allow, base);
+  if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+    Reg tmp;
+    if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) {
+      tmp = dest;
+    } else {
+      tmp = ra_scratch(as, allow);
+      rset_clear(allow, tmp);
+    }
+    if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT))
+      emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
+    /* Need type check, even if the load result is unused. */
+    asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE);
+    if (irt_type(t) >= IRT_NUM) {
+      lua_assert(irt_isinteger(t) || irt_isnum(t));
+      emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+	      ra_allock(as, LJ_TISNUM << 15, allow), tmp);
+    } else if (irt_isnil(t)) {
+      emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
+    } else if (irt_ispri(t)) {
+      emit_nm(as, A64I_CMPx,
+	      ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
+    } else {
+      Reg type = ra_scratch(as, allow);
+      emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
+      emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
+    }
+    emit_lso(as, A64I_LDRx, tmp, base, ofs);
+    return;
+  }
+  if (ra_hasreg(dest)) {
+    emit_lso(as, irt_isnum(t) ? A64I_LDRd :
+	     (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
+	     ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
+  }
+}
+
+/* -- Allocations --------------------------------------------------------- */
+
+#if LJ_HASFFI
+static void asm_cnew(ASMState *as, IRIns *ir)
+{
+  CTState *cts = ctype_ctsG(J2G(as->J));
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+  IRRef args[4];
+  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
+
+  as->gcsteps++;
+  asm_setupresult(as, ir, ci);  /* GCcdata * */
+  /* Initialize immutable cdata object. */
+  if (ir->o == IR_CNEWI) {
+    int32_t ofs = sizeof(GCcdata);
+    Reg r = ra_alloc1(as, ir->op2, allow);
+    lua_assert(sz == 4 || sz == 8);
+    emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
+  }
+
+  /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
+  {
+    Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
+    emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
+    emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
+    emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
+    if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1);
+  }
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
+  asm_gencall(as, ci, args);
+  ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+	       ra_releasetmp(as, ASMREF_TMP1));
+}
+#else
+#define asm_cnew(as, ir)	((void)0)
+#endif
+
+/* -- Write barriers ------------------------------------------------------ */
+
+static void asm_tbar(ASMState *as, IRIns *ir)
+{
+  Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+  Reg gr = ra_allock(as, i64ptr(J2G(as->J)),
+		     rset_exclude(rset_exclude(RSET_GPR, tab), link));
+  Reg mark = RID_TMP;
+  MCLabel l_end = emit_label(as);
+  emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
+  emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+  emit_lso(as, A64I_STRx, tab, gr,
+	   (int32_t)offsetof(global_State, gc.grayagain));
+  emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
+  emit_lso(as, A64I_LDRx, link, gr,
+	   (int32_t)offsetof(global_State, gc.grayagain));
+  emit_cond_branch(as, CC_EQ, l_end);
+  emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
+  emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+}
+
+static void asm_obar(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+  IRRef args[2];
+  MCLabel l_end;
+  RegSet allow = RSET_GPR;
+  Reg obj, val, tmp;
+  /* No need for other object barriers (yet). */
+  lua_assert(IR(ir->op1)->o == IR_UREFC);
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ir->op1;      /* TValue *tv      */
+  asm_gencall(as, ci, args);
+  ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) );
+  obj = IR(ir->op1)->r;
+  tmp = ra_scratch(as, rset_exclude(allow, obj));
+  emit_cond_branch(as, CC_EQ, l_end);
+  emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
+  emit_cond_branch(as, CC_EQ, l_end);
+  emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
+  val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
+  emit_lso(as, A64I_LDRB, tmp, obj,
+     (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
+  emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
+}
+
+/* -- Arithmetic and logic operations ------------------------------------- */
+
+static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+  right = (left >> 8); left &= 255;
+  emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31));
+}
+
+static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
+  emit_dn(as, ai, (dest & 31), (left & 31));
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  IRFPMathOp fpm = (IRFPMathOp)ir->op2;
+  if (fpm == IRFPM_SQRT) {
+    asm_fpunary(as, ir, A64I_FSQRTd);
+  } else if (fpm <= IRFPM_TRUNC) {
+    asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
+			fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
+  } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
+    return;
+  } else {
+    asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
+  }
+}
+
+static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
+{
+  IRIns *ir;
+  if (irref_isk(rref))
+    return 0;  /* Don't swap constants to the left. */
+  if (irref_isk(lref))
+    return 1;  /* But swap constants to the right. */
+  ir = IR(rref);
+  if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
+      (ir->o == IR_ADD && ir->op1 == ir->op2) ||
+      (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
+    return 0;  /* Don't swap fusable operands to the left. */
+  ir = IR(lref);
+  if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
+      (ir->o == IR_ADD && ir->op1 == ir->op2) ||
+      (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
+    return 1;  /* But swap fusable operands to the right. */
+  return 0;  /* Otherwise don't swap. */
+}
+
+static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  Reg left, dest = ra_dest(as, ir, RSET_GPR);
+  uint32_t m;
+  if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) {
+    IRRef tmp = lref; lref = rref; rref = tmp;
+  }
+  left = ra_hintalloc(as, lref, dest, RSET_GPR);
+  if (irt_is64(ir->t)) ai |= A64I_X;
+  m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
+  if (irt_isguard(ir->t)) {  /* For IR_ADDOV etc. */
+    asm_guardcc(as, CC_VS);
+    ai |= A64I_S;
+  }
+  emit_dn(as, ai^m, dest, left);
+}
+
+static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai)
+{
+  if (as->flagmcp == as->mcp) {  /* Drop cmp r, #0. */
+    as->flagmcp = NULL;
+    as->mcp++;
+    ai |= A64I_S;
+  }
+  asm_intop(as, ir, ai);
+}
+
+static void asm_intneg(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left);
+}
+
+/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
+static void asm_intmul(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
+  Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  if (irt_isguard(ir->t)) {  /* IR_MULOV */
+    asm_guardcc(as, CC_NE);
+    emit_dm(as, A64I_MOVw, dest, dest);  /* Zero-extend. */
+    emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
+    emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
+    emit_dnm(as, A64I_SMULL, dest, right, left);
+  } else {
+    emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
+  }
+}
+
+static void asm_add(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd))
+      asm_fparith(as, ir, A64I_FADDd);
+    return;
+  }
+  asm_intop_s(as, ir, A64I_ADDw);
+}
+
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd))
+      asm_fparith(as, ir, A64I_FSUBd);
+    return;
+  }
+  asm_intop_s(as, ir, A64I_SUBw);
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fparith(as, ir, A64I_FMULd);
+    return;
+  }
+  asm_intmul(as, ir);
+}
+
+static void asm_div(ASMState *as, IRIns *ir)
+{
+#if LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+					  IRCALL_lj_carith_divu64);
+  else
+#endif
+    asm_fparith(as, ir, A64I_FDIVd);
+}
+
+static void asm_pow(ASMState *as, IRIns *ir)
+{
+#if LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+					  IRCALL_lj_carith_powu64);
+  else
+#endif
+    asm_callid(as, ir, IRCALL_lj_vm_powi);
+}
+
+#define asm_addov(as, ir)	asm_add(as, ir)
+#define asm_subov(as, ir)	asm_sub(as, ir)
+#define asm_mulov(as, ir)	asm_mul(as, ir)
+
+#define asm_abs(as, ir)		asm_fpunary(as, ir, A64I_FABS)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+
+static void asm_mod(ASMState *as, IRIns *ir)
+{
+#if LJ_HASFFI
+  if (!irt_isint(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+					  IRCALL_lj_carith_modu64);
+  else
+#endif
+    asm_callid(as, ir, IRCALL_lj_vm_modi);
+}
+
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fpunary(as, ir, A64I_FNEGd);
+    return;
+  }
+  asm_intneg(as, ir);
+}
+
+static void asm_band(ASMState *as, IRIns *ir)
+{
+  A64Ins ai = A64I_ANDw;
+  if (asm_fuseandshift(as, ir))
+    return;
+  if (as->flagmcp == as->mcp) {
+    /* Try to drop cmp r, #0. */
+    as->flagmcp = NULL;
+    as->mcp++;
+    ai = A64I_ANDSw;
+  }
+  asm_intop(as, ir, ai);
+}
+
+static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  IRIns *irl = IR(lref), *irr = IR(rref);
+  if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) ||
+      (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) {
+    Reg left, dest = ra_dest(as, ir, RSET_GPR);
+    uint32_t m;
+    if (irl->o == IR_BNOT) {
+      IRRef tmp = lref; lref = rref; rref = tmp;
+    }
+    left = ra_alloc1(as, lref, RSET_GPR);
+    ai |= A64I_ON;
+    if (irt_is64(ir->t)) ai |= A64I_X;
+    m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left));
+    emit_dn(as, ai^m, dest, left);
+  } else {
+    asm_intop(as, ir, ai);
+  }
+}
+
+static void asm_bor(ASMState *as, IRIns *ir)
+{
+  if (asm_fuseorshift(as, ir))
+    return;
+  asm_borbxor(as, ir, A64I_ORRw);
+}
+
+#define asm_bxor(as, ir)	asm_borbxor(as, ir, A64I_EORw)
+
+static void asm_bnot(ASMState *as, IRIns *ir)
+{
+  A64Ins ai = A64I_MVNw;
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+  if (irt_is64(ir->t)) ai |= A64I_X;
+  emit_d(as, ai^m, dest);
+}
+
+static void asm_bswap(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+  emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left);
+}
+
+static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh)
+{
+  int32_t shmask = irt_is64(ir->t) ? 63 : 31;
+  if (irref_isk(ir->op2)) {  /* Constant shifts. */
+    Reg left, dest = ra_dest(as, ir, RSET_GPR);
+    int32_t shift = (IR(ir->op2)->i & shmask);
+    IRIns *irl = IR(ir->op1);
+    if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw;
+
+    /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */
+    if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) {
+      if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
+	int32_t shift2 = (IR(irl->op2)->i & shmask);
+	shift = ((shift - shift2) & shmask);
+	shmask -= shift2;
+	ir = irl;
+      }
+    }
+
+    left = ra_alloc1(as, ir->op1, RSET_GPR);
+    switch (sh) {
+    case A64SH_LSL:
+      emit_dn(as, ai | A64F_IMMS(shmask-shift) |
+		  A64F_IMMR((shmask-shift+1)&shmask), dest, left);
+      break;
+    case A64SH_LSR: case A64SH_ASR:
+      emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left);
+      break;
+    case A64SH_ROR:
+      emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left);
+      break;
+    }
+  } else {  /* Variable-length shifts. */
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+    Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+    emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right);
+  }
+}
+
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR)
+#define asm_bror(as, ir)	asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR)
+#define asm_brol(as, ir)	lua_assert(0)
+
+static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right);
+  emit_nm(as, A64I_CMPw, left, right);
+}
+
+static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc)
+{
+  Reg dest = (ra_dest(as, ir, RSET_FPR) & 31);
+  Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+  right = ((left >> 8) & 31); left &= 31;
+  emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right);
+  emit_nm(as, A64I_FCMPd, left, right);
+}
+
+static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc)
+{
+  if (irt_isnum(ir->t))
+    asm_fpmin_max(as, ir, fcc);
+  else
+    asm_intmin_max(as, ir, cc);
+}
+
+#define asm_max(as, ir)		asm_min_max(as, ir, CC_GT, CC_HI)
+#define asm_min(as, ir)		asm_min_max(as, ir, CC_LT, CC_LO)
+
+/* -- Comparisons --------------------------------------------------------- */
+
+/* Map of comparisons to flags. ORDER IR. */
+static const uint8_t asm_compmap[IR_ABC+1] = {
+  /* op  FP swp  int cc   FP cc */
+  /* LT       */ CC_GE + (CC_HS << 4),
+  /* GE    x  */ CC_LT + (CC_HI << 4),
+  /* LE       */ CC_GT + (CC_HI << 4),
+  /* GT    x  */ CC_LE + (CC_HS << 4),
+  /* ULT   x  */ CC_HS + (CC_LS << 4),
+  /* UGE      */ CC_LO + (CC_LO << 4),
+  /* ULE   x  */ CC_HI + (CC_LO << 4),
+  /* UGT      */ CC_LS + (CC_LS << 4),
+  /* EQ       */ CC_NE + (CC_NE << 4),
+  /* NE       */ CC_EQ + (CC_EQ << 4),
+  /* ABC      */ CC_LS + (CC_LS << 4)  /* Same as UGT. */
+};
+
+/* FP comparisons. */
+static void asm_fpcomp(ASMState *as, IRIns *ir)
+{
+  Reg left, right;
+  A64Ins ai;
+  int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
+  if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
+    left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31);
+    right = 0;
+    ai = A64I_FCMPZd;
+  } else {
+    left = ra_alloc2(as, ir, RSET_FPR);
+    if (swp) {
+      right = (left & 31); left = ((left >> 8) & 31);
+    } else {
+      right = ((left >> 8) & 31); left &= 31;
+    }
+    ai = A64I_FCMPd;
+  }
+  asm_guardcc(as, (asm_compmap[ir->o] >> 4));
+  emit_nm(as, ai, left, right);
+}
+
+/* Integer comparisons. */
+static void asm_intcomp(ASMState *as, IRIns *ir)
+{
+  A64CC oldcc, cc = (asm_compmap[ir->o] & 15);
+  A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw;
+  IRRef lref = ir->op1, rref = ir->op2;
+  Reg left;
+  uint32_t m;
+  int cmpprev0 = 0;
+  lua_assert(irt_is64(ir->t) || irt_isint(ir->t) ||
+	     irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t));
+  if (asm_swapops(as, lref, rref)) {
+    IRRef tmp = lref; lref = rref; rref = tmp;
+    if (cc >= CC_GE) cc ^= 7;  /* LT <-> GT, LE <-> GE */
+    else if (cc > CC_NE) cc ^= 11;  /* LO <-> HI, LS <-> HS */
+  }
+  oldcc = cc;
+  if (irref_isk(rref) && get_k64val(IR(rref)) == 0) {
+    IRIns *irl = IR(lref);
+    if (cc == CC_GE) cc = CC_PL;
+    else if (cc == CC_LT) cc = CC_MI;
+    else if (cc > CC_NE) goto nocombine;  /* Other conds don't work with tst. */
+    cmpprev0 = (irl+1 == ir);
+    /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */
+    if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
+      IRRef blref = irl->op1, brref = irl->op2;
+      uint32_t m2 = 0;
+      Reg bleft;
+      if (asm_swapops(as, blref, brref)) {
+	Reg tmp = blref; blref = brref; brref = tmp;
+      }
+      if (irref_isk(brref)) {
+	uint64_t k = get_k64val(IR(brref));
+	if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
+	  asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
+		       ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
+	  return;
+	}
+	m2 = emit_isk13(k, irt_is64(irl->t));
+      }
+      bleft = ra_alloc1(as, blref, RSET_GPR);
+      ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
+      if (!m2)
+	m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
+      asm_guardcc(as, cc);
+      emit_n(as, ai^m2, bleft);
+      return;
+    }
+    if (cc == CC_EQ || cc == CC_NE) {
+      /* Combine cmp-bcc into cbz/cbnz. */
+      ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ;
+      if (irt_is64(ir->t)) ai |= A64I_X;
+      asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR));
+      return;
+    }
+  }
+nocombine:
+  left = ra_alloc1(as, lref, RSET_GPR);
+  m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
+  asm_guardcc(as, cc);
+  emit_n(as, ai^m, left);
+  /* Signed comparison with zero and referencing previous ins? */
+  if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE))
+    as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
+}
+
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fpcomp(as, ir);
+  else
+    asm_intcomp(as, ir);
+}
+
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
+/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+
+/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+static void asm_hiop(ASMState *as, IRIns *ir)
+{
+  UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused on 64 bit. */
+}
+
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  uint32_t k = emit_isk13(HOOK_PROFILE, 0);
+  lua_assert(k != 0);
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_n(as, A64I_TSTw^k, RID_TMP);
+  emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
+}
+
+/* -- Stack handling ------------------------------------------------------ */
+
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
+static void asm_stack_check(ASMState *as, BCReg topslot,
+			    IRIns *irp, RegSet allow, ExitNo exitno)
+{
+  Reg pbase;
+  uint32_t k;
+  if (irp) {
+    if (!ra_hasspill(irp->s)) {
+      pbase = irp->r;
+      lua_assert(ra_hasreg(pbase));
+    } else if (allow) {
+      pbase = rset_pickbot(allow);
+    } else {
+      pbase = RID_RET;
+      emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0);  /* Restore temp register. */
+    }
+  } else {
+    pbase = RID_BASE;
+  }
+  emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
+  k = emit_isk12((8*topslot));
+  lua_assert(k);
+  emit_n(as, A64I_CMPx^k, RID_TMP);
+  emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
+  emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
+	   (int32_t)offsetof(lua_State, maxstack));
+  if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
+    if (ra_hasspill(irp->s))
+      emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
+    emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
+    if (ra_hasspill(irp->s) && !allow)
+      emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0);  /* Save temp register. */
+  } else {
+    emit_getgl(as, RID_TMP, cur_L);
+  }
+}
+
+/* Restore Lua stack from on-trace state. */
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
+{
+  SnapEntry *map = &as->T->snapmap[snap->mapofs];
+#ifdef LUA_USE_ASSERT
+  SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
+#endif
+  MSize n, nent = snap->nent;
+  /* Store the value of all modified slots to the Lua stack. */
+  for (n = 0; n < nent; n++) {
+    SnapEntry sn = map[n];
+    BCReg s = snap_slot(sn);
+    int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
+    IRRef ref = snap_ref(sn);
+    IRIns *ir = IR(ref);
+    if ((sn & SNAP_NORESTORE))
+      continue;
+    if (irt_isnum(ir->t)) {
+      Reg src = ra_alloc1(as, ref, RSET_FPR);
+      emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs);
+    } else {
+      asm_tvstore64(as, RID_BASE, ofs, ref);
+    }
+    checkmclim(as);
+  }
+  lua_assert(map + nent == flinks);
+}
+
+/* -- GC handling --------------------------------------------------------- */
+
+/* Check GC threshold and do one or more GC steps. */
+static void asm_gc_check(ASMState *as)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg tmp1, tmp2;
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+  asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ASMREF_TMP2;  /* MSize steps     */
+  asm_gencall(as, ci, args);
+  tmp1 = ra_releasetmp(as, ASMREF_TMP1);
+  tmp2 = ra_releasetmp(as, ASMREF_TMP2);
+  emit_loadi(as, tmp2, as->gcsteps);
+  /* Jump around GC step if GC total < GC threshold. */
+  emit_cond_branch(as, CC_LS, l_end);
+  emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
+  emit_lso(as, A64I_LDRx, tmp2, tmp1,
+	   (int32_t)offsetof(global_State, gc.threshold));
+  emit_lso(as, A64I_LDRx, RID_TMP, tmp1,
+	   (int32_t)offsetof(global_State, gc.total));
+  ra_allockreg(as, i64ptr(J2G(as->J)), tmp1);
+  as->gcsteps = 0;
+  checkmclim(as);
+}
+
+/* -- Loop handling ------------------------------------------------------- */
+
+/* Fixup the loop branch. */
+static void asm_loop_fixup(ASMState *as)
+{
+  MCode *p = as->mctop;
+  MCode *target = as->mcp;
+  if (as->loopinv) {  /* Inverted loop branch? */
+    uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu;
+    ptrdiff_t delta = target - (p - 2);
+    /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */
+    p[-2] |= ((uint32_t)delta & mask) << 5;
+  } else {
+    ptrdiff_t delta = target - (p - 1);
+    p[-1] = A64I_B | A64F_S26(delta);
+  }
+}
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Reload L register from g->cur_L. */
+static void asm_head_lreg(ASMState *as)
+{
+  IRIns *ir = IR(ASMREF_L);
+  if (ra_used(ir)) {
+    Reg r = ra_dest(as, ir, RSET_GPR);
+    emit_getgl(as, r, cur_L);
+    ra_evictk(as);
+  }
+}
+
+/* Coalesce BASE register for a root trace. */
+static void asm_head_root_base(ASMState *as)
+{
+  IRIns *ir;
+  asm_head_lreg(as);
+  ir = IR(REF_BASE);
+  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+    ra_spill(as, ir);
+  ra_destreg(as, ir, RID_BASE);
+}
+
+/* Coalesce BASE register for a side trace. */
+static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+{
+  IRIns *ir;
+  asm_head_lreg(as);
+  ir = IR(REF_BASE);
+  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+    ra_spill(as, ir);
+  if (ra_hasspill(irp->s)) {
+    rset_clear(allow, ra_dest(as, ir, allow));
+  } else {
+    Reg r = irp->r;
+    lua_assert(ra_hasreg(r));
+    rset_clear(allow, r);
+    if (r != ir->r && !rset_test(as->freeset, r))
+      ra_restore(as, regcost_ref(as->cost[r]));
+    ra_destreg(as, ir, r);
+  }
+  return allow;
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Fixup the tail code. */
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+{
+  MCode *p = as->mctop;
+  MCode *target;
+  /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
+  int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
+  if (spadj == 0) {
+    *--p = A64I_LE(A64I_NOP);
+    as->mctop = p;
+  } else {
+    /* Patch stack adjustment. */
+    uint32_t k = emit_isk12(spadj);
+    lua_assert(k);
+    p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
+  }
+  /* Patch exit branch. */
+  target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
+  p[-1] = A64I_B | A64F_S26((target-p)+1);
+}
+
+/* Prepare tail of code. */
+static void asm_tail_prep(ASMState *as)
+{
+  MCode *p = as->mctop - 1;  /* Leave room for exit branch. */
+  if (as->loopref) {
+    as->invmcp = as->mcp = p;
+  } else {
+    as->mcp = p-1;  /* Leave room for stack pointer adjustment. */
+    as->invmcp = NULL;
+  }
+  *p = 0;  /* Prevent load/store merging. */
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Ensure there are enough stack slots for call arguments. */
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = CCI_XNARGS(ci);
+  int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+  asm_collectargs(as, ir, ci, args);
+  for (i = 0; i < nargs; i++) {
+    if (args[i] && irt_isfp(IR(args[i])->t)) {
+      if (nfpr > 0) nfpr--; else nslots += 2;
+    } else {
+      if (ngpr > 0) ngpr--; else nslots += 2;
+    }
+  }
+  if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
+    as->evenspill = nslots;
+  return REGSP_HINT(RID_RET);
+}
+
+static void asm_setup_target(ASMState *as)
+{
+  /* May need extra exit for asm_stack_check on side traces. */
+  asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
+}
+
+#if LJ_BE
+/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
+static void asm_mcode_fixup(MCode *mcode, MSize size)
+{
+  MCode *pe = (MCode *)((char *)mcode + size);
+  while (mcode < pe) {
+    MCode ins = *mcode;
+    *mcode++ = lj_bswap(ins);
+  }
+}
+#define LJ_TARGET_MCODE_FIXUP	1
+#endif
+
+/* -- Trace patching ------------------------------------------------------ */
+
+/* Patch exit jumps of existing machine code to a new target. */
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
+{
+  MCode *p = T->mcode;
+  MCode *pe = (MCode *)((char *)p + T->szmcode);
+  MCode *cstart = NULL;
+  MCode *mcarea = lj_mcode_patch(J, p, 0);
+  MCode *px = exitstub_trace_addr(T, exitno);
+  /* Note: this assumes a trace exit is only ever patched once. */
+  for (; p < pe; p++) {
+    /* Look for exitstub branch, replace with branch to target. */
+    ptrdiff_t delta = target - p;
+    MCode ins = A64I_LE(*p);
+    if ((ins & 0xff000000u) == 0x54000000u &&
+	((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
+      /* Patch bcc, if within range. */
+      if (A64F_S_OK(delta, 19)) {
+	*p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
+	if (!cstart) cstart = p;
+      }
+    } else if ((ins & 0xfc000000u) == 0x14000000u &&
+	       ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
+      /* Patch b. */
+      lua_assert(A64F_S_OK(delta, 26));
+      *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta));
+      if (!cstart) cstart = p;
+    } else if ((ins & 0x7e000000u) == 0x34000000u &&
+	       ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
+      /* Patch cbz/cbnz, if within range. */
+      if (A64F_S_OK(delta, 19)) {
+	*p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
+	if (!cstart) cstart = p;
+      }
+    } else if ((ins & 0x7e000000u) == 0x36000000u &&
+	       ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
+      /* Patch tbz/tbnz, if within range. */
+      if (A64F_S_OK(delta, 14)) {
+	*p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta));
+	if (!cstart) cstart = p;
+      }
+    }
+  }
+  {  /* Always patch long-range branch in exit stub itself. */
+    ptrdiff_t delta = target - px;
+    lua_assert(A64F_S_OK(delta, 26));
+    *px = A64I_B | A64F_S26(delta);
+    if (!cstart) cstart = px;
+  }
+  lj_mcode_sync(cstart, px+1);
+  lj_mcode_patch(J, mcarea, 1);
+}
+

Diferenças do arquivo suprimidas por serem muito extensas
+ 468 - 147
luajit.mod/luajit/src/lj_asm_mips.h


Diferenças do arquivo suprimidas por serem muito extensas
+ 298 - 192
luajit.mod/luajit/src/lj_asm_ppc.h


Diferenças do arquivo suprimidas por serem muito extensas
+ 373 - 163
luajit.mod/luajit/src/lj_asm_x86.h


+ 4 - 0
luajit.mod/luajit/src/lj_bc.h

@@ -89,6 +89,8 @@
   _(ISFC,	dst,	___,	var,	___) \
   _(ISFC,	dst,	___,	var,	___) \
   _(IST,	___,	___,	var,	___) \
   _(IST,	___,	___,	var,	___) \
   _(ISF,	___,	___,	var,	___) \
   _(ISF,	___,	___,	var,	___) \
+  _(ISTYPE,	var,	___,	lit,	___) \
+  _(ISNUM,	var,	___,	lit,	___) \
   \
   \
   /* Unary ops. */ \
   /* Unary ops. */ \
   _(MOV,	dst,	___,	var,	___) \
   _(MOV,	dst,	___,	var,	___) \
@@ -143,10 +145,12 @@
   _(TGETV,	dst,	var,	var,	index) \
   _(TGETV,	dst,	var,	var,	index) \
   _(TGETS,	dst,	var,	str,	index) \
   _(TGETS,	dst,	var,	str,	index) \
   _(TGETB,	dst,	var,	lit,	index) \
   _(TGETB,	dst,	var,	lit,	index) \
+  _(TGETR,	dst,	var,	var,	index) \
   _(TSETV,	var,	var,	var,	newindex) \
   _(TSETV,	var,	var,	var,	newindex) \
   _(TSETS,	var,	var,	str,	newindex) \
   _(TSETS,	var,	var,	str,	newindex) \
   _(TSETB,	var,	var,	lit,	newindex) \
   _(TSETB,	var,	var,	lit,	newindex) \
   _(TSETM,	base,	___,	num,	newindex) \
   _(TSETM,	base,	___,	num,	newindex) \
+  _(TSETR,	var,	var,	var,	newindex) \
   \
   \
   /* Calls and vararg handling. T = tail call. */ \
   /* Calls and vararg handling. T = tail call. */ \
   _(CALLM,	base,	lit,	lit,	call) \
   _(CALLM,	base,	lit,	lit,	call) \

+ 4 - 2
luajit.mod/luajit/src/lj_bcdump.h

@@ -36,14 +36,15 @@
 /* If you perform *any* kind of private modifications to the bytecode itself
 /* If you perform *any* kind of private modifications to the bytecode itself
 ** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
 ** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
 */
 */
-#define BCDUMP_VERSION		1
+#define BCDUMP_VERSION		2
 
 
 /* Compatibility flags. */
 /* Compatibility flags. */
 #define BCDUMP_F_BE		0x01
 #define BCDUMP_F_BE		0x01
 #define BCDUMP_F_STRIP		0x02
 #define BCDUMP_F_STRIP		0x02
 #define BCDUMP_F_FFI		0x04
 #define BCDUMP_F_FFI		0x04
+#define BCDUMP_F_FR2		0x08
 
 
-#define BCDUMP_F_KNOWN		(BCDUMP_F_FFI*2-1)
+#define BCDUMP_F_KNOWN		(BCDUMP_F_FR2*2-1)
 
 
 /* Type codes for the GC constants of a prototype. Plus length for strings. */
 /* Type codes for the GC constants of a prototype. Plus length for strings. */
 enum {
 enum {
@@ -61,6 +62,7 @@ enum {
 
 
 LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
 LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
 		       void *data, int strip);
 		       void *data, int strip);
+LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
 LJ_FUNC GCproto *lj_bcread(LexState *ls);
 LJ_FUNC GCproto *lj_bcread(LexState *ls);
 
 
 #endif
 #endif

+ 62 - 81
luajit.mod/luajit/src/lj_bcread.c

@@ -9,6 +9,7 @@
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_bc.h"
 #include "lj_bc.h"
@@ -20,6 +21,7 @@
 #include "lj_lex.h"
 #include "lj_lex.h"
 #include "lj_bcdump.h"
 #include "lj_bcdump.h"
 #include "lj_state.h"
 #include "lj_state.h"
+#include "lj_strfmt.h"
 
 
 /* Reuse some lexer fields for our own purposes. */
 /* Reuse some lexer fields for our own purposes. */
 #define bcread_flags(ls)	ls->level
 #define bcread_flags(ls)	ls->level
@@ -38,84 +40,73 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
   const char *name = ls->chunkarg;
   const char *name = ls->chunkarg;
   if (*name == BCDUMP_HEAD1) name = "(binary)";
   if (*name == BCDUMP_HEAD1) name = "(binary)";
   else if (*name == '@' || *name == '=') name++;
   else if (*name == '@' || *name == '=') name++;
-  lj_str_pushf(L, "%s: %s", name, err2msg(em));
+  lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
   lj_err_throw(L, LUA_ERRSYNTAX);
   lj_err_throw(L, LUA_ERRSYNTAX);
 }
 }
 
 
-/* Resize input buffer. */
-static void bcread_resize(LexState *ls, MSize len)
-{
-  if (ls->sb.sz < len) {
-    MSize sz = ls->sb.sz * 2;
-    while (len > sz) sz = sz * 2;
-    lj_str_resizebuf(ls->L, &ls->sb, sz);
-    /* Caveat: this may change ls->sb.buf which may affect ls->p. */
-  }
-}
-
-/* Refill buffer if needed. */
+/* Refill buffer. */
 static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
 static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
 {
 {
   lua_assert(len != 0);
   lua_assert(len != 0);
-  if (len > LJ_MAX_MEM || ls->current < 0)
+  if (len > LJ_MAX_BUF || ls->c < 0)
     bcread_error(ls, LJ_ERR_BCBAD);
     bcread_error(ls, LJ_ERR_BCBAD);
   do {
   do {
     const char *buf;
     const char *buf;
-    size_t size;
-    if (ls->n) {  /* Copy remainder to buffer. */
-      if (ls->sb.n) {  /* Move down in buffer. */
-	lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n);
-	if (ls->n != ls->sb.n)
-	  memmove(ls->sb.buf, ls->p, ls->n);
+    size_t sz;
+    char *p = sbufB(&ls->sb);
+    MSize n = (MSize)(ls->pe - ls->p);
+    if (n) {  /* Copy remainder to buffer. */
+      if (sbuflen(&ls->sb)) {  /* Move down in buffer. */
+	lua_assert(ls->pe == sbufP(&ls->sb));
+	if (ls->p != p) memmove(p, ls->p, n);
       } else {  /* Copy from buffer provided by reader. */
       } else {  /* Copy from buffer provided by reader. */
-	bcread_resize(ls, len);
-	memcpy(ls->sb.buf, ls->p, ls->n);
+	p = lj_buf_need(&ls->sb, len);
+	memcpy(p, ls->p, n);
       }
       }
-      ls->p = ls->sb.buf;
+      ls->p = p;
+      ls->pe = p + n;
     }
     }
-    ls->sb.n = ls->n;
-    buf = ls->rfunc(ls->L, ls->rdata, &size);  /* Get more data from reader. */
-    if (buf == NULL || size == 0) {  /* EOF? */
+    setsbufP(&ls->sb, p + n);
+    buf = ls->rfunc(ls->L, ls->rdata, &sz);  /* Get more data from reader. */
+    if (buf == NULL || sz == 0) {  /* EOF? */
       if (need) bcread_error(ls, LJ_ERR_BCBAD);
       if (need) bcread_error(ls, LJ_ERR_BCBAD);
-      ls->current = -1;  /* Only bad if we get called again. */
+      ls->c = -1;  /* Only bad if we get called again. */
       break;
       break;
     }
     }
-    if (ls->sb.n) {  /* Append to buffer. */
-      MSize n = ls->sb.n + (MSize)size;
-      bcread_resize(ls, n < len ? len : n);
-      memcpy(ls->sb.buf + ls->sb.n, buf, size);
-      ls->n = ls->sb.n = n;
-      ls->p = ls->sb.buf;
+    if (n) {  /* Append to buffer. */
+      n += (MSize)sz;
+      p = lj_buf_need(&ls->sb, n < len ? len : n);
+      memcpy(sbufP(&ls->sb), buf, sz);
+      setsbufP(&ls->sb, p + n);
+      ls->p = p;
+      ls->pe = p + n;
     } else {  /* Return buffer provided by reader. */
     } else {  /* Return buffer provided by reader. */
-      ls->n = (MSize)size;
       ls->p = buf;
       ls->p = buf;
+      ls->pe = buf + sz;
     }
     }
-  } while (ls->n < len);
+  } while (ls->p + len > ls->pe);
 }
 }
 
 
 /* Need a certain number of bytes. */
 /* Need a certain number of bytes. */
 static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
 static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
 {
 {
-  if (LJ_UNLIKELY(ls->n < len))
+  if (LJ_UNLIKELY(ls->p + len > ls->pe))
     bcread_fill(ls, len, 1);
     bcread_fill(ls, len, 1);
 }
 }
 
 
 /* Want to read up to a certain number of bytes, but may need less. */
 /* Want to read up to a certain number of bytes, but may need less. */
 static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
 static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
 {
 {
-  if (LJ_UNLIKELY(ls->n < len))
+  if (LJ_UNLIKELY(ls->p + len > ls->pe))
     bcread_fill(ls, len, 0);
     bcread_fill(ls, len, 0);
 }
 }
 
 
-#define bcread_dec(ls)		check_exp(ls->n > 0, ls->n--)
-#define bcread_consume(ls, len)	check_exp(ls->n >= (len), ls->n -= (len))
-
 /* Return memory block from buffer. */
 /* Return memory block from buffer. */
-static uint8_t *bcread_mem(LexState *ls, MSize len)
+static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
 {
 {
   uint8_t *p = (uint8_t *)ls->p;
   uint8_t *p = (uint8_t *)ls->p;
-  bcread_consume(ls, len);
-  ls->p = (char *)p + len;
+  ls->p += len;
+  lua_assert(ls->p <= ls->pe);
   return p;
   return p;
 }
 }
 
 
@@ -128,25 +119,15 @@ static void bcread_block(LexState *ls, void *q, MSize len)
 /* Read byte from buffer. */
 /* Read byte from buffer. */
 static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
 static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
 {
 {
-  bcread_dec(ls);
+  lua_assert(ls->p < ls->pe);
   return (uint32_t)(uint8_t)*ls->p++;
   return (uint32_t)(uint8_t)*ls->p++;
 }
 }
 
 
 /* Read ULEB128 value from buffer. */
 /* Read ULEB128 value from buffer. */
-static uint32_t bcread_uleb128(LexState *ls)
+static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
 {
 {
-  const uint8_t *p = (const uint8_t *)ls->p;
-  uint32_t v = *p++;
-  if (LJ_UNLIKELY(v >= 0x80)) {
-    int sh = 0;
-    v &= 0x7f;
-    do {
-     v |= ((*p & 0x7f) << (sh += 7));
-     bcread_dec(ls);
-   } while (*p++ >= 0x80);
-  }
-  bcread_dec(ls);
-  ls->p = (char *)p;
+  uint32_t v = lj_buf_ruleb128(&ls->p);
+  lua_assert(ls->p <= ls->pe);
   return v;
   return v;
 }
 }
 
 
@@ -160,11 +141,10 @@ static uint32_t bcread_uleb128_33(LexState *ls)
     v &= 0x3f;
     v &= 0x3f;
     do {
     do {
      v |= ((*p & 0x7f) << (sh += 7));
      v |= ((*p & 0x7f) << (sh += 7));
-     bcread_dec(ls);
    } while (*p++ >= 0x80);
    } while (*p++ >= 0x80);
   }
   }
-  bcread_dec(ls);
   ls->p = (char *)p;
   ls->p = (char *)p;
+  lua_assert(ls->p <= ls->pe);
   return v;
   return v;
 }
 }
 
 
@@ -212,7 +192,7 @@ static void bcread_ktabk(LexState *ls, TValue *o)
     o->u32.hi = bcread_uleb128(ls);
     o->u32.hi = bcread_uleb128(ls);
   } else {
   } else {
     lua_assert(tp <= BCDUMP_KTAB_TRUE);
     lua_assert(tp <= BCDUMP_KTAB_TRUE);
-    setitype(o, ~tp);
+    setpriV(o, ~tp);
   }
   }
 }
 }
 
 
@@ -326,25 +306,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
 }
 }
 
 
 /* Read a prototype. */
 /* Read a prototype. */
-static GCproto *bcread_proto(LexState *ls)
+GCproto *lj_bcread_proto(LexState *ls)
 {
 {
   GCproto *pt;
   GCproto *pt;
   MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
   MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
   MSize ofsk, ofsuv, ofsdbg;
   MSize ofsk, ofsuv, ofsdbg;
   MSize sizedbg = 0;
   MSize sizedbg = 0;
   BCLine firstline = 0, numline = 0;
   BCLine firstline = 0, numline = 0;
-  MSize len, startn;
-
-  /* Read length. */
-  if (ls->n > 0 && ls->p[0] == 0) {  /* Shortcut EOF. */
-    ls->n--; ls->p++;
-    return NULL;
-  }
-  bcread_want(ls, 5);
-  len = bcread_uleb128(ls);
-  if (!len) return NULL;  /* EOF */
-  bcread_need(ls, len);
-  startn = ls->n;
 
 
   /* Read prototype header. */
   /* Read prototype header. */
   flags = bcread_byte(ls);
   flags = bcread_byte(ls);
@@ -413,9 +381,6 @@ static GCproto *bcread_proto(LexState *ls)
     setmref(pt->uvinfo, NULL);
     setmref(pt->uvinfo, NULL);
     setmref(pt->varinfo, NULL);
     setmref(pt->varinfo, NULL);
   }
   }
-
-  if (len != startn - ls->n)
-    bcread_error(ls, LJ_ERR_BCBAD);
   return pt;
   return pt;
 }
 }
 
 
@@ -429,6 +394,7 @@ static int bcread_header(LexState *ls)
       bcread_byte(ls) != BCDUMP_VERSION) return 0;
       bcread_byte(ls) != BCDUMP_VERSION) return 0;
   bcread_flags(ls) = flags = bcread_uleb128(ls);
   bcread_flags(ls) = flags = bcread_uleb128(ls);
   if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
   if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
+  if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
   if ((flags & BCDUMP_F_FFI)) {
   if ((flags & BCDUMP_F_FFI)) {
 #if LJ_HASFFI
 #if LJ_HASFFI
     lua_State *L = ls->L;
     lua_State *L = ls->L;
@@ -455,19 +421,34 @@ static int bcread_header(LexState *ls)
 GCproto *lj_bcread(LexState *ls)
 GCproto *lj_bcread(LexState *ls)
 {
 {
   lua_State *L = ls->L;
   lua_State *L = ls->L;
-  lua_assert(ls->current == BCDUMP_HEAD1);
+  lua_assert(ls->c == BCDUMP_HEAD1);
   bcread_savetop(L, ls, L->top);
   bcread_savetop(L, ls, L->top);
-  lj_str_resetbuf(&ls->sb);
+  lj_buf_reset(&ls->sb);
   /* Check for a valid bytecode dump header. */
   /* Check for a valid bytecode dump header. */
   if (!bcread_header(ls))
   if (!bcread_header(ls))
     bcread_error(ls, LJ_ERR_BCFMT);
     bcread_error(ls, LJ_ERR_BCFMT);
   for (;;) {  /* Process all prototypes in the bytecode dump. */
   for (;;) {  /* Process all prototypes in the bytecode dump. */
-    GCproto *pt = bcread_proto(ls);
-    if (!pt) break;
+    GCproto *pt;
+    MSize len;
+    const char *startp;
+    /* Read length. */
+    if (ls->p < ls->pe && ls->p[0] == 0) {  /* Shortcut EOF. */
+      ls->p++;
+      break;
+    }
+    bcread_want(ls, 5);
+    len = bcread_uleb128(ls);
+    if (!len) break;  /* EOF */
+    bcread_need(ls, len);
+    startp = ls->p;
+    pt = lj_bcread_proto(ls);
+    if (ls->p != startp + len)
+      bcread_error(ls, LJ_ERR_BCBAD);
     setprotoV(L, L->top, pt);
     setprotoV(L, L->top, pt);
     incr_top(L);
     incr_top(L);
   }
   }
-  if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls))
+  if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 ||
+      L->top-1 != bcread_oldtop(L, ls))
     bcread_error(ls, LJ_ERR_BCBAD);
     bcread_error(ls, LJ_ERR_BCBAD);
   /* Pop off last prototype. */
   /* Pop off last prototype. */
   L->top--;
   L->top--;

+ 97 - 132
luajit.mod/luajit/src/lj_bcwrite.c

@@ -8,7 +8,7 @@
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
-#include "lj_str.h"
+#include "lj_buf.h"
 #include "lj_bc.h"
 #include "lj_bc.h"
 #if LJ_HASFFI
 #if LJ_HASFFI
 #include "lj_ctype.h"
 #include "lj_ctype.h"
@@ -17,13 +17,13 @@
 #include "lj_dispatch.h"
 #include "lj_dispatch.h"
 #include "lj_jit.h"
 #include "lj_jit.h"
 #endif
 #endif
+#include "lj_strfmt.h"
 #include "lj_bcdump.h"
 #include "lj_bcdump.h"
 #include "lj_vm.h"
 #include "lj_vm.h"
 
 
 /* Context for bytecode writer. */
 /* Context for bytecode writer. */
 typedef struct BCWriteCtx {
 typedef struct BCWriteCtx {
   SBuf sb;			/* Output buffer. */
   SBuf sb;			/* Output buffer. */
-  lua_State *L;			/* Lua state. */
   GCproto *pt;			/* Root prototype. */
   GCproto *pt;			/* Root prototype. */
   lua_Writer wfunc;		/* Writer callback. */
   lua_Writer wfunc;		/* Writer callback. */
   void *wdata;			/* Writer callback data. */
   void *wdata;			/* Writer callback data. */
@@ -31,85 +31,44 @@ typedef struct BCWriteCtx {
   int status;			/* Status from writer callback. */
   int status;			/* Status from writer callback. */
 } BCWriteCtx;
 } BCWriteCtx;
 
 
-/* -- Output buffer handling ---------------------------------------------- */
-
-/* Resize buffer if needed. */
-static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len)
-{
-  MSize sz = ctx->sb.sz * 2;
-  while (ctx->sb.n + len > sz) sz = sz * 2;
-  lj_str_resizebuf(ctx->L, &ctx->sb, sz);
-}
-
-/* Need a certain amount of buffer space. */
-static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
-{
-  if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
-    bcwrite_resize(ctx, len);
-}
-
-/* Add memory block to buffer. */
-static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
-{
-  uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
-  MSize i;
-  ctx->sb.n += len;
-  for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
-}
-
-/* Add byte to buffer. */
-static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
-{
-  ctx->sb.buf[ctx->sb.n++] = b;
-}
-
-/* Add ULEB128 value to buffer. */
-static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
-{
-  MSize n = ctx->sb.n;
-  uint8_t *p = (uint8_t *)ctx->sb.buf;
-  for (; v >= 0x80; v >>= 7)
-    p[n++] = (uint8_t)((v & 0x7f) | 0x80);
-  p[n++] = (uint8_t)v;
-  ctx->sb.n = n;
-}
-
 /* -- Bytecode writer ----------------------------------------------------- */
 /* -- Bytecode writer ----------------------------------------------------- */
 
 
 /* Write a single constant key/value of a template table. */
 /* Write a single constant key/value of a template table. */
 static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
 static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
 {
 {
-  bcwrite_need(ctx, 1+10);
+  char *p = lj_buf_more(&ctx->sb, 1+10);
   if (tvisstr(o)) {
   if (tvisstr(o)) {
     const GCstr *str = strV(o);
     const GCstr *str = strV(o);
     MSize len = str->len;
     MSize len = str->len;
-    bcwrite_need(ctx, 5+len);
-    bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len);
-    bcwrite_block(ctx, strdata(str), len);
+    p = lj_buf_more(&ctx->sb, 5+len);
+    p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len);
+    p = lj_buf_wmem(p, strdata(str), len);
   } else if (tvisint(o)) {
   } else if (tvisint(o)) {
-    bcwrite_byte(ctx, BCDUMP_KTAB_INT);
-    bcwrite_uleb128(ctx, intV(o));
+    *p++ = BCDUMP_KTAB_INT;
+    p = lj_strfmt_wuleb128(p, intV(o));
   } else if (tvisnum(o)) {
   } else if (tvisnum(o)) {
     if (!LJ_DUALNUM && narrow) {  /* Narrow number constants to integers. */
     if (!LJ_DUALNUM && narrow) {  /* Narrow number constants to integers. */
       lua_Number num = numV(o);
       lua_Number num = numV(o);
       int32_t k = lj_num2int(num);
       int32_t k = lj_num2int(num);
       if (num == (lua_Number)k) {  /* -0 is never a constant. */
       if (num == (lua_Number)k) {  /* -0 is never a constant. */
-	bcwrite_byte(ctx, BCDUMP_KTAB_INT);
-	bcwrite_uleb128(ctx, k);
+	*p++ = BCDUMP_KTAB_INT;
+	p = lj_strfmt_wuleb128(p, k);
+	setsbufP(&ctx->sb, p);
 	return;
 	return;
       }
       }
     }
     }
-    bcwrite_byte(ctx, BCDUMP_KTAB_NUM);
-    bcwrite_uleb128(ctx, o->u32.lo);
-    bcwrite_uleb128(ctx, o->u32.hi);
+    *p++ = BCDUMP_KTAB_NUM;
+    p = lj_strfmt_wuleb128(p, o->u32.lo);
+    p = lj_strfmt_wuleb128(p, o->u32.hi);
   } else {
   } else {
     lua_assert(tvispri(o));
     lua_assert(tvispri(o));
-    bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o));
+    *p++ = BCDUMP_KTAB_NIL+~itype(o);
   }
   }
+  setsbufP(&ctx->sb, p);
 }
 }
 
 
 /* Write a template table. */
 /* Write a template table. */
-static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
+static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
 {
 {
   MSize narray = 0, nhash = 0;
   MSize narray = 0, nhash = 0;
   if (t->asize > 0) {  /* Determine max. length of array part. */
   if (t->asize > 0) {  /* Determine max. length of array part. */
@@ -127,8 +86,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
       nhash += !tvisnil(&node[i].val);
       nhash += !tvisnil(&node[i].val);
   }
   }
   /* Write number of array slots and hash slots. */
   /* Write number of array slots and hash slots. */
-  bcwrite_uleb128(ctx, narray);
-  bcwrite_uleb128(ctx, nhash);
+  p = lj_strfmt_wuleb128(p, narray);
+  p = lj_strfmt_wuleb128(p, nhash);
+  setsbufP(&ctx->sb, p);
   if (narray) {  /* Write array entries (may contain nil). */
   if (narray) {  /* Write array entries (may contain nil). */
     MSize i;
     MSize i;
     TValue *o = tvref(t->array);
     TValue *o = tvref(t->array);
@@ -155,6 +115,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
   for (i = 0; i < sizekgc; i++, kr++) {
   for (i = 0; i < sizekgc; i++, kr++) {
     GCobj *o = gcref(*kr);
     GCobj *o = gcref(*kr);
     MSize tp, need = 1;
     MSize tp, need = 1;
+    char *p;
     /* Determine constant type and needed size. */
     /* Determine constant type and needed size. */
     if (o->gch.gct == ~LJ_TSTR) {
     if (o->gch.gct == ~LJ_TSTR) {
       tp = BCDUMP_KGC_STR + gco2str(o)->len;
       tp = BCDUMP_KGC_STR + gco2str(o)->len;
@@ -181,24 +142,26 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
       need = 1+2*5;
       need = 1+2*5;
     }
     }
     /* Write constant type. */
     /* Write constant type. */
-    bcwrite_need(ctx, need);
-    bcwrite_uleb128(ctx, tp);
+    p = lj_buf_more(&ctx->sb, need);
+    p = lj_strfmt_wuleb128(p, tp);
     /* Write constant data (if any). */
     /* Write constant data (if any). */
     if (tp >= BCDUMP_KGC_STR) {
     if (tp >= BCDUMP_KGC_STR) {
-      bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len);
+      p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len);
     } else if (tp == BCDUMP_KGC_TAB) {
     } else if (tp == BCDUMP_KGC_TAB) {
-      bcwrite_ktab(ctx, gco2tab(o));
+      bcwrite_ktab(ctx, p, gco2tab(o));
+      continue;
 #if LJ_HASFFI
 #if LJ_HASFFI
     } else if (tp != BCDUMP_KGC_CHILD) {
     } else if (tp != BCDUMP_KGC_CHILD) {
-      cTValue *p = (TValue *)cdataptr(gco2cd(o));
-      bcwrite_uleb128(ctx, p[0].u32.lo);
-      bcwrite_uleb128(ctx, p[0].u32.hi);
+      cTValue *q = (TValue *)cdataptr(gco2cd(o));
+      p = lj_strfmt_wuleb128(p, q[0].u32.lo);
+      p = lj_strfmt_wuleb128(p, q[0].u32.hi);
       if (tp == BCDUMP_KGC_COMPLEX) {
       if (tp == BCDUMP_KGC_COMPLEX) {
-	bcwrite_uleb128(ctx, p[1].u32.lo);
-	bcwrite_uleb128(ctx, p[1].u32.hi);
+	p = lj_strfmt_wuleb128(p, q[1].u32.lo);
+	p = lj_strfmt_wuleb128(p, q[1].u32.hi);
       }
       }
 #endif
 #endif
     }
     }
+    setsbufP(&ctx->sb, p);
   }
   }
 }
 }
 
 
@@ -207,7 +170,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
 {
 {
   MSize i, sizekn = pt->sizekn;
   MSize i, sizekn = pt->sizekn;
   cTValue *o = mref(pt->k, TValue);
   cTValue *o = mref(pt->k, TValue);
-  bcwrite_need(ctx, 10*sizekn);
+  char *p = lj_buf_more(&ctx->sb, 10*sizekn);
   for (i = 0; i < sizekn; i++, o++) {
   for (i = 0; i < sizekn; i++, o++) {
     int32_t k;
     int32_t k;
     if (tvisint(o)) {
     if (tvisint(o)) {
@@ -220,58 +183,58 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
 	k = lj_num2int(num);
 	k = lj_num2int(num);
 	if (num == (lua_Number)k) {  /* -0 is never a constant. */
 	if (num == (lua_Number)k) {  /* -0 is never a constant. */
 	save_int:
 	save_int:
-	  bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u));
-	  if (k < 0) {
-	    char *p = &ctx->sb.buf[ctx->sb.n-1];
-	    *p = (*p & 7) | ((k>>27) & 0x18);
-	  }
+	  p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
+	  if (k < 0)
+	    p[-1] = (p[-1] & 7) | ((k>>27) & 0x18);
 	  continue;
 	  continue;
 	}
 	}
       }
       }
-      bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
-      if (o->u32.lo >= 0x80000000u) {
-	char *p = &ctx->sb.buf[ctx->sb.n-1];
-	*p = (*p & 7) | ((o->u32.lo>>27) & 0x18);
-      }
-      bcwrite_uleb128(ctx, o->u32.hi);
+      p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
+      if (o->u32.lo >= 0x80000000u)
+	p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18);
+      p = lj_strfmt_wuleb128(p, o->u32.hi);
     }
     }
   }
   }
+  setsbufP(&ctx->sb, p);
 }
 }
 
 
 /* Write bytecode instructions. */
 /* Write bytecode instructions. */
-static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt)
+static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt)
 {
 {
   MSize nbc = pt->sizebc-1;  /* Omit the [JI]FUNC* header. */
   MSize nbc = pt->sizebc-1;  /* Omit the [JI]FUNC* header. */
 #if LJ_HASJIT
 #if LJ_HASJIT
-  uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n];
+  uint8_t *q = (uint8_t *)p;
 #endif
 #endif
-  bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
+  p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
+  UNUSED(ctx);
 #if LJ_HASJIT
 #if LJ_HASJIT
   /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
   /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
   if ((pt->flags & PROTO_ILOOP) || pt->trace) {
   if ((pt->flags & PROTO_ILOOP) || pt->trace) {
-    jit_State *J = L2J(ctx->L);
+    jit_State *J = L2J(sbufL(&ctx->sb));
     MSize i;
     MSize i;
-    for (i = 0; i < nbc; i++, p += sizeof(BCIns)) {
-      BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)];
+    for (i = 0; i < nbc; i++, q += sizeof(BCIns)) {
+      BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)];
       if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
       if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
 	  op == BC_JFORI) {
 	  op == BC_JFORI) {
-	p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
+	q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
       } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
       } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
-	BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8);
+	BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
 	BCIns ins = traceref(J, rd)->startins;
 	BCIns ins = traceref(J, rd)->startins;
-	p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
-	p[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
-	p[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
+	q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
+	q[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
+	q[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
       }
       }
     }
     }
   }
   }
 #endif
 #endif
+  return p;
 }
 }
 
 
 /* Write prototype. */
 /* Write prototype. */
 static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
 static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
 {
 {
   MSize sizedbg = 0;
   MSize sizedbg = 0;
+  char *p;
 
 
   /* Recursively write children of prototype. */
   /* Recursively write children of prototype. */
   if ((pt->flags & PROTO_CHILD)) {
   if ((pt->flags & PROTO_CHILD)) {
@@ -285,31 +248,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
   }
   }
 
 
   /* Start writing the prototype info to a buffer. */
   /* Start writing the prototype info to a buffer. */
-  lj_str_resetbuf(&ctx->sb);
-  ctx->sb.n = 5;  /* Leave room for final size. */
-  bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
+  p = lj_buf_need(&ctx->sb,
+		  5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
+  p += 5;  /* Leave room for final size. */
 
 
   /* Write prototype header. */
   /* Write prototype header. */
-  bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI)));
-  bcwrite_byte(ctx, pt->numparams);
-  bcwrite_byte(ctx, pt->framesize);
-  bcwrite_byte(ctx, pt->sizeuv);
-  bcwrite_uleb128(ctx, pt->sizekgc);
-  bcwrite_uleb128(ctx, pt->sizekn);
-  bcwrite_uleb128(ctx, pt->sizebc-1);
+  *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI));
+  *p++ = pt->numparams;
+  *p++ = pt->framesize;
+  *p++ = pt->sizeuv;
+  p = lj_strfmt_wuleb128(p, pt->sizekgc);
+  p = lj_strfmt_wuleb128(p, pt->sizekn);
+  p = lj_strfmt_wuleb128(p, pt->sizebc-1);
   if (!ctx->strip) {
   if (!ctx->strip) {
     if (proto_lineinfo(pt))
     if (proto_lineinfo(pt))
       sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
       sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
-    bcwrite_uleb128(ctx, sizedbg);
+    p = lj_strfmt_wuleb128(p, sizedbg);
     if (sizedbg) {
     if (sizedbg) {
-      bcwrite_uleb128(ctx, pt->firstline);
-      bcwrite_uleb128(ctx, pt->numline);
+      p = lj_strfmt_wuleb128(p, pt->firstline);
+      p = lj_strfmt_wuleb128(p, pt->numline);
     }
     }
   }
   }
 
 
   /* Write bytecode instructions and upvalue refs. */
   /* Write bytecode instructions and upvalue refs. */
-  bcwrite_bytecode(ctx, pt);
-  bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2);
+  p = bcwrite_bytecode(ctx, p, pt);
+  p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
+  setsbufP(&ctx->sb, p);
 
 
   /* Write constants. */
   /* Write constants. */
   bcwrite_kgc(ctx, pt);
   bcwrite_kgc(ctx, pt);
@@ -317,18 +281,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
 
 
   /* Write debug info, if not stripped. */
   /* Write debug info, if not stripped. */
   if (sizedbg) {
   if (sizedbg) {
-    bcwrite_need(ctx, sizedbg);
-    bcwrite_block(ctx, proto_lineinfo(pt), sizedbg);
+    p = lj_buf_more(&ctx->sb, sizedbg);
+    p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
+    setsbufP(&ctx->sb, p);
   }
   }
 
 
   /* Pass buffer to writer function. */
   /* Pass buffer to writer function. */
   if (ctx->status == 0) {
   if (ctx->status == 0) {
-    MSize n = ctx->sb.n - 5;
+    MSize n = sbuflen(&ctx->sb) - 5;
     MSize nn = (lj_fls(n)+8)*9 >> 6;
     MSize nn = (lj_fls(n)+8)*9 >> 6;
-    ctx->sb.n = 5 - nn;
-    bcwrite_uleb128(ctx, n);  /* Fill in final size. */
-    lua_assert(ctx->sb.n == 5);
-    ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata);
+    char *q = sbufB(&ctx->sb) + (5 - nn);
+    p = lj_strfmt_wuleb128(q, n);  /* Fill in final size. */
+    lua_assert(p == sbufB(&ctx->sb) + 5);
+    ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
   }
   }
 }
 }
 
 
@@ -338,20 +303,21 @@ static void bcwrite_header(BCWriteCtx *ctx)
   GCstr *chunkname = proto_chunkname(ctx->pt);
   GCstr *chunkname = proto_chunkname(ctx->pt);
   const char *name = strdata(chunkname);
   const char *name = strdata(chunkname);
   MSize len = chunkname->len;
   MSize len = chunkname->len;
-  lj_str_resetbuf(&ctx->sb);
-  bcwrite_need(ctx, 5+5+len);
-  bcwrite_byte(ctx, BCDUMP_HEAD1);
-  bcwrite_byte(ctx, BCDUMP_HEAD2);
-  bcwrite_byte(ctx, BCDUMP_HEAD3);
-  bcwrite_byte(ctx, BCDUMP_VERSION);
-  bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) +
-		   (LJ_BE ? BCDUMP_F_BE : 0) +
-		   ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0));
+  char *p = lj_buf_need(&ctx->sb, 5+5+len);
+  *p++ = BCDUMP_HEAD1;
+  *p++ = BCDUMP_HEAD2;
+  *p++ = BCDUMP_HEAD3;
+  *p++ = BCDUMP_VERSION;
+  *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
+	 LJ_BE*BCDUMP_F_BE +
+	 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
+	 LJ_FR2*BCDUMP_F_FR2;
   if (!ctx->strip) {
   if (!ctx->strip) {
-    bcwrite_uleb128(ctx, len);
-    bcwrite_block(ctx, name, len);
+    p = lj_strfmt_wuleb128(p, len);
+    p = lj_buf_wmem(p, name, len);
   }
   }
-  ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata);
+  ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb),
+			   (MSize)(p - sbufB(&ctx->sb)), ctx->wdata);
 }
 }
 
 
 /* Write footer of bytecode dump. */
 /* Write footer of bytecode dump. */
@@ -359,7 +325,7 @@ static void bcwrite_footer(BCWriteCtx *ctx)
 {
 {
   if (ctx->status == 0) {
   if (ctx->status == 0) {
     uint8_t zero = 0;
     uint8_t zero = 0;
-    ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata);
+    ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata);
   }
   }
 }
 }
 
 
@@ -367,8 +333,8 @@ static void bcwrite_footer(BCWriteCtx *ctx)
 static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
 static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
 {
 {
   BCWriteCtx *ctx = (BCWriteCtx *)ud;
   BCWriteCtx *ctx = (BCWriteCtx *)ud;
-  UNUSED(dummy);
-  lj_str_resizebuf(L, &ctx->sb, 1024);  /* Avoids resize for most prototypes. */
+  UNUSED(L); UNUSED(dummy);
+  lj_buf_need(&ctx->sb, 1024);  /* Avoids resize for most prototypes. */
   bcwrite_header(ctx);
   bcwrite_header(ctx);
   bcwrite_proto(ctx, ctx->pt);
   bcwrite_proto(ctx, ctx->pt);
   bcwrite_footer(ctx);
   bcwrite_footer(ctx);
@@ -381,16 +347,15 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
 {
 {
   BCWriteCtx ctx;
   BCWriteCtx ctx;
   int status;
   int status;
-  ctx.L = L;
   ctx.pt = pt;
   ctx.pt = pt;
   ctx.wfunc = writer;
   ctx.wfunc = writer;
   ctx.wdata = data;
   ctx.wdata = data;
   ctx.strip = strip;
   ctx.strip = strip;
   ctx.status = 0;
   ctx.status = 0;
-  lj_str_initbuf(&ctx.sb);
+  lj_buf_init(L, &ctx.sb);
   status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
   status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
   if (status == 0) status = ctx.status;
   if (status == 0) status = ctx.status;
-  lj_str_freebuf(G(ctx.L), &ctx.sb);
+  lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
   return status;
   return status;
 }
 }
 
 

+ 232 - 0
luajit.mod/luajit/src/lj_buf.c

@@ -0,0 +1,232 @@
+/*
+** Buffer handling.
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_buf_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_strfmt.h"
+
+/* -- Buffer management --------------------------------------------------- */
+
+static void buf_grow(SBuf *sb, MSize sz)
+{
+  MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
+  char *b;
+  if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
+  while (nsz < sz) nsz += nsz;
+  b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz);
+  setmref(sb->b, b);
+  setmref(sb->p, b + len);
+  setmref(sb->e, b + nsz);
+}
+
+LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
+{
+  lua_assert(sz > sbufsz(sb));
+  if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
+    lj_err_mem(sbufL(sb));
+  buf_grow(sb, sz);
+  return sbufB(sb);
+}
+
+LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
+{
+  MSize len = sbuflen(sb);
+  lua_assert(sz > sbufleft(sb));
+  if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
+    lj_err_mem(sbufL(sb));
+  buf_grow(sb, len + sz);
+  return sbufP(sb);
+}
+
+void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
+{
+  char *b = sbufB(sb);
+  MSize osz = (MSize)(sbufE(sb) - b);
+  if (osz > 2*LJ_MIN_SBUF) {
+    MSize n = (MSize)(sbufP(sb) - b);
+    b = lj_mem_realloc(L, b, osz, (osz >> 1));
+    setmref(sb->b, b);
+    setmref(sb->p, b + n);
+    setmref(sb->e, b + (osz >> 1));
+  }
+}
+
+char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
+{
+  SBuf *sb = &G(L)->tmpbuf;
+  setsbufL(sb, L);
+  return lj_buf_need(sb, sz);
+}
+
+/* -- Low-level buffer put operations ------------------------------------- */
+
+SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
+{
+  char *p = lj_buf_more(sb, len);
+  p = lj_buf_wmem(p, q, len);
+  setsbufP(sb, p);
+  return sb;
+}
+
+SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
+{
+  char *p = lj_buf_more(sb, 1);
+  *p++ = (char)c;
+  setsbufP(sb, p);
+  return sb;
+}
+
+SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len);
+  p = lj_buf_wmem(p, strdata(s), len);
+  setsbufP(sb, p);
+  return sb;
+}
+
+/* -- High-level buffer put operations ------------------------------------ */
+
+SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len), *e = p+len;
+  const char *q = strdata(s)+len-1;
+  while (p < e)
+    *p++ = *q--;
+  setsbufP(sb, p);
+  return sb;
+}
+
+SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len), *e = p+len;
+  const char *q = strdata(s);
+  for (; p < e; p++, q++) {
+    uint32_t c = *(unsigned char *)q;
+#if LJ_TARGET_PPC
+    *p = c + ((c >= 'A' && c <= 'Z') << 5);
+#else
+    if (c >= 'A' && c <= 'Z') c += 0x20;
+    *p = c;
+#endif
+  }
+  setsbufP(sb, p);
+  return sb;
+}
+
+SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len), *e = p+len;
+  const char *q = strdata(s);
+  for (; p < e; p++, q++) {
+    uint32_t c = *(unsigned char *)q;
+#if LJ_TARGET_PPC
+    *p = c - ((c >= 'a' && c <= 'z') << 5);
+#else
+    if (c >= 'a' && c <= 'z') c -= 0x20;
+    *p = c;
+#endif
+  }
+  setsbufP(sb, p);
+  return sb;
+}
+
+SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
+{
+  MSize len = s->len;
+  if (rep > 0 && len) {
+    uint64_t tlen = (uint64_t)rep * len;
+    char *p;
+    if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
+      lj_err_mem(sbufL(sb));
+    p = lj_buf_more(sb, (MSize)tlen);
+    if (len == 1) {  /* Optimize a common case. */
+      uint32_t c = strdata(s)[0];
+      do { *p++ = c; } while (--rep > 0);
+    } else {
+      const char *e = strdata(s) + len;
+      do {
+	const char *q = strdata(s);
+	do { *p++ = *q++; } while (q < e);
+      } while (--rep > 0);
+    }
+    setsbufP(sb, p);
+  }
+  return sb;
+}
+
+SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
+{
+  MSize seplen = sep ? sep->len : 0;
+  if (i <= e) {
+    for (;;) {
+      cTValue *o = lj_tab_getint(t, i);
+      char *p;
+      if (!o) {
+      badtype:  /* Error: bad element type. */
+	setsbufP(sb, (void *)(intptr_t)i);  /* Store failing index. */
+	return NULL;
+      } else if (tvisstr(o)) {
+	MSize len = strV(o)->len;
+	p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
+      } else if (tvisint(o)) {
+	p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
+      } else if (tvisnum(o)) {
+	p = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen);
+      } else {
+	goto badtype;
+      }
+      if (i++ == e) {
+	setsbufP(sb, p);
+	break;
+      }
+      if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen);
+      setsbufP(sb, p);
+    }
+  }
+  return sb;
+}
+
+/* -- Miscellaneous buffer operations ------------------------------------- */
+
+GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
+{
+  return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb));
+}
+
+/* Concatenate two strings. */
+GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
+{
+  MSize len1 = s1->len, len2 = s2->len;
+  char *buf = lj_buf_tmp(L, len1 + len2);
+  memcpy(buf, strdata(s1), len1);
+  memcpy(buf+len1, strdata(s2), len2);
+  return lj_str_new(L, buf, len1 + len2);
+}
+
+/* Read ULEB128 from buffer. */
+uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
+{
+  const uint8_t *p = (const uint8_t *)*pp;
+  uint32_t v = *p++;
+  if (LJ_UNLIKELY(v >= 0x80)) {
+    int sh = 0;
+    v &= 0x7f;
+    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
+  }
+  *pp = (const char *)p;
+  return v;
+}
+

+ 103 - 0
luajit.mod/luajit/src/lj_buf.h

@@ -0,0 +1,103 @@
+/*
+** Buffer handling.
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_BUF_H
+#define _LJ_BUF_H
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_str.h"
+
+/* Resizable string buffers. Struct definition in lj_obj.h. */
+#define sbufB(sb)	(mref((sb)->b, char))
+#define sbufP(sb)	(mref((sb)->p, char))
+#define sbufE(sb)	(mref((sb)->e, char))
+#define sbufL(sb)	(mref((sb)->L, lua_State))
+#define sbufsz(sb)	((MSize)(sbufE((sb)) - sbufB((sb))))
+#define sbuflen(sb)	((MSize)(sbufP((sb)) - sbufB((sb))))
+#define sbufleft(sb)	((MSize)(sbufE((sb)) - sbufP((sb))))
+#define setsbufP(sb, q)	(setmref((sb)->p, (q)))
+#define setsbufL(sb, l)	(setmref((sb)->L, (l)))
+
+/* Buffer management */
+LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
+LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz);
+LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
+LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
+
+static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
+{
+  setsbufL(sb, L);
+  setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL);
+}
+
+static LJ_AINLINE void lj_buf_reset(SBuf *sb)
+{
+  setmrefr(sb->p, sb->b);
+}
+
+static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
+{
+  SBuf *sb = &G(L)->tmpbuf;
+  setsbufL(sb, L);
+  lj_buf_reset(sb);
+  return sb;
+}
+
+static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
+{
+  lj_mem_free(g, sbufB(sb), sbufsz(sb));
+}
+
+static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
+{
+  if (LJ_UNLIKELY(sz > sbufsz(sb)))
+    return lj_buf_need2(sb, sz);
+  return sbufB(sb);
+}
+
+static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
+{
+  if (LJ_UNLIKELY(sz > sbufleft(sb)))
+    return lj_buf_more2(sb, sz);
+  return sbufP(sb);
+}
+
+/* Low-level buffer put operations */
+LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
+LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
+LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
+
+static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
+{
+  return (char *)memcpy(p, q, len) + len;
+}
+
+static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
+{
+  char *p = lj_buf_more(sb, 1);
+  *p++ = (char)c;
+  setsbufP(sb, p);
+}
+
+/* High-level buffer put operations */
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s);
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s);
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s);
+LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep);
+LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep,
+			    int32_t i, int32_t e);
+
+/* Miscellaneous buffer operations */
+LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb);
+LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2);
+LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
+
+static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
+{
+  return lj_str_new(L, sbufB(sb), sbuflen(sb));
+}
+
+#endif

+ 84 - 0
luajit.mod/luajit/src/lj_carith.c

@@ -11,10 +11,12 @@
 #include "lj_err.h"
 #include "lj_err.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
 #include "lj_meta.h"
+#include "lj_ir.h"
 #include "lj_ctype.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
 #include "lj_cconv.h"
 #include "lj_cdata.h"
 #include "lj_cdata.h"
 #include "lj_carith.h"
 #include "lj_carith.h"
+#include "lj_strscan.h"
 
 
 /* -- C data arithmetic --------------------------------------------------- */
 /* -- C data arithmetic --------------------------------------------------- */
 
 
@@ -272,6 +274,88 @@ int lj_carith_op(lua_State *L, MMS mm)
   return lj_carith_meta(L, cts, &ca, mm);
   return lj_carith_meta(L, cts, &ca, mm);
 }
 }
 
 
+/* No built-in functionality for length of cdata. */
+int lj_carith_len(lua_State *L)
+{
+  CTState *cts = ctype_cts(L);
+  CDArith ca;
+  carith_checkarg(L, cts, &ca);
+  return lj_carith_meta(L, cts, &ca, MM_len);
+}
+
+/* -- 64 bit bit operations helpers --------------------------------------- */
+
+#if LJ_64
+#define B64DEF(name) \
+  static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
+#else
+/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
+#define B64DEF(name) \
+  uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
+#endif
+
+B64DEF(shl64) { return x << (sh&63); }
+B64DEF(shr64) { return x >> (sh&63); }
+B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
+B64DEF(rol64) { return lj_rol(x, (sh&63)); }
+B64DEF(ror64) { return lj_ror(x, (sh&63)); }
+
+#undef B64DEF
+
+uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
+{
+  switch (op) {
+  case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
+  case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
+  case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
+  case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
+  case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
+  default: lua_assert(0); break;
+  }
+  return x;
+}
+
+/* Equivalent to lj_lib_checkbit(), but handles cdata. */
+uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
+{
+  TValue *o = L->base + narg-1;
+  if (o >= L->top) {
+  err:
+    lj_err_argt(L, narg, LUA_TNUMBER);
+  } else if (LJ_LIKELY(tvisnumber(o))) {
+    /* Handled below. */
+  } else if (tviscdata(o)) {
+    CTState *cts = ctype_cts(L);
+    uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
+    CTypeID sid = cdataV(o)->ctypeid;
+    CType *s = ctype_get(cts, sid);
+    uint64_t x;
+    if (ctype_isref(s->info)) {
+      sp = *(void **)sp;
+      sid = ctype_cid(s->info);
+    }
+    s = ctype_raw(cts, sid);
+    if (ctype_isenum(s->info)) s = ctype_child(cts, s);
+    if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
+	CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
+      *id = CTID_UINT64;  /* Use uint64_t, since it has the highest rank. */
+    else if (!*id)
+      *id = CTID_INT64;  /* Use int64_t, unless already set. */
+    lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
+		   (uint8_t *)&x, sp, CCF_ARG(narg));
+    return x;
+  } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
+    goto err;
+  }
+  if (LJ_LIKELY(tvisint(o))) {
+    return (uint32_t)intV(o);
+  } else {
+    int32_t i = lj_num2bit(numV(o));
+    if (LJ_DUALNUM) setintV(o, i);
+    return (uint32_t)i;
+  }
+}
+
 /* -- 64 bit integer arithmetic helpers ----------------------------------- */
 /* -- 64 bit integer arithmetic helpers ----------------------------------- */
 
 
 #if LJ_32 && LJ_HASJIT
 #if LJ_32 && LJ_HASJIT

+ 11 - 0
luajit.mod/luajit/src/lj_carith.h

@@ -11,6 +11,17 @@
 #if LJ_HASFFI
 #if LJ_HASFFI
 
 
 LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
 LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
+LJ_FUNC int lj_carith_len(lua_State *L);
+
+#if LJ_32
+LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
+#endif
+LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
+LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
 
 
 #if LJ_32 && LJ_HASJIT
 #if LJ_32 && LJ_HASJIT
 LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
 LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);

+ 330 - 47
luajit.mod/luajit/src/lj_ccall.c

@@ -9,7 +9,6 @@
 
 
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
-#include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_ctype.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
 #include "lj_cconv.h"
@@ -291,56 +290,84 @@
 #define CCALL_HANDLE_RET \
 #define CCALL_HANDLE_RET \
   if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
   if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
 
 
-#elif LJ_TARGET_PPC
-/* -- PPC calling conventions --------------------------------------------- */
+#elif LJ_TARGET_ARM64
+/* -- ARM64 calling conventions ------------------------------------------- */
 
 
 #define CCALL_HANDLE_STRUCTRET \
 #define CCALL_HANDLE_STRUCTRET \
-  cc->retref = 1;  /* Return all structs by reference. */ \
-  cc->gpr[ngpr++] = (GPRArg)dp;
+  cc->retref = !ccall_classify_struct(cts, ctr); \
+  if (cc->retref) cc->retp = dp;
+
+#define CCALL_HANDLE_STRUCTRET2 \
+  unsigned int cl = ccall_classify_struct(cts, ctr); \
+  if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
+    CTSize i = (cl >> 8) - 1; \
+    do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \
+  } else { \
+    if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
+    memcpy(dp, sp, ctr->size); \
+  }
 
 
 #define CCALL_HANDLE_COMPLEXRET \
 #define CCALL_HANDLE_COMPLEXRET \
-  /* Complex values are returned in 2 or 4 GPRs. */ \
+  /* Complex values are returned in one or two FPRs. */ \
   cc->retref = 0;
   cc->retref = 0;
 
 
 #define CCALL_HANDLE_COMPLEXRET2 \
 #define CCALL_HANDLE_COMPLEXRET2 \
-  memcpy(dp, sp, ctr->size);  /* Copy complex from GPRs. */
+  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
+    ((float *)dp)[0] = cc->fpr[0].f; \
+    ((float *)dp)[1] = cc->fpr[1].f; \
+  } else {  /* Copy complex double from FPRs. */ \
+    ((double *)dp)[0] = cc->fpr[0].d; \
+    ((double *)dp)[1] = cc->fpr[1].d; \
+  }
 
 
 #define CCALL_HANDLE_STRUCTARG \
 #define CCALL_HANDLE_STRUCTARG \
-  rp = cdataptr(lj_cdata_new(cts, did, sz)); \
-  sz = CTSIZE_PTR;  /* Pass all structs by reference. */
+  unsigned int cl = ccall_classify_struct(cts, d); \
+  if (cl == 0) {  /* Pass struct by reference. */ \
+    rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+    sz = CTSIZE_PTR; \
+  } else if (cl > 1) {  /* Pass struct in FPRs or on stack. */ \
+    isfp = (cl & 4) ? 2 : 1; \
+  }  /* else: Pass struct in GPRs or on stack. */
 
 
 #define CCALL_HANDLE_COMPLEXARG \
 #define CCALL_HANDLE_COMPLEXARG \
-  /* Pass complex by value in 2 or 4 GPRs. */
+  /* Pass complex by value in separate (!) FPRs or on stack. */ \
+  isfp = sz == 2*sizeof(float) ? 2 : 1;
 
 
 #define CCALL_HANDLE_REGARG \
 #define CCALL_HANDLE_REGARG \
-  if (isfp) {  /* Try to pass argument in FPRs. */ \
-    if (nfpr + 1 <= CCALL_NARG_FPR) { \
+  if (LJ_TARGET_IOS && isva) { \
+    /* IOS: All variadic arguments are on the stack. */ \
+  } else if (isfp) {  /* Try to pass argument in FPRs. */ \
+    int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \
+    if (nfpr + n2 <= CCALL_NARG_FPR) { \
       dp = &cc->fpr[nfpr]; \
       dp = &cc->fpr[nfpr]; \
-      nfpr += 1; \
-      d = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */ \
+      nfpr += n2; \
       goto done; \
       goto done; \
+    } else { \
+      nfpr = CCALL_NARG_FPR;  /* Prevent reordering. */ \
+      if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
     } \
     } \
   } else {  /* Try to pass argument in GPRs. */ \
   } else {  /* Try to pass argument in GPRs. */ \
-    if (n > 1) { \
-      lua_assert(n == 2 || n == 4);  /* int64_t or complex (float). */ \
-      if (ctype_isinteger(d->info)) \
-	ngpr = (ngpr + 1u) & ~1u;  /* Align int64_t to regpair. */ \
-      else if (ngpr + n > maxgpr) \
-	ngpr = maxgpr;  /* Prevent reordering. */ \
-    } \
+    if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
+      ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
     if (ngpr + n <= maxgpr) { \
     if (ngpr + n <= maxgpr) { \
       dp = &cc->gpr[ngpr]; \
       dp = &cc->gpr[ngpr]; \
       ngpr += n; \
       ngpr += n; \
       goto done; \
       goto done; \
+    } else { \
+      ngpr = maxgpr;  /* Prevent reordering. */ \
+      if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
     } \
     } \
   }
   }
 
 
+#if LJ_BE
 #define CCALL_HANDLE_RET \
 #define CCALL_HANDLE_RET \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
-    ctr = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */
+    sp = (uint8_t *)&cc->fpr[0].f;
+#endif
 
 
-#elif LJ_TARGET_PPCSPE
-/* -- PPC/SPE calling conventions ----------------------------------------- */
+
+#elif LJ_TARGET_PPC
+/* -- PPC calling conventions --------------------------------------------- */
 
 
 #define CCALL_HANDLE_STRUCTRET \
 #define CCALL_HANDLE_STRUCTRET \
   cc->retref = 1;  /* Return all structs by reference. */ \
   cc->retref = 1;  /* Return all structs by reference. */ \
@@ -360,12 +387,12 @@
 #define CCALL_HANDLE_COMPLEXARG \
 #define CCALL_HANDLE_COMPLEXARG \
   /* Pass complex by value in 2 or 4 GPRs. */
   /* Pass complex by value in 2 or 4 GPRs. */
 
 
-/* PPC/SPE has a softfp ABI. */
-#define CCALL_HANDLE_REGARG \
-  if (n > 1) {  /* Doesn't fit in a single GPR? */ \
-    lua_assert(n == 2 || n == 4);  /* int64_t, double or complex (float). */ \
-    if (n == 2) \
-      ngpr = (ngpr + 1u) & ~1u;  /* Only align 64 bit value to regpair. */ \
+#define CCALL_HANDLE_GPR \
+  /* Try to pass argument in GPRs. */ \
+  if (n > 1) { \
+    lua_assert(n == 2 || n == 4);  /* int64_t or complex (float). */ \
+    if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
+      ngpr = (ngpr + 1u) & ~1u;  /* Align int64_t to regpair. */ \
     else if (ngpr + n > maxgpr) \
     else if (ngpr + n > maxgpr) \
       ngpr = maxgpr;  /* Prevent reordering. */ \
       ngpr = maxgpr;  /* Prevent reordering. */ \
   } \
   } \
@@ -373,10 +400,32 @@
     dp = &cc->gpr[ngpr]; \
     dp = &cc->gpr[ngpr]; \
     ngpr += n; \
     ngpr += n; \
     goto done; \
     goto done; \
+  } \
+
+#if LJ_ABI_SOFTFP
+#define CCALL_HANDLE_REGARG  CCALL_HANDLE_GPR
+#else
+#define CCALL_HANDLE_REGARG \
+  if (isfp) {  /* Try to pass argument in FPRs. */ \
+    if (nfpr + 1 <= CCALL_NARG_FPR) { \
+      dp = &cc->fpr[nfpr]; \
+      nfpr += 1; \
+      d = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */ \
+      goto done; \
+    } \
+  } else { \
+    CCALL_HANDLE_GPR \
   }
   }
+#endif
 
 
-#elif LJ_TARGET_MIPS
-/* -- MIPS calling conventions -------------------------------------------- */
+#if !LJ_ABI_SOFTFP
+#define CCALL_HANDLE_RET \
+  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+    ctr = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */
+#endif
+
+#elif LJ_TARGET_MIPS32
+/* -- MIPS o32 calling conventions ---------------------------------------- */
 
 
 #define CCALL_HANDLE_STRUCTRET \
 #define CCALL_HANDLE_STRUCTRET \
   cc->retref = 1;  /* Return all structs by reference. */ \
   cc->retref = 1;  /* Return all structs by reference. */ \
@@ -386,6 +435,18 @@
   /* Complex values are returned in 1 or 2 FPRs. */ \
   /* Complex values are returned in 1 or 2 FPRs. */ \
   cc->retref = 0;
   cc->retref = 0;
 
 
+#if LJ_ABI_SOFTFP
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from GPRs. */ \
+    ((intptr_t *)dp)[0] = cc->gpr[0]; \
+    ((intptr_t *)dp)[1] = cc->gpr[1]; \
+  } else {  /* Copy complex double from GPRs. */ \
+    ((intptr_t *)dp)[0] = cc->gpr[0]; \
+    ((intptr_t *)dp)[1] = cc->gpr[1]; \
+    ((intptr_t *)dp)[2] = cc->gpr[2]; \
+    ((intptr_t *)dp)[3] = cc->gpr[3]; \
+  }
+#else
 #define CCALL_HANDLE_COMPLEXRET2 \
 #define CCALL_HANDLE_COMPLEXRET2 \
   if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
   if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
     ((float *)dp)[0] = cc->fpr[0].f; \
     ((float *)dp)[0] = cc->fpr[0].f; \
@@ -394,6 +455,7 @@
     ((double *)dp)[0] = cc->fpr[0].d; \
     ((double *)dp)[0] = cc->fpr[0].d; \
     ((double *)dp)[1] = cc->fpr[1].d; \
     ((double *)dp)[1] = cc->fpr[1].d; \
   }
   }
+#endif
 
 
 #define CCALL_HANDLE_STRUCTARG \
 #define CCALL_HANDLE_STRUCTARG \
   /* Pass all structs by value in registers and/or on the stack. */
   /* Pass all structs by value in registers and/or on the stack. */
@@ -401,6 +463,22 @@
 #define CCALL_HANDLE_COMPLEXARG \
 #define CCALL_HANDLE_COMPLEXARG \
   /* Pass complex by value in 2 or 4 GPRs. */
   /* Pass complex by value in 2 or 4 GPRs. */
 
 
+#define CCALL_HANDLE_GPR \
+  if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
+    ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
+  if (ngpr < maxgpr) { \
+    dp = &cc->gpr[ngpr]; \
+    if (ngpr + n > maxgpr) { \
+     nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
+     if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
+     ngpr = maxgpr; \
+    } else { \
+     ngpr += n; \
+    } \
+    goto done; \
+  }
+
+#if !LJ_ABI_SOFTFP	/* MIPS32 hard-float */
 #define CCALL_HANDLE_REGARG \
 #define CCALL_HANDLE_REGARG \
   if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \
   if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \
     /* Try to pass argument in FPRs. */ \
     /* Try to pass argument in FPRs. */ \
@@ -409,25 +487,91 @@
     goto done; \
     goto done; \
   } else {  /* Try to pass argument in GPRs. */ \
   } else {  /* Try to pass argument in GPRs. */ \
     nfpr = CCALL_NARG_FPR; \
     nfpr = CCALL_NARG_FPR; \
-    if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
-      ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
-    if (ngpr < maxgpr) { \
-      dp = &cc->gpr[ngpr]; \
-      if (ngpr + n > maxgpr) { \
-	nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
-	if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
-	ngpr = maxgpr; \
-      } else { \
-	ngpr += n; \
-      } \
-      goto done; \
-    } \
+    CCALL_HANDLE_GPR \
+  }
+#else			/* MIPS32 soft-float */
+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
+#endif
+
+#if !LJ_ABI_SOFTFP
+/* On MIPS64 soft-float, position of float return values is endian-dependant. */
+#define CCALL_HANDLE_RET \
+  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+    sp = (uint8_t *)&cc->fpr[0].f;
+#endif
+
+#elif LJ_TARGET_MIPS64
+/* -- MIPS n64 calling conventions ---------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+  cc->retref = !(sz <= 16); \
+  if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_STRUCTRET2 \
+  ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct));
+
+#define CCALL_HANDLE_COMPLEXRET \
+  /* Complex values are returned in 1 or 2 FPRs. */ \
+  cc->retref = 0;
+
+#if LJ_ABI_SOFTFP	/* MIPS64 soft-float */
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from GPRs. */ \
+    ((intptr_t *)dp)[0] = cc->gpr[0]; \
+  } else {  /* Copy complex double from GPRs. */ \
+    ((intptr_t *)dp)[0] = cc->gpr[0]; \
+    ((intptr_t *)dp)[1] = cc->gpr[1]; \
+  }
+
+#define CCALL_HANDLE_COMPLEXARG \
+  /* Pass complex by value in 2 or 4 GPRs. */
+
+/* Position of soft-float 'float' return value depends on endianess.  */
+#define CCALL_HANDLE_RET \
+  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+    sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4);
+
+#else			/* MIPS64 hard-float */
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
+    ((float *)dp)[0] = cc->fpr[0].f; \
+    ((float *)dp)[1] = cc->fpr[1].f; \
+  } else {  /* Copy complex double from FPRs. */ \
+    ((double *)dp)[0] = cc->fpr[0].d; \
+    ((double *)dp)[1] = cc->fpr[1].d; \
+  }
+
+#define CCALL_HANDLE_COMPLEXARG \
+  if (sz == 2*sizeof(float)) { \
+    isfp = 2; \
+    if (ngpr < maxgpr) \
+      sz *= 2; \
   }
   }
 
 
 #define CCALL_HANDLE_RET \
 #define CCALL_HANDLE_RET \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
     sp = (uint8_t *)&cc->fpr[0].f;
     sp = (uint8_t *)&cc->fpr[0].f;
 
 
+#endif
+
+#define CCALL_HANDLE_STRUCTARG \
+  /* Pass all structs by value in registers and/or on the stack. */
+
+#define CCALL_HANDLE_REGARG \
+  if (ngpr < maxgpr) { \
+    dp = &cc->gpr[ngpr]; \
+    if (ngpr + n > maxgpr) { \
+      nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
+      if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
+      ngpr = maxgpr; \
+    } else { \
+      ngpr += n; \
+    } \
+    goto done; \
+  }
+
 #else
 #else
 #error "Missing calling convention definitions for this architecture"
 #error "Missing calling convention definitions for this architecture"
 #endif
 #endif
@@ -621,6 +765,125 @@ noth:  /* Not a homogeneous float/double aggregate. */
 
 
 #endif
 #endif
 
 
+/* -- ARM64 ABI struct classification ------------------------------------- */
+
+#if LJ_TARGET_ARM64
+
+/* Classify a struct based on its fields. */
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
+{
+  CTSize sz = ct->size;
+  unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
+  while (ct->sib) {
+    CType *sct;
+    ct = ctype_get(cts, ct->sib);
+    if (ctype_isfield(ct->info)) {
+      sct = ctype_rawchild(cts, ct);
+      if (ctype_isfp(sct->info)) {
+	r |= sct->size;
+	if (!isu) n++; else if (n == 0) n = 1;
+      } else if (ctype_iscomplex(sct->info)) {
+	r |= (sct->size >> 1);
+	if (!isu) n += 2; else if (n < 2) n = 2;
+      } else if (ctype_isstruct(sct->info)) {
+	goto substruct;
+      } else {
+	goto noth;
+      }
+    } else if (ctype_isbitfield(ct->info)) {
+      goto noth;
+    } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+      sct = ctype_rawchild(cts, ct);
+    substruct:
+      if (sct->size > 0) {
+	unsigned int s = ccall_classify_struct(cts, sct);
+	if (s <= 1) goto noth;
+	r |= (s & 255);
+	if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
+      }
+    }
+  }
+  if ((r == 4 || r == 8) && n <= 4)
+    return r + (n << 8);
+noth:  /* Not a homogeneous float/double aggregate. */
+  return (sz <= 16);  /* Return structs of size <= 16 in GPRs. */
+}
+
+#endif
+
+/* -- MIPS64 ABI struct classification ---------------------------- */
+
+#if LJ_TARGET_MIPS64
+
+#define FTYPE_FLOAT	1
+#define FTYPE_DOUBLE	2
+
+/* Classify FP fields (max. 2) and their types. */
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)
+{
+  int n = 0, ft = 0;
+  if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION))
+    goto noth;
+  while (ct->sib) {
+    CType *sct;
+    ct = ctype_get(cts, ct->sib);
+    if (n == 2) {
+      goto noth;
+    } else if (ctype_isfield(ct->info)) {
+      sct = ctype_rawchild(cts, ct);
+      if (ctype_isfp(sct->info)) {
+	ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n;
+	n++;
+      } else {
+	goto noth;
+      }
+    } else if (ctype_isbitfield(ct->info) ||
+	       ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+      goto noth;
+    }
+  }
+  if (n <= 2)
+    return ft;
+noth:  /* Not a homogeneous float/double aggregate. */
+  return 0;  /* Struct is in GPRs. */
+}
+
+static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
+			      int ft)
+{
+  if (LJ_ABI_SOFTFP ? ft :
+      ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
+    int i, ofs = 0;
+    for (i = 0; ft != 0; i++, ft >>= 2) {
+      if ((ft & 3) == FTYPE_FLOAT) {
+#if LJ_ABI_SOFTFP
+	/* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */
+	memcpy((uint8_t *)dp + ofs,
+	       (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4);
+#else
+	*(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f;
+#endif
+	ofs += 4;
+      } else {
+	ofs = (ofs + 7) & ~7;  /* 64 bit alignment. */
+#if LJ_ABI_SOFTFP
+	*(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i];
+#else
+	*(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d;
+#endif
+	ofs += 8;
+      }
+    }
+  } else {
+#if !LJ_ABI_SOFTFP
+    if (ft) sp = (uint8_t *)&cc->fpr[0];
+#endif
+    memcpy(dp, sp, ctr->size);
+  }
+}
+
+#endif
+
 /* -- Common C call handling ---------------------------------------------- */
 /* -- Common C call handling ---------------------------------------------- */
 
 
 /* Infer the destination CTypeID for a vararg argument. */
 /* Infer the destination CTypeID for a vararg argument. */
@@ -788,6 +1051,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
 	*(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
 	*(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
 					(int32_t)*(int16_t *)dp;
 					(int32_t)*(int16_t *)dp;
     }
     }
+#if LJ_TARGET_ARM64 && LJ_BE
+    if (isfp && d->size == sizeof(float))
+      ((float *)dp)[1] = ((float *)dp)[0];  /* Floats occupy high slot. */
+#endif
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
+    if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
+#if LJ_TARGET_MIPS64
+	 || (isfp && nsp == 0)
+#endif
+	 ) && d->size <= 4) {
+      *(int64_t *)dp = (int64_t)*(int32_t *)dp;  /* Sign-extend to 64 bit. */
+    }
+#endif
 #if LJ_TARGET_X64 && LJ_ABI_WIN
 #if LJ_TARGET_X64 && LJ_ABI_WIN
     if (isva) {  /* Windows/x64 mirrors varargs in both register sets. */
     if (isva) {  /* Windows/x64 mirrors varargs in both register sets. */
       if (nfpr == ngpr)
       if (nfpr == ngpr)
@@ -803,13 +1079,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
       cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1];  /* Split complex double. */
       cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1];  /* Split complex double. */
       cc->fpr[nfpr-2].d[1] = 0;
       cc->fpr[nfpr-2].d[1] = 0;
     }
     }
+#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP)
+    if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
+      /* Split float HFA or complex float into separate registers. */
+      CTSize i = (sz >> 2) - 1;
+      do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
+    }
 #else
 #else
     UNUSED(isfp);
     UNUSED(isfp);
 #endif
 #endif
   }
   }
   if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG);  /* Too few arguments. */
   if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG);  /* Too few arguments. */
 
 
-#if LJ_TARGET_X64 || LJ_TARGET_PPC
+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
   cc->nfpr = nfpr;  /* Required for vararg functions. */
   cc->nfpr = nfpr;  /* Required for vararg functions. */
 #endif
 #endif
   cc->nsp = nsp;
   cc->nsp = nsp;
@@ -844,7 +1126,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
     CCALL_HANDLE_COMPLEXRET2
     CCALL_HANDLE_COMPLEXRET2
     return 1;  /* One GC step. */
     return 1;  /* One GC step. */
   }
   }
-  if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR)
+  if (LJ_BE && ctr->size < CTSIZE_PTR &&
+      (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info)))
     sp += (CTSIZE_PTR - ctr->size);
     sp += (CTSIZE_PTR - ctr->size);
 #if CCALL_NUM_FPR
 #if CCALL_NUM_FPR
   if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info))
   if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info))

+ 36 - 13
luajit.mod/luajit/src/lj_ccall.h

@@ -68,35 +68,56 @@ typedef union FPRArg {
   float f[2];
   float f[2];
 } FPRArg;
 } FPRArg;
 
 
-#elif LJ_TARGET_PPC
+#elif LJ_TARGET_ARM64
 
 
 #define CCALL_NARG_GPR		8
 #define CCALL_NARG_GPR		8
+#define CCALL_NRET_GPR		2
 #define CCALL_NARG_FPR		8
 #define CCALL_NARG_FPR		8
+#define CCALL_NRET_FPR		4
+#define CCALL_SPS_FREE		0
+
+typedef intptr_t GPRArg;
+typedef union FPRArg {
+  double d;
+  struct { LJ_ENDIAN_LOHI(float f; , float g;) };
+  struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) };
+} FPRArg;
+
+#elif LJ_TARGET_PPC
+
+#define CCALL_NARG_GPR		8
+#define CCALL_NARG_FPR		(LJ_ABI_SOFTFP ? 0 : 8)
 #define CCALL_NRET_GPR		4	/* For complex double. */
 #define CCALL_NRET_GPR		4	/* For complex double. */
-#define CCALL_NRET_FPR		1
+#define CCALL_NRET_FPR		(LJ_ABI_SOFTFP ? 0 : 1)
 #define CCALL_SPS_EXTRA		4
 #define CCALL_SPS_EXTRA		4
 #define CCALL_SPS_FREE		0
 #define CCALL_SPS_FREE		0
 
 
 typedef intptr_t GPRArg;
 typedef intptr_t GPRArg;
 typedef double FPRArg;
 typedef double FPRArg;
 
 
-#elif LJ_TARGET_PPCSPE
+#elif LJ_TARGET_MIPS32
 
 
-#define CCALL_NARG_GPR		8
-#define CCALL_NARG_FPR		0
-#define CCALL_NRET_GPR		4	/* For softfp complex double. */
-#define CCALL_NRET_FPR		0
-#define CCALL_SPS_FREE		0	/* NYI */
+#define CCALL_NARG_GPR		4
+#define CCALL_NARG_FPR		(LJ_ABI_SOFTFP ? 0 : 2)
+#define CCALL_NRET_GPR		(LJ_ABI_SOFTFP ? 4 : 2)
+#define CCALL_NRET_FPR		(LJ_ABI_SOFTFP ? 0 : 2)
+#define CCALL_SPS_EXTRA		7
+#define CCALL_SPS_FREE		1
 
 
 typedef intptr_t GPRArg;
 typedef intptr_t GPRArg;
+typedef union FPRArg {
+  double d;
+  struct { LJ_ENDIAN_LOHI(float f; , float g;) };
+} FPRArg;
 
 
-#elif LJ_TARGET_MIPS
+#elif LJ_TARGET_MIPS64
 
 
-#define CCALL_NARG_GPR		4
-#define CCALL_NARG_FPR		2
+/* FP args are positional and overlay the GPR array. */
+#define CCALL_NARG_GPR		8
+#define CCALL_NARG_FPR		0
 #define CCALL_NRET_GPR		2
 #define CCALL_NRET_GPR		2
-#define CCALL_NRET_FPR		2
-#define CCALL_SPS_EXTRA		7
+#define CCALL_NRET_FPR		(LJ_ABI_SOFTFP ? 0 : 2)
+#define CCALL_SPS_EXTRA		3
 #define CCALL_SPS_FREE		1
 #define CCALL_SPS_FREE		1
 
 
 typedef intptr_t GPRArg;
 typedef intptr_t GPRArg;
@@ -145,6 +166,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
   uint8_t nfpr;			/* Number of arguments in FPRs. */
   uint8_t nfpr;			/* Number of arguments in FPRs. */
 #elif LJ_TARGET_X86
 #elif LJ_TARGET_X86
   uint8_t resx87;		/* Result on x87 stack: 1:float, 2:double. */
   uint8_t resx87;		/* Result on x87 stack: 1:float, 2:double. */
+#elif LJ_TARGET_ARM64
+  void *retp;			/* Aggregate return pointer in x8. */
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
   uint8_t nfpr;			/* Number of arguments in FPRs. */
   uint8_t nfpr;			/* Number of arguments in FPRs. */
 #endif
 #endif

+ 191 - 48
luajit.mod/luajit/src/lj_ccallback.c

@@ -27,7 +27,7 @@
 
 
 #if LJ_OS_NOJIT
 #if LJ_OS_NOJIT
 
 
-/* Disabled callback support. */
+/* Callbacks disabled. */
 #define CALLBACK_SLOT2OFS(slot)	(0*(slot))
 #define CALLBACK_SLOT2OFS(slot)	(0*(slot))
 #define CALLBACK_OFS2SLOT(ofs)	(0*(ofs))
 #define CALLBACK_OFS2SLOT(ofs)	(0*(ofs))
 #define CALLBACK_MAX_SLOT	0
 #define CALLBACK_MAX_SLOT	0
@@ -35,7 +35,7 @@
 #elif LJ_TARGET_X86ORX64
 #elif LJ_TARGET_X86ORX64
 
 
 #define CALLBACK_MCODE_HEAD	(LJ_64 ? 8 : 0)
 #define CALLBACK_MCODE_HEAD	(LJ_64 ? 8 : 0)
-#define CALLBACK_MCODE_GROUP	(-2+1+2+5+(LJ_64 ? 6 : 5))
+#define CALLBACK_MCODE_GROUP	(-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
 
 
 #define CALLBACK_SLOT2OFS(slot) \
 #define CALLBACK_SLOT2OFS(slot) \
   (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
   (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
@@ -54,23 +54,22 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
 #elif LJ_TARGET_ARM
 #elif LJ_TARGET_ARM
 
 
 #define CALLBACK_MCODE_HEAD		32
 #define CALLBACK_MCODE_HEAD		32
-#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+
+#elif LJ_TARGET_ARM64
+
+#define CALLBACK_MCODE_HEAD		32
 
 
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 
 
 #define CALLBACK_MCODE_HEAD		24
 #define CALLBACK_MCODE_HEAD		24
-#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
 
 
-#elif LJ_TARGET_MIPS
+#elif LJ_TARGET_MIPS32
 
 
-#define CALLBACK_MCODE_HEAD		24
-#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+#define CALLBACK_MCODE_HEAD		20
+
+#elif LJ_TARGET_MIPS64
+
+#define CALLBACK_MCODE_HEAD		52
 
 
 #else
 #else
 
 
@@ -81,6 +80,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
 
 
 #endif
 #endif
 
 
+#ifndef CALLBACK_SLOT2OFS
+#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
+#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
+#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+#endif
+
 /* Convert callback slot number to callback function pointer. */
 /* Convert callback slot number to callback function pointer. */
 static void *callback_slot2ptr(CTState *cts, MSize slot)
 static void *callback_slot2ptr(CTState *cts, MSize slot)
 {
 {
@@ -119,8 +124,13 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
       /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
       /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
       *p++ = XI_PUSH + RID_EBP;
       *p++ = XI_PUSH + RID_EBP;
       *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
       *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
+#if LJ_GC64
+      *p++ = 0x48; *p++ = XI_MOVri | RID_EBP;
+      *(uint64_t *)p = (uint64_t)(g); p += 8;
+#else
       *p++ = XI_MOVri | RID_EBP;
       *p++ = XI_MOVri | RID_EBP;
       *(int32_t *)p = i32ptr(g); p += 4;
       *(int32_t *)p = i32ptr(g); p += 4;
+#endif
 #if LJ_64
 #if LJ_64
       /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
       /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
       *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
       *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
@@ -157,6 +167,26 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
   }
   }
   lua_assert(p - page <= CALLBACK_MCODE_SIZE);
   lua_assert(p - page <= CALLBACK_MCODE_SIZE);
 }
 }
+#elif LJ_TARGET_ARM64
+static void callback_mcode_init(global_State *g, uint32_t *page)
+{
+  uint32_t *p = page;
+  void *target = (void *)lj_vm_ffi_callback;
+  MSize slot;
+  *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
+  *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
+  *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
+  *p++ = A64I_LE(A64I_NOP);
+  ((void **)p)[0] = target;
+  ((void **)p)[1] = g;
+  p += 4;
+  for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+    *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
+    *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
+    p++;
+  }
+  lua_assert(p - page <= CALLBACK_MCODE_SIZE);
+}
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 static void callback_mcode_init(global_State *g, uint32_t *page)
 static void callback_mcode_init(global_State *g, uint32_t *page)
 {
 {
@@ -180,14 +210,27 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
 static void callback_mcode_init(global_State *g, uint32_t *page)
 static void callback_mcode_init(global_State *g, uint32_t *page)
 {
 {
   uint32_t *p = page;
   uint32_t *p = page;
-  void *target = (void *)lj_vm_ffi_callback;
+  uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
+  uintptr_t ug = (uintptr_t)(void *)g;
   MSize slot;
   MSize slot;
-  *p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0;
-  *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16);
-  *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16);
-  *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff);
+#if LJ_TARGET_MIPS32
+  *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16);
+  *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16);
+#else
+  *p++ = MIPSI_LUI  | MIPSF_T(RID_R3) | (target >> 48);
+  *p++ = MIPSI_LUI  | MIPSF_T(RID_R2) | (ug >> 48);
+  *p++ = MIPSI_ORI  | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff);
+  *p++ = MIPSI_ORI  | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff);
+  *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
+  *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
+  *p++ = MIPSI_ORI  | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff);
+  *p++ = MIPSI_ORI  | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff);
+  *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
+  *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
+#endif
+  *p++ = MIPSI_ORI  | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff);
   *p++ = MIPSI_JR | MIPSF_S(RID_R3);
   *p++ = MIPSI_JR | MIPSF_S(RID_R3);
-  *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff);
+  *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff);
   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
     *p = MIPSI_B | ((page-p-1) & 0x0000ffffu);
     *p = MIPSI_B | ((page-p-1) & 0x0000ffffu);
     p++;
     p++;
@@ -224,7 +267,7 @@ static void callback_mcode_new(CTState *cts)
   if (CALLBACK_MAX_SLOT == 0)
   if (CALLBACK_MAX_SLOT == 0)
     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
 #if LJ_TARGET_WINDOWS
 #if LJ_TARGET_WINDOWS
-  p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+  p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
   if (!p)
   if (!p)
     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
 #elif LJ_TARGET_POSIX
 #elif LJ_TARGET_POSIX
@@ -242,7 +285,7 @@ static void callback_mcode_new(CTState *cts)
 #if LJ_TARGET_WINDOWS
 #if LJ_TARGET_WINDOWS
   {
   {
     DWORD oprot;
     DWORD oprot;
-    VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot);
+    LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot);
   }
   }
 #elif LJ_TARGET_POSIX
 #elif LJ_TARGET_POSIX
   mprotect(p, sz, (PROT_READ|PROT_EXEC));
   mprotect(p, sz, (PROT_READ|PROT_EXEC));
@@ -351,33 +394,77 @@ void lj_ccallback_mcode_free(CTState *cts)
     goto done; \
     goto done; \
   } CALLBACK_HANDLE_REGARG_FP2
   } CALLBACK_HANDLE_REGARG_FP2
 
 
-#elif LJ_TARGET_PPC
+#elif LJ_TARGET_ARM64
 
 
 #define CALLBACK_HANDLE_REGARG \
 #define CALLBACK_HANDLE_REGARG \
   if (isfp) { \
   if (isfp) { \
-    if (nfpr + 1 <= CCALL_NARG_FPR) { \
-      sp = &cts->cb.fpr[nfpr++]; \
-      cta = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */ \
+    if (nfpr + n <= CCALL_NARG_FPR) { \
+      sp = &cts->cb.fpr[nfpr]; \
+      nfpr += n; \
       goto done; \
       goto done; \
+    } else { \
+      nfpr = CCALL_NARG_FPR;  /* Prevent reordering. */ \
     } \
     } \
-  } else {  /* Try to pass argument in GPRs. */ \
-    if (n > 1) { \
-      lua_assert(ctype_isinteger(cta->info) && n == 2);  /* int64_t. */ \
-      ngpr = (ngpr + 1u) & ~1u;  /* Align int64_t to regpair. */ \
-    } \
+  } else { \
+    if (!LJ_TARGET_IOS && n > 1) \
+      ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
     if (ngpr + n <= maxgpr) { \
     if (ngpr + n <= maxgpr) { \
       sp = &cts->cb.gpr[ngpr]; \
       sp = &cts->cb.gpr[ngpr]; \
       ngpr += n; \
       ngpr += n; \
       goto done; \
       goto done; \
+    } else { \
+      ngpr = CCALL_NARG_GPR;  /* Prevent reordering. */ \
+    } \
+  }
+
+#elif LJ_TARGET_PPC
+
+#define CALLBACK_HANDLE_GPR \
+  if (n > 1) { \
+    lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) ||  /* double. */ \
+		ctype_isinteger(cta->info)) && n == 2);  /* int64_t. */ \
+    ngpr = (ngpr + 1u) & ~1u;  /* Align int64_t to regpair. */ \
+  } \
+  if (ngpr + n <= maxgpr) { \
+    sp = &cts->cb.gpr[ngpr]; \
+    ngpr += n; \
+    goto done; \
+  }
+
+#if LJ_ABI_SOFTFP
+#define CALLBACK_HANDLE_REGARG \
+  CALLBACK_HANDLE_GPR \
+  UNUSED(isfp);
+#else
+#define CALLBACK_HANDLE_REGARG \
+  if (isfp) { \
+    if (nfpr + 1 <= CCALL_NARG_FPR) { \
+      sp = &cts->cb.fpr[nfpr++]; \
+      cta = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */ \
+      goto done; \
     } \
     } \
+  } else {  /* Try to pass argument in GPRs. */ \
+    CALLBACK_HANDLE_GPR \
   }
   }
+#endif
 
 
+#if !LJ_ABI_SOFTFP
 #define CALLBACK_HANDLE_RET \
 #define CALLBACK_HANDLE_RET \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
     *(double *)dp = *(float *)dp;  /* FPRs always hold doubles. */
     *(double *)dp = *(float *)dp;  /* FPRs always hold doubles. */
+#endif
 
 
-#elif LJ_TARGET_MIPS
+#elif LJ_TARGET_MIPS32
 
 
+#define CALLBACK_HANDLE_GPR \
+  if (n > 1) ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
+  if (ngpr + n <= maxgpr) { \
+    sp = &cts->cb.gpr[ngpr]; \
+    ngpr += n; \
+    goto done; \
+  }
+
+#if !LJ_ABI_SOFTFP	/* MIPS32 hard-float */
 #define CALLBACK_HANDLE_REGARG \
 #define CALLBACK_HANDLE_REGARG \
   if (isfp && nfpr < CCALL_NARG_FPR) {  /* Try to pass argument in FPRs. */ \
   if (isfp && nfpr < CCALL_NARG_FPR) {  /* Try to pass argument in FPRs. */ \
     sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \
     sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \
@@ -385,13 +472,36 @@ void lj_ccallback_mcode_free(CTState *cts)
     goto done; \
     goto done; \
   } else {  /* Try to pass argument in GPRs. */ \
   } else {  /* Try to pass argument in GPRs. */ \
     nfpr = CCALL_NARG_FPR; \
     nfpr = CCALL_NARG_FPR; \
-    if (n > 1) ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
-    if (ngpr + n <= maxgpr) { \
-      sp = &cts->cb.gpr[ngpr]; \
-      ngpr += n; \
-      goto done; \
-    } \
+    CALLBACK_HANDLE_GPR \
+  }
+#else			/* MIPS32 soft-float */
+#define CALLBACK_HANDLE_REGARG \
+  CALLBACK_HANDLE_GPR \
+  UNUSED(isfp);
+#endif
+
+#define CALLBACK_HANDLE_RET \
+  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+    ((float *)dp)[1] = *(float *)dp;
+
+#elif LJ_TARGET_MIPS64
+
+#if !LJ_ABI_SOFTFP	/* MIPS64 hard-float */
+#define CALLBACK_HANDLE_REGARG \
+  if (ngpr + n <= maxgpr) { \
+    sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \
+    ngpr += n; \
+    goto done; \
   }
   }
+#else			/* MIPS64 soft-float */
+#define CALLBACK_HANDLE_REGARG \
+  if (ngpr + n <= maxgpr) { \
+    UNUSED(isfp); \
+    sp = (void*) &cts->cb.gpr[ngpr]; \
+    ngpr += n; \
+    goto done; \
+  }
+#endif
 
 
 #define CALLBACK_HANDLE_RET \
 #define CALLBACK_HANDLE_RET \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
@@ -411,6 +521,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
   int gcsteps = 0;
   int gcsteps = 0;
   CType *ct;
   CType *ct;
   GCfunc *fn;
   GCfunc *fn;
+  int fntp;
   MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
   MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
 #if CCALL_NARG_FPR
 #if CCALL_NARG_FPR
   MSize nfpr = 0;
   MSize nfpr = 0;
@@ -421,18 +532,27 @@ static void callback_conv_args(CTState *cts, lua_State *L)
 
 
   if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
   if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
     ct = ctype_get(cts, id);
     ct = ctype_get(cts, id);
-    rid = ctype_cid(ct->info);
+    rid = ctype_cid(ct->info);  /* Return type. x86: +(spadj<<16). */
     fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
     fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
+    fntp = LJ_TFUNC;
   } else {  /* Must set up frame first, before throwing the error. */
   } else {  /* Must set up frame first, before throwing the error. */
     ct = NULL;
     ct = NULL;
     rid = 0;
     rid = 0;
     fn = (GCfunc *)L;
     fn = (GCfunc *)L;
+    fntp = LJ_TTHREAD;
+  }
+  /* Continuation returns from callback. */
+  if (LJ_FR2) {
+    (o++)->u64 = LJ_CONT_FFI_CALLBACK;
+    (o++)->u64 = rid;
+    o++;
+  } else {
+    o->u32.lo = LJ_CONT_FFI_CALLBACK;
+    o->u32.hi = rid;
+    o++;
   }
   }
-  o->u32.lo = LJ_CONT_FFI_CALLBACK;  /* Continuation returns from callback. */
-  o->u32.hi = rid;  /* Return type. x86: +(spadj<<16). */
-  o++;
-  setframe_gc(o, obj2gco(fn));
-  setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT);
+  setframe_gc(o, obj2gco(fn), fntp);
+  setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT);
   L->top = L->base = ++o;
   L->top = L->base = ++o;
   if (!ct)
   if (!ct)
     lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
     lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
@@ -474,7 +594,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
       nsp += n;
       nsp += n;
 
 
     done:
     done:
-      if (LJ_BE && cta->size < CTSIZE_PTR)
+      if (LJ_BE && cta->size < CTSIZE_PTR
+#if LJ_TARGET_MIPS64
+	  && !(isfp && nsp)
+#endif
+	 )
 	sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size);
 	sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size);
       gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp);
       gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp);
     }
     }
@@ -483,8 +607,13 @@ static void callback_conv_args(CTState *cts, lua_State *L)
   L->top = o;
   L->top = o;
 #if LJ_TARGET_X86
 #if LJ_TARGET_X86
   /* Store stack adjustment for returns from non-cdecl callbacks. */
   /* Store stack adjustment for returns from non-cdecl callbacks. */
-  if (ctype_cconv(ct->info) != CTCC_CDECL)
+  if (ctype_cconv(ct->info) != CTCC_CDECL) {
+#if LJ_FR2
+    (L->base-3)->u64 |= (nsp << (16+2));
+#else
     (L->base-2)->u32.hi |= (nsp << (16+2));
     (L->base-2)->u32.hi |= (nsp << (16+2));
+#endif
+  }
 #endif
 #endif
   while (gcsteps-- > 0)
   while (gcsteps-- > 0)
     lj_gc_check(L);
     lj_gc_check(L);
@@ -493,7 +622,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
 /* Convert Lua object to callback result. */
 /* Convert Lua object to callback result. */
 static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
 static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
 {
 {
+#if LJ_FR2
+  CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64);
+#else
   CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
   CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
+#endif
 #if LJ_TARGET_X86
 #if LJ_TARGET_X86
   cts->cb.gpr[2] = 0;
   cts->cb.gpr[2] = 0;
 #endif
 #endif
@@ -502,6 +635,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
 #if CCALL_NUM_FPR
 #if CCALL_NUM_FPR
     if (ctype_isfp(ctr->info))
     if (ctype_isfp(ctr->info))
       dp = (uint8_t *)&cts->cb.fpr[0];
       dp = (uint8_t *)&cts->cb.fpr[0];
+#endif
+#if LJ_TARGET_ARM64 && LJ_BE
+    if (ctype_isfp(ctr->info) && ctr->size == sizeof(float))
+      dp = (uint8_t *)&cts->cb.fpr[0].f[1];
 #endif
 #endif
     lj_cconv_ct_tv(cts, ctr, dp, o, 0);
     lj_cconv_ct_tv(cts, ctr, dp, o, 0);
 #ifdef CALLBACK_HANDLE_RET
 #ifdef CALLBACK_HANDLE_RET
@@ -516,6 +653,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
 	*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
 	*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
 					  (int32_t)*(int16_t *)dp;
 					  (int32_t)*(int16_t *)dp;
     }
     }
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
+    /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
+    if (ctr->size <= 4 &&
+	(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
+      *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+#endif
 #if LJ_TARGET_X86
 #if LJ_TARGET_X86
     if (ctype_isfp(ctr->info))
     if (ctype_isfp(ctr->info))
       cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2;
       cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2;
@@ -529,7 +672,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
   lua_State *L = cts->L;
   lua_State *L = cts->L;
   global_State *g = cts->g;
   global_State *g = cts->g;
   lua_assert(L != NULL);
   lua_assert(L != NULL);
-  if (gcref(g->jit_L)) {
+  if (tvref(g->jit_base)) {
     setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
     setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
     if (g->panic) g->panic(L);
     if (g->panic) g->panic(L);
     exit(EXIT_FAILURE);
     exit(EXIT_FAILURE);
@@ -562,9 +705,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
   }
   }
   callback_conv_result(cts, L, o);
   callback_conv_result(cts, L, o);
   /* Finally drop C frame and continuation frame. */
   /* Finally drop C frame and continuation frame. */
-  L->cframe = cframe_prev(L->cframe);
-  L->top -= 2;
+  L->top -= 2+2*LJ_FR2;
   L->base = obase;
   L->base = obase;
+  L->cframe = cframe_prev(L->cframe);
   cts->cb.slot = 0;  /* Blacklist C function that called the callback. */
   cts->cb.slot = 0;  /* Blacklist C function that called the callback. */
 }
 }
 
 

+ 3 - 1
luajit.mod/luajit/src/lj_cconv.c

@@ -448,8 +448,10 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp)
 	setintV(o, (int32_t)val);
 	setintV(o, (int32_t)val);
     }
     }
   } else {
   } else {
+    uint32_t b = (val >> pos) & 1;
     lua_assert(bsz == 1);
     lua_assert(bsz == 1);
-    setboolV(o, (val >> pos) & 1);
+    setboolV(o, b);
+    setboolV(&cts->g->tmptv2, b);  /* Remember for trace recorder. */
   }
   }
   return 0;  /* No GC step needed. */
   return 0;  /* No GC step needed. */
 }
 }

+ 27 - 13
luajit.mod/luajit/src/lj_cdata.c

@@ -9,7 +9,6 @@
 
 
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
-#include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_ctype.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
 #include "lj_cconv.h"
@@ -27,12 +26,12 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id)
 }
 }
 
 
 /* Allocate variable-sized or specially aligned C data object. */
 /* Allocate variable-sized or specially aligned C data object. */
-GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
+GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
 {
 {
   global_State *g;
   global_State *g;
   MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
   MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
 		(align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
 		(align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
-  char *p = lj_mem_newt(cts->L, extra + sz, char);
+  char *p = lj_mem_newt(L, extra + sz, char);
   uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
   uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
   uintptr_t almask = (1u << align) - 1u;
   uintptr_t almask = (1u << align) - 1u;
   GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
   GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
@@ -40,7 +39,7 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
   cdatav(cd)->offset = (uint16_t)((char *)cd - p);
   cdatav(cd)->offset = (uint16_t)((char *)cd - p);
   cdatav(cd)->extra = extra;
   cdatav(cd)->extra = extra;
   cdatav(cd)->len = sz;
   cdatav(cd)->len = sz;
-  g = cts->g;
+  g = G(L);
   setgcrefr(cd->nextgc, g->gc.root);
   setgcrefr(cd->nextgc, g->gc.root);
   setgcref(g->gc.root, obj2gco(cd));
   setgcref(g->gc.root, obj2gco(cd));
   newwhite(g, obj2gco(cd));
   newwhite(g, obj2gco(cd));
@@ -50,6 +49,15 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
   return cd;
   return cd;
 }
 }
 
 
+/* Allocate arbitrary C data object. */
+GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info)
+{
+  if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
+    return lj_cdata_new(cts, id, sz);
+  else
+    return lj_cdata_newv(cts->L, id, sz, ctype_align(info));
+}
+
 /* Free a C data object. */
 /* Free a C data object. */
 void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
 void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
 {
 {
@@ -76,21 +84,22 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
   }
   }
 }
 }
 
 
-TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd)
+void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
 {
 {
-  global_State *g = G(L);
-  GCtab *t = ctype_ctsG(g)->finalizer;
+  GCtab *t = ctype_ctsG(G(L))->finalizer;
   if (gcref(t->metatable)) {
   if (gcref(t->metatable)) {
     /* Add cdata to finalizer table, if still enabled. */
     /* Add cdata to finalizer table, if still enabled. */
     TValue *tv, tmp;
     TValue *tv, tmp;
     setcdataV(L, &tmp, cd);
     setcdataV(L, &tmp, cd);
     lj_gc_anybarriert(L, t);
     lj_gc_anybarriert(L, t);
     tv = lj_tab_set(L, t, &tmp);
     tv = lj_tab_set(L, t, &tmp);
-    cd->marked |= LJ_GC_CDATA_FIN;
-    return tv;
-  } else {
-    /* Otherwise return dummy TValue. */
-    return &g->tmptv;
+    if (it == LJ_TNIL) {
+      setnilV(tv);
+      cd->marked &= ~LJ_GC_CDATA_FIN;
+    } else {
+      setgcV(L, tv, obj, it);
+      cd->marked |= LJ_GC_CDATA_FIN;
+    }
   }
   }
 }
 }
 
 
@@ -123,7 +132,12 @@ collect_attrib:
     idx = (ptrdiff_t)intV(key);
     idx = (ptrdiff_t)intV(key);
     goto integer_key;
     goto integer_key;
   } else if (tvisnum(key)) {  /* Numeric key. */
   } else if (tvisnum(key)) {  /* Numeric key. */
-    idx = LJ_64 ? (ptrdiff_t)numV(key) : (ptrdiff_t)lj_num2int(numV(key));
+#ifdef _MSC_VER
+    /* Workaround for MSVC bug. */
+    volatile
+#endif
+    lua_Number n = numV(key);
+    idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
   integer_key:
   integer_key:
     if (ctype_ispointer(ct->info)) {
     if (ctype_ispointer(ct->info)) {
       CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info));  /* Element size. */
       CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info));  /* Element size. */

Alguns arquivos não foram mostrados porque muitos arquivos mudaram nesse diff